diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c index 0e357e48f9..15784e7687 100644 --- a/daemons/controld/controld_execd.c +++ b/daemons/controld/controld_execd.c @@ -1,2930 +1,2929 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include // lrmd_event_data_t, lrmd_rsc_info_t, etc. #include #include #include #include #include #include #include #define START_DELAY_THRESHOLD 5 * 60 * 1000 #define MAX_LRM_REG_FAILS 30 struct delete_event_s { int rc; const char *rsc; lrm_state_t *lrm_state; }; static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation); static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *operation, xmlNode *msg); static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level); static int do_update_resource(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, time_t lock_time); static void lrm_connection_destroy(void) { if (pcmk_is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("Connection to executor failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); controld_clear_fsa_input_flags(R_LRM_CONNECTED); } else { crm_info("Disconnected from executor"); } } static char * make_stop_id(const char *rsc, int call_id) { return crm_strdup_printf("%s:%d", rsc, call_id); } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } /*! * \internal * \brief Remove a recurring operation from a resource's history * * \param[in,out] history Resource history to modify * \param[in] op Operation to remove * * \return TRUE if the operation was found and removed, FALSE otherwise */ static gboolean history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_event_data_t *existing = iter->data; if ((op->interval_ms == existing->interval_ms) && pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none) && pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) { history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); lrmd_free_event(existing); return TRUE; } } return FALSE; } /*! * \internal * \brief Free all recurring operations in resource history * * \param[in,out] history Resource history to modify */ static void history_free_recurring_ops(rsc_history_t *history) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_free_event(iter->data); } g_list_free(history->recurring_op_list); history->recurring_op_list = NULL; } /*! * \internal * \brief Free resource history * * \param[in,out] history Resource history to free */ void history_free(gpointer data) { rsc_history_t *history = (rsc_history_t*)data; if (history->stop_params) { g_hash_table_destroy(history->stop_params); } /* Don't need to free history->rsc.id because it's set to history->id */ free(history->rsc.type); free(history->rsc.standard); free(history->rsc.provider); lrmd_free_event(history->failed); lrmd_free_event(history->last); free(history->id); history_free_recurring_ops(history); free(history); } static void update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); controld_delete_resource_history(op->rsc_id, lrm_state->node_name, NULL, crmd_cib_smart_opt()); return; } if (pcmk__str_eq(op->op_type, RSC_NOTIFY, pcmk__str_casei)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = calloc(1, sizeof(rsc_history_t)); entry->id = strdup(op->rsc_id); g_hash_table_insert(lrm_state->resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = strdup(rsc->type); entry->rsc.standard = strdup(rsc->standard); if (rsc->provider) { entry->rsc.provider = strdup(rsc->provider); } else { entry->rsc.provider = NULL; } } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } entry->last_callid = op->call_id; target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_EXEC_CANCELLED) { if (op->interval_ms > 0) { crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT, op->rsc_id, op->op_type, op->interval_ms); history_remove_recurring_op(entry, op); return; } else { crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d", op->rsc_id, op->op_type, op->interval_ms, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* Store failed monitors here, otherwise the block below will cause them * to be forgotten when a stop happens. */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval_ms == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && pcmk__strcase_any_of(op->op_type, CRMD_ACTION_START, CRMD_ACTION_RELOAD, CRMD_ACTION_RELOAD_AGENT, CRMD_ACTION_STATUS, NULL)) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = pcmk__strkey_table(free, free); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval_ms > 0) { /* Ensure there are no duplicates */ history_remove_recurring_op(entry, op); crm_trace("Adding recurring op: " PCMK__OP_FMT, op->rsc_id, op->op_type, op->interval_ms); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if (entry->recurring_op_list && !pcmk__str_eq(op->op_type, RSC_STATUS, pcmk__str_casei)) { crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT, g_list_length(entry->recurring_op_list), op->rsc_id, op->op_type, op->interval_ms); history_free_recurring_ops(entry); } } /*! * \internal * \brief Send a direct OK ack for a resource task * * \param[in] lrm_state LRM connection * \param[in] input Input message being ack'ed * \param[in] rsc_id ID of affected resource * \param[in] rsc Affected resource (if available) * \param[in] task Operation task being ack'ed * \param[in] ack_host Name of host to send ack to * \param[in] ack_sys IPC system name to ack */ static void send_task_ok_ack(lrm_state_t *lrm_state, ha_msg_input_t *input, const char *rsc_id, lrmd_rsc_info_t *rsc, const char *task, const char *ack_host, const char *ack_sys) { lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task); lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id); lrmd_free_event(op); } static inline const char * op_node_name(lrmd_event_data_t *op) { return op->remote_nodename? op->remote_nodename : fsa_our_uname; } void lrm_op_callback(lrmd_event_data_t * op) { CRM_CHECK(op != NULL, return); switch (op->type) { case lrmd_event_disconnect: if (op->remote_nodename == NULL) { /* If this is the local executor IPC connection, set the right * bits in the controller when the connection goes down. */ lrm_connection_destroy(); } break; case lrmd_event_exec_complete: { lrm_state_t *lrm_state = lrm_state_find(op_node_name(op)); CRM_ASSERT(lrm_state != NULL); process_lrm_event(lrm_state, op, NULL, NULL); } break; default: break; } } static void try_local_executor_connect(long long action, fsa_data_t *msg_data, lrm_state_t *lrm_state) { int rc = pcmk_rc_ok; crm_debug("Connecting to the local executor"); // If we can connect, great rc = controld_connect_local_executor(lrm_state); if (rc == pcmk_rc_ok) { controld_set_fsa_input_flags(R_LRM_CONNECTED); crm_info("Connection to the local executor established"); return; } // Otherwise, if we can try again, set a timer to do so if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { crm_warn("Failed to connect to the local executor %d time%s " "(%d max): %s", lrm_state->num_lrm_register_fails, pcmk__plural_s(lrm_state->num_lrm_register_fails), MAX_LRM_REG_FAILS, pcmk_rc_str(rc)); controld_start_timer(wait_timer); crmd_fsa_stall(FALSE); return; } // Otherwise give up crm_err("Failed to connect to the executor the max allowed " "%d time%s: %s", lrm_state->num_lrm_register_fails, pcmk__plural_s(lrm_state->num_lrm_register_fails), pcmk_rc_str(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* This only pertains to local executor connections. Remote connections are * handled as resources within the scheduler. Connecting and disconnecting * from remote executor instances is handled differently. */ lrm_state_t *lrm_state = NULL; if(fsa_our_uname == NULL) { return; /* Nothing to do */ } lrm_state = lrm_state_find_or_create(fsa_our_uname); if (lrm_state == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { if (action == A_LRM_DISCONNECT) { crmd_fsa_stall(FALSE); return; } } controld_clear_fsa_input_flags(R_LRM_CONNECTED); crm_info("Disconnecting from the executor"); lrm_state_disconnect(lrm_state); lrm_state_reset_tables(lrm_state, FALSE); crm_notice("Disconnected from the executor"); } if (action & A_LRM_CONNECT) { try_local_executor_connect(action, msg_data, lrm_state); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __func__); } } static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; const char *when = "lrm disconnect"; GHashTableIter gIter; const char *key = NULL; rsc_history_t *entry = NULL; active_op_t *pending = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; when = "shutdown"; } else if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) { when = "shutdown... waiting"; } if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) { guint removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_actions, lrm_state); guint nremaining = g_hash_table_size(lrm_state->pending_ops); if (removed || nremaining) { crm_notice("Stopped %u recurring operation%s at %s (%u remaining)", removed, pcmk__plural_s(removed), when, nremaining); } } if (lrm_state->pending_ops) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { /* Ignore recurring actions in the shutdown calculations */ if (pending->interval_ms == 0) { counter++; } } } if (counter > 0) { do_crm_log(log_level, "%d pending executor operation%s at %s", counter, pcmk__plural_s(counter), when); if ((cur_state == S_TERMINATE) || !pcmk_is_set(fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); } } else { rc = FALSE; } return rc; } if (lrm_state->resource_history == NULL) { return rc; } if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) { /* At this point we're not waiting, we're just shutting down */ when = "shutdown"; } counter = 0; g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { if (is_rsc_active(lrm_state, entry->id) == FALSE) { continue; } counter++; if (log_level == LOG_ERR) { crm_info("Found %s active at %s", entry->id, when); } else { crm_trace("Found %s active at %s", entry->id, when); } if (lrm_state->pending_ops) { GHashTableIter hIter; g_hash_table_iter_init(&hIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) { crm_notice("%sction %s (%s) incomplete at %s", pending->interval_ms == 0 ? "A" : "Recurring a", key, pending->op_key, when); } } } } if (counter) { crm_err("%d resource%s active at %s", counter, (counter == 1)? " was" : "s were", when); } return rc; } /*! * \internal * \brief Build XML and string of parameters meeting some criteria, for digest * * \param[in] op Executor event with parameter table to use * \param[in] metadata Parsed meta-data for executed resource agent * \param[in] param_type Flag used for selection criteria * \param[out] result Will be set to newly created XML with selected * parameters as attributes * * \return Newly allocated space-separated string of parameter names * \note Selection criteria varies by param_type: for the restart digest, we * want parameters that are *not* marked reloadable (OCF 1.1) or that * *are* marked unique (pre-1.1), for both string and XML results; for the * secure digest, we want parameters that *are* marked private for the * string, but parameters that are *not* marked private for the XML. * \note It is the caller's responsibility to free the string return value with * free() and the XML result with free_xml(). */ static char * build_parameter_list(const lrmd_event_data_t *op, const struct ra_metadata_s *metadata, enum ra_param_flags_e param_type, xmlNode **result) { char *list = NULL; size_t len = 0; *result = create_xml_node(NULL, XML_TAG_PARAMS); for (GList *iter = metadata->ra_params; iter != NULL; iter = iter->next) { struct ra_param_s *param = (struct ra_param_s *) iter->data; bool accept_for_list = false; bool accept_for_xml = false; switch (param_type) { case ra_param_reloadable: accept_for_list = !pcmk_is_set(param->rap_flags, param_type); accept_for_xml = accept_for_list; break; case ra_param_unique: accept_for_list = pcmk_is_set(param->rap_flags, param_type); accept_for_xml = accept_for_list; break; case ra_param_private: accept_for_list = pcmk_is_set(param->rap_flags, param_type); accept_for_xml = !accept_for_list; break; } if (accept_for_list) { crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type)); if (list == NULL) { // We will later search for " WORD ", so start list with a space pcmk__add_word(&list, &len, " "); } pcmk__add_word(&list, &len, param->rap_name); } else { crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type)); } if (accept_for_xml) { const char *v = g_hash_table_lookup(op->params, param->rap_name); if (v != NULL) { crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v); crm_xml_add(*result, param->rap_name, v); } } } if (list != NULL) { // We will later search for " WORD ", so end list with a space pcmk__add_word(&list, &len, " "); } return list; } static void append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *restart = NULL; CRM_LOG_ASSERT(op->params != NULL); if (op->interval_ms > 0) { /* monitors are not reloadable */ return; } if (pcmk_is_set(metadata->ra_flags, ra_supports_reload_agent)) { // Add parameters not marked reloadable to the "op-force-restart" list list = build_parameter_list(op, metadata, ra_param_reloadable, &restart); } else if (pcmk_is_set(metadata->ra_flags, ra_supports_legacy_reload)) { /* @COMPAT pre-OCF-1.1 resource agents * * Before OCF 1.1, Pacemaker abused "unique=0" to indicate * reloadability. Add any parameters with unique="1" to the * "op-force-restart" list. */ list = build_parameter_list(op, metadata, ra_param_unique, &restart); } else { // Resource does not support agent reloads return; } digest = calculate_operation_digest(restart, version); /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload, * no matter if it actually supports any parameters with unique="1"). */ crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: ""); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(restart, "restart digest source"); free_xml(restart); free(digest); free(list); } static void append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *secure = NULL; CRM_LOG_ASSERT(op->params != NULL); /* * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on * the insecure ones */ list = build_parameter_list(op, metadata, ra_param_private, &secure); if (list != NULL) { digest = calculate_operation_digest(secure, version); crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list); crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(secure, "secure digest source"); } else { crm_trace("%s: no secure parameters", op->rsc_id); } free_xml(secure); free(digest); free(list); } static gboolean build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *node_name, const char *src) { int target_rc = 0; xmlNode *xml_op = NULL; struct ra_metadata_s *metadata = NULL; const char *caller_version = NULL; lrm_state_t *lrm_state = NULL; uint32_t metadata_source = controld_metadata_from_agent; if (op == NULL) { return FALSE; } target_rc = rsc_op_expected_rc(op); /* there is a small risk in formerly mixed clusters that it will * be sub-optimal. * * however with our upgrade policy, the update we send should * still be completely supported anyway */ caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); CRM_LOG_ASSERT(caller_version != NULL); if(caller_version == NULL) { caller_version = CRM_FEATURE_SET; } crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version); xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG); if (xml_op == NULL) { return TRUE; } if ((rsc == NULL) || (op->params == NULL) || !crm_op_needs_metadata(rsc->standard, op->op_type)) { crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)", op->op_type, op->rsc_id, op->params, rsc); return TRUE; } lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { crm_warn("Cannot calculate digests for operation " PCMK__OP_FMT " because we have no connection to executor for %s", op->rsc_id, op->op_type, op->interval_ms, node_name); return TRUE; } /* Getting meta-data from cache is OK unless this is a successful start * action -- always refresh from the agent for those, in case the * resource agent was updated. * * @TODO Only refresh the meta-data after starts if the agent actually * changed (using something like inotify, or a hash or modification time of * the agent executable). */ if ((op->op_status != PCMK_EXEC_DONE) || (op->rc != target_rc) || !pcmk__str_eq(op->op_type, CRMD_ACTION_START, pcmk__str_none)) { metadata_source |= controld_metadata_from_cache; } metadata = controld_get_rsc_metadata(lrm_state, rsc, metadata_source); if (metadata == NULL) { return TRUE; } #if ENABLE_VERSIONED_ATTRS crm_xml_add(xml_op, XML_ATTR_RA_VERSION, metadata->ra_version); #endif crm_trace("Including additional digests for %s:%s:%s", rsc->standard, rsc->provider, rsc->type); append_restart_list(op, metadata, xml_op, caller_version); append_secure_list(op, metadata, xml_op, caller_version); return TRUE; } static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) { rsc_history_t *entry = NULL; entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, entry->last->interval_ms, entry->last->rc); if (entry->last->rc == PCMK_OCF_OK && pcmk__str_eq(entry->last->op_type, CRMD_ACTION_STOP, pcmk__str_casei)) { return FALSE; } else if (entry->last->rc == PCMK_OCF_OK && pcmk__str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { // A stricter check is too complex ... leave that to the scheduler return FALSE; } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { return FALSE; } else if ((entry->last->interval_ms == 0) && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider); if (entry->last && entry->last->params) { const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); if (container) { crm_trace("Resource %s is a part of container resource %s", entry->id, container); crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container); } } build_operation_update(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name, __func__); build_operation_update(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name, __func__); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { build_operation_update(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name, __func__); } } return FALSE; } static xmlNode * do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags) { xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; crm_node_t *peer = NULL; peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return NULL); xml_state = create_node_state_update(peer, update_flags, NULL, __func__); if (xml_state == NULL) { return NULL; } xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(lrm_state, rsc_list); crm_log_xml_trace(xml_state, "Current executor state"); return xml_state; } xmlNode * controld_query_executor_state(const char *node_name) { lrm_state_t *lrm_state = lrm_state_find(node_name); if (!lrm_state) { crm_err("Could not find executor state for node %s", node_name); return NULL; } return do_lrm_query_internal(lrm_state, node_update_cluster|node_update_peer); } /*! * \internal * \brief Map standard Pacemaker return code to operation status and OCF code * * \param[out] event Executor event whose status and return code should be set * \param[in] rc Standard Pacemaker return code */ void controld_rc2event(lrmd_event_data_t *event, int rc) { /* This is called for cleanup requests from controller peers/clients, not * for resource actions, so no exit reason is needed. */ switch (rc) { case pcmk_rc_ok: lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); break; case EACCES: lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV, PCMK_EXEC_ERROR, NULL); break; default: lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, NULL); break; } } /*! * \internal * \brief Trigger a new transition after CIB status was deleted * * If a CIB status delete was not expected (as part of the transition graph), * trigger a new transition by updating the (arbitrary) "last-lrm-refresh" * cluster property. * * \param[in] from_sys IPC name that requested the delete * \param[in] rsc_id Resource whose status was deleted (for logging only) */ void controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id) { if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) { char *now_s = crm_strdup_printf("%lld", (long long) time(NULL)); crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id); update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, FALSE, NULL, NULL); free(now_s); } } static void notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "localhost"), rsc_id, ((rc == pcmk_ok)? "" : " not")); op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE); controld_rc2event(op, pcmk_legacy2rc(rc)); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); controld_trigger_delete_refresh(from_sys, rsc_id); } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) { notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; active_op_t *pending = value; if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } static void delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, GHashTableIter * rsc_gIter, int rc, const char *user_name) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = strdup(rsc_id); if (rsc_gIter) { g_hash_table_iter_remove(rsc_gIter); } else { g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); } controld_delete_resource_history(rsc_id_copy, lrm_state->node_name, user_name, crmd_cib_smart_opt()); g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(lrm_state, input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; event.lrm_state = lrm_state; g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); } /*! * \internal * \brief Erase an LRM history entry from the CIB, given the operation data * * \param[in] lrm_state LRM state of the desired node * \param[in] op Operation whose history should be deleted */ static void erase_lrm_history_by_op(lrm_state_t *lrm_state, lrmd_event_data_t *op) { xmlNode *xml_top = NULL; CRM_CHECK(op != NULL, return); xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); if (op->interval_ms > 0) { char *op_id = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, XML_ATTR_ID, op_id); free(op_id); } crm_debug("Erasing resource operation history for " PCMK__OP_FMT " (call=%d)", op->rsc_id, op->op_type, op->interval_ms, op->call_id); fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override); crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } /* Define xpath to find LRM resource history entry by node and resource */ #define XPATH_HISTORY \ "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \ "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ "/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \ "/" XML_LRM_TAG_RSC_OP /* ... and also by operation key */ #define XPATH_HISTORY_ID XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s']" /* ... and also by operation key and operation call ID */ #define XPATH_HISTORY_CALL XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']" /* ... and also by operation key and original operation key */ #define XPATH_HISTORY_ORIG XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']" /*! * \internal * \brief Erase an LRM history entry from the CIB, given operation identifiers * * \param[in] lrm_state LRM state of the node to clear history for * \param[in] rsc_id Name of resource to clear history for * \param[in] key Operation key of operation to clear history for * \param[in] orig_op If specified, delete only if it has this original op * \param[in] call_id If specified, delete entry only if it has this call ID */ static void erase_lrm_history_by_id(lrm_state_t *lrm_state, const char *rsc_id, const char *key, const char *orig_op, int call_id) { char *op_xpath = NULL; CRM_CHECK((rsc_id != NULL) && (key != NULL), return); if (call_id > 0) { op_xpath = crm_strdup_printf(XPATH_HISTORY_CALL, lrm_state->node_name, rsc_id, key, call_id); } else if (orig_op) { op_xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, lrm_state->node_name, rsc_id, key, orig_op); } else { op_xpath = crm_strdup_printf(XPATH_HISTORY_ID, lrm_state->node_name, rsc_id, key); } crm_debug("Erasing resource operation history for %s on %s (call=%d)", key, rsc_id, call_id); fsa_cib_conn->cmds->remove(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath); free(op_xpath); } static inline gboolean last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms) { if (entry == NULL) { return FALSE; } if (op == NULL) { return TRUE; } return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei) && (interval_ms == entry->failed->interval_ms)); } /*! * \internal * \brief Clear a resource's last failure * * Erase a resource's last failure on a particular node from both the * LRM resource history in the CIB, and the resource history remembered * for the LRM state. * * \param[in] rsc_id Resource name * \param[in] node_name Node name * \param[in] operation If specified, only clear if matching this operation * \param[in] interval_ms If operation is specified, it has this interval */ void lrm_clear_last_failure(const char *rsc_id, const char *node_name, const char *operation, guint interval_ms) { char *op_key = NULL; char *orig_op_key = NULL; lrm_state_t *lrm_state = NULL; lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { return; } /* Erase from CIB */ op_key = pcmk__op_key(rsc_id, "last_failure", 0); if (operation) { orig_op_key = pcmk__op_key(rsc_id, operation, interval_ms); } erase_lrm_history_by_id(lrm_state, rsc_id, op_key, orig_op_key, 0); free(op_key); free(orig_op_key); /* Remove from memory */ if (lrm_state->resource_history) { rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (last_failed_matches_op(entry, operation, interval_ms)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } /* Returns: gboolean - cancellation is in progress */ static gboolean cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; char *local_key = NULL; active_op_t *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { local_key = make_stop_id(rsc_id, op); key = local_key; } pending = g_hash_table_lookup(lrm_state->pending_ops, key); if (pending) { if (remove && !pcmk_is_set(pending->flags, active_op_remove)) { controld_set_active_op_flags(pending, active_op_remove); crm_debug("Scheduling %s for removal", key); } if (pcmk_is_set(pending->flags, active_op_cancelled)) { crm_debug("Operation %s already cancelled", key); free(local_key); return FALSE; } controld_set_active_op_flags(pending, active_op_cancelled); } else { crm_info("No pending op found for %s", key); free(local_key); return FALSE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); free(local_key); return TRUE; } crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the pending_ops list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from pending_ops will block * the node from shutting down */ free(local_key); return FALSE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct cancel_data *data = user_data; active_op_t *op = value; if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) { data->done = TRUE; remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); } return remove; } static gboolean cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { guint removed = 0; struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data); crm_trace("Removed %u op cache entries, new size: %u", removed, g_hash_table_size(lrm_state->pending_ops)); return data.done; } /*! * \internal * \brief Retrieve resource information from LRM * * \param[in] lrm_state LRM connection to use * \param[in] rsc_xml XML containing resource configuration * \param[in] do_create If true, register resource with LRM if not already * \param[out] rsc_info Where to store resource information obtained from LRM * * \retval pcmk_ok Success (and rsc_info holds newly allocated result) * \retval -EINVAL Required information is missing from arguments * \retval -ENOTCONN No active connection to LRM * \retval -ENODEV Resource not found * \retval -errno Error communicating with executor when registering resource * * \note Caller is responsible for freeing result on success. */ static int get_lrm_resource(lrm_state_t *lrm_state, xmlNode *rsc_xml, gboolean do_create, lrmd_rsc_info_t **rsc_info) { const char *id = ID(rsc_xml); CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL); CRM_CHECK(id, return -EINVAL); if (lrm_state_is_connected(lrm_state) == FALSE) { return -ENOTCONN; } crm_trace("Retrieving resource information for %s from the executor", id); *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); // If resource isn't known by ID, try clone name, if provided if (!*rsc_info) { const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG); if (long_id) { *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0); } } if ((*rsc_info == NULL) && do_create) { const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER); const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE); int rc; crm_trace("Registering resource %s with the executor", id); rc = lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring); if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Could not register resource %s with the executor on %s: %s " CRM_XS " rc=%d", id, lrm_state->node_name, pcmk_strerror(rc), rc); /* Register this as an internal error if this involves the local * executor. Otherwise, we're likely dealing with an unresponsive * remote node, which is not an FSA failure. */ if (lrm_state_is_local(lrm_state) == TRUE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } return rc; } *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); } return *rsc_info? pcmk_ok : -ENODEV; } static void delete_resource(lrm_state_t * lrm_state, const char *id, lrmd_rsc_info_t * rsc, GHashTableIter * gIter, const char *sys, const char *user, ha_msg_input_t * request, gboolean unregister) { int rc = pcmk_ok; crm_info("Removing resource %s from executor for %s%s%s", id, sys, (user? " as " : ""), (user? user : "")); if (rsc && unregister) { rc = lrm_state_unregister_rsc(lrm_state, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource %s deleted from executor", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' from executor is pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); op = calloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = strdup(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(lrm_state->deletion_ops, ref, op); } return; } else { crm_warn("Could not delete '%s' from executor for %s%s%s: %s " CRM_XS " rc=%d", id, sys, (user? " as " : ""), (user? user : ""), pcmk_strerror(rc), rc); } delete_rsc_entry(lrm_state, request, id, gIter, rc, user); } static int get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) { int call_id = 999999999; rsc_history_t *entry = NULL; if(lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry) { call_id = entry->last_callid + 1; } if (call_id < 0) { call_id = 1; } return call_id; } static void fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status, enum ocf_exitcode op_exitcode, const char *exit_reason) { op->call_id = get_fake_call_id(lrm_state, op->rsc_id); op->t_run = time(NULL); op->t_rcchange = op->t_run; lrmd__set_result(op, op_exitcode, op_status, exit_reason); } static void force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_info("Clearing resource history on node %s", lrm_state->node_name); g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { /* only unregister the resource during a reprobe if it is not a remote connection * resource. otherwise unregistering the connection will terminate remote-node * membership */ gboolean unregister = TRUE; if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); if (remote_lrm_state) { /* when forcing a reprobe, make sure to clear remote node before * clearing the remote node's connection resource */ force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE); } unregister = FALSE; } delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, user_name, NULL, unregister); } /* Now delete the copy in the CIB */ controld_delete_node_state(lrm_state->node_name, controld_section_lrm, cib_scope_local); - /* Finally, _delete_ the value in pacemaker-attrd -- setting it to FALSE - * would result in the scheduler sending us back here again - */ + // @COMPAT DCs < 1.1.14 need this deleted (in case it was explicitly false) update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); } /*! * \internal * \brief Fail a requested action without actually executing it * * For an action that can't be executed, process it similarly to an actual * execution result, with specified error status (except for notify actions, * which will always be treated as successful). * * \param[in] lrm_state Executor connection that action is for * \param[in] action Action XML from request * \param[in] rc Desired return code to use * \param[in] op_status Desired operation status to use * \param[in] exit_reason Human-friendly detail, if error */ static void synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int op_status, enum ocf_exitcode rc, const char *exit_reason) { lrmd_event_data_t *op = NULL; const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) { /* @TODO Should we do something else, like direct ack? */ crm_info("Can't fake %s failure (%d) on %s without resource configuration", crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node); return; } else if(operation == NULL) { /* This probably came from crm_resource -C, nothing to do */ crm_info("Can't fake %s failure (%d) on %s without operation", ID(xml_rsc), rc, target_node); return; } op = construct_op(lrm_state, action, ID(xml_rsc), operation); if (pcmk__str_eq(operation, RSC_NOTIFY, pcmk__str_casei)) { // Notifications can't fail fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL); } else { fake_op_status(lrm_state, op, op_status, rc, exit_reason); } crm_info("Faking " PCMK__OP_FMT " result (%d) on %s", op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node); // Process the result as if it came from the LRM process_lrm_event(lrm_state, op, NULL, action); lrmd_free_event(op); } /*! * \internal * \brief Get target of an LRM operation * * \param[in] xml LRM operation data XML * * \return LRM operation target node name (local node or Pacemaker Remote node) */ static const char * lrm_op_target(xmlNode *xml) { const char *target = NULL; if (xml) { target = crm_element_value(xml, XML_LRM_ATTR_TARGET); } if (target == NULL) { target = fsa_our_uname; } return target; } static void fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, const char *from_host, const char *from_sys) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* The executor simply executes operations and reports the results, without * any concept of success or failure, so to fail a resource, we must fake * what a failure looks like. * * To do this, we create a fake executor operation event for the resource, * and pass that event to the executor client callback so it will be * processed as if it came from the executor. */ op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon"); free((char*) op->user_data); op->user_data = NULL; op->interval_ms = 0; if (user_name && !pcmk__is_privileged(user_name)) { crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_INSUFFICIENT_PRIV, "Unprivileged user cannot fail resources"); controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) { crm_info("Failing resource %s...", rsc->id); fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR, "Simulated failure"); process_lrm_event(lrm_state, op, NULL, xml); op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(xml, "bad input"); fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR, "Cannot fail unknown resource"); } controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); } static void handle_refresh_op(lrm_state_t *lrm_state, const char *user_name, const char *from_host, const char *from_sys) { int rc = pcmk_ok; xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); crm_info("Forced a local resource history refresh: call=%d", rc); if (!pcmk__str_eq(CRM_SYSTEM_CRMD, from_sys, pcmk__str_casei)) { xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, fragment, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } free_xml(fragment); } static void handle_query_op(xmlNode *msg, lrm_state_t *lrm_state) { xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all); xmlNode *reply = create_reply(msg, data); if (relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_xml_err(reply, "reply"); } free_xml(reply); free_xml(data); } static void handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) { crm_notice("Forcing the status of all resources to be redetected"); force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node); if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) { xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } } static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys) { char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; guint interval_ms = 0; gboolean in_progress = FALSE; xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); CRM_CHECK(params != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_TASK); op_task = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS); if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) { free(meta_key); return FALSE; } free(meta_key); op_key = pcmk__op_key(rsc->id, op_task, interval_ms); meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); call_id = crm_element_value(params, meta_key); free(meta_key); crm_debug("Scheduler requested op %s (call=%s) be cancelled", op_key, (call_id? call_id : "NA")); pcmk__scan_min_int(call_id, &call, 0); if (call == 0) { // Normal case when the scheduler cancels a recurring op in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); } else { // Normal case when the scheduler cancels an orphan op in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); } // Acknowledge cancellation operation if for a remote connection resource if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { char *op_id = make_stop_id(rsc->id, call); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { crm_info("Nothing known about operation %d for %s", call, op_key); } erase_lrm_history_by_id(lrm_state, rsc->id, op_key, NULL, call); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); /* needed at least for cancellation of a remote operation */ g_hash_table_remove(lrm_state->pending_ops, op_id); free(op_id); } else { /* No ack is needed since abcdaa8, but peers with older versions * in a rolling upgrade need one. We didn't bump the feature set * at that commit, so we can only compare against the previous * CRM version (3.0.8). If any peers have feature set 3.0.9 but * not abcdaa8, they will time out waiting for the ack (no * released versions of Pacemaker are affected). */ const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION); if (compare_version(peer_version, "3.0.8") <= 0) { crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)", op_key, from_host, peer_version); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); } } free(op_key); return TRUE; } static void do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host, bool crm_rsc_delete, const char *user_name) { gboolean unregister = TRUE; int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name, user_name, cib_dryrun|cib_sync_call); if (cib_rc != pcmk_rc_ok) { lrmd_event_data_t *op = NULL; op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE); /* These are resource clean-ups, not actions, so no exit reason is * needed. */ lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); return; } if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { unregister = FALSE; } delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, user_name, input, unregister); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { lrm_state_t *lrm_state = NULL; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; const char *target_node = NULL; gboolean is_remote_node = FALSE; bool crm_rsc_delete = FALSE; target_node = lrm_op_target(input->xml); is_remote_node = !pcmk__str_eq(target_node, fsa_our_uname, pcmk__str_casei); lrm_state = lrm_state_find(target_node); if ((lrm_state == NULL) && is_remote_node) { crm_err("Failing action because local node has never had connection to remote node %s", target_node); synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, "Local node has no connection to remote"); return; } CRM_ASSERT(lrm_state != NULL); user_name = pcmk__update_acl_user(input->msg, F_CRM_USER, NULL); crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } crm_trace("Executor %s command from %s as user %s", crm_op, from_sys, user_name); if (pcmk__str_eq(crm_op, CRM_OP_LRM_DELETE, pcmk__str_casei)) { if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) { crm_rsc_delete = TRUE; // from crm_resource } operation = CRMD_ACTION_DELETE; } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_casei)) { fail_lrm_resource(input->xml, lrm_state, user_name, from_host, from_sys); return; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if (pcmk__str_eq(crm_op, CRM_OP_LRM_REFRESH, pcmk__str_casei)) { handle_refresh_op(lrm_state, user_name, from_host, from_sys); } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_QUERY, pcmk__str_casei)) { handle_query_op(input->msg, lrm_state); + // @COMPAT DCs <1.1.14 in a rolling upgrade might schedule this op } else if (pcmk__str_eq(operation, CRM_OP_PROBED, pcmk__str_casei)) { update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node); } else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_casei) || pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_casei)) { handle_reprobe_op(lrm_state, from_sys, from_host, user_name, is_remote_node); } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); gboolean create_rsc = !pcmk__str_eq(operation, CRMD_ACTION_DELETE, pcmk__str_casei); int rc; // We can't return anything meaningful without a resource ID CRM_CHECK(xml_rsc && ID(xml_rsc), return); rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc); if (rc == -ENOTCONN) { synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, "Not connected to remote executor"); return; } else if ((rc < 0) && !create_rsc) { /* Delete of malformed or nonexistent resource * (deleting something that does not exist is a success) */ crm_notice("Not registering resource '%s' for a %s event " CRM_XS " get-rc=%d (%s) transition-key=%s", ID(xml_rsc), operation, rc, pcmk_strerror(rc), ID(input->xml)); delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name); return; } else if (rc == -EINVAL) { // Resource operation on malformed resource crm_err("Invalid resource definition for %s", ID(xml_rsc)); crm_log_xml_warn(input->msg, "invalid resource"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, PCMK_OCF_NOT_CONFIGURED, // fatal error "Invalid resource definition"); return; } else if (rc < 0) { // Error communicating with the executor crm_err("Could not register resource '%s' with executor: %s " CRM_XS " rc=%d", ID(xml_rsc), pcmk_strerror(rc), rc); crm_log_xml_warn(input->msg, "failed registration"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, PCMK_OCF_INVALID_PARAM, // hard error "Could not register resource with executor"); return; } if (pcmk__str_eq(operation, CRMD_ACTION_CANCEL, pcmk__str_casei)) { if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) { crm_log_xml_warn(input->xml, "Bad command"); } } else if (pcmk__str_eq(operation, CRMD_ACTION_DELETE, pcmk__str_casei)) { do_lrm_delete(input, lrm_state, rsc, from_sys, from_host, crm_rsc_delete, user_name); } else if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD, CRMD_ACTION_RELOAD_AGENT, NULL)) { /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs * will schedule reload-agent actions only. In either case, we need * to map that to whatever the resource agent actually supports. * Default to the OCF 1.1 name. */ struct ra_metadata_s *md = NULL; const char *reload_name = CRMD_ACTION_RELOAD_AGENT; md = controld_get_rsc_metadata(lrm_state, rsc, controld_metadata_from_cache); if ((md != NULL) && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) { reload_name = CRMD_ACTION_RELOAD; } do_lrm_rsc_op(lrm_state, rsc, reload_name, input->xml); } else { do_lrm_rsc_op(lrm_state, rsc, operation, input->xml); } lrmd_free_rsc_info(rsc); } else { crm_err("Cannot perform operation %s of unknown type", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } #if ENABLE_VERSIONED_ATTRS static void resolve_versioned_parameters(lrm_state_t *lrm_state, const char *rsc_id, const xmlNode *rsc_op, GHashTable *params) { /* Resource info *should* already be cached, so we don't get * executor call */ lrmd_rsc_info_t *rsc = lrm_state_get_rsc_info(lrm_state, rsc_id, 0); struct ra_metadata_s *metadata; metadata = controld_get_rsc_metadata(lrm_state, rsc, controld_metadata_from_cache); if (metadata) { xmlNode *versioned_attrs = NULL; GHashTable *hash = NULL; char *key = NULL; char *value = NULL; GHashTableIter iter; versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_ATTRS); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_iter_steal(&iter); g_hash_table_replace(params, key, value); } g_hash_table_destroy(hash); versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_META); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_replace(params, crm_meta_name(key), strdup(value)); if (pcmk__str_eq(key, XML_ATTR_TIMEOUT, pcmk__str_casei)) { pcmk__scan_min_int(value, &op->timeout, 0); } else if (pcmk__str_eq(key, XML_OP_ATTR_START_DELAY, pcmk__str_casei)) { pcmk__scan_min_int(value, &op->start_delay, 0); } } g_hash_table_destroy(hash); versioned_attrs = first_named_child(rsc_op, XML_TAG_RSC_VER_ATTRS); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_iter_steal(&iter); g_hash_table_replace(params, key, value); } g_hash_table_destroy(hash); } lrmd_free_rsc_info(rsc); } #endif static lrmd_event_data_t * construct_op(lrm_state_t *lrm_state, xmlNode *rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; GHashTable *params = NULL; xmlNode *primitive = NULL; const char *class = NULL; const char *transition = NULL; CRM_ASSERT(rsc_id && operation); op = lrmd_new_event(rsc_id, operation, 0); op->type = lrmd_event_exec_complete; op->timeout = 0; op->start_delay = 0; lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); if (rsc_op == NULL) { CRM_LOG_ASSERT(pcmk__str_eq(CRMD_ACTION_STOP, operation, pcmk__str_casei)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = pcmk__strkey_table(free, free); g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_op_target_rc"); op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY); pcmk__scan_min_int(op_delay, &op->start_delay, 0); op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT); pcmk__scan_min_int(op_timeout, &op->timeout, 0); if (pcmk__guint_from_hash(params, CRM_META "_" XML_LRM_ATTR_INTERVAL_MS, 0, &(op->interval_ms)) != pcmk_rc_ok) { op->interval_ms = 0; } /* Use pcmk_monitor_timeout instead of meta timeout for stonith recurring monitor, if set */ primitive = find_xml_node(rsc_op, XML_CIB_TAG_RESOURCE, FALSE); class = crm_element_value(primitive, XML_AGENT_ATTR_CLASS); if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params) && pcmk__str_eq(operation, CRMD_ACTION_STATUS, pcmk__str_casei) && (op->interval_ms > 0)) { op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout"); if (op_timeout != NULL) { op->timeout = crm_get_msec(op_timeout); } } #if ENABLE_VERSIONED_ATTRS if (lrm_state && !is_remote_lrmd_ra(NULL, NULL, rsc_id) && !pcmk__strcase_any_of(op_type, CRMD_ACTION_METADATA, CRMD_ACTION_DELETE, NULL)) { resolve_versioned_parameters(lrm_state, rsc_id, rsc_op, params); } #endif if (!pcmk__str_eq(operation, RSC_STOP, pcmk__str_casei)) { op->params = params; } else { rsc_history_t *entry = NULL; if (lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = pcmk__strkey_table(free, free); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; } } /* sanity */ if (op->timeout <= 0) { op->timeout = op->interval_ms; } if (op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = strdup(transition); if (op->interval_ms != 0) { if (pcmk__strcase_any_of(operation, CRMD_ACTION_START, CRMD_ACTION_STOP, NULL)) { crm_err("Start and Stop actions cannot have an interval: %u", op->interval_ms); op->interval_ms = 0; } } crm_trace("Constructed %s op for %s: interval=%u", operation, rsc_id, op->interval_ms); return op; } /*! * \internal * \brief Send a (synthesized) event result * * Reply with a synthesized event result directly, as opposed to going through * the executor. * * \param[in] to_host Host to send result to * \param[in] to_sys IPC name to send result to (NULL for transition engine) * \param[in] rsc Type information about resource the result is for * \param[in] op Event with result to send * \param[in] rsc_id ID of resource the result is for */ void controld_ack_event_directly(const char *to_host, const char *to_sys, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; crm_node_t *peer = NULL; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { CRM_ASSERT(rsc_id != NULL); op->rsc_id = strdup(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } peer = crm_get_peer(0, fsa_our_uname); update = create_node_state_update(peer, node_update_none, NULL, __func__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, fsa_our_uname, __func__); reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_trace(update, "[direct ACK]"); crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s", op->rsc_id, op->op_type, op->interval_ms, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(update); free_xml(reply); } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { gboolean res = TRUE; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { /* keep iterating through all even when false is returned */ res = FALSE; } } controld_set_fsa_input_flags(R_SENT_RSC_STOP); g_list_free(lrm_state_list); lrm_state_list = NULL; return res; } struct stop_recurring_action_s { lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct stop_recurring_action_s *event = user_data; active_op_t *op = value; if ((op->interval_ms != 0) && pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) { crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key); remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); } return remove; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; lrm_state_t *lrm_state = user_data; active_op_t *op = value; if (op->interval_ms != 0) { crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (const char *) key); remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); } return remove; } static void record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op) { const char *record_pending = NULL; CRM_CHECK(node_name != NULL, return); CRM_CHECK(rsc != NULL, return); CRM_CHECK(op != NULL, return); // Never record certain operation types as pending if ((op->op_type == NULL) || (op->params == NULL) || !controld_action_is_recordable(op->op_type)) { return; } // defaults to true record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING); if (record_pending && !crm_is_true(record_pending)) { return; } op->call_id = -1; lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); op->t_run = time(NULL); op->t_rcchange = op->t_run; /* write a "pending" entry to the CIB, inhibit notification */ crm_debug("Recording pending op " PCMK__OP_FMT " on %s in the CIB", op->rsc_id, op->op_type, op->interval_ms, node_name); do_update_resource(node_name, rsc, op, 0); } static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *operation, xmlNode *msg) { int rc; int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; lrmd_key_value_t *params = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; gboolean stop_recurring = FALSE; const char *nack_reason = NULL; CRM_CHECK(rsc != NULL, return); CRM_CHECK(operation != NULL, return); if (msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if (transition == NULL) { crm_log_xml_err(msg, "Missing transition number"); } } op = construct_op(lrm_state, msg, rsc->id, operation); CRM_CHECK(op != NULL, return); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) && (op->interval_ms == 0) && strcmp(operation, CRMD_ACTION_MIGRATE) == 0) { /* pcmk remote connections are a special use case. * We never ever want to stop monitoring a connection resource until * the entire migration has completed. If the connection is unexpectedly * severed, even during a migration, this is an event we must detect.*/ stop_recurring = FALSE; } else if ((op->interval_ms == 0) && strcmp(operation, CRMD_ACTION_STATUS) != 0 && strcmp(operation, CRMD_ACTION_NOTIFY) != 0) { /* stop any previous monitor operations before changing the resource state */ stop_recurring = TRUE; } if (stop_recurring == TRUE) { guint removed = 0; struct stop_recurring_action_s data; data.rsc = rsc; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_action_by_rsc, &data); if (removed) { crm_debug("Stopped %u recurring operation%s in preparation for " PCMK__OP_FMT, removed, pcmk__plural_s(removed), rsc->id, operation, op->interval_ms); } } /* now do the op */ crm_notice("Requesting local execution of %s operation for %s on %s " CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT, crm_action_str(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name, transition, rsc->id, operation, op->interval_ms); if (pcmk_is_set(fsa_input_register, R_SHUTDOWN) && pcmk__str_eq(operation, RSC_START, pcmk__str_casei)) { register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); nack_reason = "Not attempting start due to shutdown in progress"; } else if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE /* Recalculating */ && fsa_state != S_TRANSITION_ENGINE && !pcmk__str_eq(operation, CRMD_ACTION_STOP, pcmk__str_casei)) { nack_reason = "Controller cannot attempt actions at this time"; } if (nack_reason != NULL) { crm_notice("Discarding attempt to perform action %s on %s in state %s (shutdown=%s)", operation, rsc->id, fsa_state2string(fsa_state), pcmk__btoa(pcmk_is_set(fsa_input_register, R_SHUTDOWN))); lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID, nack_reason); controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } record_pending_op(lrm_state->node_name, rsc, op); op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms); if (op->interval_ms > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(lrm_state, rsc, op_id, FALSE); } if (op->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, op->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { params = lrmd_key_value_add(params, key, value); } } rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type, op->user_data, op->interval_ms, op->timeout, op->start_delay, params, &call_id); if (rc == pcmk_rc_ok) { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); active_op_t *pending = NULL; pending = calloc(1, sizeof(active_op_t)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval_ms = op->interval_ms; pending->op_type = strdup(operation); pending->op_key = strdup(op_id); pending->rsc_id = strdup(rsc->id); pending->start_time = time(NULL); pending->user_data = op->user_data? strdup(op->user_data) : NULL; if (crm_element_value_epoch(msg, XML_CONFIG_ATTR_SHUTDOWN_LOCK, &(pending->lock_time)) != pcmk_ok) { pending->lock_time = 0; } g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending); if ((op->interval_ms > 0) && (op->start_delay > START_DELAY_THRESHOLD)) { int target_rc = PCMK_OCF_OK; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc); lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL); controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); } pending->params = op->params; op->params = NULL; } else if (lrm_state_is_local(lrm_state)) { crm_err("Could not initiate %s action for resource %s locally: %s " CRM_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc); fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); process_lrm_event(lrm_state, op, NULL, NULL); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else { crm_err("Could not initiate %s action for resource %s remotely on %s: " "%s " CRM_XS " rc=%d", operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc); fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); process_lrm_event(lrm_state, op, NULL, NULL); } free(op_id); lrmd_free_event(op); } int last_resource_update = 0; static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource update %d complete: rc=%d", call_id, rc); break; default: crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); } if (call_id == last_resource_update) { last_resource_update = 0; trigger_fsa(); } } /* Only successful stops, and probes that found the resource inactive, get locks * recorded in the history. This ensures the resource stays locked to the node * until it is active there again after the node comes back up. */ static bool should_preserve_lock(lrmd_event_data_t *op) { if (!controld_shutdown_lock_enabled) { return false; } if (!strcmp(op->op_type, RSC_STOP) && (op->rc == PCMK_OCF_OK)) { return true; } if (!strcmp(op->op_type, RSC_STATUS) && (op->rc == PCMK_OCF_NOT_RUNNING)) { return true; } return false; } static int do_update_resource(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, time_t lock_time) { /* */ int rc = pcmk_ok; xmlNode *update, *iter = NULL; int call_opt = crmd_cib_smart_opt(); const char *uuid = NULL; CRM_CHECK(op != NULL, return 0); iter = create_xml_node(iter, XML_CIB_TAG_STATUS); update = iter; iter = create_xml_node(iter, XML_CIB_TAG_STATE); if (pcmk__str_eq(node_name, fsa_our_uname, pcmk__str_casei)) { uuid = fsa_our_uuid; } else { /* remote nodes uuid and uname are equal */ uuid = node_name; pcmk__xe_set_bool_attr(iter, XML_NODE_IS_REMOTE, true); } CRM_LOG_ASSERT(uuid != NULL); if(uuid == NULL) { rc = -EINVAL; goto done; } crm_xml_add(iter, XML_ATTR_UUID, uuid); crm_xml_add(iter, XML_ATTR_UNAME, node_name); crm_xml_add(iter, XML_ATTR_ORIGIN, __func__); iter = create_xml_node(iter, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, node_name, __func__); if (rsc) { const char *container = NULL; crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->standard); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider); if (lock_time != 0) { /* Actions on a locked resource should either preserve the lock by * recording it with the action result, or clear it. */ if (!should_preserve_lock(op)) { lock_time = 0; } crm_xml_add_ll(iter, XML_CONFIG_ATTR_SHUTDOWN_LOCK, (long long) lock_time); } if (op->params) { container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); } if (container) { crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container); crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container); } } else { crm_warn("Resource %s no longer exists in the executor", op->rsc_id); controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id); goto cleanup; } crm_log_xml_trace(update, __func__); /* make it an asynchronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" that really weren't * * the alternative however means blocking here for too long, which * isn't acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL); if (rc > 0) { last_resource_update = rc; } done: /* the return code is a call number, not an error code */ crm_trace("Sent resource state update message: %d for %s=%u on %s", rc, op->op_type, op->interval_ms, op->rsc_id); fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback); cleanup: free_xml(update); return rc; } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) { CRM_CHECK(FALSE, return); } static char * unescape_newlines(const char *string) { char *pch = NULL; char *ret = NULL; static const char *escaped_newline = "\\n"; if (!string) { return NULL; } ret = strdup(string); pch = strstr(ret, escaped_newline); while (pch != NULL) { /* Replace newline escape pattern with actual newline (and a space so we * don't have to shuffle the rest of the buffer) */ pch[0] = '\n'; pch[1] = ' '; pch = strstr(pch, escaped_newline); } return ret; } static bool did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id, const char * op_type, guint interval_ms) { rsc_history_t *entry = NULL; CRM_CHECK(lrm_state != NULL, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); CRM_CHECK(op_type != NULL, return FALSE); entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->failed == NULL) { return FALSE; } if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none) && pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei) && entry->failed->interval_ms == interval_ms) { return TRUE; } return FALSE; } /*! * \internal * \brief Log the result of an executor action (actual or synthesized) * * \param[in] op Executor action to log result for * \param[in] op_key Operation key for action * \param[in] node_name Name of node action was performed on, if known * \param[in] update_id Call ID for CIB update (or 0 if none) * \param[in] confirmed Whether to log that graph action was confirmed */ static void log_executor_event(lrmd_event_data_t *op, const char *op_key, const char *node_name, int update_id, gboolean confirmed) { int log_level = LOG_ERR; GString *str = g_string_sized_new(100); // reasonable starting size g_string_printf(str, "Result of %s operation for %s", crm_action_str(op->op_type, op->interval_ms), op->rsc_id); if (node_name != NULL) { g_string_append_printf(str, " on %s", node_name); } switch (op->op_status) { case PCMK_EXEC_DONE: log_level = LOG_NOTICE; g_string_append_printf(str, ": %s", services_ocf_exitcode_str(op->rc)); break; case PCMK_EXEC_TIMEOUT: g_string_append_printf(str, ": %s after %s", pcmk_exec_status_str(op->op_status), pcmk__readable_interval(op->timeout)); break; case PCMK_EXEC_CANCELLED: log_level = LOG_INFO; // Fall through default: g_string_append_printf(str, ": %s", pcmk_exec_status_str(op->op_status)); } if ((op->exit_reason != NULL) && ((op->op_status != PCMK_EXEC_DONE) || (op->rc != PCMK_OCF_OK))) { g_string_append_printf(str, " (%s)", op->exit_reason); } g_string_append(str, " " CRM_XS); if (update_id != 0) { g_string_append_printf(str, " CIB update %d,", update_id); } g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s", (confirmed? "" : "un"), op->call_id, op_key); if (op->op_status == PCMK_EXEC_DONE) { g_string_append_printf(str, " rc=%d", op->rc); } do_crm_log(log_level, "%s", str->str); g_string_free(str, TRUE); /* The services library has already logged the output at info or debug * level, so just raise to notice if it looks like a failure. */ if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) { char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output", op->rsc_id, op->op_type, op->interval_ms, node_name); crm_log_output(LOG_NOTICE, prefix, op->output); free(prefix); } } void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, active_op_t *pending, xmlNode *action_xml) { char *op_id = NULL; char *op_key = NULL; int update_id = 0; gboolean remove = FALSE; gboolean removed = FALSE; bool need_direct_ack = FALSE; lrmd_rsc_info_t *rsc = NULL; const char *node_name = NULL; CRM_CHECK(op != NULL, return); CRM_CHECK(op->rsc_id != NULL, return); // Remap new status codes for older DCs if (compare_version(fsa_our_dc_version, "3.2.0") < 0) { switch (op->op_status) { case PCMK_EXEC_NOT_CONNECTED: lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED, PCMK_EXEC_ERROR, op->exit_reason); break; case PCMK_EXEC_INVALID: lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR, op->exit_reason); break; default: break; } } op_id = make_stop_id(op->rsc_id, op->call_id); op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); // Get resource info if available (from executor state or action XML) if (lrm_state) { rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); } if ((rsc == NULL) && action_xml) { xmlNode *xml = find_xml_node(action_xml, XML_CIB_TAG_RESOURCE, TRUE); const char *standard = crm_element_value(xml, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER); const char *type = crm_element_value(xml, XML_ATTR_TYPE); if (standard && type) { crm_info("%s agent information not cached, using %s%s%s:%s from action XML", op->rsc_id, standard, (provider? ":" : ""), (provider? provider : ""), type); rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); } else { crm_err("Can't process %s result because %s agent information not cached or in XML", op_key, op->rsc_id); } } // Get node name if available (from executor state or action XML) if (lrm_state) { node_name = lrm_state->node_name; } else if (action_xml) { node_name = crm_element_value(action_xml, XML_LRM_ATTR_TARGET); } if(pending == NULL) { remove = TRUE; if (lrm_state) { pending = g_hash_table_lookup(lrm_state->pending_ops, op_id); } } if (op->op_status == PCMK_EXEC_ERROR) { switch(op->rc) { case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_PROMOTED: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_PROMOTED: // Leave it to the TE/scheduler to decide if this is an error op->op_status = PCMK_EXEC_DONE; break; default: /* Nothing to do */ break; } } if (op->op_status != PCMK_EXEC_CANCELLED) { /* We might not record the result, so directly acknowledge it to the * originator instead, so it doesn't time out waiting for the result * (especially important if part of a transition). */ need_direct_ack = TRUE; if (controld_action_is_recordable(op->op_type)) { if (node_name && rsc) { // We should record the result, and happily, we can update_id = do_update_resource(node_name, rsc, op, pending? pending->lock_time : 0); need_direct_ack = FALSE; } else if (op->rsc_deleted) { /* We shouldn't record the result (likely the resource was * refreshed, cleaned, or removed while this operation was * in flight). */ crm_notice("Not recording %s result in CIB because " "resource information was removed since it was initiated", op_key); } else { /* This shouldn't be possible; the executor didn't consider the * resource deleted, but we couldn't find resource or node * information. */ crm_err("Unable to record %s result in CIB: %s", op_key, (node_name? "No resource information" : "No node name")); } } } else if (op->interval_ms == 0) { /* A non-recurring operation was cancelled. Most likely, the * never-initiated action was removed from the executor's pending * operations list upon resource removal. */ need_direct_ack = TRUE; } else if (pending == NULL) { /* This recurring operation was cancelled, but was not pending. No * transition actions are waiting on it, nothing needs to be done. */ } else if (op->user_data == NULL) { /* This recurring operation was cancelled and pending, but we don't * have a transition key. This should never happen. */ crm_err("Recurring operation %s was cancelled without transition information", op_key); } else if (pcmk_is_set(pending->flags, active_op_remove)) { /* This recurring operation was cancelled (by us) and pending, and we * have been waiting for it to finish. */ if (lrm_state) { erase_lrm_history_by_op(lrm_state, op); } /* If the recurring operation had failed, the lrm_rsc_op is recorded as * "last_failure" which won't get erased from the cib given the logic on * purpose in erase_lrm_history_by_op(). So that the cancel action won't * have a chance to get confirmed by DC with process_op_deletion(). * Cluster transition would get stuck waiting for the remaining action * timer to time out. * * Directly acknowledge the cancel operation in this case. */ if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms)) { need_direct_ack = TRUE; } } else if (op->rsc_deleted) { /* This recurring operation was cancelled (but not by us, and the * executor does not have resource information, likely due to resource * cleanup, refresh, or removal) and pending. */ crm_debug("Recurring op %s was cancelled due to resource deletion", op_key); need_direct_ack = TRUE; } else { /* This recurring operation was cancelled (but not by us, likely by the * executor before stopping the resource) and pending. We don't need to * do anything special. */ } if (need_direct_ack) { controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id); } if(remove == FALSE) { /* The caller will do this afterwards, but keep the logging consistent */ removed = TRUE; } else if (lrm_state && ((op->interval_ms == 0) || (op->op_status == PCMK_EXEC_CANCELLED))) { gboolean found = g_hash_table_remove(lrm_state->pending_ops, op_id); if (op->interval_ms != 0) { removed = TRUE; } else if (found) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops)); } } log_executor_event(op, op_key, node_name, update_id, removed); if (lrm_state) { if (!pcmk__str_eq(op->op_type, RSC_METADATA, pcmk__str_casei)) { crmd_alert_resource_op(lrm_state->node_name, op); } else if (rsc && (op->rc == PCMK_OCF_OK)) { char *metadata = unescape_newlines(op->output); metadata_cache_update(lrm_state->metadata_cache, rsc, metadata); free(metadata); } } if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); if (lrm_state) { delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL); } } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ mainloop_set_trigger(fsa_source); if (lrm_state && rsc) { update_history_cache(lrm_state, rsc, op); } lrmd_free_rsc_info(rsc); free(op_key); free(op_id); } diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c index 06e968b08a..90d15f92bb 100644 --- a/daemons/controld/controld_remote_ra.c +++ b/daemons/controld/controld_remote_ra.c @@ -1,1344 +1,1349 @@ /* * Copyright 2013-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #define REMOTE_LRMD_RA "remote" /* The max start timeout before cmd retry */ #define MAX_START_TIMEOUT_MS 10000 typedef struct remote_ra_cmd_s { /*! the local node the cmd is issued from */ char *owner; /*! the remote node the cmd is executed on */ char *rsc_id; /*! the action to execute */ char *action; /*! some string the client wants us to give it back */ char *userdata; /*! start delay in ms */ int start_delay; /*! timer id used for start delay. */ int delay_id; /*! timeout in ms for cmd */ int timeout; int remaining_timeout; /*! recurring interval in ms */ guint interval_ms; /*! interval timer id */ int interval_id; int reported_success; int monitor_timeout_id; int takeover_timeout_id; /*! action parameters */ lrmd_key_value_t *params; pcmk__action_result_t result; int call_id; time_t start_time; gboolean cancel; } remote_ra_cmd_t; enum remote_migration_status { expect_takeover = 1, takeover_complete, }; typedef struct remote_ra_data_s { crm_trigger_t *work; remote_ra_cmd_t *cur_cmd; GList *cmds; GList *recurring_cmds; enum remote_migration_status migrate_status; gboolean active; /* Maintenance mode is difficult to determine from the controller's context, * so we have it signalled back with the transition from the scheduler. */ gboolean is_maintenance; /* Similar for whether we are controlling a guest node or remote node. * Fortunately there is a meta-attribute in the transition already and * as the situation doesn't change over time we can use the * resource start for noting down the information for later use when * the attributes aren't at hand. */ gboolean controlling_guest; } remote_ra_data_t; static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms); static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd); static GList *fail_all_monitor_cmds(GList * list); static void free_cmd(gpointer user_data) { remote_ra_cmd_t *cmd = user_data; if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->interval_id) { g_source_remove(cmd->interval_id); } if (cmd->monitor_timeout_id) { g_source_remove(cmd->monitor_timeout_id); } if (cmd->takeover_timeout_id) { g_source_remove(cmd->takeover_timeout_id); } free(cmd->owner); free(cmd->rsc_id); free(cmd->action); free(cmd->userdata); pcmk__reset_result(&(cmd->result)); lrmd_key_value_freeall(cmd->params); free(cmd); } static int generate_callid(void) { static int remote_ra_callid = 0; remote_ra_callid++; if (remote_ra_callid <= 0) { remote_ra_callid = 1; } return remote_ra_callid; } static gboolean recurring_helper(gpointer data) { remote_ra_cmd_t *cmd = data; lrm_state_t *connection_rsc = NULL; cmd->interval_id = 0; connection_rsc = lrm_state_find(cmd->rsc_id); if (connection_rsc && connection_rsc->remote_ra_data) { remote_ra_data_t *ra_data = connection_rsc->remote_ra_data; ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd); ra_data->cmds = g_list_append(ra_data->cmds, cmd); mainloop_set_trigger(ra_data->work); } return FALSE; } static gboolean start_delay_helper(gpointer data) { remote_ra_cmd_t *cmd = data; lrm_state_t *connection_rsc = NULL; cmd->delay_id = 0; connection_rsc = lrm_state_find(cmd->rsc_id); if (connection_rsc && connection_rsc->remote_ra_data) { remote_ra_data_t *ra_data = connection_rsc->remote_ra_data; mainloop_set_trigger(ra_data->work); } return FALSE; } /*! * \internal * \brief Handle cluster communication related to pacemaker_remote node joining * * \param[in] node_name Name of newly integrated pacemaker_remote node */ static void remote_node_up(const char *node_name) { int call_opt, call_id = 0; xmlNode *update, *state; crm_node_t *node; enum controld_section_e section = controld_section_all; CRM_CHECK(node_name != NULL, return); crm_info("Announcing Pacemaker Remote node %s", node_name); /* Clear node's entire state (resource history and transient attributes) * other than shutdown locks. The transient attributes should and normally * will be cleared when the node leaves, but since remote node state has a * number of corner cases, clear them here as well, to be sure. */ call_opt = crmd_cib_smart_opt(); if (controld_shutdown_lock_enabled) { section = controld_section_all_unlocked; } /* Purge node from attrd's memory */ update_attrd_remote_node_removed(node_name, NULL); controld_delete_node_state(node_name, section, call_opt); - /* Clear node's probed attribute */ + /* Delete node's probe_complete attribute. This serves two purposes: + * + * - @COMPAT DCs < 1.1.14 in a rolling upgrade might use it + * - deleting it (or any attribute for that matter) here ensures the + * attribute manager learns the node is remote + */ update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE); /* Ensure node is in the remote peer cache with member status */ node = crm_remote_peer_get(node_name); CRM_CHECK(node != NULL, return); pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0); /* pacemaker_remote nodes don't participate in the membership layer, * so cluster nodes don't automatically get notified when they come and go. * We send a cluster message to the DC, and update the CIB node state entry, * so the DC will get it sooner (via message) or later (via CIB refresh), * and any other interested parties can query the CIB. */ send_remote_state_message(node_name, TRUE); update = create_xml_node(NULL, XML_CIB_TAG_STATUS); state = create_node_state_update(node, node_update_cluster, update, __func__); /* Clear the XML_NODE_IS_FENCED flag in the node state. If the node ever * needs to be fenced, this flag will allow various actions to determine * whether the fencing has happened yet. */ crm_xml_add(state, XML_NODE_IS_FENCED, "0"); /* TODO: If the remote connection drops, and this (async) CIB update either * failed or has not yet completed, later actions could mistakenly think the * node has already been fenced (if the XML_NODE_IS_FENCED attribute was * previously set, because it won't have been cleared). This could prevent * actual fencing or allow recurring monitor failures to be cleared too * soon. Ideally, we wouldn't rely on the CIB for the fenced status. */ fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL); if (call_id < 0) { crm_perror(LOG_WARNING, "%s CIB node state setup", node_name); } free_xml(update); } enum down_opts { DOWN_KEEP_LRM, DOWN_ERASE_LRM }; /*! * \internal * \brief Handle cluster communication related to pacemaker_remote node leaving * * \param[in] node_name Name of lost node * \param[in] opts Whether to keep or erase LRM history */ static void remote_node_down(const char *node_name, const enum down_opts opts) { xmlNode *update; int call_id = 0; int call_opt = crmd_cib_smart_opt(); crm_node_t *node; /* Purge node from attrd's memory */ update_attrd_remote_node_removed(node_name, NULL); /* Normally, only node attributes should be erased, and the resource history * should be kept until the node comes back up. However, after a successful * fence, we want to clear the history as well, so we don't think resources * are still running on the node. */ if (opts == DOWN_ERASE_LRM) { controld_delete_node_state(node_name, controld_section_all, call_opt); } else { controld_delete_node_state(node_name, controld_section_attrs, call_opt); } /* Ensure node is in the remote peer cache with lost state */ node = crm_remote_peer_get(node_name); CRM_CHECK(node != NULL, return); pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0); /* Notify DC */ send_remote_state_message(node_name, FALSE); /* Update CIB node state */ update = create_xml_node(NULL, XML_CIB_TAG_STATUS); create_node_state_update(node, node_update_cluster, update, __func__); fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL); if (call_id < 0) { crm_perror(LOG_ERR, "%s CIB node state update", node_name); } free_xml(update); } /*! * \internal * \brief Handle effects of a remote RA command on node state * * \param[in] cmd Completed remote RA command */ static void check_remote_node_state(remote_ra_cmd_t *cmd) { /* Only successful actions can change node state */ if (!pcmk__result_ok(&(cmd->result))) { return; } if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) { remote_node_up(cmd->rsc_id); } else if (pcmk__str_eq(cmd->action, "migrate_from", pcmk__str_casei)) { /* After a successful migration, we don't need to do remote_node_up() * because the DC already knows the node is up, and we don't want to * clear LRM history etc. We do need to add the remote node to this * host's remote peer cache, because (unless it happens to be DC) * it hasn't been tracking the remote node, and other code relies on * the cache to distinguish remote nodes from unseen cluster nodes. */ crm_node_t *node = crm_remote_peer_get(cmd->rsc_id); CRM_CHECK(node != NULL, return); pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0); } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id); remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL; if (ra_data) { if (ra_data->migrate_status != takeover_complete) { /* Stop means down if we didn't successfully migrate elsewhere */ remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM); } else if (AM_I_DC == FALSE) { /* Only the connection host and DC track node state, * so if the connection migrated elsewhere and we aren't DC, * un-cache the node, so we don't have stale info */ crm_remote_peer_cache_remove(cmd->rsc_id); } } } /* We don't do anything for successful monitors, which is correct for * routine recurring monitors, and for monitors on nodes where the * connection isn't supposed to be (the cluster will stop the connection in * that case). However, if the initial probe finds the connection already * active on the node where we want it, we probably should do * remote_node_up(). Unfortunately, we can't distinguish that case here. * Given that connections have to be initiated by the cluster, the chance of * that should be close to zero. */ } static void report_remote_ra_result(remote_ra_cmd_t * cmd) { lrmd_event_data_t op = { 0, }; check_remote_node_state(cmd); op.type = lrmd_event_exec_complete; op.rsc_id = cmd->rsc_id; op.op_type = cmd->action; op.user_data = cmd->userdata; op.timeout = cmd->timeout; op.interval_ms = cmd->interval_ms; op.t_run = (unsigned int) cmd->start_time; op.t_rcchange = (unsigned int) cmd->start_time; lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status, cmd->result.exit_reason); if (cmd->reported_success && !pcmk__result_ok(&(cmd->result))) { op.t_rcchange = (unsigned int) time(NULL); /* This edge case will likely never ever occur, but if it does the * result is that a failure will not be processed correctly. This is only * remotely possible because we are able to detect a connection resource's tcp * connection has failed at any moment after start has completed. The actual * recurring operation is just a connectivity ping. * * basically, we are not guaranteed that the first successful monitor op and * a subsequent failed monitor op will not occur in the same timestamp. We have to * make it look like the operations occurred at separate times though. */ if (op.t_rcchange == op.t_run) { op.t_rcchange++; } } if (cmd->params) { lrmd_key_value_t *tmp; op.params = pcmk__strkey_table(free, free); for (tmp = cmd->params; tmp; tmp = tmp->next) { g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value)); } } op.call_id = cmd->call_id; op.remote_nodename = cmd->owner; lrm_op_callback(&op); if (op.params) { g_hash_table_destroy(op.params); } lrmd__reset_result(&op); } static void update_remaining_timeout(remote_ra_cmd_t * cmd) { cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000; } static gboolean retry_start_cmd_cb(gpointer data) { lrm_state_t *lrm_state = data; remote_ra_data_t *ra_data = lrm_state->remote_ra_data; remote_ra_cmd_t *cmd = NULL; int rc = ETIME; if (!ra_data || !ra_data->cur_cmd) { return FALSE; } cmd = ra_data->cur_cmd; if (!pcmk__strcase_any_of(cmd->action, "start", "migrate_from", NULL)) { return FALSE; } update_remaining_timeout(cmd); if (cmd->remaining_timeout > 0) { rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout); } else { pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, "Not enough time remains to retry remote connection"); } if (rc != pcmk_rc_ok) { report_remote_ra_result(cmd); if (ra_data->cmds) { mainloop_set_trigger(ra_data->work); } ra_data->cur_cmd = NULL; free_cmd(cmd); } else { /* wait for connection event */ } return FALSE; } static gboolean connection_takeover_timeout_cb(gpointer data) { lrm_state_t *lrm_state = NULL; remote_ra_cmd_t *cmd = data; crm_info("takeover event timed out for node %s", cmd->rsc_id); cmd->takeover_timeout_id = 0; lrm_state = lrm_state_find(cmd->rsc_id); handle_remote_ra_stop(lrm_state, cmd); free_cmd(cmd); return FALSE; } static gboolean monitor_timeout_cb(gpointer data) { lrm_state_t *lrm_state = NULL; remote_ra_cmd_t *cmd = data; lrm_state = lrm_state_find(cmd->rsc_id); crm_info("Timed out waiting for remote poke response from %s%s", cmd->rsc_id, (lrm_state? "" : " (no LRM state)")); cmd->monitor_timeout_id = 0; pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, "Remote executor did not respond"); if (lrm_state && lrm_state->remote_ra_data) { remote_ra_data_t *ra_data = lrm_state->remote_ra_data; if (ra_data->cur_cmd == cmd) { ra_data->cur_cmd = NULL; } if (ra_data->cmds) { mainloop_set_trigger(ra_data->work); } } report_remote_ra_result(cmd); free_cmd(cmd); if(lrm_state) { lrm_state_disconnect(lrm_state); } return FALSE; } static void synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type) { lrmd_event_data_t op = { 0, }; if (lrm_state == NULL) { /* if lrm_state not given assume local */ lrm_state = lrm_state_find(fsa_our_uname); } CRM_ASSERT(lrm_state != NULL); op.type = lrmd_event_exec_complete; op.rsc_id = rsc_id; op.op_type = op_type; op.t_run = (unsigned int) time(NULL); op.t_rcchange = op.t_run; op.call_id = generate_callid(); lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); process_lrm_event(lrm_state, &op, NULL, NULL); } void remote_lrm_op_callback(lrmd_event_data_t * op) { gboolean cmd_handled = FALSE; lrm_state_t *lrm_state = NULL; remote_ra_data_t *ra_data = NULL; remote_ra_cmd_t *cmd = NULL; crm_debug("Processing '%s%s%s' event on remote connection to %s: %s " "(%d) status=%s (%d)", (op->op_type? op->op_type : ""), (op->op_type? " " : ""), lrmd_event_type2str(op->type), op->remote_nodename, services_ocf_exitcode_str(op->rc), op->rc, pcmk_exec_status_str(op->op_status), op->op_status); lrm_state = lrm_state_find(op->remote_nodename); if (!lrm_state || !lrm_state->remote_ra_data) { crm_debug("No state information found for remote connection event"); return; } ra_data = lrm_state->remote_ra_data; if (op->type == lrmd_event_new_client) { // Another client has connected to the remote daemon if (ra_data->migrate_status == expect_takeover) { // Great, we knew this was coming ra_data->migrate_status = takeover_complete; } else { crm_err("Disconnecting from Pacemaker Remote node %s due to " "unexpected client takeover", op->remote_nodename); /* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */ /* Do not free lrm_state->conn yet. */ /* It'll be freed in the following stop action. */ lrm_state_disconnect_only(lrm_state); } return; } /* filter all EXEC events up */ if (op->type == lrmd_event_exec_complete) { if (ra_data->migrate_status == takeover_complete) { crm_debug("ignoring event, this connection is taken over by another node"); } else { lrm_op_callback(op); } return; } if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) { if (ra_data->active == FALSE) { crm_debug("Disconnection from Pacemaker Remote node %s complete", lrm_state->node_name); } else if (!remote_ra_is_in_maintenance(lrm_state)) { crm_err("Lost connection to Pacemaker Remote node %s", lrm_state->node_name); ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds); ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds); } else { crm_notice("Unmanaged Pacemaker Remote node %s disconnected", lrm_state->node_name); /* Do roughly what a 'stop' on the remote-resource would do */ handle_remote_ra_stop(lrm_state, NULL); remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM); /* now fake the reply of a successful 'stop' */ synthesize_lrmd_success(NULL, lrm_state->node_name, "stop"); } return; } if (!ra_data->cur_cmd) { crm_debug("no event to match"); return; } cmd = ra_data->cur_cmd; /* Start actions and migrate from actions complete after connection * comes back to us. */ if (op->type == lrmd_event_connect && pcmk__strcase_any_of(cmd->action, "start", "migrate_from", NULL)) { if (op->connection_rc < 0) { update_remaining_timeout(cmd); if (op->connection_rc == -ENOKEY) { // Hard error, don't retry pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM, PCMK_EXEC_ERROR, "Authentication key not readable"); } else if (cmd->remaining_timeout > 3000) { crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout); g_timeout_add(1000, retry_start_cmd_cb, lrm_state); return; } else { crm_trace("can't reschedule start, remaining timeout too small %d", cmd->remaining_timeout); pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, pcmk_strerror(op->connection_rc)); } } else { lrm_state_reset_tables(lrm_state, TRUE); pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); ra_data->active = TRUE; } crm_debug("Remote connection event matched %s action", cmd->action); report_remote_ra_result(cmd); cmd_handled = TRUE; } else if (op->type == lrmd_event_poke && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { if (cmd->monitor_timeout_id) { g_source_remove(cmd->monitor_timeout_id); cmd->monitor_timeout_id = 0; } /* Only report success the first time, after that only worry about failures. * For this function, if we get the poke pack, it is always a success. Pokes * only fail if the send fails, or the response times out. */ if (!cmd->reported_success) { pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); report_remote_ra_result(cmd); cmd->reported_success = 1; } crm_debug("Remote poke event matched %s action", cmd->action); /* success, keep rescheduling if interval is present. */ if (cmd->interval_ms && (cmd->cancel == FALSE)) { ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd); cmd->interval_id = g_timeout_add(cmd->interval_ms, recurring_helper, cmd); cmd = NULL; /* prevent free */ } cmd_handled = TRUE; } else if (op->type == lrmd_event_disconnect && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { if (ra_data->active == TRUE && (cmd->cancel == FALSE)) { pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "Remote connection unexpectedly dropped " "during monitor"); report_remote_ra_result(cmd); crm_err("Remote connection to %s unexpectedly dropped during monitor", lrm_state->node_name); } cmd_handled = TRUE; } else if (op->type == lrmd_event_new_client && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { handle_remote_ra_stop(lrm_state, cmd); cmd_handled = TRUE; } else { crm_debug("Event did not match %s action", ra_data->cur_cmd->action); } if (cmd_handled) { ra_data->cur_cmd = NULL; if (ra_data->cmds) { mainloop_set_trigger(ra_data->work); } free_cmd(cmd); } } static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd) { remote_ra_data_t *ra_data = NULL; CRM_ASSERT(lrm_state); ra_data = lrm_state->remote_ra_data; if (ra_data->migrate_status != takeover_complete) { /* delete pending ops when ever the remote connection is intentionally stopped */ g_hash_table_remove_all(lrm_state->pending_ops); } else { /* we no longer hold the history if this connection has been migrated, * however, we keep metadata cache for future use */ lrm_state_reset_tables(lrm_state, FALSE); } ra_data->active = FALSE; lrm_state_disconnect(lrm_state); if (ra_data->cmds) { g_list_free_full(ra_data->cmds, free_cmd); } if (ra_data->recurring_cmds) { g_list_free_full(ra_data->recurring_cmds, free_cmd); } ra_data->cmds = NULL; ra_data->recurring_cmds = NULL; ra_data->cur_cmd = NULL; if (cmd) { pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); report_remote_ra_result(cmd); } } // \return Standard Pacemaker return code static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms) { const char *server = NULL; lrmd_key_value_t *tmp = NULL; int port = 0; remote_ra_data_t *ra_data = lrm_state->remote_ra_data; int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms; int rc = pcmk_rc_ok; for (tmp = cmd->params; tmp; tmp = tmp->next) { if (pcmk__strcase_any_of(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR, XML_RSC_ATTR_REMOTE_RA_SERVER, NULL)) { server = tmp->value; } else if (pcmk__str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT, pcmk__str_casei)) { port = atoi(tmp->value); } else if (pcmk__str_eq(tmp->key, CRM_META "_" XML_RSC_ATTR_CONTAINER, pcmk__str_casei)) { ra_data->controlling_guest = TRUE; } } rc = controld_connect_remote_executor(lrm_state, server, port, timeout_used); if (rc != pcmk_rc_ok) { pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, pcmk_rc_str(rc)); } return rc; } static gboolean handle_remote_ra_exec(gpointer user_data) { int rc = 0; lrm_state_t *lrm_state = user_data; remote_ra_data_t *ra_data = lrm_state->remote_ra_data; remote_ra_cmd_t *cmd; GList *first = NULL; if (ra_data->cur_cmd) { /* still waiting on previous cmd */ return TRUE; } while (ra_data->cmds) { first = ra_data->cmds; cmd = first->data; if (cmd->delay_id) { /* still waiting for start delay timer to trip */ return TRUE; } ra_data->cmds = g_list_remove_link(ra_data->cmds, first); g_list_free_1(first); if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) { ra_data->migrate_status = 0; if (handle_remote_ra_start(lrm_state, cmd, cmd->timeout) == pcmk_rc_ok) { /* take care of this later when we get async connection result */ crm_debug("Initiated async remote connection, %s action will complete after connect event", cmd->action); ra_data->cur_cmd = cmd; return TRUE; } report_remote_ra_result(cmd); } else if (!strcmp(cmd->action, "monitor")) { if (lrm_state_is_connected(lrm_state) == TRUE) { rc = lrm_state_poke_connection(lrm_state); if (rc < 0) { pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, pcmk_strerror(rc)); } } else { rc = -1; pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING, PCMK_EXEC_DONE, "Remote connection inactive"); } if (rc == 0) { crm_debug("Poked Pacemaker Remote at node %s, waiting for async response", cmd->rsc_id); ra_data->cur_cmd = cmd; cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd); return TRUE; } report_remote_ra_result(cmd); } else if (!strcmp(cmd->action, "stop")) { if (ra_data->migrate_status == expect_takeover) { /* briefly wait on stop for the takeover event to occur. If the * takeover event does not occur during the wait period, that's fine. * It just means that the remote-node's lrm_status section is going to get * cleared which will require all the resources running in the remote-node * to be explicitly re-detected via probe actions. If the takeover does occur * successfully, then we can leave the status section intact. */ cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd); ra_data->cur_cmd = cmd; return TRUE; } handle_remote_ra_stop(lrm_state, cmd); } else if (!strcmp(cmd->action, "migrate_to")) { ra_data->migrate_status = expect_takeover; pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); report_remote_ra_result(cmd); } else if (pcmk__str_any_of(cmd->action, CRMD_ACTION_RELOAD, CRMD_ACTION_RELOAD_AGENT, NULL)) { /* Currently the only reloadable parameter is reconnect_interval, * which is only used by the scheduler via the CIB, so reloads are a * no-op. * * @COMPAT DC <2.1.0: We only need to check for "reload" in case * we're in a rolling upgrade with a DC scheduling "reload" instead * of "reload-agent". An OCF 1.1 "reload" would be a no-op anyway, * so this would work for that purpose as well. */ pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); report_remote_ra_result(cmd); } free_cmd(cmd); } return TRUE; } static void remote_ra_data_init(lrm_state_t * lrm_state) { remote_ra_data_t *ra_data = NULL; if (lrm_state->remote_ra_data) { return; } ra_data = calloc(1, sizeof(remote_ra_data_t)); ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state); lrm_state->remote_ra_data = ra_data; } void remote_ra_cleanup(lrm_state_t * lrm_state) { remote_ra_data_t *ra_data = lrm_state->remote_ra_data; if (!ra_data) { return; } if (ra_data->cmds) { g_list_free_full(ra_data->cmds, free_cmd); } if (ra_data->recurring_cmds) { g_list_free_full(ra_data->recurring_cmds, free_cmd); } mainloop_destroy_trigger(ra_data->work); free(ra_data); lrm_state->remote_ra_data = NULL; } gboolean is_remote_lrmd_ra(const char *agent, const char *provider, const char *id) { if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) { return TRUE; } if (id && lrm_state_find(id) && !pcmk__str_eq(id, fsa_our_uname, pcmk__str_casei)) { return TRUE; } return FALSE; } lrmd_rsc_info_t * remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id) { lrmd_rsc_info_t *info = NULL; if ((lrm_state_find(rsc_id))) { info = calloc(1, sizeof(lrmd_rsc_info_t)); info->id = strdup(rsc_id); info->type = strdup(REMOTE_LRMD_RA); info->standard = strdup(PCMK_RESOURCE_CLASS_OCF); info->provider = strdup("pacemaker"); } return info; } static gboolean is_remote_ra_supported_action(const char *action) { return pcmk__str_any_of(action, CRMD_ACTION_START, CRMD_ACTION_STOP, CRMD_ACTION_STATUS, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, CRMD_ACTION_RELOAD_AGENT, CRMD_ACTION_RELOAD, NULL); } static GList * fail_all_monitor_cmds(GList * list) { GList *rm_list = NULL; remote_ra_cmd_t *cmd = NULL; GList *gIter = NULL; for (gIter = list; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; if ((cmd->interval_ms > 0) && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { rm_list = g_list_append(rm_list, cmd); } } for (gIter = rm_list; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "Lost connection to remote executor"); crm_trace("Pre-emptively failing %s %s (interval=%u, %s)", cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata); report_remote_ra_result(cmd); list = g_list_remove(list, cmd); free_cmd(cmd); } /* frees only the list data, not the cmds */ g_list_free(rm_list); return list; } static GList * remove_cmd(GList * list, const char *action, guint interval_ms) { remote_ra_cmd_t *cmd = NULL; GList *gIter = NULL; for (gIter = list; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; if ((cmd->interval_ms == interval_ms) && pcmk__str_eq(cmd->action, action, pcmk__str_casei)) { break; } cmd = NULL; } if (cmd) { list = g_list_remove(list, cmd); free_cmd(cmd); } return list; } int remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id, const char *action, guint interval_ms) { lrm_state_t *connection_rsc = NULL; remote_ra_data_t *ra_data = NULL; connection_rsc = lrm_state_find(rsc_id); if (!connection_rsc || !connection_rsc->remote_ra_data) { return -EINVAL; } ra_data = connection_rsc->remote_ra_data; ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms); ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action, interval_ms); if (ra_data->cur_cmd && (ra_data->cur_cmd->interval_ms == interval_ms) && (pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) { ra_data->cur_cmd->cancel = TRUE; } return 0; } static remote_ra_cmd_t * handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms, const char *userdata) { GList *gIter = NULL; remote_ra_cmd_t *cmd = NULL; /* there are 3 places a potential duplicate monitor operation * could exist. * 1. recurring_cmds list. where the op is waiting for its next interval * 2. cmds list, where the op is queued to get executed immediately * 3. cur_cmd, which means the monitor op is in flight right now. */ if (interval_ms == 0) { return NULL; } if (ra_data->cur_cmd && ra_data->cur_cmd->cancel == FALSE && (ra_data->cur_cmd->interval_ms == interval_ms) && pcmk__str_eq(ra_data->cur_cmd->action, "monitor", pcmk__str_casei)) { cmd = ra_data->cur_cmd; goto handle_dup; } for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; if ((cmd->interval_ms == interval_ms) && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { goto handle_dup; } } for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; if ((cmd->interval_ms == interval_ms) && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { goto handle_dup; } } return NULL; handle_dup: crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT, cmd->rsc_id, "monitor", interval_ms); /* update the userdata */ if (userdata) { free(cmd->userdata); cmd->userdata = strdup(userdata); } /* if we've already reported success, generate a new call id */ if (cmd->reported_success) { cmd->start_time = time(NULL); cmd->call_id = generate_callid(); cmd->reported_success = 0; } /* if we have an interval_id set, that means we are in the process of * waiting for this cmd's next interval. instead of waiting, cancel * the timer and execute the action immediately */ if (cmd->interval_id) { g_source_remove(cmd->interval_id); cmd->interval_id = 0; recurring_helper(cmd); } return cmd; } /*! * \internal * \brief Execute an action using the (internal) ocf:pacemaker:remote agent * * \param[in] lrm_state Executor state object for remote connection * \param[in] rsc_id Connection resource ID * \param[in] action Action to execute * \param[in] userdata String to copy and pass to execution callback * \param[in] interval_ms Action interval (in milliseconds) * \param[in] timeout_ms Action timeout (in milliseconds) * \param[in] start_delay_ms Delay (in milliseconds) before initiating action * \param[in] params Connection resource parameters * \param[out] call_id Where to store call ID on success * * \return Standard Pacemaker return code * \note This takes ownership of \p params, which should not be used or freed * after calling this function. */ int controld_execute_remote_agent(lrm_state_t *lrm_state, const char *rsc_id, const char *action, const char *userdata, guint interval_ms, int timeout_ms, int start_delay_ms, lrmd_key_value_t *params, int *call_id) { lrm_state_t *connection_rsc = NULL; remote_ra_cmd_t *cmd = NULL; remote_ra_data_t *ra_data = NULL; *call_id = 0; CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL) && (userdata != NULL) && (call_id != NULL), lrmd_key_value_freeall(params); return EINVAL); if (!is_remote_ra_supported_action(action)) { lrmd_key_value_freeall(params); return EOPNOTSUPP; } connection_rsc = lrm_state_find(rsc_id); if (connection_rsc == NULL) { lrmd_key_value_freeall(params); return ENOTCONN; } remote_ra_data_init(connection_rsc); ra_data = connection_rsc->remote_ra_data; cmd = handle_dup_monitor(ra_data, interval_ms, userdata); if (cmd) { *call_id = cmd->call_id; lrmd_key_value_freeall(params); return pcmk_rc_ok; } cmd = calloc(1, sizeof(remote_ra_cmd_t)); if (cmd == NULL) { lrmd_key_value_freeall(params); return ENOMEM; } cmd->owner = strdup(lrm_state->node_name); cmd->rsc_id = strdup(rsc_id); cmd->action = strdup(action); cmd->userdata = strdup(userdata); if ((cmd->owner == NULL) || (cmd->rsc_id == NULL) || (cmd->action == NULL) || (cmd->userdata == NULL)) { free_cmd(cmd); lrmd_key_value_freeall(params); return ENOMEM; } cmd->interval_ms = interval_ms; cmd->timeout = timeout_ms; cmd->start_delay = start_delay_ms; cmd->params = params; cmd->start_time = time(NULL); cmd->call_id = generate_callid(); if (cmd->start_delay) { cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } ra_data->cmds = g_list_append(ra_data->cmds, cmd); mainloop_set_trigger(ra_data->work); *call_id = cmd->call_id; return pcmk_rc_ok; } /*! * \internal * \brief Immediately fail all monitors of a remote node, if proxied here * * \param[in] node_name Name of pacemaker_remote node */ void remote_ra_fail(const char *node_name) { lrm_state_t *lrm_state = lrm_state_find(node_name); if (lrm_state && lrm_state_is_connected(lrm_state)) { remote_ra_data_t *ra_data = lrm_state->remote_ra_data; crm_info("Failing monitors on Pacemaker Remote node %s", node_name); ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds); ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds); } } /* A guest node fencing implied by host fencing looks like: * * * * * * * */ #define XPATH_PSEUDO_FENCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \ "[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \ "/" XML_CIB_TAG_NODE /*! * \internal * \brief Check a pseudo-action for Pacemaker Remote node side effects * * \param[in] xml XML of pseudo-action to check */ void remote_ra_process_pseudo(xmlNode *xml) { xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE); if (numXpathResults(search) == 1) { xmlNode *result = getXpathResult(search, 0); /* Normally, we handle the necessary side effects of a guest node stop * action when reporting the remote agent's result. However, if the stop * is implied due to fencing, it will be a fencing pseudo-event, and * there won't be a result to report. Handle that case here. * * This will result in a duplicate call to remote_node_down() if the * guest stop was real instead of implied, but that shouldn't hurt. * * There is still one corner case that isn't handled: if a guest node * isn't running any resources when its host is fenced, it will appear * to be cleanly stopped, so there will be no pseudo-fence, and our * peer cache state will be incorrect unless and until the guest is * recovered. */ if (result) { const char *remote = ID(result); if (remote) { remote_node_down(remote, DOWN_ERASE_LRM); } } } freeXpathObject(search); } static void remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance) { remote_ra_data_t *ra_data = lrm_state->remote_ra_data; xmlNode *update, *state; int call_opt, call_id = 0; crm_node_t *node; call_opt = crmd_cib_smart_opt(); node = crm_remote_peer_get(lrm_state->node_name); CRM_CHECK(node != NULL, return); update = create_xml_node(NULL, XML_CIB_TAG_STATUS); state = create_node_state_update(node, node_update_none, update, __func__); crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0"); fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL); if (call_id < 0) { crm_perror(LOG_WARNING, "%s CIB node state update failed", lrm_state->node_name); } else { /* TODO: still not 100% sure that async update will succeed ... */ ra_data->is_maintenance = maintenance; } free_xml(update); } #define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \ "[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \ XML_GRAPH_TAG_MAINTENANCE /*! * \internal * \brief Check a pseudo-action holding updates for maintenance state * * \param[in] xml XML of pseudo-action to check */ void remote_ra_process_maintenance_nodes(xmlNode *xml) { xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE); if (numXpathResults(search) == 1) { xmlNode *node; int cnt = 0, cnt_remote = 0; for (node = first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE); node != NULL; node = pcmk__xml_next(node)) { lrm_state_t *lrm_state = lrm_state_find(ID(node)); cnt++; if (lrm_state && lrm_state->remote_ra_data && ((remote_ra_data_t *) lrm_state->remote_ra_data)->active) { int is_maint; cnt_remote++; pcmk__scan_min_int(crm_element_value(node, XML_NODE_IS_MAINTENANCE), &is_maint, 0); remote_ra_maintenance(lrm_state, is_maint); } } crm_trace("Action holds %d nodes (%d remotes found) " "adjusting maintenance-mode", cnt, cnt_remote); } freeXpathObject(search); } gboolean remote_ra_is_in_maintenance(lrm_state_t * lrm_state) { remote_ra_data_t *ra_data = lrm_state->remote_ra_data; return ra_data->is_maintenance; } gboolean remote_ra_controlling_guest(lrm_state_t * lrm_state) { remote_ra_data_t *ra_data = lrm_state->remote_ra_data; return ra_data->controlling_guest; } diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c index 5ceb324653..7ed75fab83 100644 --- a/daemons/controld/controld_te_callbacks.c +++ b/daemons/controld/controld_te_callbacks.c @@ -1,698 +1,677 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include /* For ONLINESTATUS etc */ #include void te_update_confirm(const char *event, xmlNode * msg); extern char *te_uuid; gboolean shuttingdown = FALSE; crm_graph_t *transition_graph; crm_trigger_t *transition_trigger = NULL; /* #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */ #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']" // An explicit shutdown-lock of 0 means the lock has been cleared static bool shutdown_lock_cleared(xmlNode *lrm_resource) { time_t shutdown_lock = 0; return (crm_element_value_epoch(lrm_resource, XML_CONFIG_ATTR_SHUTDOWN_LOCK, &shutdown_lock) == pcmk_ok) && (shutdown_lock == 0); } static void te_update_diff_v1(const char *event, xmlNode *diff) { int lpc, max; xmlXPathObject *xpathObj = NULL; CRM_CHECK(diff != NULL, return); xml_log_patchset(LOG_TRACE, __func__, diff); if (cib_config_changed(NULL, NULL, &diff)) { abort_transition(INFINITY, tg_restart, "Non-status change", diff); goto bail; /* configuration changed */ } /* Tickets Attributes - Added/Updated */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Ticket attribute: update", aborted); goto bail; } freeXpathObject(xpathObj); /* Tickets Attributes - Removed */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Ticket attribute: removal", aborted); goto bail; } freeXpathObject(xpathObj); - /* Transient Attributes - Added/Updated */ - xpathObj = - xpath_search(diff, - "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" - XML_TAG_TRANSIENT_NODEATTRS "//" XML_CIB_TAG_NVPAIR); - max = numXpathResults(xpathObj); - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *attr = getXpathResult(xpathObj, lpc); - const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); - - if (pcmk__str_eq(CRM_OP_PROBED, name, pcmk__str_casei) && - !pcmk__xe_attr_is_true(attr, XML_NVPAIR_ATTR_VALUE)) { - abort_transition(INFINITY, tg_restart, "Transient attribute: update", attr); - crm_log_xml_trace(attr, "Abort"); - goto bail; - } - } - - freeXpathObject(xpathObj); - /* Transient Attributes - Removed */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_TRANSIENT_NODEATTRS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted); goto bail; } freeXpathObject(xpathObj); // Check for lrm_resource entries xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RESOURCE); max = numXpathResults(xpathObj); /* * Updates by, or in response to, graph actions will never affect more than * one resource at a time, so such updates indicate an LRM refresh. In that * case, start a new transition rather than check each result individually, * which can result in _huge_ speedups in large clusters. * * Unfortunately, we can only do so when there are no pending actions. * Otherwise, we could mistakenly throw away those results here, and * the cluster will stall waiting for them and time out the operation. */ if ((transition_graph->pending == 0) && (max > 1)) { crm_debug("Ignoring resource operation updates due to history refresh of %d resources", max); crm_log_xml_trace(diff, "lrm-refresh"); abort_transition(INFINITY, tg_restart, "History refresh", NULL); goto bail; } if (max == 1) { xmlNode *lrm_resource = getXpathResult(xpathObj, 0); if (shutdown_lock_cleared(lrm_resource)) { // @TODO would be more efficient to abort once after transition done abort_transition(INFINITY, tg_restart, "Shutdown lock cleared", lrm_resource); // Still process results, so we stop timers and update failcounts } } freeXpathObject(xpathObj); /* Process operation updates */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); max = numXpathResults(xpathObj); if (max > 0) { int lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); const char *node = get_node_id(rsc_op); process_graph_event(rsc_op, node); } } freeXpathObject(xpathObj); /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */ xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP); max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { int path_max = 0; const char *op_id = NULL; char *rsc_op_xpath = NULL; xmlXPathObject *op_match = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; op_id = ID(match); path_max = strlen(RSC_OP_TEMPLATE) + strlen(op_id) + 1; rsc_op_xpath = calloc(1, path_max); snprintf(rsc_op_xpath, path_max, RSC_OP_TEMPLATE, op_id); op_match = xpath_search(diff, rsc_op_xpath); if (numXpathResults(op_match) == 0) { /* Prevent false positives by matching cancelations too */ const char *node = get_node_id(match); crm_action_t *cancelled = get_cancel_action(op_id, node); if (cancelled == NULL) { crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id, node); abort_transition(INFINITY, tg_restart, "Resource op removal", match); freeXpathObject(op_match); free(rsc_op_xpath); goto bail; } else { crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d", op_id, node, cancelled->id); } } freeXpathObject(op_match); free(rsc_op_xpath); } bail: freeXpathObject(xpathObj); } static void process_lrm_resource_diff(xmlNode *lrm_resource, const char *node) { for (xmlNode *rsc_op = pcmk__xml_first_child(lrm_resource); rsc_op != NULL; rsc_op = pcmk__xml_next(rsc_op)) { process_graph_event(rsc_op, node); } if (shutdown_lock_cleared(lrm_resource)) { // @TODO would be more efficient to abort once after transition done abort_transition(INFINITY, tg_restart, "Shutdown lock cleared", lrm_resource); } } static void process_resource_updates(const char *node, xmlNode *xml, xmlNode *change, const char *op, const char *xpath) { xmlNode *rsc = NULL; if (xml == NULL) { return; } if (strcmp(TYPE(xml), XML_CIB_TAG_LRM) == 0) { xml = first_named_child(xml, XML_LRM_TAG_RESOURCES); CRM_CHECK(xml != NULL, return); } CRM_CHECK(strcmp(TYPE(xml), XML_LRM_TAG_RESOURCES) == 0, return); /* * Updates by, or in response to, TE actions will never contain updates * for more than one resource at a time, so such updates indicate an * LRM refresh. * * In that case, start a new transition rather than check each result * individually, which can result in _huge_ speedups in large clusters. * * Unfortunately, we can only do so when there are no pending actions. * Otherwise, we could mistakenly throw away those results here, and * the cluster will stall waiting for them and time out the operation. */ if ((transition_graph->pending == 0) && xml->children && xml->children->next) { crm_log_xml_trace(change, "lrm-refresh"); abort_transition(INFINITY, tg_restart, "History refresh", NULL); return; } for (rsc = pcmk__xml_first_child(xml); rsc != NULL; rsc = pcmk__xml_next(rsc)) { crm_trace("Processing %s", ID(rsc)); process_lrm_resource_diff(rsc, node); } } static char *extract_node_uuid(const char *xpath) { char *mutable_path = strdup(xpath); char *node_uuid = NULL; char *search = NULL; char *match = NULL; match = strstr(mutable_path, "node_state[@id=\'"); if (match == NULL) { free(mutable_path); return NULL; } match += strlen("node_state[@id=\'"); search = strchr(match, '\''); if (search == NULL) { free(mutable_path); return NULL; } search[0] = 0; node_uuid = strdup(match); free(mutable_path); return node_uuid; } static void abort_unless_down(const char *xpath, const char *op, xmlNode *change, const char *reason) { char *node_uuid = NULL; crm_action_t *down = NULL; if(!pcmk__str_eq(op, "delete", pcmk__str_casei)) { abort_transition(INFINITY, tg_restart, reason, change); return; } node_uuid = extract_node_uuid(xpath); if(node_uuid == NULL) { crm_err("Could not extract node ID from %s", xpath); abort_transition(INFINITY, tg_restart, reason, change); return; } down = match_down_event(node_uuid); if (down == NULL) { crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath); abort_transition(INFINITY, tg_restart, reason, change); } else { crm_trace("Expecting changes to %s (%s)", node_uuid, xpath); } free(node_uuid); } static void process_op_deletion(const char *xpath, xmlNode *change) { char *mutable_key = strdup(xpath); char *key; char *node_uuid; // Extract the part of xpath between last pair of single quotes key = strrchr(mutable_key, '\''); if (key != NULL) { *key = '\0'; key = strrchr(mutable_key, '\''); } if (key == NULL) { crm_warn("Ignoring malformed CIB update (resource deletion of %s)", xpath); free(mutable_key); return; } ++key; node_uuid = extract_node_uuid(xpath); if (confirm_cancel_action(key, node_uuid) == FALSE) { abort_transition(INFINITY, tg_restart, "Resource operation removal", change); } free(mutable_key); free(node_uuid); } static void process_delete_diff(const char *xpath, const char *op, xmlNode *change) { if (strstr(xpath, "/" XML_LRM_TAG_RSC_OP "[")) { process_op_deletion(xpath, change); } else if (strstr(xpath, "/" XML_CIB_TAG_LRM "[")) { abort_unless_down(xpath, op, change, "Resource state removal"); } else if (strstr(xpath, "/" XML_CIB_TAG_STATE "[")) { abort_unless_down(xpath, op, change, "Node state removal"); } else { crm_trace("Ignoring delete of %s", xpath); } } static void process_node_state_diff(xmlNode *state, xmlNode *change, const char *op, const char *xpath) { xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM); process_resource_updates(ID(state), lrm, change, op, xpath); } static void process_status_diff(xmlNode *status, xmlNode *change, const char *op, const char *xpath) { for (xmlNode *state = pcmk__xml_first_child(status); state != NULL; state = pcmk__xml_next(state)) { process_node_state_diff(state, change, op, xpath); } } static void process_cib_diff(xmlNode *cib, xmlNode *change, const char *op, const char *xpath) { xmlNode *status = first_named_child(cib, XML_CIB_TAG_STATUS); xmlNode *config = first_named_child(cib, XML_CIB_TAG_CONFIGURATION); if (status) { process_status_diff(status, change, op, xpath); } if (config) { abort_transition(INFINITY, tg_restart, "Non-status-only change", change); } } static void te_update_diff_v2(xmlNode *diff) { crm_log_xml_trace(diff, "Patch:Raw"); for (xmlNode *change = pcmk__xml_first_child(diff); change != NULL; change = pcmk__xml_next(change)) { xmlNode *match = NULL; const char *name = NULL; const char *xpath = crm_element_value(change, XML_DIFF_PATH); // Possible ops: create, modify, delete, move const char *op = crm_element_value(change, XML_DIFF_OP); // Ignore uninteresting updates if (op == NULL) { continue; } else if (xpath == NULL) { crm_trace("Ignoring %s change for version field", op); continue; } else if (strcmp(op, "move") == 0) { crm_trace("Ignoring move change at %s", xpath); continue; } // Find the result of create/modify ops if (strcmp(op, "create") == 0) { match = change->children; } else if (strcmp(op, "modify") == 0) { match = first_named_child(change, XML_DIFF_RESULT); if(match) { match = match->children; } } else if (strcmp(op, "delete") != 0) { crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)", op, xpath); continue; } if (match) { if (match->type == XML_COMMENT_NODE) { crm_trace("Ignoring %s operation for comment at %s", op, xpath); continue; } name = (const char *)match->name; } crm_trace("Handling %s operation for %s%s%s", op, (xpath? xpath : "CIB"), (name? " matched by " : ""), (name? name : "")); if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION)) { abort_transition(INFINITY, tg_restart, "Configuration change", change); break; // Won't be packaged with operation results we may be waiting for } else if (strstr(xpath, "/" XML_CIB_TAG_TICKETS) || pcmk__str_eq(name, XML_CIB_TAG_TICKETS, pcmk__str_casei)) { abort_transition(INFINITY, tg_restart, "Ticket attribute change", change); break; // Won't be packaged with operation results we may be waiting for } else if (strstr(xpath, "/" XML_TAG_TRANSIENT_NODEATTRS "[") || pcmk__str_eq(name, XML_TAG_TRANSIENT_NODEATTRS, pcmk__str_casei)) { abort_unless_down(xpath, op, change, "Transient attribute change"); break; // Won't be packaged with operation results we may be waiting for } else if (strcmp(op, "delete") == 0) { process_delete_diff(xpath, op, change); } else if (name == NULL) { crm_warn("Ignoring malformed CIB update (%s at %s has no result)", op, xpath); } else if (strcmp(name, XML_TAG_CIB) == 0) { process_cib_diff(match, change, op, xpath); } else if (strcmp(name, XML_CIB_TAG_STATUS) == 0) { process_status_diff(match, change, op, xpath); } else if (strcmp(name, XML_CIB_TAG_STATE) == 0) { process_node_state_diff(match, change, op, xpath); } else if (strcmp(name, XML_CIB_TAG_LRM) == 0) { process_resource_updates(ID(match), match, change, op, xpath); } else if (strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); process_resource_updates(local_node, match, change, op, xpath); free(local_node); } else if (strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); process_lrm_resource_diff(match, local_node); free(local_node); } else if (strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); process_graph_event(match, local_node); free(local_node); } else { crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)", op, xpath, name); } } } void te_update_diff(const char *event, xmlNode * msg) { xmlNode *diff = NULL; const char *op = NULL; int rc = -EINVAL; int format = 1; int p_add[] = { 0, 0, 0 }; int p_del[] = { 0, 0, 0 }; CRM_CHECK(msg != NULL, return); crm_element_value_int(msg, F_CIB_RC, &rc); if (transition_graph == NULL) { crm_trace("No graph"); return; } else if (rc < pcmk_ok) { crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc)); return; } else if (transition_graph->complete && fsa_state != S_IDLE && fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) { crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state), transition_graph->complete); return; } op = crm_element_value(msg, F_CIB_OPERATION); diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); xml_patch_versions(diff, p_add, p_del); crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op, p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2], fsa_state2string(fsa_state)); crm_element_value_int(diff, "format", &format); switch (format) { case 1: te_update_diff_v1(event, diff); break; case 2: te_update_diff_v2(diff); break; default: crm_warn("Ignoring malformed CIB update (unknown patch format %d)", format); } } gboolean process_te_message(xmlNode * msg, xmlNode * xml_data) { const char *from = crm_element_value(msg, F_ORIG); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *ref = crm_element_value(msg, F_CRM_REFERENCE); const char *op = crm_element_value(msg, F_CRM_TASK); const char *type = crm_element_value(msg, F_CRM_MSG_TYPE); crm_trace("Processing %s (%s) message", op, ref); crm_log_xml_trace(msg, "ipc"); if (op == NULL) { /* error */ } else if (sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) { crm_trace("Bad sys-to %s", crm_str(sys_to)); return FALSE; } else if (pcmk__str_eq(op, CRM_OP_INVOKE_LRM, pcmk__str_casei) && pcmk__str_eq(sys_from, CRM_SYSTEM_LRMD, pcmk__str_casei) /* && pcmk__str_eq(type, XML_ATTR_RESPONSE, pcmk__str_casei) */ ) { xmlXPathObject *xpathObj = NULL; crm_log_xml_trace(msg, "Processing (N)ACK"); crm_debug("Processing (N)ACK %s from %s", crm_element_value(msg, F_CRM_REFERENCE), from); xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP); if (numXpathResults(xpathObj)) { int lpc = 0, max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); const char *node = get_node_id(rsc_op); process_graph_event(rsc_op, node); } freeXpathObject(xpathObj); } else { crm_log_xml_err(msg, "Invalid (N)ACK"); freeXpathObject(xpathObj); return FALSE; } } else { crm_err("Unknown command: %s::%s from %s", type, op, sys_from); } crm_trace("finished processing message"); return TRUE; } void cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc < pcmk_ok) { crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc)); } } /*! * \brief Handle a timeout in node-to-node communication * * \param[in] data Pointer to action timer * * \return FALSE (indicating that source should be not be re-added) */ gboolean action_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; const char *task = NULL; const char *on_node = NULL; const char *via_node = NULL; CRM_CHECK(data != NULL, return FALSE); timer = (crm_action_timer_t *) data; stop_te_timer(timer); CRM_CHECK(timer->action != NULL, return FALSE); task = crm_element_value(timer->action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_TARGET); via_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_ROUTER_NODE); if (transition_graph->complete) { crm_notice("Node %s did not send %s result (via %s) within %dms " "(ignoring because transition not in progress)", (on_node? on_node : ""), (task? task : "unknown action"), (via_node? via_node : "controller"), timer->timeout); } else { /* fail the action */ crm_err("Node %s did not send %s result (via %s) within %dms " "(action timeout plus cluster-delay)", (on_node? on_node : ""), (task? task : "unknown action"), (via_node? via_node : "controller"), timer->timeout + transition_graph->network_delay); pcmk__log_graph_action(LOG_ERR, timer->action); crm__set_graph_action_flags(timer->action, pcmk__graph_action_failed); te_action_confirmed(timer->action, transition_graph); abort_transition(INFINITY, tg_restart, "Action lost", NULL); // Record timeout in the CIB if appropriate if ((timer->action->type == action_type_rsc) && controld_action_is_recordable(task)) { controld_record_action_timeout(timer->action); } } return FALSE; } diff --git a/doc/sphinx/Pacemaker_Development/components.rst b/doc/sphinx/Pacemaker_Development/components.rst index 0d29fa0343..a51220cac9 100644 --- a/doc/sphinx/Pacemaker_Development/components.rst +++ b/doc/sphinx/Pacemaker_Development/components.rst @@ -1,367 +1,368 @@ Coding Particular Pacemaker Components -------------------------------------- The Pacemaker code can be intricate and difficult to follow. This chapter has some high-level descriptions of how individual components work. .. index:: single: fencer single: pacemaker-fenced Fencer ###### ``pacemaker-fenced`` is the Pacemaker daemon that handles fencing requests. In the broadest terms, fencing works like this: #. The initiator (an external program such as ``stonith_admin``, or the cluster itself via the controller) asks the local fencer, "Hey, could you please fence this node?" #. The local fencer asks all the fencers in the cluster (including itself), "Hey, what fencing devices do you have access to that can fence this node?" #. Each fencer in the cluster replies with a list of available devices that it knows about. #. Once the original fencer gets all the replies, it asks the most appropriate fencer peer to actually carry out the fencing. It may send out more than one such request if the target node must be fenced with multiple devices. #. The chosen fencer(s) call the appropriate fencing resource agent(s) to do the fencing, then reply to the original fencer with the result. #. The original fencer broadcasts the result to all fencers. #. Each fencer sends the result to each of its local clients (including, at some point, the initiator). A more detailed description follows. .. index:: single: libstonithd Initiating a fencing request ____________________________ A fencing request can be initiated by the cluster or externally, using the libstonithd API. * The cluster always initiates fencing via ``daemons/controld/controld_fencing.c:te_fence_node()`` (which calls the ``fence()`` API method). This occurs when a transition graph synapse contains a ``CRM_OP_FENCE`` XML operation. * The main external clients are ``stonith_admin`` and ``cts-fence-helper``. The ``DLM`` project also uses Pacemaker for fencing. Highlights of the fencing API: * ``stonith_api_new()`` creates and returns a new ``stonith_t`` object, whose ``cmds`` member has methods for connect, disconnect, fence, etc. * the ``fence()`` method creates and sends a ``STONITH_OP_FENCE XML`` request with the desired action and target node. Callers do not have to choose or even have any knowledge about particular fencing devices. Fencing queries _______________ The function calls for a fencing request go something like this: The local fencer receives the client's request via an IPC or messaging layer callback, which calls * ``stonith_command()``, which (for requests) calls * ``handle_request()``, which (for ``STONITH_OP_FENCE`` from a client) calls * ``initiate_remote_stonith_op()``, which creates a ``STONITH_OP_QUERY`` XML request with the target, desired action, timeout, etc. then broadcasts the operation to the cluster group (i.e. all fencer instances) and starts a timer. The query is broadcast because (1) location constraints might prevent the local node from accessing the stonith device directly, and (2) even if the local node does have direct access, another node might be preferred to carry out the fencing. Each fencer receives the original fencer's ``STONITH_OP_QUERY`` broadcast request via IPC or messaging layer callback, which calls: * ``stonith_command()``, which (for requests) calls * ``handle_request()``, which (for ``STONITH_OP_QUERY`` from a peer) calls * ``stonith_query()``, which calls * ``get_capable_devices()`` with ``stonith_query_capable_device_db()`` to add device information to an XML reply and send it. (A message is considered a reply if it contains ``T_STONITH_REPLY``, which is only set by fencer peers, not clients.) The original fencer receives all peers' ``STONITH_OP_QUERY`` replies via IPC or messaging layer callback, which calls: * ``stonith_command()``, which (for replies) calls * ``handle_reply()`` which (for ``STONITH_OP_QUERY``) calls * ``process_remote_stonith_query()``, which allocates a new query result structure, parses device information into it, and adds it to the operation object. It increments the number of replies received for this operation, and compares it against the expected number of replies (i.e. the number of active peers), and if this is the last expected reply, calls * ``call_remote_stonith()``, which calculates the timeout and sends ``STONITH_OP_FENCE`` request(s) to carry out the fencing. If the target node has a fencing "topology" (which allows specifications such as "this node can be fenced either with device A, or devices B and C in combination"), it will choose the device(s), and send out as many requests as needed. If it chooses a device, it will choose the peer; a peer is preferred if it has "verified" access to the desired device, meaning that it has the device "running" on it and thus has a monitor operation ensuring reachability. Fencing operations __________________ Each ``STONITH_OP_FENCE`` request goes something like this: The chosen peer fencer receives the ``STONITH_OP_FENCE`` request via IPC or messaging layer callback, which calls: * ``stonith_command()``, which (for requests) calls * ``handle_request()``, which (for ``STONITH_OP_FENCE`` from a peer) calls * ``stonith_fence()``, which calls * ``schedule_stonith_command()`` (using supplied device if ``F_STONITH_DEVICE`` was set, otherwise the highest-priority capable device obtained via ``get_capable_devices()`` with ``stonith_fence_get_devices_cb()``), which adds the operation to the device's pending operations list and triggers processing. The chosen peer fencer's mainloop is triggered and calls * ``stonith_device_dispatch()``, which calls * ``stonith_device_execute()``, which pops off the next item from the device's pending operations list. If acting as the (internally implemented) watchdog agent, it panics the node, otherwise it calls * ``stonith_action_create()`` and ``stonith_action_execute_async()`` to call the fencing agent. The chosen peer fencer's mainloop is triggered again once the fencing agent returns, and calls * ``stonith_action_async_done()`` which adds the results to an action object then calls its * done callback (``st_child_done()``), which calls ``schedule_stonith_command()`` for a new device if there are further required actions to execute or if the original action failed, then builds and sends an XML reply to the original fencer (via ``stonith_send_async_reply()``), then checks whether any pending actions are the same as the one just executed and merges them if so. Fencing replies _______________ The original fencer receives the ``STONITH_OP_FENCE`` reply via IPC or messaging layer callback, which calls: * ``stonith_command()``, which (for replies) calls * ``handle_reply()``, which calls * ``process_remote_stonith_exec()``, which calls either ``call_remote_stonith()`` (to retry a failed operation, or try the next device in a topology is appropriate, which issues a new ``STONITH_OP_FENCE`` request, proceeding as before) or ``remote_op_done()`` (if the operation is definitively failed or successful). * remote_op_done() broadcasts the result to all peers. Finally, all peers receive the broadcast result and call * ``remote_op_done()``, which sends the result to all local clients. .. index:: single: scheduler single: pacemaker-schedulerd single: libpe_status single: libpe_rules single: libpacemaker Scheduler ######### ``pacemaker-schedulerd`` is the Pacemaker daemon that runs the Pacemaker scheduler for the controller, but "the scheduler" in general refers to related library code in ``libpe_status`` and ``libpe_rules`` (``lib/pengine/*.c``), and some of ``libpacemaker`` (``lib/pacemaker/pcmk_sched_*.c``). The purpose of the scheduler is to take a CIB as input and generate a transition graph (list of actions that need to be taken) as output. The controller invokes the scheduler by contacting the scheduler daemon via local IPC. Tools such as ``crm_simulate``, ``crm_mon``, and ``crm_resource`` can also invoke the scheduler, but do so by calling the library functions directly. This allows them to run using a ``CIB_file`` without the cluster needing to be active. The main entry point for the scheduler code is ``lib/pacemaker/pcmk_sched_messages.c:pcmk__schedule_actions()``. It sets -defaults and calls a bunch of "stage *N*" functions. Yes, there is a stage 0 -and no stage 1. :) The code has evolved over time to where splitting the stages -up differently and renumbering them would make sense. +defaults and calls a series of functions for each "stage" of the scheduling. +(Some of the functions are named like ``stageN()`` but the code has evolved +over time to where the numbers no longer make sense. A project is in progress +to reorganize and rename them.) * ``stage0()`` "unpacks" most of the CIB XML into data structures, and determines the current cluster status. It also creates implicit location constraints for the node health feature. * ``stage2()`` applies factors that make resources prefer certain nodes (such as shutdown locks, location constraints, and stickiness). -* ``stage3()`` creates internal constraints (such as the implicit ordering for - group members, or start actions being implicitly ordered before promote - actions). +* ``pcmk__create_internal_constraints()`` creates internal constraints (such as + the implicit ordering for group members, or start actions being implicitly + ordered before promote actions). * ``stage4()`` "checks actions", which means processing resource history entries in the CIB status section. This is used to decide whether certain actions need to be done, such as deleting orphan resources, forcing a restart when a resource definition changes, etc. * ``stage5()`` allocates resources to nodes and creates actions (which might or might not end up in the final graph). * ``stage6()`` creates implicit ordering constraints for resources running across remote connections, and schedules fencing actions and shutdowns. * ``stage7()`` "updates actions", which means applying ordering constraints in order to modify action attributes such as optional or required. -* ``stage8()`` creates the transition graph. +* ``pcmk__create_graph()`` creates the transition graph. Challenges __________ Working with the scheduler is difficult. Challenges include: * It is far too much code to keep more than a small portion in your head at one time. * Small changes can have large (and unexpected) effects. This is why we have a large number of regression tests (``cts/cts-scheduler``), which should be run after making code changes. * It produces an insane amount of log messages at debug and trace levels. You can put resource ID(s) in the ``PCMK_trace_tags`` environment variable to enable trace-level messages only when related to specific resources. * Different parts of the main ``pe_working_set_t`` structure are finalized at different points in the scheduling process, so you have to keep in mind whether information you're using at one point of the code can possibly change later. For example, data unpacked from the CIB can safely be used anytime - after stage0(), but actions may become optional or required anytime before - stage8(). There's no easy way to deal with this. + after ``stage0(),`` but actions may become optional or required anytime + before ``pcmk__create_graph()``. There's no easy way to deal with this. * Many names of struct members, functions, etc., are suboptimal, but are part of the public API and cannot be changed until an API backward compatibility break. .. index:: single: pe_working_set_t Cluster Working Set ___________________ The main data object for the scheduler is ``pe_working_set_t``, which contains all information needed about nodes, resources, constraints, etc., both as the raw CIB XML and parsed into more usable data structures, plus the resulting transition graph XML. The variable name is usually ``data_set``. .. index:: single: pe_resource_t Resources _________ ``pe_resource_t`` is the data object representing cluster resources. A resource has a variant: primitive (a.k.a. native), group, clone, or bundle. The resource object has members for two sets of methods, ``resource_object_functions_t`` from the ``libpe_status`` public API, and ``resource_alloc_functions_t`` whose implementation is internal to ``libpacemaker``. The actual functions vary by variant. The object functions have basic capabilities such as unpacking the resource XML, and determining the current or planned location of the resource. The allocation functions have more obscure capabilities needed for scheduling, such as processing location and ordering constraints. For example, ``stage3()``, which creates internal constraints, simply calls the ``internal_constraints()`` method for each top-level resource in the working set. .. index:: single: pe_node_t Nodes _____ Allocation of resources to nodes is done by choosing the node with the highest score for a given resource. The scheduler does a bunch of processing to generate the scores, then the actual allocation is straightforward. Node lists are frequently used. For example, ``pe_working_set_t`` has a ``nodes`` member which is a list of all nodes in the cluster, and ``pe_resource_t`` has a ``running_on`` member which is a list of all nodes on which the resource is (or might be) active. These are lists of ``pe_node_t`` objects. The ``pe_node_t`` object contains a ``struct pe_node_shared_s *details`` member with all node information that is independent of resource allocation (the node name, etc.). The working set's ``nodes`` member contains the original of this information. All other node lists contain copies of ``pe_node_t`` where only the ``details`` member points to the originals in the working set's ``nodes`` list. In this way, the other members of ``pe_node_t`` (such as ``weight``, which is the node score) may vary by node list, while the common details are shared. .. index:: single: pe_action_t single: pe_action_flags Actions _______ ``pe_action_t`` is the data object representing actions that might need to be taken. These could be resource actions, cluster-wide actions such as fencing a node, or "pseudo-actions" which are abstractions used as convenient points for ordering other actions against. It has a ``flags`` member which is a bitmask of ``enum pe_action_flags``. The most important of these are ``pe_action_runnable`` (if not set, the action is "blocked" and cannot be added to the transition graph) and ``pe_action_optional`` (actions with this set will not be added to the transition graph; actions often start out as optional, and may become required later). .. index:: single: pe__ordering_t single: pe_ordering Orderings _________ Ordering constraints are simple in concept, but they are one of the most important, powerful, and difficult to follow aspects of the scheduler code. ``pe__ordering_t`` is the data object representing an ordering, better thought of as a relationship between two actions, since the relation can be more complex than just "this one runs after that one". For an ordering "A then B", the code generally refers to A as "first" or "before", and B as "then" or "after". Much of the power comes from ``enum pe_ordering``, which are flags that determine how an ordering behaves. There are many obscure flags with big effects. A few examples: * ``pe_order_none`` means the ordering is disabled and will be ignored. It's 0, meaning no flags set, so it must be compared with equality rather than ``pcmk_is_set()``. * ``pe_order_optional`` means the ordering does not make either action required, so it only applies if they both become required for other reasons. * ``pe_order_implies_first`` means that if action B becomes required for any reason, then action A will become required as well. diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h index babc1caf59..f6e50c0bba 100644 --- a/include/crm/pengine/pe_types.h +++ b/include/crm/pengine/pe_types.h @@ -1,545 +1,545 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PE_TYPES__H # define PE_TYPES__H #ifdef __cplusplus extern "C" { #endif /*! * \file * \brief Data types for cluster status * \ingroup pengine */ # include // bool # include // time_t # include // xmlNode # include // gboolean, guint, GList, GHashTable # include # include typedef struct pe_node_s pe_node_t; typedef struct pe_action_s pe_action_t; typedef struct pe_resource_s pe_resource_t; typedef struct pe_working_set_s pe_working_set_t; enum pe_obj_types { pe_unknown = -1, pe_native = 0, pe_group = 1, pe_clone = 2, pe_container = 3, }; typedef struct resource_object_functions_s { gboolean (*unpack) (pe_resource_t*, pe_working_set_t*); pe_resource_t *(*find_rsc) (pe_resource_t *parent, const char *search, const pe_node_t *node, int flags); /* parameter result must be free'd */ char *(*parameter) (pe_resource_t*, pe_node_t*, gboolean, const char*, pe_working_set_t*); //! \deprecated will be removed in a future release void (*print) (pe_resource_t*, const char*, long, void*); gboolean (*active) (pe_resource_t*, gboolean); enum rsc_role_e (*state) (const pe_resource_t*, gboolean); pe_node_t *(*location) (const pe_resource_t*, GList**, int); void (*free) (pe_resource_t*); void (*count) (pe_resource_t*); gboolean (*is_filtered) (pe_resource_t*, GList *, gboolean); } resource_object_functions_t; typedef struct resource_alloc_functions_s resource_alloc_functions_t; enum pe_quorum_policy { no_quorum_freeze, no_quorum_stop, no_quorum_ignore, no_quorum_suicide, no_quorum_demote }; enum node_type { node_ping, node_member, node_remote }; //! \deprecated will be removed in a future release enum pe_restart { pe_restart_restart, //! \deprecated will be removed in a future release pe_restart_ignore //! \deprecated will be removed in a future release }; //! Determine behavior of pe_find_resource_with_flags() enum pe_find { pe_find_renamed = 0x001, //!< match resource ID or LRM history ID pe_find_anon = 0x002, //!< match base name of anonymous clone instances pe_find_clone = 0x004, //!< match only clone instances pe_find_current = 0x008, //!< match resource active on specified node pe_find_inactive = 0x010, //!< match resource not running anywhere pe_find_any = 0x020, //!< match base name of any clone instance }; // @TODO Make these an enum # define pe_flag_have_quorum 0x00000001ULL # define pe_flag_symmetric_cluster 0x00000002ULL # define pe_flag_maintenance_mode 0x00000008ULL # define pe_flag_stonith_enabled 0x00000010ULL # define pe_flag_have_stonith_resource 0x00000020ULL # define pe_flag_enable_unfencing 0x00000040ULL # define pe_flag_concurrent_fencing 0x00000080ULL # define pe_flag_stop_rsc_orphans 0x00000100ULL # define pe_flag_stop_action_orphans 0x00000200ULL # define pe_flag_stop_everything 0x00000400ULL # define pe_flag_start_failure_fatal 0x00001000ULL //! \deprecated # define pe_flag_remove_after_stop 0x00002000ULL # define pe_flag_startup_fencing 0x00004000ULL # define pe_flag_shutdown_lock 0x00008000ULL # define pe_flag_startup_probes 0x00010000ULL # define pe_flag_have_status 0x00020000ULL # define pe_flag_have_remote_nodes 0x00040000ULL # define pe_flag_quick_location 0x00100000ULL # define pe_flag_sanitized 0x00200000ULL //! \deprecated # define pe_flag_stdout 0x00400000ULL //! Don't count total, disabled and blocked resource instances # define pe_flag_no_counts 0x00800000ULL /*! Skip deprecated code that is kept solely for backward API compatibility. * (Internal code should always set this.) */ # define pe_flag_no_compat 0x01000000ULL # define pe_flag_show_scores 0x02000000ULL # define pe_flag_show_utilization 0x04000000ULL struct pe_working_set_s { xmlNode *input; crm_time_t *now; /* options extracted from the input */ char *dc_uuid; pe_node_t *dc_node; const char *stonith_action; const char *placement_strategy; unsigned long long flags; int stonith_timeout; enum pe_quorum_policy no_quorum_policy; GHashTable *config_hash; GHashTable *tickets; // Actions for which there can be only one (e.g. fence nodeX) GHashTable *singletons; GList *nodes; GList *resources; GList *placement_constraints; GList *ordering_constraints; GList *colocation_constraints; GList *ticket_constraints; GList *actions; xmlNode *failed; xmlNode *op_defaults; xmlNode *rsc_defaults; /* stats */ int num_synapse; int max_valid_nodes; //! Deprecated (will be removed in a future release) int order_id; int action_id; /* final output */ xmlNode *graph; GHashTable *template_rsc_sets; const char *localhost; GHashTable *tags; int blocked_resources; int disabled_resources; GList *param_check; // History entries that need to be checked GList *stop_needed; // Containers that need stop actions time_t recheck_by; // Hint to controller to re-run scheduler by this time int ninstances; // Total number of resource instances guint shutdown_lock;// How long (seconds) to lock resources to shutdown node int priority_fencing_delay; // Priority fencing delay void *priv; }; enum pe_check_parameters { /* Clear fail count if parameters changed for un-expired start or monitor * last_failure. */ pe_check_last_failure, /* Clear fail count if parameters changed for start, monitor, promote, or * migrate_from actions for active resources. */ pe_check_active, }; struct pe_node_shared_s { const char *id; const char *uname; enum node_type type; /* @TODO convert these flags into a bitfield */ gboolean online; gboolean standby; gboolean standby_onfail; gboolean pending; gboolean unclean; gboolean unseen; gboolean shutdown; gboolean expected_up; gboolean is_dc; gboolean maintenance; gboolean rsc_discovery_enabled; gboolean remote_requires_reset; gboolean remote_was_fenced; gboolean remote_maintenance; /* what the remote-rsc is thinking */ gboolean unpacked; int num_resources; pe_resource_t *remote_rsc; GList *running_rsc; /* pe_resource_t* */ GList *allocated_rsc; /* pe_resource_t* */ GHashTable *attrs; /* char* => char* */ GHashTable *utilization; GHashTable *digest_cache; //!< cache of calculated resource digests int priority; // calculated based on the priority of resources running on the node }; struct pe_node_s { int weight; gboolean fixed; int count; struct pe_node_shared_s *details; int rsc_discover_mode; }; # define pe_rsc_orphan 0x00000001ULL # define pe_rsc_managed 0x00000002ULL # define pe_rsc_block 0x00000004ULL # define pe_rsc_orphan_container_filler 0x00000008ULL # define pe_rsc_notify 0x00000010ULL # define pe_rsc_unique 0x00000020ULL # define pe_rsc_fence_device 0x00000040ULL # define pe_rsc_promotable 0x00000080ULL # define pe_rsc_provisional 0x00000100ULL # define pe_rsc_allocating 0x00000200ULL # define pe_rsc_merging 0x00000400ULL # define pe_rsc_stop 0x00001000ULL # define pe_rsc_reload 0x00002000ULL # define pe_rsc_allow_remote_remotes 0x00004000ULL # define pe_rsc_critical 0x00008000ULL # define pe_rsc_failed 0x00010000ULL # define pe_rsc_runnable 0x00040000ULL # define pe_rsc_start_pending 0x00080000ULL # define pe_rsc_starting 0x00100000ULL # define pe_rsc_stopping 0x00200000ULL # define pe_rsc_allow_migrate 0x00800000ULL # define pe_rsc_failure_ignored 0x01000000ULL # define pe_rsc_maintenance 0x04000000ULL # define pe_rsc_is_container 0x08000000ULL # define pe_rsc_needs_quorum 0x10000000ULL # define pe_rsc_needs_fencing 0x20000000ULL # define pe_rsc_needs_unfencing 0x40000000ULL enum pe_graph_flags { pe_graph_none = 0x00000, pe_graph_updated_first = 0x00001, pe_graph_updated_then = 0x00002, pe_graph_disable = 0x00004, }; /* *INDENT-OFF* */ enum pe_action_flags { pe_action_pseudo = 0x00001, pe_action_runnable = 0x00002, pe_action_optional = 0x00004, pe_action_print_always = 0x00008, pe_action_have_node_attrs = 0x00010, pe_action_implied_by_stonith = 0x00040, pe_action_migrate_runnable = 0x00080, pe_action_dumped = 0x00100, pe_action_processed = 0x00200, #if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) pe_action_clear = 0x00400, //! \deprecated Unused #endif pe_action_dangle = 0x00800, /* This action requires one or more of its dependencies to be runnable. * We use this to clear the runnable flag before checking dependencies. */ pe_action_requires_any = 0x01000, pe_action_reschedule = 0x02000, pe_action_tracking = 0x04000, pe_action_dedup = 0x08000, //! Internal state tracking when creating graph pe_action_dc = 0x10000, //! Action may run on DC instead of target }; /* *INDENT-ON* */ struct pe_resource_s { char *id; char *clone_name; xmlNode *xml; xmlNode *orig_xml; xmlNode *ops_xml; pe_working_set_t *cluster; pe_resource_t *parent; enum pe_obj_types variant; void *variant_opaque; resource_object_functions_t *fns; resource_alloc_functions_t *cmds; enum rsc_recovery_type recovery_type; enum pe_restart restart_type; //!< \deprecated will be removed in future release int priority; int stickiness; int sort_index; int failure_timeout; int migration_threshold; guint remote_reconnect_ms; char *pending_task; unsigned long long flags; // @TODO merge these into flags gboolean is_remote_node; gboolean exclusive_discover; //!@{ //! This field should be treated as internal to Pacemaker GList *rsc_cons_lhs; // List of pcmk__colocation_t* GList *rsc_cons; // List of pcmk__colocation_t* GList *rsc_location; // List of pe__location_t* GList *actions; // List of pe_action_t* GList *rsc_tickets; // List of rsc_ticket* //!@} pe_node_t *allocated_to; pe_node_t *partial_migration_target; pe_node_t *partial_migration_source; GList *running_on; /* pe_node_t* */ GHashTable *known_on; /* pe_node_t* */ GHashTable *allowed_nodes; /* pe_node_t* */ enum rsc_role_e role; enum rsc_role_e next_role; GHashTable *meta; GHashTable *parameters; //! \deprecated Use pe_rsc_params() instead GHashTable *utilization; GList *children; /* pe_resource_t* */ GList *dangling_migrations; /* pe_node_t* */ pe_resource_t *container; GList *fillers; pe_node_t *pending_node; // Node on which pending_task is happening pe_node_t *lock_node; // Resource is shutdown-locked to this node time_t lock_time; // When shutdown lock started /* Resource parameters may have node-attribute-based rules, which means the * values can vary by node. This table is a cache of parameter name/value * tables for each node (as needed). Use pe_rsc_params() to get the table * for a given node. */ GHashTable *parameter_cache; // Key = node name, value = parameters table #if ENABLE_VERSIONED_ATTRS xmlNode *versioned_parameters; #endif }; #if ENABLE_VERSIONED_ATTRS // Used as action->action_details if action->rsc is not NULL typedef struct pe_rsc_action_details_s { xmlNode *versioned_parameters; xmlNode *versioned_meta; } pe_rsc_action_details_t; #endif struct pe_action_s { int id; int priority; pe_resource_t *rsc; pe_node_t *node; xmlNode *op_entry; char *task; char *uuid; char *cancel_task; char *reason; enum pe_action_flags flags; enum rsc_start_requirement needs; enum action_fail_response on_fail; enum rsc_role_e fail_role; GHashTable *meta; GHashTable *extra; /* * These two varables are associated with the constraint logic * that involves first having one or more actions runnable before * then allowing this action to execute. * * These varables are used with features such as 'clone-min' which * requires at minimum X number of cloned instances to be running * before an order dependency can run. Another option that uses * this is 'require-all=false' in ordering constrants. This option * says "only require one instance of a resource to start before * allowing dependencies to start" -- basically, require-all=false is * the same as clone-min=1. */ /* current number of known runnable actions in the before list. */ int runnable_before; /* the number of "before" runnable actions required for this action * to be considered runnable */ int required_runnable_before; GList *actions_before; /* pe_action_wrapper_t* */ GList *actions_after; /* pe_action_wrapper_t* */ /* Some of the above fields could be moved to the details, * except for API backward compatibility. */ void *action_details; // varies by type of action }; typedef struct pe_ticket_s { char *id; gboolean granted; time_t last_granted; gboolean standby; GHashTable *state; } pe_ticket_t; typedef struct pe_tag_s { char *id; GList *refs; } pe_tag_t; //! Internal tracking for transition graph creation enum pe_link_state { pe_link_not_dumped, //! Internal tracking for transition graph creation pe_link_dumped, //! Internal tracking for transition graph creation pe_link_dup, //! \deprecated No longer used by Pacemaker }; enum pe_discover_e { pe_discover_always = 0, pe_discover_never, pe_discover_exclusive, }; /* *INDENT-OFF* */ enum pe_ordering { pe_order_none = 0x0, /* deleted */ pe_order_optional = 0x1, /* pure ordering, nothing implied */ pe_order_apply_first_non_migratable = 0x2, /* Only apply this constraint's ordering if first is not migratable. */ pe_order_implies_first = 0x10, /* If 'then' is required, ensure 'first' is too */ pe_order_implies_then = 0x20, /* If 'first' is required, ensure 'then' is too */ pe_order_promoted_implies_first = 0x40, /* If 'then' is required and then's rsc is promoted, ensure 'first' becomes required too */ /* first requires then to be both runnable and migrate runnable. */ pe_order_implies_first_migratable = 0x80, pe_order_runnable_left = 0x100, /* 'then' requires 'first' to be runnable */ pe_order_pseudo_left = 0x200, /* 'then' can only be pseudo if 'first' is runnable */ pe_order_implies_then_on_node = 0x400, /* If 'first' is required on 'nodeX', * ensure instances of 'then' on 'nodeX' are too. * Only really useful if 'then' is a clone and 'first' is not */ pe_order_probe = 0x800, /* If 'first->rsc' is * - running but about to stop, ignore the constraint * - otherwise, behave as runnable_left */ pe_order_restart = 0x1000, /* 'then' is runnable if 'first' is optional or runnable */ - pe_order_stonith_stop = 0x2000, /* only applies if the action is non-pseudo */ + pe_order_stonith_stop = 0x2000, // #endif #ifdef __cplusplus } #endif #endif // PE_TYPES__H diff --git a/include/pcmki/pcmki_scheduler.h b/include/pcmki/pcmki_scheduler.h index bcf2717731..1712cd46bc 100644 --- a/include/pcmki/pcmki_scheduler.h +++ b/include/pcmki/pcmki_scheduler.h @@ -1,104 +1,102 @@ /* * Copyright 2014-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PENGINE__H # define PENGINE__H typedef struct rsc_ticket_s rsc_ticket_t; # include # include # include # include # include # include # include enum pe_weights { pe_weights_none = 0x0, pe_weights_init = 0x1, pe_weights_forward = 0x4, pe_weights_positive = 0x8, pe_weights_rollback = 0x10, }; typedef struct { const char *id; const char *node_attribute; pe_resource_t *dependent; // The resource being colocated pe_resource_t *primary; // The resource the dependent is colocated with int dependent_role; // Colocation applies only if dependent has this role int primary_role; // Colocation applies only if primary has this role int score; bool influence; // Whether dependent influences active primary placement } pcmk__colocation_t; enum loss_ticket_policy_e { loss_ticket_stop, loss_ticket_demote, loss_ticket_fence, loss_ticket_freeze }; struct rsc_ticket_s { const char *id; pe_resource_t *rsc_lh; pe_ticket_t *ticket; enum loss_ticket_policy_e loss_policy; int role_lh; }; extern gboolean stage0(pe_working_set_t * data_set); extern gboolean probe_resources(pe_working_set_t * data_set); extern gboolean stage2(pe_working_set_t * data_set); extern gboolean stage4(pe_working_set_t * data_set); extern gboolean stage5(pe_working_set_t * data_set); extern gboolean stage6(pe_working_set_t * data_set); -extern gboolean stage8(pe_working_set_t * data_set); void pcmk__unpack_constraints(pe_working_set_t *data_set); -extern void graph_element_from_action(pe_action_t * action, pe_working_set_t * data_set); extern void add_maintenance_update(pe_working_set_t *data_set); xmlNode *pcmk__schedule_actions(pe_working_set_t *data_set, xmlNode *xml_input, crm_time_t *now); extern const char *transition_idle_timeout; /*! * \internal * \brief Check whether colocation's left-hand preferences should be considered * * \param[in] colocation Colocation constraint * \param[in] rsc Right-hand instance (normally this will be * colocation->primary, which NULL will be treated as, * but for clones or bundles with multiple instances * this can be a particular instance) * * \return true if colocation influence should be effective, otherwise false */ static inline bool pcmk__colocation_has_influence(const pcmk__colocation_t *colocation, const pe_resource_t *rsc) { if (rsc == NULL) { rsc = colocation->primary; } /* The left hand of a colocation influences the right hand's location * if the influence option is true, or the right hand is not yet active. */ return colocation->influence || (rsc->running_on == NULL); } #endif diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h index 94ffbfee14..fcd98cad58 100644 --- a/lib/pacemaker/libpacemaker_private.h +++ b/lib/pacemaker/libpacemaker_private.h @@ -1,255 +1,266 @@ /* * Copyright 2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__LIBPACEMAKER_PRIVATE__H # define PCMK__LIBPACEMAKER_PRIVATE__H /* This header is for the sole use of libpacemaker, so that functions can be * declared with G_GNUC_INTERNAL for efficiency. */ #include // pe_action_t, pe_node_t, pe_working_set_t // Actions G_GNUC_INTERNAL void pcmk__update_action_for_orderings(pe_action_t *action, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__log_action(const char *pre_text, pe_action_t *action, bool details); G_GNUC_INTERNAL pe_action_t *pcmk__new_rsc_pseudo_action(pe_resource_t *rsc, const char *task, bool optional, bool runnable); G_GNUC_INTERNAL pe_action_t *pcmk__new_cancel_action(pe_resource_t *rsc, const char *name, guint interval_ms, pe_node_t *node); G_GNUC_INTERNAL pe_action_t *pcmk__new_shutdown_action(pe_node_t *node, pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__action_locks_rsc_to_node(const pe_action_t *action); G_GNUC_INTERNAL void pcmk__deduplicate_action_inputs(pe_action_t *action); G_GNUC_INTERNAL void pcmk__output_actions(pe_working_set_t *data_set); +// Producing transition graphs (pcmk_graph_producer.c) + G_GNUC_INTERNAL bool pcmk__graph_has_loop(pe_action_t *init_action, pe_action_t *action, pe_action_wrapper_t *input); +G_GNUC_INTERNAL +void pcmk__add_action_to_graph(pe_action_t *action, pe_working_set_t *data_set); + +G_GNUC_INTERNAL +void pcmk__create_graph(pe_working_set_t *data_set); + + +// Fencing (pcmk_sched_fencing.c) + G_GNUC_INTERNAL void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_vs_unfence(pe_resource_t *rsc, pe_node_t *node, pe_action_t *action, enum pe_ordering order, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__fence_guest(pe_node_t *node, pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__node_unfenced(pe_node_t *node); G_GNUC_INTERNAL bool pcmk__is_unfence_device(const pe_resource_t *rsc, const pe_working_set_t *data_set); G_GNUC_INTERNAL pe_resource_t *pcmk__find_constraint_resource(GList *rsc_list, const char *id); G_GNUC_INTERNAL xmlNode *pcmk__expand_tags_in_sets(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__valid_resource_or_tag(pe_working_set_t *data_set, const char *id, pe_resource_t **rsc, pe_tag_t **tag); G_GNUC_INTERNAL bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr, bool convert_rsc, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__create_internal_constraints(pe_working_set_t *data_set); // Location constraints G_GNUC_INTERNAL void pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL pe__location_t *pcmk__new_location(const char *id, pe_resource_t *rsc, int node_weight, const char *discover_mode, pe_node_t *foo_node, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_locations(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_location(pe__location_t *constraint, pe_resource_t *rsc); // Colocation constraints enum pcmk__coloc_affects { pcmk__coloc_affects_nothing = 0, pcmk__coloc_affects_location, pcmk__coloc_affects_role, }; G_GNUC_INTERNAL enum pcmk__coloc_affects pcmk__colocation_affects(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint, bool preview); G_GNUC_INTERNAL void pcmk__apply_coloc_to_weights(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint); G_GNUC_INTERNAL void pcmk__apply_coloc_to_priority(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint); G_GNUC_INTERNAL void pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__new_colocation(const char *id, const char *node_attr, int score, pe_resource_t *dependent, pe_resource_t *primary, const char *dependent_role, const char *primary_role, bool influence, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__block_colocated_starts(pe_action_t *action, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__new_ordering(pe_resource_t *lh_rsc, char *lh_task, pe_action_t *lh_action, pe_resource_t *rh_rsc, char *rh_task, pe_action_t *rh_action, enum pe_ordering type, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__disable_invalid_orderings(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_stops_before_shutdown(pe_node_t *node, pe_action_t *shutdown_op, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_orderings(pe_working_set_t *data_set); /*! * \internal * \brief Create a new ordering between two resource actions * * \param[in] lh_rsc Resource for 'first' action * \param[in] rh_rsc Resource for 'then' action * \param[in] lh_task Action key for 'first' action * \param[in] rh_task Action key for 'then' action * \param[in] flags Bitmask of enum pe_ordering flags * \param[in] data_set Cluster working set to add ordering to */ #define pcmk__order_resource_actions(lh_rsc, lh_task, rh_rsc, rh_task, \ flags, data_set) \ pcmk__new_ordering((lh_rsc), pcmk__op_key((lh_rsc)->id, (lh_task), 0), \ NULL, \ (rh_rsc), pcmk__op_key((rh_rsc)->id, (rh_task), 0), \ NULL, (flags), (data_set)) #define pcmk__order_starts(rsc1, rsc2, type, data_set) \ pcmk__order_resource_actions((rsc1), CRMD_ACTION_START, \ (rsc2), CRMD_ACTION_START, (type), (data_set)) #define pcmk__order_stops(rsc1, rsc2, type, data_set) \ pcmk__order_resource_actions((rsc1), CRMD_ACTION_STOP, \ (rsc2), CRMD_ACTION_STOP, (type), (data_set)) G_GNUC_INTERNAL void pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_probes(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__is_failed_remote_node(pe_node_t *node); G_GNUC_INTERNAL void pcmk__order_remote_connection_actions(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__rsc_corresponds_to_guest(pe_resource_t *rsc, pe_node_t *node); G_GNUC_INTERNAL pe_node_t *pcmk__connection_host_for_action(pe_action_t *action); G_GNUC_INTERNAL void pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, pe_action_t *action); // Groups (pcmk_sched_group.c) G_GNUC_INTERNAL GList *pcmk__group_colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc, GList *colocated_rscs); // Bundles (pcmk_sched_bundle.c) G_GNUC_INTERNAL void pcmk__output_bundle_actions(pe_resource_t *rsc); // Functions applying to more than one variant (pcmk_sched_resource.c) G_GNUC_INTERNAL GList *pcmk__colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc, GList *colocated_rscs); G_GNUC_INTERNAL void pcmk__output_resource_actions(pe_resource_t *rsc); G_GNUC_INTERNAL bool pcmk__assign_primitive(pe_resource_t *rsc, pe_node_t *chosen, bool force); G_GNUC_INTERNAL bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force); G_GNUC_INTERNAL void pcmk__unassign_resource(pe_resource_t *rsc); G_GNUC_INTERNAL bool pcmk__threshold_reached(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set, pe_resource_t **failed); #endif // PCMK__LIBPACEMAKER_PRIVATE__H diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c index ecd36214c6..4c1b5a619e 100644 --- a/lib/pacemaker/pcmk_graph_producer.c +++ b/lib/pacemaker/pcmk_graph_producer.c @@ -1,906 +1,1089 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" // Convenience macros for logging action properties #define action_type_str(flags) \ (pcmk_is_set((flags), pe_action_pseudo)? "pseudo-action" : "action") #define action_optional_str(flags) \ (pcmk_is_set((flags), pe_action_optional)? "optional" : "required") #define action_runnable_str(flags) \ (pcmk_is_set((flags), pe_action_runnable)? "runnable" : "unrunnable") #define action_node_str(a) \ (((a)->node == NULL)? "no node" : (a)->node->details->uname) /*! * \internal * \brief Add an XML node tag for a specified ID * * \param[in] id Node UUID to add * \param[in,out] xml Parent XML tag to add to */ static xmlNode* add_node_to_xml_by_id(const char *id, xmlNode *xml) { xmlNode *node_xml; node_xml = create_xml_node(xml, XML_CIB_TAG_NODE); crm_xml_add(node_xml, XML_ATTR_UUID, id); return node_xml; } /*! * \internal * \brief Add an XML node tag for a specified node * * \param[in] node Node to add * \param[in,out] xml XML to add node to */ static void add_node_to_xml(const pe_node_t *node, void *xml) { add_node_to_xml_by_id(node->details->id, (xmlNode *) xml); } /*! * \internal * \brief Add XML with nodes that need an update of their maintenance state * * \param[in,out] xml Parent XML tag to add to * \param[in] data_set Working set for cluster */ static int add_maintenance_nodes(xmlNode *xml, const pe_working_set_t *data_set) { GList *gIter = NULL; xmlNode *maintenance = xml?create_xml_node(xml, XML_GRAPH_TAG_MAINTENANCE):NULL; int count = 0; for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; struct pe_node_shared_s *details = node->details; if (!pe__is_guest_or_remote_node(node)) { continue; /* just remote nodes need to know atm */ } if (details->maintenance != details->remote_maintenance) { if (maintenance) { crm_xml_add( add_node_to_xml_by_id(node->details->id, maintenance), XML_NODE_IS_MAINTENANCE, details->maintenance?"1":"0"); } count++; } } crm_trace("%s %d nodes to adjust maintenance-mode " "to transition", maintenance?"Added":"Counted", count); return count; } /*! * \internal * \brief Add pseudo action with nodes needing maintenance state update * * \param[in,out] data_set Working set for cluster */ void add_maintenance_update(pe_working_set_t *data_set) { pe_action_t *action = NULL; if (add_maintenance_nodes(NULL, data_set)) { crm_trace("adding maintenance state update pseudo action"); action = get_pseudo_op(CRM_OP_MAINTENANCE_NODES, data_set); pe__set_action_flags(action, pe_action_print_always); } } /*! * \internal * \brief Add XML with nodes that an action is expected to bring down * * If a specified action is expected to bring any nodes down, add an XML block * with their UUIDs. When a node is lost, this allows the controller to * determine whether it was expected. * * \param[in,out] xml Parent XML tag to add to * \param[in] action Action to check for downed nodes * \param[in] data_set Working set for cluster */ static void add_downed_nodes(xmlNode *xml, const pe_action_t *action, const pe_working_set_t *data_set) { CRM_CHECK(xml && action && action->node && data_set, return); if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { /* Shutdown makes the action's node down */ xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->node->details->id, downed); } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { /* Fencing makes the action's node and any hosted guest nodes down */ const char *fence = g_hash_table_lookup(action->meta, "stonith_action"); if (pcmk__is_fencing_action(fence)) { xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->node->details->id, downed); pe_foreach_guest_node(data_set, action->node, add_node_to_xml, downed); } } else if (action->rsc && action->rsc->is_remote_node && pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)) { /* Stopping a remote connection resource makes connected node down, * unless it's part of a migration */ GList *iter; pe_action_t *input; gboolean migrating = FALSE; for (iter = action->actions_before; iter != NULL; iter = iter->next) { input = ((pe_action_wrapper_t *) iter->data)->action; if (input->rsc && pcmk__str_eq(action->rsc->id, input->rsc->id, pcmk__str_casei) && pcmk__str_eq(input->task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { migrating = TRUE; break; } } if (!migrating) { xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->rsc->id, downed); } } } -static xmlNode * -action2xml(pe_action_t * action, gboolean as_input, pe_working_set_t *data_set) +/*! + * \internal + * \brief Create a transition graph operation key for a clone action + * + * \param[in] action Clone action + * \param[in] interval_ms Action interval in milliseconds + * + * \return Newly allocated string with transition graph operation key + */ +static char * +clone_op_key(pe_action_t *action, guint interval_ms) { - gboolean needs_node_info = TRUE; - gboolean needs_maintenance_info = FALSE; - xmlNode *action_xml = NULL; - xmlNode *args_xml = NULL; -#if ENABLE_VERSIONED_ATTRS - pe_rsc_action_details_t *rsc_details = NULL; -#endif - - if (action == NULL) { - return NULL; - } - - if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { - /* All fences need node info; guest node fences are pseudo-events */ - action_xml = create_xml_node(NULL, - pcmk_is_set(action->flags, pe_action_pseudo)? - XML_GRAPH_TAG_PSEUDO_EVENT : - XML_GRAPH_TAG_CRM_EVENT); - - } else if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { - action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); - - } else if (pcmk__str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT, pcmk__str_casei)) { - action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); - - } else if (pcmk__str_eq(action->task, CRM_OP_LRM_REFRESH, pcmk__str_casei)) { - action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); - - } else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { - // CIB-only clean-up for shutdown locks - action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); - crm_xml_add(action_xml, PCMK__XA_MODE, XML_TAG_CIB); - -/* } else if(pcmk__str_eq(action->task, RSC_PROBED, pcmk__str_casei)) { */ -/* action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); */ - - } else if (pcmk_is_set(action->flags, pe_action_pseudo)) { - if (pcmk__str_eq(action->task, CRM_OP_MAINTENANCE_NODES, pcmk__str_casei)) { - needs_maintenance_info = TRUE; - } - action_xml = create_xml_node(NULL, XML_GRAPH_TAG_PSEUDO_EVENT); - needs_node_info = FALSE; - - } else { - action_xml = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); - -#if ENABLE_VERSIONED_ATTRS - rsc_details = pe_rsc_action_details(action); -#endif - } + if (pcmk__str_eq(action->task, RSC_NOTIFY, pcmk__str_none)) { + const char *n_type = g_hash_table_lookup(action->meta, "notify_type"); + const char *n_task = g_hash_table_lookup(action->meta, + "notify_operation"); - crm_xml_add_int(action_xml, XML_ATTR_ID, action->id); - crm_xml_add(action_xml, XML_LRM_ATTR_TASK, action->task); - if (action->rsc != NULL && action->rsc->clone_name != NULL) { - char *clone_key = NULL; - guint interval_ms; - - if (pcmk__guint_from_hash(action->meta, - XML_LRM_ATTR_INTERVAL_MS, 0, - &interval_ms) != pcmk_rc_ok) { - interval_ms = 0; - } - - if (pcmk__str_eq(action->task, RSC_NOTIFY, pcmk__str_casei)) { - const char *n_type = g_hash_table_lookup(action->meta, "notify_type"); - const char *n_task = g_hash_table_lookup(action->meta, "notify_operation"); - - CRM_CHECK(n_type != NULL, crm_err("No notify type value found for %s", action->uuid)); - CRM_CHECK(n_task != NULL, - crm_err("No notify operation value found for %s", action->uuid)); - clone_key = pcmk__notify_key(action->rsc->clone_name, - n_type, n_task); - - } else if(action->cancel_task) { - clone_key = pcmk__op_key(action->rsc->clone_name, - action->cancel_task, interval_ms); - } else { - clone_key = pcmk__op_key(action->rsc->clone_name, - action->task, interval_ms); - } - - CRM_CHECK(clone_key != NULL, crm_err("Could not generate a key for %s", action->uuid)); - crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, clone_key); - crm_xml_add(action_xml, "internal_" XML_LRM_ATTR_TASK_KEY, action->uuid); - free(clone_key); + CRM_LOG_ASSERT((n_type != NULL) && (n_task != NULL)); + return pcmk__notify_key(action->rsc->clone_name, n_type, n_task); + } else if (action->cancel_task != NULL) { + return pcmk__op_key(action->rsc->clone_name, action->cancel_task, + interval_ms); } else { - crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, action->uuid); + return pcmk__op_key(action->rsc->clone_name, action->task, interval_ms); } +} - if (needs_node_info && action->node != NULL) { - pe_node_t *router_node = pcmk__connection_host_for_action(action); - - crm_xml_add(action_xml, XML_LRM_ATTR_TARGET, action->node->details->uname); - crm_xml_add(action_xml, XML_LRM_ATTR_TARGET_UUID, action->node->details->id); - if (router_node) { - crm_xml_add(action_xml, XML_LRM_ATTR_ROUTER_NODE, router_node->details->uname); - } +/*! + * \internal + * \brief Add node details to transition graph action XML + * + * \param[in] action Scheduled action + * \param[in] xml Transition graph action XML for \p action + */ +static void +add_node_details(pe_action_t *action, xmlNode *xml) +{ + pe_node_t *router_node = pcmk__connection_host_for_action(action); - g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET), strdup(action->node->details->uname)); - g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET_UUID), strdup(action->node->details->id)); + crm_xml_add(xml, XML_LRM_ATTR_TARGET, action->node->details->uname); + crm_xml_add(xml, XML_LRM_ATTR_TARGET_UUID, action->node->details->id); + if (router_node != NULL) { + crm_xml_add(xml, XML_LRM_ATTR_ROUTER_NODE, router_node->details->uname); } +} - /* No details if this action is only being listed in the inputs section */ - if (as_input) { - return action_xml; +/*! + * \internal + * \brief Add resource details to transition graph action XML + * + * \param[in] action Scheduled action + * \param[in] action_xml Transition graph action XML for \p action + */ +static void +add_resource_details(pe_action_t *action, xmlNode *action_xml) +{ + xmlNode *rsc_xml = NULL; + const char *attr_list[] = { + XML_AGENT_ATTR_CLASS, + XML_AGENT_ATTR_PROVIDER, + XML_ATTR_TYPE + }; + + /* If a resource is locked to a node via shutdown-lock, mark its actions + * so the controller can preserve the lock when the action completes. + */ + if (pcmk__action_locks_rsc_to_node(action)) { + crm_xml_add_ll(action_xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK, + (long long) action->rsc->lock_time); } - if (action->rsc && !pcmk_is_set(action->flags, pe_action_pseudo)) { - int lpc = 0; - xmlNode *rsc_xml = NULL; - const char *attr_list[] = { - XML_AGENT_ATTR_CLASS, - XML_AGENT_ATTR_PROVIDER, - XML_ATTR_TYPE - }; + // List affected resource - /* If a resource is locked to a node via shutdown-lock, mark its actions - * so the controller can preserve the lock when the action completes. + rsc_xml = create_xml_node(action_xml, crm_element_name(action->rsc->xml)); + if (pcmk_is_set(action->rsc->flags, pe_rsc_orphan) + && (action->rsc->clone_name != NULL)) { + /* Use the numbered instance name here, because if there is more + * than one instance on a node, we need to make sure the command + * goes to the right one. + * + * This is important even for anonymous clones, because the clone's + * unique meta-attribute might have just been toggled from on to + * off. */ - if (pcmk__action_locks_rsc_to_node(action)) { - crm_xml_add_ll(action_xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK, - (long long) action->rsc->lock_time); - } + crm_debug("Using orphan clone name %s instead of %s", + action->rsc->id, action->rsc->clone_name); + crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name); + crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); - // List affected resource - - rsc_xml = create_xml_node(action_xml, - crm_element_name(action->rsc->xml)); - if (pcmk_is_set(action->rsc->flags, pe_rsc_orphan) - && action->rsc->clone_name) { - /* Do not use the 'instance free' name here as that - * might interfere with the instance we plan to keep. - * Ie. if there are more than two named /anonymous/ - * instances on a given node, we need to make sure the - * command goes to the right one. - * - * Keep this block, even when everyone is using - * 'instance free' anonymous clone names - it means - * we'll do the right thing if anyone toggles the - * unique flag to 'off' - */ - crm_debug("Using orphan clone name %s instead of %s", action->rsc->id, - action->rsc->clone_name); - crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name); - crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); + } else if (!pcmk_is_set(action->rsc->flags, pe_rsc_unique)) { + const char *xml_id = ID(action->rsc->xml); - } else if (!pcmk_is_set(action->rsc->flags, pe_rsc_unique)) { - const char *xml_id = ID(action->rsc->xml); - - crm_debug("Using anonymous clone name %s for %s (aka. %s)", xml_id, action->rsc->id, - action->rsc->clone_name); - - /* ID is what we'd like client to use - * ID_LONG is what they might know it as instead - * - * ID_LONG is only strictly needed /here/ during the - * transition period until all nodes in the cluster - * are running the new software /and/ have rebooted - * once (meaning that they've only ever spoken to a DC - * supporting this feature). - * - * If anyone toggles the unique flag to 'on', the - * 'instance free' name will correspond to an orphan - * and fall into the clause above instead - */ - crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id); - if (action->rsc->clone_name && !pcmk__str_eq(xml_id, action->rsc->clone_name, pcmk__str_casei)) { - crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name); - } else { - crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); - } + crm_debug("Using anonymous clone name %s for %s (aka %s)", + xml_id, action->rsc->id, action->rsc->clone_name); + /* ID is what we'd like client to use + * ID_LONG is what they might know it as instead + * + * ID_LONG is only strictly needed /here/ during the + * transition period until all nodes in the cluster + * are running the new software /and/ have rebooted + * once (meaning that they've only ever spoken to a DC + * supporting this feature). + * + * If anyone toggles the unique flag to 'on', the + * 'instance free' name will correspond to an orphan + * and fall into the clause above instead + */ + crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id); + if ((action->rsc->clone_name != NULL) + && !pcmk__str_eq(xml_id, action->rsc->clone_name, + pcmk__str_none)) { + crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name); } else { - CRM_ASSERT(action->rsc->clone_name == NULL); - crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id); + crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); } - for (lpc = 0; lpc < PCMK__NELEM(attr_list); lpc++) { - crm_xml_add(rsc_xml, attr_list[lpc], - g_hash_table_lookup(action->rsc->meta, attr_list[lpc])); - } + } else { + CRM_ASSERT(action->rsc->clone_name == NULL); + crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id); } - /* List any attributes in effect */ + for (int lpc = 0; lpc < PCMK__NELEM(attr_list); lpc++) { + crm_xml_add(rsc_xml, attr_list[lpc], + g_hash_table_lookup(action->rsc->meta, attr_list[lpc])); + } +} + +/*! + * \internal + * \brief Add action attributes to transition graph action XML + * + * \param[in] action Scheduled action + * \param[in] action_xml Transition graph action XML for \p action + */ +static void +add_action_attributes(pe_action_t *action, xmlNode *action_xml) +{ + xmlNode *args_xml = NULL; + + /* We create free-standing XML to start, so we can sort the attributes + * before adding it to action_xml, which keeps the scheduler regression + * test graphs comparable. + */ args_xml = create_xml_node(NULL, XML_TAG_ATTRS); - crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); g_hash_table_foreach(action->extra, hash2field, args_xml); - if (action->rsc != NULL && action->node) { + + if ((action->rsc != NULL) && (action->node != NULL)) { // Get the resource instance attributes, evaluated properly for node - GHashTable *params = pe_rsc_params(action->rsc, action->node, data_set); + GHashTable *params = pe_rsc_params(action->rsc, action->node, + action->rsc->cluster); - pcmk__substitute_remote_addr(action->rsc, params, data_set); + pcmk__substitute_remote_addr(action->rsc, params, action->rsc->cluster); g_hash_table_foreach(params, hash2smartfield, args_xml); #if ENABLE_VERSIONED_ATTRS { xmlNode *versioned_parameters = create_xml_node(NULL, XML_TAG_RSC_VER_ATTRS); pe_get_versioned_attributes(versioned_parameters, action->rsc, - action->node, data_set); + action->node, action->rsc->cluster); if (xml_has_children(versioned_parameters)) { add_node_copy(action_xml, versioned_parameters); } free_xml(versioned_parameters); } #endif - } else if(action->rsc && action->rsc->variant <= pe_native) { - GHashTable *params = pe_rsc_params(action->rsc, NULL, data_set); + } else if ((action->rsc != NULL) && (action->rsc->variant <= pe_native)) { + GHashTable *params = pe_rsc_params(action->rsc, NULL, + action->rsc->cluster); g_hash_table_foreach(params, hash2smartfield, args_xml); #if ENABLE_VERSIONED_ATTRS if (xml_has_children(action->rsc->versioned_parameters)) { add_node_copy(action_xml, action->rsc->versioned_parameters); } #endif } #if ENABLE_VERSIONED_ATTRS - if (rsc_details) { + if (rsc_details != NULL) { if (xml_has_children(rsc_details->versioned_parameters)) { add_node_copy(action_xml, rsc_details->versioned_parameters); } - if (xml_has_children(rsc_details->versioned_meta)) { add_node_copy(action_xml, rsc_details->versioned_meta); } } #endif g_hash_table_foreach(action->meta, hash2metafield, args_xml); if (action->rsc != NULL) { - const char *value = g_hash_table_lookup(action->rsc->meta, "external-ip"); + const char *value = g_hash_table_lookup(action->rsc->meta, + "external-ip"); pe_resource_t *parent = action->rsc; while (parent != NULL) { parent->cmds->append_meta(parent, args_xml); parent = parent->parent; } - if(value) { - hash2smartfield((gpointer)"pcmk_external_ip", (gpointer)value, (gpointer)args_xml); + if (value != NULL) { + hash2smartfield((gpointer) "pcmk_external_ip", (gpointer) value, + (gpointer) args_xml); } pcmk__add_bundle_meta_to_xml(args_xml, action); - } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei) && action->node) { + } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_none) + && (action->node != NULL)) { /* Pass the node's attributes as meta-attributes. * * @TODO: Determine whether it is still necessary to do this. It was * added in 33d99707, probably for the libfence-based implementation in * c9a90bd, which is no longer used. */ g_hash_table_foreach(action->node->details->attrs, hash2metafield, args_xml); } sorted_xml(args_xml, action_xml, FALSE); free_xml(args_xml); +} + +/*! + * \internal + * \brief Create the transition graph XML for a scheduled action + * + * \param[in] parent Parent XML element to add action to + * \param[in] action Scheduled action + * \param[in] skip_details If false, add action details as sub-elements + * \param[in] data_set Cluster working set + */ +static void +create_graph_action(xmlNode *parent, pe_action_t *action, bool skip_details, + pe_working_set_t *data_set) +{ + bool needs_node_info = true; + bool needs_maintenance_info = false; + xmlNode *action_xml = NULL; +#if ENABLE_VERSIONED_ATTRS + pe_rsc_action_details_t *rsc_details = NULL; +#endif + + if ((action == NULL) || (data_set == NULL)) { + return; + } + + // Create the top-level element based on task + + if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { + /* All fences need node info; guest node fences are pseudo-events */ + action_xml = create_xml_node(parent, + pcmk_is_set(action->flags, pe_action_pseudo)? + XML_GRAPH_TAG_PSEUDO_EVENT : + XML_GRAPH_TAG_CRM_EVENT); + + } else if (pcmk__str_any_of(action->task, + CRM_OP_SHUTDOWN, + CRM_OP_CLEAR_FAILCOUNT, + CRM_OP_LRM_REFRESH, NULL)) { + action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT); + + } else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_none)) { + // CIB-only clean-up for shutdown locks + action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT); + crm_xml_add(action_xml, PCMK__XA_MODE, XML_TAG_CIB); + + } else if (pcmk_is_set(action->flags, pe_action_pseudo)) { + if (pcmk__str_eq(action->task, CRM_OP_MAINTENANCE_NODES, + pcmk__str_none)) { + needs_maintenance_info = true; + } + action_xml = create_xml_node(parent, XML_GRAPH_TAG_PSEUDO_EVENT); + needs_node_info = false; + + } else { + action_xml = create_xml_node(parent, XML_GRAPH_TAG_RSC_OP); +#if ENABLE_VERSIONED_ATTRS + rsc_details = pe_rsc_action_details(action); +#endif + } + + crm_xml_add_int(action_xml, XML_ATTR_ID, action->id); + crm_xml_add(action_xml, XML_LRM_ATTR_TASK, action->task); + + if ((action->rsc != NULL) && (action->rsc->clone_name != NULL)) { + char *clone_key = NULL; + guint interval_ms; + + if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0, + &interval_ms) != pcmk_rc_ok) { + interval_ms = 0; + } + clone_key = clone_op_key(action, interval_ms); + crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, clone_key); + crm_xml_add(action_xml, "internal_" XML_LRM_ATTR_TASK_KEY, action->uuid); + free(clone_key); + } else { + crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, action->uuid); + } + + if (needs_node_info && (action->node != NULL)) { + add_node_details(action, action_xml); + g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET), + strdup(action->node->details->uname)); + g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET_UUID), + strdup(action->node->details->id)); + } + + if (skip_details) { + return; + } + + if ((action->rsc != NULL) + && !pcmk_is_set(action->flags, pe_action_pseudo)) { + + // This is a real resource action, so add resource details + add_resource_details(action, action_xml); + } + + /* List any attributes in effect */ + add_action_attributes(action, action_xml); /* List any nodes this action is expected to make down */ if (needs_node_info && (action->node != NULL)) { add_downed_nodes(action_xml, action, data_set); } if (needs_maintenance_info) { add_maintenance_nodes(action_xml, data_set); } - - crm_log_xml_trace(action_xml, "dumped action"); - return action_xml; } +/*! + * \internal + * \brief Check whether an action should be added to the transition graph + * + * \param[in] action Action to check + * + * \return true if action should be added to graph, otherwise false + */ static bool -should_dump_action(pe_action_t *action) +should_add_action_to_graph(pe_action_t *action) { - CRM_CHECK(action != NULL, return false); - - if (pcmk_is_set(action->flags, pe_action_dumped)) { - crm_trace("Action %s (%d) already dumped", action->uuid, action->id); - return false; - - } else if (pcmk_is_set(action->flags, pe_action_pseudo) - && pcmk__str_eq(action->task, CRM_OP_PROBED, pcmk__str_casei)) { - GList *lpc = NULL; - - /* This is a horrible but convenient hack - * - * It mimimizes the number of actions with unsatisfied inputs - * (i.e. not included in the graph) - * - * This in turn, means we can be more concise when printing - * aborted/incomplete graphs. - * - * It also makes it obvious which node is preventing - * probe_complete from running (presumably because it is only - * partially up) - * - * For these reasons we tolerate such perversions - */ - - for (lpc = action->actions_after; lpc != NULL; lpc = lpc->next) { - pe_action_wrapper_t *wrapper = (pe_action_wrapper_t *) lpc->data; - - if (!pcmk_is_set(wrapper->action->flags, pe_action_runnable)) { - /* Only interested in runnable operations */ - } else if (!pcmk__str_eq(wrapper->action->task, RSC_START, pcmk__str_casei)) { - /* Only interested in start operations */ - } else if (pcmk_is_set(wrapper->action->flags, pe_action_dumped) - || should_dump_action(wrapper->action)) { - crm_trace("Action %s (%d) should be dumped: " - "dependency of %s (%d)", - action->uuid, action->id, - wrapper->action->uuid, wrapper->action->id); - return true; - } - } - } - if (!pcmk_is_set(action->flags, pe_action_runnable)) { crm_trace("Ignoring action %s (%d): unrunnable", action->uuid, action->id); return false; + } - } else if (pcmk_is_set(action->flags, pe_action_optional) - && !pcmk_is_set(action->flags, pe_action_print_always)) { + if (pcmk_is_set(action->flags, pe_action_optional) + && !pcmk_is_set(action->flags, pe_action_print_always)) { crm_trace("Ignoring action %s (%d): optional", action->uuid, action->id); return false; + } - // Monitors should be dumped even for unmanaged resources - } else if (action->rsc && !pcmk_is_set(action->rsc->flags, pe_rsc_managed) - && !pcmk__str_eq(action->task, RSC_STATUS, pcmk__str_casei)) { - - const char *interval_ms_s = g_hash_table_lookup(action->meta, - XML_LRM_ATTR_INTERVAL_MS); - - // Cancellation of recurring monitors should still be dumped + /* Actions for unmanaged resources should be excluded from the graph, + * with the exception of monitors and cancellation of recurring monitors. + */ + if ((action->rsc != NULL) + && !pcmk_is_set(action->rsc->flags, pe_rsc_managed) + && !pcmk__str_eq(action->task, RSC_STATUS, pcmk__str_none)) { + const char *interval_ms_s; + + /* A cancellation of a recurring monitor will get here because the task + * is cancel rather than monitor, but the interval can still be used to + * recognize it. The interval has been normalized to milliseconds by + * this point, so a string comparison is sufficient. + */ + interval_ms_s = g_hash_table_lookup(action->meta, + XML_LRM_ATTR_INTERVAL_MS); if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)) { crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)", action->uuid, action->id, action->rsc->id); return false; } } - if (pcmk_is_set(action->flags, pe_action_pseudo) || - pcmk__strcase_any_of(action->task, CRM_OP_FENCE, CRM_OP_SHUTDOWN, NULL)) { - /* skip the next checks */ + /* Always add pseudo-actions, fence actions, and shutdown actions (already + * determined to be required and runnable by this point) + */ + if (pcmk_is_set(action->flags, pe_action_pseudo) + || pcmk__strcase_any_of(action->task, CRM_OP_FENCE, CRM_OP_SHUTDOWN, + NULL)) { return true; } if (action->node == NULL) { pe_err("Skipping action %s (%d) " "because it was not allocated to a node (bug?)", action->uuid, action->id); - pcmk__log_action("Unallocated action", action, false); + pcmk__log_action("Unallocated", action, false); return false; + } - } else if (pcmk_is_set(action->flags, pe_action_dc)) { + if (pcmk_is_set(action->flags, pe_action_dc)) { crm_trace("Action %s (%d) should be dumped: " "can run on DC instead of %s", action->uuid, action->id, action->node->details->uname); } else if (pe__is_guest_node(action->node) && !action->node->details->remote_requires_reset) { crm_trace("Action %s (%d) should be dumped: " "assuming will be runnable on guest node %s", action->uuid, action->id, action->node->details->uname); - } else if (action->node->details->online == false) { + } else if (!action->node->details->online) { pe_err("Skipping action %s (%d) " "because it was scheduled for offline node (bug?)", action->uuid, action->id); - pcmk__log_action("Action for offline node", action, false); + pcmk__log_action("Offline node", action, false); return false; -#if 0 - /* but this would also affect resources that can be safely - * migrated before a fencing op - */ - } else if (action->node->details->unclean == false) { + + } else if (action->node->details->unclean) { pe_err("Skipping action %s (%d) " "because it was scheduled for unclean node (bug?)", action->uuid, action->id); - pcmk__log_action("Action for unclean node", action, false); + pcmk__log_action("Unclean node", action, false); return false; -#endif } return true; } /*! * \internal * \brief Check whether an ordering's flags can change an action * * \param[in] ordering Ordering to check * * \return true if ordering has flags that can change an action, false otherwise */ static bool ordering_can_change_actions(pe_action_wrapper_t *ordering) { return pcmk_any_flags_set(ordering->type, ~(pe_order_implies_first_printed |pe_order_implies_then_printed |pe_order_optional)); } /*! * \internal * \brief Check whether an action input should be in the transition graph * * \param[in] action Action to check * \param[in,out] input Action input to check * * \return true if input should be in graph, false otherwise * \note This function may not only check an input, but disable it under certian * circumstances (load or anti-colocation orderings that are not needed). */ static bool -check_dump_input(pe_action_t *action, pe_action_wrapper_t *input) +should_add_input_to_graph(pe_action_t *action, pe_action_wrapper_t *input) { if (input->state == pe_link_dumped) { return true; } if (input->type == pe_order_none) { crm_trace("Ignoring %s (%d) input %s (%d): " "ordering disabled", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk_is_set(input->action->flags, pe_action_runnable) - && !ordering_can_change_actions(input) - && !pcmk__str_eq(input->action->uuid, CRM_OP_PROBED, pcmk__str_casei)) { + && !ordering_can_change_actions(input)) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional and input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk_is_set(input->action->flags, pe_action_runnable) && pcmk_is_set(input->type, pe_order_one_or_more)) { crm_trace("Ignoring %s (%d) input %s (%d): " "one-or-more and input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; - } else if (pcmk_is_set(action->flags, pe_action_pseudo) - && pcmk_is_set(input->type, pe_order_stonith_stop)) { - crm_trace("Ignoring %s (%d) input %s (%d): " - "stonith stop but action is pseudo", - action->uuid, action->id, - input->action->uuid, input->action->id); - return false; - } else if (pcmk_is_set(input->type, pe_order_implies_first_migratable) && !pcmk_is_set(input->action->flags, pe_action_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "implies input migratable but input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (pcmk_is_set(input->type, pe_order_apply_first_non_migratable) && pcmk_is_set(input->action->flags, pe_action_migrate_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "only if input unmigratable but input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if ((input->type == pe_order_optional) && pcmk_is_set(input->action->flags, pe_action_migrate_runnable) && pcmk__ends_with(input->action->uuid, "_stop_0")) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional but stop in migration", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (input->type == pe_order_load) { pe_node_t *input_node = input->action->node; // load orderings are relevant only if actions are for same node if (action->rsc && pcmk__str_eq(action->task, RSC_MIGRATE, pcmk__str_casei)) { pe_node_t *allocated = action->rsc->allocated_to; /* For load_stopped -> migrate_to orderings, we care about where it * has been allocated to, not where it will be executed. */ if ((input_node == NULL) || (allocated == NULL) || (input_node->details != allocated->details)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, (allocated? allocated->details->uname : ""), (input_node? input_node->details->uname : "")); input->type = pe_order_none; return false; } } else if ((input_node == NULL) || (action->node == NULL) || (input_node->details != action->node->details)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, (action->node? action->node->details->uname : ""), (input_node? input_node->details->uname : "")); input->type = pe_order_none; return false; } else if (pcmk_is_set(input->action->flags, pe_action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering input optional", action->uuid, action->id, input->action->uuid, input->action->id); input->type = pe_order_none; return false; } } else if (input->type == pe_order_anti_colocation) { if (input->action->node && action->node && (input->action->node->details != action->node->details)) { crm_trace("Ignoring %s (%d) input %s (%d): " "anti-colocation node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, action->node->details->uname, input->action->node->details->uname); input->type = pe_order_none; return false; } else if (pcmk_is_set(input->action->flags, pe_action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): " "anti-colocation input optional", action->uuid, action->id, input->action->uuid, input->action->id); input->type = pe_order_none; return false; } } else if (input->action->rsc && input->action->rsc != action->rsc && pcmk_is_set(input->action->rsc->flags, pe_rsc_failed) && !pcmk_is_set(input->action->rsc->flags, pe_rsc_managed) && pcmk__ends_with(input->action->uuid, "_stop_0") && action->rsc && pe_rsc_is_clone(action->rsc)) { crm_warn("Ignoring requirement that %s complete before %s:" " unmanaged failed resources cannot prevent clone shutdown", input->action->uuid, action->uuid); return false; } else if (pcmk_is_set(input->action->flags, pe_action_optional) && !pcmk_any_flags_set(input->action->flags, pe_action_print_always|pe_action_dumped) - && !should_dump_action(input->action)) { + && !should_add_action_to_graph(input->action)) { crm_trace("Ignoring %s (%d) input %s (%d): " "input optional", action->uuid, action->id, input->action->uuid, input->action->id); return false; } crm_trace("%s (%d) input %s %s (%d) on %s should be dumped: %s %s %#.6x", action->uuid, action->id, action_type_str(input->action->flags), input->action->uuid, input->action->id, action_node_str(input->action), action_runnable_str(input->action->flags), action_optional_str(input->action->flags), input->type); return true; } +/*! + * \internal + * \brief Check whether an ordering creates an ordering loop + * + * \param[in] init_action "First" action in ordering + * \param[in] action Callers should always set this the same as + * \p init_action (this function may use a different + * value for recursive calls) + * \param[in] input Action wrapper for "then" action in ordering + * + * \return true if the ordering creates a loop, otherwise false + */ bool pcmk__graph_has_loop(pe_action_t *init_action, pe_action_t *action, pe_action_wrapper_t *input) { bool has_loop = false; if (pcmk_is_set(input->action->flags, pe_action_tracking)) { crm_trace("Breaking tracking loop: %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->details->uname : "", action->uuid, action->node? action->node->details->uname : "", input->type); return false; } // Don't need to check inputs that won't be used - if (!check_dump_input(action, input)) { + if (!should_add_input_to_graph(action, input)) { return false; } if (input->action == init_action) { crm_debug("Input loop found in %s@%s ->...-> %s@%s", action->uuid, action->node? action->node->details->uname : "", init_action->uuid, init_action->node? init_action->node->details->uname : ""); return true; } pe__set_action_flags(input->action, pe_action_tracking); crm_trace("Checking inputs of action %s@%s input %s@%s (%#.6x)" "for graph loop with %s@%s ", action->uuid, action->node? action->node->details->uname : "", input->action->uuid, input->action->node? input->action->node->details->uname : "", input->type, init_action->uuid, init_action->node? init_action->node->details->uname : ""); // Recursively check input itself for loops for (GList *iter = input->action->actions_before; iter != NULL; iter = iter->next) { if (pcmk__graph_has_loop(init_action, input->action, (pe_action_wrapper_t *) iter->data)) { // Recursive call already logged a debug message has_loop = true; - goto done; + break; } } -done: pe__clear_action_flags(input->action, pe_action_tracking); if (!has_loop) { crm_trace("No input loop found in %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->details->uname : "", action->uuid, action->node? action->node->details->uname : "", input->type); } return has_loop; } +/*! + * \internal + * \brief Create a synapse XML element for a transition graph + * + * \param[in] action Action that synapse is for + * \param[in] data_set Cluster working set containing graph + * + * \return Newly added XML element for new graph synapse + */ +static xmlNode * +create_graph_synapse(pe_action_t *action, pe_working_set_t *data_set) +{ + int synapse_priority = 0; + xmlNode *syn = create_xml_node(data_set->graph, "synapse"); + + crm_xml_add_int(syn, XML_ATTR_ID, data_set->num_synapse); + data_set->num_synapse++; + + if (action->rsc != NULL) { + synapse_priority = action->rsc->priority; + } + if (action->priority > synapse_priority) { + synapse_priority = action->priority; + } + if (synapse_priority > 0) { + crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority); + } + return syn; +} + /*! * \internal * \brief Add an action to the transition graph XML if appropriate * * \param[in] action Action to possibly add * \param[in] data_set Cluster working set * * \note This will de-duplicate the action inputs, meaning that the * pe_action_wrapper_t:type flags can no longer be relied on to retain * their original settings. That means this MUST be called after * pcmk__apply_orderings() is complete, and nothing after this should rely * on those type flags. (For example, some code looks for type equal to * some flag rather than whether the flag is set, and some code looks for * particular combinations of flags -- such code must be done before - * stage8().) + * pcmk__create_graph().) */ void -graph_element_from_action(pe_action_t *action, pe_working_set_t *data_set) +pcmk__add_action_to_graph(pe_action_t *action, pe_working_set_t *data_set) { - GList *lpc = NULL; - int synapse_priority = 0; xmlNode *syn = NULL; xmlNode *set = NULL; xmlNode *in = NULL; - xmlNode *xml_action = NULL; - pe_action_wrapper_t *input = NULL; - /* If we haven't already, de-duplicate inputs -- even if we won't be dumping - * the action, so that crm_simulate dot graphs don't have duplicates. + /* If we haven't already, de-duplicate inputs (even if we won't be adding + * the action to the graph, so that crm_simulate's dot graphs don't have + * duplicates). */ if (!pcmk_is_set(action->flags, pe_action_dedup)) { pcmk__deduplicate_action_inputs(action); pe__set_action_flags(action, pe_action_dedup); } - if (should_dump_action(action) == FALSE) { + if (pcmk_is_set(action->flags, pe_action_dumped) // Already added, or + || !should_add_action_to_graph(action)) { // shouldn't be added return; } - pe__set_action_flags(action, pe_action_dumped); - syn = create_xml_node(data_set->graph, "synapse"); + syn = create_graph_synapse(action, data_set); set = create_xml_node(syn, "action_set"); in = create_xml_node(syn, "inputs"); - crm_xml_add_int(syn, XML_ATTR_ID, data_set->num_synapse); - data_set->num_synapse++; + create_graph_action(set, action, false, data_set); - if (action->rsc != NULL) { - synapse_priority = action->rsc->priority; + for (GList *lpc = action->actions_before; lpc != NULL; lpc = lpc->next) { + pe_action_wrapper_t *input = (pe_action_wrapper_t *) lpc->data; + + if (should_add_input_to_graph(action, input)) { + xmlNode *input_xml = create_xml_node(in, "trigger"); + + input->state = pe_link_dumped; + create_graph_action(input_xml, input->action, true, data_set); + } } - if (action->priority > synapse_priority) { - synapse_priority = action->priority; +} + +static int transition_id = -1; + +/*! + * \internal + * \brief Log a message after calculating a transition + * + * \param[in] filename Where transition input is stored + */ +void +pcmk__log_transition_summary(const char *filename) +{ + if (was_processing_error) { + crm_err("Calculated transition %d (with errors)%s%s", + transition_id, + (filename == NULL)? "" : ", saving inputs in ", + (filename == NULL)? "" : filename); + + } else if (was_processing_warning) { + crm_warn("Calculated transition %d (with warnings)%s%s", + transition_id, + (filename == NULL)? "" : ", saving inputs in ", + (filename == NULL)? "" : filename); + + } else { + crm_notice("Calculated transition %d%s%s", + transition_id, + (filename == NULL)? "" : ", saving inputs in ", + (filename == NULL)? "" : filename); } - if (synapse_priority > 0) { - crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority); + if (crm_config_error) { + crm_notice("Configuration errors found during scheduler processing," + " please run \"crm_verify -L\" to identify issues"); } +} + +/*! + * \internal + * \brief Create a transition graph with all cluster actions needed + * + * \param[in] data_set Cluster working set + */ +void +pcmk__create_graph(pe_working_set_t *data_set) +{ + GList *iter = NULL; + const char *value = NULL; + long long limit = 0LL; - xml_action = action2xml(action, FALSE, data_set); - add_node_nocopy(set, crm_element_name(xml_action), xml_action); + transition_id++; + crm_trace("Creating transition graph %d", transition_id); - for (lpc = action->actions_before; lpc != NULL; lpc = lpc->next) { - input = (pe_action_wrapper_t *) lpc->data; - if (check_dump_input(action, input)) { - xmlNode *input_xml = create_xml_node(in, "trigger"); + data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); - input->state = pe_link_dumped; - xml_action = action2xml(input->action, TRUE, data_set); - add_node_nocopy(input_xml, crm_element_name(xml_action), xml_action); + value = pe_pref(data_set->config_hash, "cluster-delay"); + crm_xml_add(data_set->graph, "cluster-delay", value); + + value = pe_pref(data_set->config_hash, "stonith-timeout"); + crm_xml_add(data_set->graph, "stonith-timeout", value); + + crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY"); + + if (pcmk_is_set(data_set->flags, pe_flag_start_failure_fatal)) { + crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY"); + } else { + crm_xml_add(data_set->graph, "failed-start-offset", "1"); + } + + value = pe_pref(data_set->config_hash, "batch-limit"); + crm_xml_add(data_set->graph, "batch-limit", value); + + crm_xml_add_int(data_set->graph, "transition_id", transition_id); + + value = pe_pref(data_set->config_hash, "migration-limit"); + if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) { + crm_xml_add(data_set->graph, "migration-limit", value); + } + + if (data_set->recheck_by > 0) { + char *recheck_epoch = NULL; + + recheck_epoch = crm_strdup_printf("%llu", + (long long) data_set->recheck_by); + crm_xml_add(data_set->graph, "recheck-by", recheck_epoch); + free(recheck_epoch); + } + + /* The following code will de-duplicate action inputs, so nothing past this + * should rely on the action input type flags retaining their original + * values. + */ + + // Add resource actions to graph + for (iter = data_set->resources; iter != NULL; iter = iter->next) { + pe_resource_t *rsc = (pe_resource_t *) iter->data; + + pe_rsc_trace(rsc, "Processing actions for %s", rsc->id); + rsc->cmds->expand(rsc, data_set); + } + + // Add pseudo-action for list of nodes with maintenance state update + add_maintenance_update(data_set); + + // Add non-resource (node) actions + for (iter = data_set->actions; iter != NULL; iter = iter->next) { + pe_action_t *action = (pe_action_t *) iter->data; + + if ((action->rsc != NULL) + && (action->node != NULL) + && action->node->details->shutdown + && !pcmk_is_set(action->rsc->flags, pe_rsc_maintenance) + && !pcmk_any_flags_set(action->flags, + pe_action_optional|pe_action_runnable) + && pcmk__str_eq(action->task, RSC_STOP, pcmk__str_none)) { + /* Eventually we should just ignore the 'fence' case, but for now + * it's the best way to detect (in CTS) when CIB resource updates + * are being lost. + */ + if (pcmk_is_set(data_set->flags, pe_flag_have_quorum) + || (data_set->no_quorum_policy == no_quorum_ignore)) { + crm_crit("Cannot %s node '%s' because of %s:%s%s (%s)", + action->node->details->unclean? "fence" : "shut down", + action->node->details->uname, action->rsc->id, + pcmk_is_set(action->rsc->flags, pe_rsc_managed)? " blocked" : " unmanaged", + pcmk_is_set(action->rsc->flags, pe_rsc_failed)? " failed" : "", + action->uuid); + } } + + pcmk__add_action_to_graph(action, data_set); } + + crm_log_xml_trace(data_set->graph, "graph"); } diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c index 26f2546cf9..8a1542c6d5 100644 --- a/lib/pacemaker/pcmk_sched_allocate.c +++ b/lib/pacemaker/pcmk_sched_allocate.c @@ -1,2109 +1,1973 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" CRM_TRACE_INIT_DATA(pacemaker); extern bool pcmk__is_daemon; void set_alloc_actions(pe_working_set_t * data_set); extern void ReloadRsc(pe_resource_t * rsc, pe_node_t *node, pe_working_set_t * data_set); extern gboolean DeleteRsc(pe_resource_t * rsc, pe_node_t * node, gboolean optional, pe_working_set_t * data_set); resource_alloc_functions_t resource_class_alloc_functions[] = { { pcmk__native_merge_weights, pcmk__native_allocate, native_create_actions, native_create_probe, native_internal_constraints, native_rsc_colocation_lh, native_rsc_colocation_rh, pcmk__colocated_resources, native_rsc_location, native_action_flags, native_update_actions, pcmk__output_resource_actions, native_expand, native_append_meta, }, { pcmk__group_merge_weights, pcmk__group_allocate, group_create_actions, native_create_probe, group_internal_constraints, group_rsc_colocation_lh, group_rsc_colocation_rh, pcmk__group_colocated_resources, group_rsc_location, group_action_flags, group_update_actions, pcmk__output_resource_actions, group_expand, group_append_meta, }, { pcmk__native_merge_weights, pcmk__clone_allocate, clone_create_actions, clone_create_probe, clone_internal_constraints, clone_rsc_colocation_lh, clone_rsc_colocation_rh, pcmk__colocated_resources, clone_rsc_location, clone_action_flags, pcmk__multi_update_actions, pcmk__output_resource_actions, clone_expand, clone_append_meta, }, { pcmk__native_merge_weights, pcmk__bundle_allocate, pcmk__bundle_create_actions, pcmk__bundle_create_probe, pcmk__bundle_internal_constraints, pcmk__bundle_rsc_colocation_lh, pcmk__bundle_rsc_colocation_rh, pcmk__colocated_resources, pcmk__bundle_rsc_location, pcmk__bundle_action_flags, pcmk__multi_update_actions, pcmk__output_bundle_actions, pcmk__bundle_expand, pcmk__bundle_append_meta, } }; static gboolean check_rsc_parameters(pe_resource_t * rsc, pe_node_t * node, xmlNode * rsc_entry, gboolean active_here, pe_working_set_t * data_set) { int attr_lpc = 0; gboolean force_restart = FALSE; gboolean delete_resource = FALSE; gboolean changed = FALSE; const char *value = NULL; const char *old_value = NULL; const char *attr_list[] = { XML_ATTR_TYPE, XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER }; for (; attr_lpc < PCMK__NELEM(attr_list); attr_lpc++) { value = crm_element_value(rsc->xml, attr_list[attr_lpc]); old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]); if (value == old_value /* i.e. NULL */ || pcmk__str_eq(value, old_value, pcmk__str_none)) { continue; } changed = TRUE; trigger_unfencing(rsc, node, "Device definition changed", NULL, data_set); if (active_here) { force_restart = TRUE; crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s", rsc->id, node->details->uname, attr_list[attr_lpc], crm_str(old_value), crm_str(value)); } } if (force_restart) { /* make sure the restart happens */ stop_action(rsc, node, FALSE); pe__set_resource_flags(rsc, pe_rsc_start_pending); delete_resource = TRUE; } else if (changed) { delete_resource = TRUE; } return delete_resource; } static void CancelXmlOp(pe_resource_t * rsc, xmlNode * xml_op, pe_node_t * active_node, const char *reason, pe_working_set_t * data_set) { guint interval_ms = 0; pe_action_t *cancel = NULL; const char *task = NULL; const char *call_id = NULL; CRM_CHECK(xml_op != NULL, return); CRM_CHECK(active_node != NULL, return); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); crm_info("Action " PCMK__OP_FMT " on %s will be stopped: %s", rsc->id, task, interval_ms, active_node->details->uname, (reason? reason : "unknown")); cancel = pcmk__new_cancel_action(rsc, task, interval_ms, active_node); add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id); pcmk__new_ordering(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set); } static gboolean check_action_definition(pe_resource_t * rsc, pe_node_t * active_node, xmlNode * xml_op, pe_working_set_t * data_set) { char *key = NULL; guint interval_ms = 0; const op_digest_cache_t *digest_data = NULL; gboolean did_change = FALSE; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *digest_secure = NULL; CRM_CHECK(active_node != NULL, return FALSE); crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if (interval_ms > 0) { xmlNode *op_match = NULL; /* we need to reconstruct the key because of the way we used to construct resource IDs */ key = pcmk__op_key(rsc->id, task, interval_ms); pe_rsc_trace(rsc, "Checking parameters for %s", key); op_match = find_rsc_op_entry(rsc, key); if ((op_match == NULL) && pcmk_is_set(data_set->flags, pe_flag_stop_action_orphans)) { CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set); free(key); return TRUE; } else if (op_match == NULL) { pe_rsc_debug(rsc, "Orphan action detected: %s on %s", key, active_node->details->uname); free(key); return TRUE; } free(key); key = NULL; } crm_trace("Testing " PCMK__OP_FMT " on %s", rsc->id, task, interval_ms, active_node->details->uname); if ((interval_ms == 0) && pcmk__str_eq(task, RSC_STATUS, pcmk__str_casei)) { /* Reload based on the start action not a probe */ task = RSC_START; } else if ((interval_ms == 0) && pcmk__str_eq(task, RSC_MIGRATED, pcmk__str_casei)) { /* Reload based on the start action not a migrate */ task = RSC_START; } else if ((interval_ms == 0) && pcmk__str_eq(task, RSC_PROMOTE, pcmk__str_casei)) { /* Reload based on the start action not a promote */ task = RSC_START; } digest_data = rsc_action_digest_cmp(rsc, xml_op, active_node, data_set); if (pcmk_is_set(data_set->flags, pe_flag_sanitized)) { digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST); } if(digest_data->rc != RSC_DIGEST_MATCH && digest_secure && digest_data->digest_secure_calc && strcmp(digest_data->digest_secure_calc, digest_secure) == 0) { if (!pcmk__is_daemon && data_set->priv != NULL) { pcmk__output_t *out = data_set->priv; out->info(out, "Only 'private' parameters to " PCMK__OP_FMT " on %s changed: %s", rsc->id, task, interval_ms, active_node->details->uname, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); } } else if (digest_data->rc == RSC_DIGEST_RESTART) { /* Changes that force a restart */ pe_action_t *required = NULL; did_change = TRUE; key = pcmk__op_key(rsc->id, task, interval_ms); crm_log_xml_info(digest_data->params_restart, "params:restart"); required = custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); pe_action_set_reason(required, "resource definition change", true); trigger_unfencing(rsc, active_node, "Device parameters changed", NULL, data_set); } else if ((digest_data->rc == RSC_DIGEST_ALL) || (digest_data->rc == RSC_DIGEST_UNKNOWN)) { // Changes that can potentially be handled by an agent reload const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); did_change = TRUE; trigger_unfencing(rsc, active_node, "Device parameters changed (reload)", NULL, data_set); crm_log_xml_info(digest_data->params_all, "params:reload"); key = pcmk__op_key(rsc->id, task, interval_ms); if (interval_ms > 0) { pe_action_t *op = NULL; #if 0 /* Always reload/restart the entire resource */ ReloadRsc(rsc, active_node, data_set); #else /* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */ op = custom_action(rsc, key, task, active_node, TRUE, TRUE, data_set); pe__set_action_flags(op, pe_action_reschedule); #endif } else if (digest_restart) { pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id); /* Reload this resource */ ReloadRsc(rsc, active_node, data_set); free(key); } else { pe_action_t *required = NULL; pe_rsc_trace(rsc, "Resource %s doesn't support agent reloads", rsc->id); /* Re-send the start/demote/promote op * Recurring ops will be detected independently */ required = custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); pe_action_set_reason(required, "resource definition change", true); } } return did_change; } /*! * \internal * \brief Do deferred action checks after allocation * * \param[in] data_set Working set for cluster */ static void check_params(pe_resource_t *rsc, pe_node_t *node, xmlNode *rsc_op, enum pe_check_parameters check, pe_working_set_t *data_set) { const char *reason = NULL; op_digest_cache_t *digest_data = NULL; switch (check) { case pe_check_active: if (check_action_definition(rsc, node, rsc_op, data_set) && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL, data_set)) { reason = "action definition changed"; } break; case pe_check_last_failure: digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, data_set); switch (digest_data->rc) { case RSC_DIGEST_UNKNOWN: crm_trace("Resource %s history entry %s on %s has no digest to compare", rsc->id, ID(rsc_op), node->details->id); break; case RSC_DIGEST_MATCH: break; default: reason = "resource parameters have changed"; break; } break; } if (reason) { pe__clear_failcount(rsc, node, reason, data_set); } } static void check_actions_for(xmlNode * rsc_entry, pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set) { GList *gIter = NULL; int offset = -1; int stop_index = 0; int start_index = 0; const char *task = NULL; xmlNode *rsc_op = NULL; GList *op_list = NULL; GList *sorted_op_list = NULL; CRM_CHECK(node != NULL, return); if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) { pe_resource_t *parent = uber_parent(rsc); if(parent == NULL || pe_rsc_is_clone(parent) == FALSE || pcmk_is_set(parent->flags, pe_rsc_unique)) { pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id); DeleteRsc(rsc, node, FALSE, data_set); } else { pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id); } return; } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) { if (check_rsc_parameters(rsc, node, rsc_entry, FALSE, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } pe_rsc_trace(rsc, "Skipping param check for %s: no longer active on %s", rsc->id, node->details->uname); return; } pe_rsc_trace(rsc, "Processing %s on %s", rsc->id, node->details->uname); if (check_rsc_parameters(rsc, node, rsc_entry, TRUE, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } for (rsc_op = pcmk__xe_first_child(rsc_entry); rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) { if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) { op_list = g_list_prepend(op_list, rsc_op); } } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; guint interval_ms = 0; offset++; if (start_index < stop_index) { /* stopped */ continue; } else if (offset < start_index) { /* action occurred prior to a start */ continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if ((interval_ms > 0) && (pcmk_is_set(rsc->flags, pe_rsc_maintenance) || node->details->maintenance)) { // Maintenance mode cancels recurring operations CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set); } else if ((interval_ms > 0) || pcmk__strcase_any_of(task, RSC_STATUS, RSC_START, RSC_PROMOTE, RSC_MIGRATED, NULL)) { /* If a resource operation failed, and the operation's definition * has changed, clear any fail count so they can be retried fresh. */ if (pe__bundle_needs_remote_name(rsc, data_set)) { /* We haven't allocated resources to nodes yet, so if the * REMOTE_CONTAINER_HACK is used, we may calculate the digest * based on the literal "#uname" value rather than the properly * substituted value. That would mistakenly make the action * definition appear to have been changed. Defer the check until * later in this case. */ pe__add_param_check(rsc_op, rsc, node, pe_check_active, data_set); } else if (check_action_definition(rsc, node, rsc_op, data_set) && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL, data_set)) { pe__clear_failcount(rsc, node, "action definition changed", data_set); } } } g_list_free(sorted_op_list); } static GList * find_rsc_list(GList *result, pe_resource_t * rsc, const char *id, gboolean renamed_clones, gboolean partial, pe_working_set_t * data_set) { GList *gIter = NULL; gboolean match = FALSE; if (id == NULL) { return NULL; } if (rsc == NULL) { if (data_set == NULL) { return NULL; } for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } return result; } if (partial) { if (strstr(rsc->id, id)) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) { match = TRUE; } } else { if (strcmp(rsc->id, id) == 0) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) { match = TRUE; } } if (match) { result = g_list_prepend(result, rsc); } if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } } return result; } static void check_actions(pe_working_set_t * data_set) { const char *id = NULL; pe_node_t *node = NULL; xmlNode *lrm_rscs = NULL; xmlNode *status = pcmk_find_cib_element(data_set->input, XML_CIB_TAG_STATUS); xmlNode *node_state = NULL; for (node_state = pcmk__xe_first_child(status); node_state != NULL; node_state = pcmk__xe_next(node_state)) { if (pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { id = crm_element_value(node_state, XML_ATTR_ID); lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE); node = pe_find_node_id(data_set->nodes, id); if (node == NULL) { continue; /* Still need to check actions for a maintenance node to cancel existing monitor operations */ } else if (can_run_resources(node) == FALSE && node->details->maintenance == FALSE) { crm_trace("Skipping param check for %s: can't run resources", node->details->uname); continue; } crm_trace("Processing node %s", node->details->uname); if (node->details->online || pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { xmlNode *rsc_entry = NULL; for (rsc_entry = pcmk__xe_first_child(lrm_rscs); rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) { if (pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_none)) { if (xml_has_children(rsc_entry)) { GList *gIter = NULL; GList *result = NULL; const char *rsc_id = ID(rsc_entry); CRM_CHECK(rsc_id != NULL, return); result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set); for (gIter = result; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; if (rsc->variant != pe_native) { continue; } check_actions_for(rsc_entry, rsc, node, data_set); } g_list_free(result); } } } } } } } static gboolean failcount_clear_action_exists(pe_node_t * node, pe_resource_t * rsc) { gboolean rc = FALSE; GList *list = pe__resource_actions(rsc, node, CRM_OP_CLEAR_FAILCOUNT, TRUE); if (list) { rc = TRUE; } g_list_free(list); return rc; } static void common_apply_stickiness(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set) { if (rsc->children) { GList *gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; common_apply_stickiness(child_rsc, node, data_set); } return; } if (pcmk_is_set(rsc->flags, pe_rsc_managed) && rsc->stickiness != 0 && pcmk__list_of_1(rsc->running_on)) { pe_node_t *current = pe_find_node_id(rsc->running_on, node->details->id); pe_node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (current == NULL) { } else if ((match != NULL) || pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster)) { pe_resource_t *sticky_rsc = rsc; resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set); pe_rsc_debug(sticky_rsc, "Resource %s: preferring current location" " (node=%s, weight=%d)", sticky_rsc->id, node->details->uname, rsc->stickiness); } else { GHashTableIter iter; pe_node_t *nIter = NULL; pe_rsc_debug(rsc, "Ignoring stickiness for %s: the cluster is asymmetric" " and node %s is not explicitly allowed", rsc->id, node->details->uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) { crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight); } } } /* Check the migration threshold only if a failcount clear action * has not already been placed for this resource on the node. * There is no sense in potentially forcing the resource from this * node if the failcount is being reset anyway. * * @TODO A clear_failcount operation can be scheduled in stage4() via * check_actions_for(), or in stage5() via check_params(). This runs in * stage2(), so it cannot detect those, meaning we might check the migration * threshold when we shouldn't -- worst case, we stop or move the resource, * then move it back next transition. */ if (failcount_clear_action_exists(node, rsc) == FALSE) { pe_resource_t *failed = NULL; if (pcmk__threshold_reached(rsc, node, data_set, &failed)) { resource_location(failed, node, -INFINITY, "__fail_limit__", data_set); } } } void complex_set_cmds(pe_resource_t * rsc) { GList *gIter = rsc->children; rsc->cmds = &resource_class_alloc_functions[rsc->variant]; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; complex_set_cmds(child_rsc); } } void set_alloc_actions(pe_working_set_t * data_set) { GList *gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; complex_set_cmds(rsc); } } static void calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data) { const char *key = (const char *)gKey; const char *value = (const char *)gValue; int *system_health = (int *)user_data; if (!gKey || !gValue || !user_data) { return; } if (pcmk__starts_with(key, "#health")) { int score; /* Convert the value into an integer */ score = char2score(value); /* Add it to the running total */ *system_health = pe__add_scores(score, *system_health); } } static gboolean apply_system_health(pe_working_set_t * data_set) { GList *gIter = NULL; const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy"); int base_health = 0; if (pcmk__str_eq(health_strategy, "none", pcmk__str_null_matches | pcmk__str_casei)) { /* Prevent any accidental health -> score translation */ pcmk__score_red = 0; pcmk__score_yellow = 0; pcmk__score_green = 0; return TRUE; } else if (pcmk__str_eq(health_strategy, "migrate-on-red", pcmk__str_casei)) { /* Resources on nodes which have health values of red are * weighted away from that node. */ pcmk__score_red = -INFINITY; pcmk__score_yellow = 0; pcmk__score_green = 0; } else if (pcmk__str_eq(health_strategy, "only-green", pcmk__str_casei)) { /* Resources on nodes which have health values of red or yellow * are forced away from that node. */ pcmk__score_red = -INFINITY; pcmk__score_yellow = -INFINITY; pcmk__score_green = 0; } else if (pcmk__str_eq(health_strategy, "progressive", pcmk__str_casei)) { /* Same as the above, but use the r/y/g scores provided by the user * Defaults are provided by the pe_prefs table * Also, custom health "base score" can be used */ base_health = char2score(pe_pref(data_set->config_hash, "node-health-base")); } else if (pcmk__str_eq(health_strategy, "custom", pcmk__str_casei)) { /* Requires the admin to configure the rsc_location constaints for * processing the stored health scores */ /* TODO: Check for the existence of appropriate node health constraints */ return TRUE; } else { crm_err("Unknown node health strategy: %s", health_strategy); return FALSE; } crm_info("Applying automated node health strategy: %s", health_strategy); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { int system_health = base_health; pe_node_t *node = (pe_node_t *) gIter->data; /* Search through the node hash table for system health entries. */ g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health); crm_info(" Node %s has an combined system health of %d", node->details->uname, system_health); /* If the health is non-zero, then create a new location constraint so * that the weight will be added later on. */ if (system_health != 0) { GList *gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { pe_resource_t *rsc = (pe_resource_t *) gIter2->data; pcmk__new_location(health_strategy, rsc, system_health, NULL, node, data_set); } } } return TRUE; } gboolean stage0(pe_working_set_t * data_set) { if (data_set->input == NULL) { return FALSE; } if (!pcmk_is_set(data_set->flags, pe_flag_have_status)) { crm_trace("Calculating status"); cluster_status(data_set); } set_alloc_actions(data_set); apply_system_health(data_set); pcmk__unpack_constraints(data_set); return TRUE; } /* * Check nodes for resources started outside of the LRM */ gboolean probe_resources(pe_working_set_t * data_set) { - pe_action_t *probe_node_complete = NULL; - for (GList *gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; - const char *probed = pe_node_attribute_raw(node, CRM_OP_PROBED); + const char *probed = NULL; if (node->details->online == FALSE) { if (pcmk__is_failed_remote_node(node)) { pe_fence_node(data_set, node, "the connection is unrecoverable", FALSE); } continue; } else if (node->details->unclean) { continue; } else if (node->details->rsc_discovery_enabled == FALSE) { /* resource discovery is disabled for this node */ continue; } + /* This is no longer needed for live clusters, since the probe_complete + * node attribute will never be in the CIB. However this is still useful + * for processing old saved CIBs (< 1.1.14), including the + * reprobe-target_rc regression test. + */ + probed = pe_node_attribute_raw(node, CRM_OP_PROBED); if (probed != NULL && crm_is_true(probed) == FALSE) { pe_action_t *probe_op = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_REPROBE, node->details->uname), CRM_OP_REPROBE, node, FALSE, TRUE, data_set); add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); continue; } for (GList *gIter2 = data_set->resources; gIter2 != NULL; gIter2 = gIter2->next) { pe_resource_t *rsc = (pe_resource_t *) gIter2->data; - rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set); + rsc->cmds->create_probe(rsc, node, NULL, FALSE, data_set); } } return TRUE; } static void rsc_discover_filter(pe_resource_t *rsc, pe_node_t *node) { pe_resource_t *top = uber_parent(rsc); pe_node_t *match; if (rsc->exclusive_discover == FALSE && top->exclusive_discover == FALSE) { return; } g_list_foreach(rsc->children, (GFunc) rsc_discover_filter, node); match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (match && match->rsc_discover_mode != pe_discover_exclusive) { match->weight = -INFINITY; } } static time_t shutdown_time(pe_node_t *node, pe_working_set_t *data_set) { const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN); time_t result = 0; if (shutdown) { long long result_ll; if (pcmk__scan_ll(shutdown, &result_ll, 0LL) == pcmk_rc_ok) { result = (time_t) result_ll; } } return result? result : get_effective_time(data_set); } static void apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set) { const char *class; // Only primitives and (uncloned) groups may be locked if (rsc->variant == pe_group) { g_list_foreach(rsc->children, (GFunc) apply_shutdown_lock, data_set); } else if (rsc->variant != pe_native) { return; } // Fence devices and remote connections can't be locked class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches) || pe__resource_is_remote_conn(rsc, data_set)) { return; } if (rsc->lock_node != NULL) { // The lock was obtained from resource history if (rsc->running_on != NULL) { /* The resource was started elsewhere even though it is now * considered locked. This shouldn't be possible, but as a * failsafe, we don't want to disturb the resource now. */ pe_rsc_info(rsc, "Cancelling shutdown lock because %s is already active", rsc->id); pe__clear_resource_history(rsc, rsc->lock_node, data_set); rsc->lock_node = NULL; rsc->lock_time = 0; } // Only a resource active on exactly one node can be locked } else if (pcmk__list_of_1(rsc->running_on)) { pe_node_t *node = rsc->running_on->data; if (node->details->shutdown) { if (node->details->unclean) { pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown", rsc->id, node->details->uname); } else { rsc->lock_node = node; rsc->lock_time = shutdown_time(node, data_set); } } } if (rsc->lock_node == NULL) { // No lock needed return; } if (data_set->shutdown_lock > 0) { time_t lock_expiration = rsc->lock_time + data_set->shutdown_lock; pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)", rsc->id, rsc->lock_node->details->uname, (long long) lock_expiration); pe__update_recheck_time(++lock_expiration, data_set); } else { pe_rsc_info(rsc, "Locking %s to %s due to shutdown", rsc->id, rsc->lock_node->details->uname); } // If resource is locked to one node, ban it from all other nodes for (GList *item = data_set->nodes; item != NULL; item = item->next) { pe_node_t *node = item->data; if (strcmp(node->details->uname, rsc->lock_node->details->uname)) { resource_location(rsc, node, -CRM_SCORE_INFINITY, XML_CONFIG_ATTR_SHUTDOWN_LOCK, data_set); } } } /* * \internal * \brief Stage 2 of cluster status: apply node-specific criteria * * Count known nodes, and apply location constraints, stickiness, and exclusive * resource discovery. */ gboolean stage2(pe_working_set_t * data_set) { GList *gIter = NULL; if (pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) { g_list_foreach(data_set->resources, (GFunc) apply_shutdown_lock, data_set); } if (!pcmk_is_set(data_set->flags, pe_flag_no_compat)) { // @COMPAT API backward compatibility for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (node && (node->weight >= 0) && node->details->online && (node->details->type != node_ping)) { data_set->max_valid_nodes++; } } } pcmk__apply_locations(data_set); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { GList *gIter2 = NULL; pe_node_t *node = (pe_node_t *) gIter->data; gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { pe_resource_t *rsc = (pe_resource_t *) gIter2->data; common_apply_stickiness(rsc, node, data_set); rsc_discover_filter(rsc, node); } } return TRUE; } /* * Check for orphaned or redefined actions */ gboolean stage4(pe_working_set_t * data_set) { check_actions(data_set); return TRUE; } static void * convert_const_pointer(const void *ptr) { /* Worst function ever */ return (void *)ptr; } static gint sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data) { int rc = 0; int r1_weight = -INFINITY; int r2_weight = -INFINITY; const char *reason = "existence"; GList *nodes = (GList *) data; const pe_resource_t *resource1 = a; const pe_resource_t *resource2 = b; pe_node_t *r1_node = NULL; pe_node_t *r2_node = NULL; GList *gIter = NULL; GHashTable *r1_nodes = NULL; GHashTable *r2_nodes = NULL; reason = "priority"; r1_weight = resource1->priority; r2_weight = resource2->priority; if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "no node list"; if (nodes == NULL) { goto done; } r1_nodes = pcmk__native_merge_weights(convert_const_pointer(resource1), resource1->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); pe__show_node_weights(true, NULL, resource1->id, r1_nodes, resource1->cluster); r2_nodes = pcmk__native_merge_weights(convert_const_pointer(resource2), resource2->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); pe__show_node_weights(true, NULL, resource2->id, r2_nodes, resource2->cluster); /* Current location score */ reason = "current location"; r1_weight = -INFINITY; r2_weight = -INFINITY; if (resource1->running_on) { r1_node = pe__current_node(resource1); r1_node = g_hash_table_lookup(r1_nodes, r1_node->details->id); if (r1_node != NULL) { r1_weight = r1_node->weight; } } if (resource2->running_on) { r2_node = pe__current_node(resource2); r2_node = g_hash_table_lookup(r2_nodes, r2_node->details->id); if (r2_node != NULL) { r2_weight = r2_node->weight; } } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "score"; for (gIter = nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; r1_node = NULL; r2_node = NULL; r1_weight = -INFINITY; if (r1_nodes) { r1_node = g_hash_table_lookup(r1_nodes, node->details->id); } if (r1_node) { r1_weight = r1_node->weight; } r2_weight = -INFINITY; if (r2_nodes) { r2_node = g_hash_table_lookup(r2_nodes, node->details->id); } if (r2_node) { r2_weight = r2_node->weight; } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } } done: crm_trace("%s (%d) on %s %c %s (%d) on %s: %s", resource1->id, r1_weight, r1_node ? r1_node->details->id : "n/a", rc < 0 ? '>' : rc > 0 ? '<' : '=', resource2->id, r2_weight, r2_node ? r2_node->details->id : "n/a", reason); if (r1_nodes) { g_hash_table_destroy(r1_nodes); } if (r2_nodes) { g_hash_table_destroy(r2_nodes); } return rc; } static void allocate_resources(pe_working_set_t * data_set) { GList *gIter = NULL; if (pcmk_is_set(data_set->flags, pe_flag_have_remote_nodes)) { /* Allocate remote connection resources first (which will also allocate * any colocation dependencies). If the connection is migrating, always * prefer the partial migration target. */ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; if (rsc->is_remote_node == FALSE) { continue; } pe_rsc_trace(rsc, "Allocating remote connection resource '%s'", rsc->id); rsc->cmds->allocate(rsc, rsc->partial_migration_target, data_set); } } /* now do the rest of the resources */ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; if (rsc->is_remote_node == TRUE) { continue; } pe_rsc_trace(rsc, "Allocating %s resource '%s'", crm_element_name(rsc->xml), rsc->id); rsc->cmds->allocate(rsc, NULL, data_set); } } // Clear fail counts for orphaned rsc on all online nodes static void cleanup_orphans(pe_resource_t * rsc, pe_working_set_t * data_set) { GList *gIter = NULL; for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (node->details->online && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL, data_set)) { pe_action_t *clear_op = NULL; clear_op = pe__clear_failcount(rsc, node, "it is orphaned", data_set); /* We can't use order_action_then_stop() here because its * pe_order_preserve breaks things */ pcmk__new_ordering(clear_op->rsc, NULL, clear_op, rsc, stop_key(rsc), NULL, pe_order_optional, data_set); } } } gboolean stage5(pe_working_set_t * data_set) { pcmk__output_t *out = data_set->priv; GList *gIter = NULL; if (!pcmk__str_eq(data_set->placement_strategy, "default", pcmk__str_casei)) { GList *nodes = g_list_copy(data_set->nodes); nodes = sort_nodes_by_weight(nodes, NULL, data_set); data_set->resources = g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes); g_list_free(nodes); } gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) { out->message(out, "node-capacity", node, "Original"); } } crm_trace("Allocating services"); /* Take (next) highest resource, assign it and create its actions */ allocate_resources(data_set); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) { out->message(out, "node-capacity", node, "Remaining"); } } // Process deferred action checks pe__foreach_param_check(data_set, check_params); pe__free_param_checks(data_set); if (pcmk_is_set(data_set->flags, pe_flag_startup_probes)) { crm_trace("Calculating needed probes"); /* This code probably needs optimization * ptest -x with 100 nodes, 100 clones and clone-max=100: With probes: ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints 36s ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph Without probes: ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph */ probe_resources(data_set); } crm_trace("Handle orphans"); if (pcmk_is_set(data_set->flags, pe_flag_stop_rsc_orphans)) { for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; /* There's no need to recurse into rsc->children because those * should just be unallocated clone instances. */ if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) { cleanup_orphans(rsc, data_set); } } } crm_trace("Creating actions"); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; rsc->cmds->create_actions(rsc, data_set); } crm_trace("Creating done"); return TRUE; } static gboolean is_managed(const pe_resource_t * rsc) { GList *gIter = rsc->children; if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { return TRUE; } for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; if (is_managed(child_rsc)) { return TRUE; } } return FALSE; } static gboolean any_managed_resources(pe_working_set_t * data_set) { GList *gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; if (is_managed(rsc)) { return TRUE; } } return FALSE; } /* * Create dependencies for stonith and shutdown operations */ gboolean stage6(pe_working_set_t * data_set) { pe_action_t *dc_down = NULL; pe_action_t *stonith_op = NULL; gboolean integrity_lost = FALSE; gboolean need_stonith = TRUE; GList *gIter; GList *stonith_ops = NULL; GList *shutdown_ops = NULL; /* Remote ordering constraints need to happen prior to calculating fencing * because it is one more place we can mark nodes as needing fencing. */ pcmk__order_remote_connection_actions(data_set); crm_trace("Processing fencing and shutdown cases"); if (any_managed_resources(data_set) == FALSE) { crm_notice("Delaying fencing operations until there are resources to manage"); need_stonith = FALSE; } /* Check each node for stonith/shutdown */ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; /* Guest nodes are "fenced" by recovering their container resource, * so handle them separately. */ if (pe__is_guest_node(node)) { if (node->details->remote_requires_reset && need_stonith && pe_can_fence(data_set, node)) { pcmk__fence_guest(node, data_set); } continue; } stonith_op = NULL; if (node->details->unclean && need_stonith && pe_can_fence(data_set, node)) { stonith_op = pe_fence_op(node, NULL, FALSE, "node is unclean", FALSE, data_set); pe_warn("Scheduling Node %s for STONITH", node->details->uname); pcmk__order_vs_fence(stonith_op, data_set); if (node->details->is_dc) { // Remember if the DC is being fenced dc_down = stonith_op; } else { if (!pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing) && (stonith_ops != NULL)) { /* Concurrent fencing is disabled, so order each non-DC * fencing in a chain. If there is any DC fencing or * shutdown, it will be ordered after the last action in the * chain later. */ order_actions((pe_action_t *) stonith_ops->data, stonith_op, pe_order_optional); } // Remember all non-DC fencing actions in a separate list stonith_ops = g_list_prepend(stonith_ops, stonith_op); } } else if (node->details->online && node->details->shutdown && /* TODO define what a shutdown op means for a remote node. * For now we do not send shutdown operations for remote nodes, but * if we can come up with a good use for this in the future, we will. */ pe__is_guest_or_remote_node(node) == FALSE) { pe_action_t *down_op = pcmk__new_shutdown_action(node, data_set); if (node->details->is_dc) { // Remember if the DC is being shut down dc_down = down_op; } else { // Remember non-DC shutdowns for later ordering shutdown_ops = g_list_prepend(shutdown_ops, down_op); } } if (node->details->unclean && stonith_op == NULL) { integrity_lost = TRUE; pe_warn("Node %s is unclean!", node->details->uname); } } if (integrity_lost) { if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED"); pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE"); } else if (!pcmk_is_set(data_set->flags, pe_flag_have_quorum)) { crm_notice("Cannot fence unclean nodes until quorum is" " attained (or no-quorum-policy is set to ignore)"); } } if (dc_down != NULL) { /* Order any non-DC shutdowns before any DC shutdown, to avoid repeated * DC elections. However, we don't want to order non-DC shutdowns before * a DC *fencing*, because even though we don't want a node that's * shutting down to become DC, the DC fencing could be ordered before a * clone stop that's also ordered before the shutdowns, thus leading to * a graph loop. */ if (pcmk__str_eq(dc_down->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { for (gIter = shutdown_ops; gIter != NULL; gIter = gIter->next) { pe_action_t *node_stop = (pe_action_t *) gIter->data; crm_debug("Ordering shutdown on %s before %s on DC %s", node_stop->node->details->uname, dc_down->task, dc_down->node->details->uname); order_actions(node_stop, dc_down, pe_order_optional); } } // Order any non-DC fencing before any DC fencing or shutdown if (pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing)) { /* With concurrent fencing, order each non-DC fencing action * separately before any DC fencing or shutdown. */ for (gIter = stonith_ops; gIter != NULL; gIter = gIter->next) { order_actions((pe_action_t *) gIter->data, dc_down, pe_order_optional); } } else if (stonith_ops) { /* Without concurrent fencing, the non-DC fencing actions are * already ordered relative to each other, so we just need to order * the DC fencing after the last action in the chain (which is the * first item in the list). */ order_actions((pe_action_t *) stonith_ops->data, dc_down, pe_order_optional); } } g_list_free(stonith_ops); g_list_free(shutdown_ops); return TRUE; } static gboolean order_first_probe_unneeded(pe_action_t * probe, pe_action_t * rh_action) { /* No need to probe the resource on the node that is being * unfenced. Otherwise it might introduce transition loop * since probe will be performed after the node is * unfenced. */ if (pcmk__str_eq(rh_action->task, CRM_OP_FENCE, pcmk__str_casei) && probe->node && rh_action->node && probe->node->details == rh_action->node->details) { const char *op = g_hash_table_lookup(rh_action->meta, "stonith_action"); if (pcmk__str_eq(op, "on", pcmk__str_casei)) { return TRUE; } } // Shutdown waits for probe to complete only if it's on the same node if ((pcmk__str_eq(rh_action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) && probe->node && rh_action->node && probe->node->details != rh_action->node->details) { return TRUE; } return FALSE; } static void order_first_probes_imply_stops(pe_working_set_t * data_set) { GList *gIter = NULL; for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) { pe__ordering_t *order = gIter->data; enum pe_ordering order_type = pe_order_optional; pe_resource_t *lh_rsc = order->lh_rsc; pe_resource_t *rh_rsc = order->rh_rsc; pe_action_t *lh_action = order->lh_action; pe_action_t *rh_action = order->rh_action; const char *lh_action_task = order->lh_action_task; const char *rh_action_task = order->rh_action_task; GList *probes = NULL; GList *rh_actions = NULL; GList *pIter = NULL; if (lh_rsc == NULL) { continue; } else if (rh_rsc && lh_rsc == rh_rsc) { continue; } if (lh_action == NULL && lh_action_task == NULL) { continue; } if (rh_action == NULL && rh_action_task == NULL) { continue; } /* Technically probe is expected to return "not running", which could be * the alternative of stop action if the status of the resource is * unknown yet. */ if (lh_action && !pcmk__str_eq(lh_action->task, RSC_STOP, pcmk__str_casei)) { continue; } else if (lh_action == NULL && lh_action_task && !pcmk__ends_with(lh_action_task, "_" RSC_STOP "_0")) { continue; } /* Do not probe the resource inside of a stopping container. Otherwise * it might introduce transition loop since probe will be performed * after the container starts again. */ if (rh_rsc && lh_rsc->container == rh_rsc) { if (rh_action && pcmk__str_eq(rh_action->task, RSC_STOP, pcmk__str_casei)) { continue; } else if (rh_action == NULL && rh_action_task && pcmk__ends_with(rh_action_task,"_" RSC_STOP "_0")) { continue; } } if (order->type == pe_order_none) { continue; } // Preserve the order options for future filtering if (pcmk_is_set(order->type, pe_order_apply_first_non_migratable)) { pe__set_order_flags(order_type, pe_order_apply_first_non_migratable); } if (pcmk_is_set(order->type, pe_order_same_node)) { pe__set_order_flags(order_type, pe_order_same_node); } // Keep the order types for future filtering if (order->type == pe_order_anti_colocation || order->type == pe_order_load) { order_type = order->type; } probes = pe__resource_actions(lh_rsc, NULL, RSC_STATUS, FALSE); if (probes == NULL) { continue; } if (rh_action) { rh_actions = g_list_prepend(rh_actions, rh_action); } else if (rh_rsc && rh_action_task) { rh_actions = find_actions(rh_rsc->actions, rh_action_task, NULL); } if (rh_actions == NULL) { g_list_free(probes); continue; } crm_trace("Processing for LH probe based on ordering constraint %s -> %s" " (id=%d, type=%.6x)", lh_action ? lh_action->uuid : lh_action_task, rh_action ? rh_action->uuid : rh_action_task, order->id, order->type); for (pIter = probes; pIter != NULL; pIter = pIter->next) { pe_action_t *probe = (pe_action_t *) pIter->data; GList *rIter = NULL; for (rIter = rh_actions; rIter != NULL; rIter = rIter->next) { pe_action_t *rh_action_iter = (pe_action_t *) rIter->data; if (order_first_probe_unneeded(probe, rh_action_iter)) { continue; } order_actions(probe, rh_action_iter, order_type); } } g_list_free(rh_actions); g_list_free(probes); } } static void order_first_probe_then_restart_repromote(pe_action_t * probe, pe_action_t * after, pe_working_set_t * data_set) { GList *gIter = NULL; bool interleave = FALSE; pe_resource_t *compatible_rsc = NULL; if (probe == NULL || probe->rsc == NULL || probe->rsc->variant != pe_native) { return; } if (after == NULL // Avoid running into any possible loop || pcmk_is_set(after->flags, pe_action_tracking)) { return; } if (!pcmk__str_eq(probe->task, RSC_STATUS, pcmk__str_casei)) { return; } pe__set_action_flags(after, pe_action_tracking); crm_trace("Processing based on %s %s -> %s %s", probe->uuid, probe->node ? probe->node->details->uname: "", after->uuid, after->node ? after->node->details->uname : ""); if (after->rsc /* Better not build a dependency directly with a clone/group. * We are going to proceed through the ordering chain and build * dependencies with its children. */ && after->rsc->variant == pe_native && probe->rsc != after->rsc) { GList *then_actions = NULL; enum pe_ordering probe_order_type = pe_order_optional; if (pcmk__str_eq(after->task, RSC_START, pcmk__str_casei)) { then_actions = pe__resource_actions(after->rsc, NULL, RSC_STOP, FALSE); } else if (pcmk__str_eq(after->task, RSC_PROMOTE, pcmk__str_casei)) { then_actions = pe__resource_actions(after->rsc, NULL, RSC_DEMOTE, FALSE); } for (gIter = then_actions; gIter != NULL; gIter = gIter->next) { pe_action_t *then = (pe_action_t *) gIter->data; // Skip any pseudo action which for example is implied by fencing if (pcmk_is_set(then->flags, pe_action_pseudo)) { continue; } order_actions(probe, then, probe_order_type); } g_list_free(then_actions); } if (after->rsc && after->rsc->variant > pe_group) { const char *interleave_s = g_hash_table_lookup(after->rsc->meta, XML_RSC_ATTR_INTERLEAVE); interleave = crm_is_true(interleave_s); if (interleave) { /* For an interleaved clone, we should build a dependency only * with the relevant clone child. */ compatible_rsc = find_compatible_child(probe->rsc, after->rsc, RSC_ROLE_UNKNOWN, FALSE, data_set); } } for (gIter = after->actions_after; gIter != NULL; gIter = gIter->next) { pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) gIter->data; /* pe_order_implies_then is the reason why a required A.start * implies/enforces B.start to be required too, which is the cause of * B.restart/re-promote. * * Not sure about pe_order_implies_then_on_node though. It's now only * used for unfencing case, which tends to introduce transition * loops... */ if (!pcmk_is_set(after_wrapper->type, pe_order_implies_then)) { /* The order type between a group/clone and its child such as * B.start-> B_child.start is: * pe_order_implies_first_printed | pe_order_runnable_left * * Proceed through the ordering chain and build dependencies with * its children. */ if (after->rsc == NULL || after->rsc->variant < pe_group || probe->rsc->parent == after->rsc || after_wrapper->action->rsc == NULL || after_wrapper->action->rsc->variant > pe_group || after->rsc != after_wrapper->action->rsc->parent) { continue; } /* Proceed to the children of a group or a non-interleaved clone. * For an interleaved clone, proceed only to the relevant child. */ if (after->rsc->variant > pe_group && interleave == TRUE && (compatible_rsc == NULL || compatible_rsc != after_wrapper->action->rsc)) { continue; } } crm_trace("Proceeding through %s %s -> %s %s (type=%#.6x)", after->uuid, after->node ? after->node->details->uname: "", after_wrapper->action->uuid, after_wrapper->action->node ? after_wrapper->action->node->details->uname : "", after_wrapper->type); order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set); } } static void clear_actions_tracking_flag(pe_working_set_t * data_set) { GList *gIter = NULL; for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (pcmk_is_set(action->flags, pe_action_tracking)) { pe__clear_action_flags(action, pe_action_tracking); } } } static void order_first_rsc_probes(pe_resource_t * rsc, pe_working_set_t * data_set) { GList *gIter = NULL; GList *probes = NULL; g_list_foreach(rsc->children, (GFunc) order_first_rsc_probes, data_set); if (rsc->variant != pe_native) { return; } probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE); for (gIter = probes; gIter != NULL; gIter= gIter->next) { pe_action_t *probe = (pe_action_t *) gIter->data; GList *aIter = NULL; for (aIter = probe->actions_after; aIter != NULL; aIter = aIter->next) { pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) aIter->data; order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set); clear_actions_tracking_flag(data_set); } } g_list_free(probes); } static void order_first_probes(pe_working_set_t * data_set) { GList *gIter = NULL; for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; order_first_rsc_probes(rsc, data_set); } order_first_probes_imply_stops(data_set); } static void order_then_probes(pe_working_set_t * data_set) { #if 0 GList *gIter = NULL; for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; /* Given "A then B", we would prefer to wait for A to be * started before probing B. * * If A was a filesystem on which the binaries and data for B * lived, it would have been useful if the author of B's agent * could assume that A is running before B.monitor will be * called. * * However we can't _only_ probe once A is running, otherwise * we'd not detect the state of B if A could not be started * for some reason. * * In practice however, we cannot even do an opportunistic * version of this because B may be moving: * * B.probe -> B.start * B.probe -> B.stop * B.stop -> B.start * A.stop -> A.start * A.start -> B.probe * * So far so good, but if we add the result of this code: * * B.stop -> A.stop * * Then we get a loop: * * B.probe -> B.stop -> A.stop -> A.start -> B.probe * * We could kill the 'B.probe -> B.stop' dependency, but that * could mean stopping B "too" soon, because B.start must wait * for the probes to complete. * * Another option is to allow it only if A is a non-unique * clone with clone-max == node-max (since we'll never be * moving it). However, we could still be stopping one * instance at the same time as starting another. * The complexity of checking for allowed conditions combined * with the ever narrowing usecase suggests that this code * should remain disabled until someone gets smarter. */ pe_action_t *start = NULL; GList *actions = NULL; GList *probes = NULL; actions = pe__resource_actions(rsc, NULL, RSC_START, FALSE); if (actions) { start = actions->data; g_list_free(actions); } if(start == NULL) { crm_err("No start action for %s", rsc->id); continue; } probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE); for (actions = start->actions_before; actions != NULL; actions = actions->next) { pe_action_wrapper_t *before = (pe_action_wrapper_t *) actions->data; GList *pIter = NULL; pe_action_t *first = before->action; pe_resource_t *first_rsc = first->rsc; if(first->required_runnable_before) { GList *clone_actions = NULL; for (clone_actions = first->actions_before; clone_actions != NULL; clone_actions = clone_actions->next) { before = (pe_action_wrapper_t *) clone_actions->data; crm_trace("Testing %s -> %s (%p) for %s", first->uuid, before->action->uuid, before->action->rsc, start->uuid); CRM_ASSERT(before->action->rsc); first_rsc = before->action->rsc; break; } } else if(!pcmk__str_eq(first->task, RSC_START, pcmk__str_casei)) { crm_trace("Not a start op %s for %s", first->uuid, start->uuid); } if(first_rsc == NULL) { continue; } else if(uber_parent(first_rsc) == uber_parent(start->rsc)) { crm_trace("Same parent %s for %s", first_rsc->id, start->uuid); continue; } else if(FALSE && pe_rsc_is_clone(uber_parent(first_rsc)) == FALSE) { crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid); continue; } crm_err("Applying %s before %s %d", first->uuid, start->uuid, uber_parent(first_rsc)->variant); for (pIter = probes; pIter != NULL; pIter = pIter->next) { pe_action_t *probe = (pe_action_t *) pIter->data; crm_err("Ordering %s before %s", first->uuid, probe->uuid); order_actions(first, probe, pe_order_optional); } } } #endif } void pcmk__order_probes(pe_working_set_t *data_set) { order_first_probes(data_set); order_then_probes(data_set); } - -static int transition_id = -1; - -/*! - * \internal - * \brief Log a message after calculating a transition - * - * \param[in] filename Where transition input is stored - */ -void -pcmk__log_transition_summary(const char *filename) -{ - if (was_processing_error) { - crm_err("Calculated transition %d (with errors)%s%s", - transition_id, - (filename == NULL)? "" : ", saving inputs in ", - (filename == NULL)? "" : filename); - - } else if (was_processing_warning) { - crm_warn("Calculated transition %d (with warnings)%s%s", - transition_id, - (filename == NULL)? "" : ", saving inputs in ", - (filename == NULL)? "" : filename); - - } else { - crm_notice("Calculated transition %d%s%s", - transition_id, - (filename == NULL)? "" : ", saving inputs in ", - (filename == NULL)? "" : filename); - } - if (crm_config_error) { - crm_notice("Configuration errors found during scheduler processing," - " please run \"crm_verify -L\" to identify issues"); - } -} - -/* - * Create a dependency graph to send to the transitioner (via the controller) - */ -gboolean -stage8(pe_working_set_t * data_set) -{ - GList *gIter = NULL; - const char *value = NULL; - long long limit = 0LL; - - transition_id++; - crm_trace("Creating transition graph %d.", transition_id); - - data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); - - value = pe_pref(data_set->config_hash, "cluster-delay"); - crm_xml_add(data_set->graph, "cluster-delay", value); - - value = pe_pref(data_set->config_hash, "stonith-timeout"); - crm_xml_add(data_set->graph, "stonith-timeout", value); - - crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY"); - - if (pcmk_is_set(data_set->flags, pe_flag_start_failure_fatal)) { - crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY"); - } else { - crm_xml_add(data_set->graph, "failed-start-offset", "1"); - } - - value = pe_pref(data_set->config_hash, "batch-limit"); - crm_xml_add(data_set->graph, "batch-limit", value); - - crm_xml_add_int(data_set->graph, "transition_id", transition_id); - - value = pe_pref(data_set->config_hash, "migration-limit"); - if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) { - crm_xml_add(data_set->graph, "migration-limit", value); - } - - if (data_set->recheck_by > 0) { - char *recheck_epoch = NULL; - - recheck_epoch = crm_strdup_printf("%llu", - (long long) data_set->recheck_by); - crm_xml_add(data_set->graph, "recheck-by", recheck_epoch); - free(recheck_epoch); - } - - /* The following code will de-duplicate action inputs, so nothing past this - * should rely on the action input type flags retaining their original - * values. - */ - - gIter = data_set->resources; - for (; gIter != NULL; gIter = gIter->next) { - pe_resource_t *rsc = (pe_resource_t *) gIter->data; - - pe_rsc_trace(rsc, "processing actions for rsc=%s", rsc->id); - rsc->cmds->expand(rsc, data_set); - } - - crm_log_xml_trace(data_set->graph, "created resource-driven action list"); - - /* pseudo action to distribute list of nodes with maintenance state update */ - add_maintenance_update(data_set); - - /* catch any non-resource specific actions */ - crm_trace("processing non-resource actions"); - - gIter = data_set->actions; - for (; gIter != NULL; gIter = gIter->next) { - pe_action_t *action = (pe_action_t *) gIter->data; - - if (action->rsc - && action->node - && action->node->details->shutdown - && !pcmk_is_set(action->rsc->flags, pe_rsc_maintenance) - && !pcmk_any_flags_set(action->flags, - pe_action_optional|pe_action_runnable) - && pcmk__str_eq(action->task, RSC_STOP, pcmk__str_none) - ) { - /* Eventually we should just ignore the 'fence' case - * But for now it's the best way to detect (in CTS) when - * CIB resource updates are being lost - */ - if (pcmk_is_set(data_set->flags, pe_flag_have_quorum) - || data_set->no_quorum_policy == no_quorum_ignore) { - crm_crit("Cannot %s node '%s' because of %s:%s%s (%s)", - action->node->details->unclean ? "fence" : "shut down", - action->node->details->uname, action->rsc->id, - pcmk_is_set(action->rsc->flags, pe_rsc_managed)? " blocked" : " unmanaged", - pcmk_is_set(action->rsc->flags, pe_rsc_failed)? " failed" : "", - action->uuid); - } - } - - graph_element_from_action(action, data_set); - } - - crm_log_xml_trace(data_set->graph, "created generic action list"); - crm_trace("Created transition graph %d.", transition_id); - - return TRUE; -} diff --git a/lib/pacemaker/pcmk_sched_messages.c b/lib/pacemaker/pcmk_sched_messages.c index 316c273880..27892a5714 100644 --- a/lib/pacemaker/pcmk_sched_messages.c +++ b/lib/pacemaker/pcmk_sched_messages.c @@ -1,156 +1,156 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" extern bool pcmk__is_daemon; static void log_resource_details(pe_working_set_t *data_set) { pcmk__output_t *out = data_set->priv; GList *all = NULL; /* We need a list of nodes that we are allowed to output information for. * This is necessary because out->message for all the resource-related * messages expects such a list, due to the `crm_mon --node=` feature. Here, * we just make it a list of all the nodes. */ all = g_list_prepend(all, (gpointer) "*"); for (GList *item = data_set->resources; item != NULL; item = item->next) { pe_resource_t *rsc = (pe_resource_t *) item->data; // Log all resources except inactive orphans if (!pcmk_is_set(rsc->flags, pe_rsc_orphan) || (rsc->role != RSC_ROLE_STOPPED)) { out->message(out, crm_map_element_name(rsc->xml), 0, rsc, all, all); } } g_list_free(all); } static void log_all_actions(pe_working_set_t *data_set) { /* This only ever outputs to the log, so ignore whatever output object was * previously set and just log instead. */ pcmk__output_t *prev_out = data_set->priv; pcmk__output_t *out = pcmk__new_logger(); if (out == NULL) { return; } pcmk__output_set_log_level(out, LOG_NOTICE); data_set->priv = out; out->begin_list(out, NULL, NULL, "Actions"); pcmk__output_actions(data_set); out->end_list(out); out->finish(out, CRM_EX_OK, true, NULL); pcmk__output_free(out); data_set->priv = prev_out; } /*! * \internal * \brief Run the scheduler for a given CIB * * \param[in,out] data_set Cluster working set * \param[in] xml_input CIB XML to use as scheduler input * \param[in] now Time to use for rule evaluation (or NULL for now) */ xmlNode * pcmk__schedule_actions(pe_working_set_t *data_set, xmlNode *xml_input, crm_time_t *now) { GList *gIter = NULL; CRM_ASSERT(xml_input || pcmk_is_set(data_set->flags, pe_flag_have_status)); if (!pcmk_is_set(data_set->flags, pe_flag_have_status)) { set_working_set_defaults(data_set); data_set->input = xml_input; data_set->now = now; } else { crm_trace("Already have status - reusing"); } if (data_set->now == NULL) { data_set->now = crm_time_new(NULL); } crm_trace("Calculate cluster status"); stage0(data_set); if (!pcmk_is_set(data_set->flags, pe_flag_quick_location) && pcmk__is_daemon) { log_resource_details(data_set); } crm_trace("Applying location constraints"); stage2(data_set); if (pcmk_is_set(data_set->flags, pe_flag_quick_location)) { return NULL; } pcmk__create_internal_constraints(data_set); crm_trace("Check actions"); stage4(data_set); crm_trace("Allocate resources"); stage5(data_set); crm_trace("Processing fencing and shutdown cases"); stage6(data_set); pcmk__apply_orderings(data_set); log_all_actions(data_set); crm_trace("Create transition graph"); - stage8(data_set); + pcmk__create_graph(data_set); crm_trace("=#=#=#=#= Summary =#=#=#=#="); crm_trace("\t========= Set %d (Un-runnable) =========", -1); if (get_crm_log_level() == LOG_TRACE) { gIter = data_set->actions; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (!pcmk_any_flags_set(action->flags, pe_action_optional |pe_action_runnable |pe_action_pseudo)) { pcmk__log_action("\t", action, true); } } } return data_set->graph; } diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c index f5973a0a19..1085a951d6 100644 --- a/lib/pacemaker/pcmk_sched_native.c +++ b/lib/pacemaker/pcmk_sched_native.c @@ -1,2512 +1,2495 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include "libpacemaker_private.h" // The controller removes the resource from the CIB, making this redundant // #define DELETE_THEN_REFRESH 1 #define INFINITY_HACK (INFINITY * -100) #define VARIANT_NATIVE 1 #include extern bool pcmk__is_daemon; static void Recurring(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node, pe_working_set_t *data_set); static void RecurringOp(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node, xmlNode *operation, pe_working_set_t *data_set); static void Recurring_Stopped(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node, pe_working_set_t *data_set); static void RecurringOp_Stopped(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node, xmlNode *operation, pe_working_set_t *data_set); void ReloadRsc(pe_resource_t * rsc, pe_node_t *node, pe_working_set_t * data_set); gboolean DeleteRsc(pe_resource_t * rsc, pe_node_t * node, gboolean optional, pe_working_set_t * data_set); gboolean StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean StartRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean DemoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean PromoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean RoleError(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean NullOp(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); /* This array says what the *next* role should be when transitioning from one * role to another. For example going from Stopped to Promoted, the next role is * RSC_ROLE_UNPROMOTED, because the resource must be started before being promoted. * The current state then becomes Started, which is fed into this array again, * giving a next role of RSC_ROLE_PROMOTED. */ static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* Current state Next state*/ /* Unknown Stopped Started Unpromoted Promoted */ /* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED }, /* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_UNPROMOTED, RSC_ROLE_UNPROMOTED }, /* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_UNPROMOTED, RSC_ROLE_PROMOTED }, /* Unpromoted */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_UNPROMOTED, RSC_ROLE_PROMOTED }, /* Promoted */ { RSC_ROLE_STOPPED, RSC_ROLE_UNPROMOTED, RSC_ROLE_UNPROMOTED, RSC_ROLE_UNPROMOTED, RSC_ROLE_PROMOTED }, }; typedef gboolean (*rsc_transition_fn)(pe_resource_t *rsc, pe_node_t *next, gboolean optional, pe_working_set_t *data_set); // This array picks the function needed to transition from one role to another static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* Current state Next state */ /* Unknown Stopped Started Unpromoted Promoted */ /* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, }, /* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, }, /* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, }, /* Unpromoted */ { RoleError, StopRsc, StopRsc, NullOp, PromoteRsc, }, /* Promoted */ { RoleError, DemoteRsc, DemoteRsc, DemoteRsc, NullOp, }, }; #define clear_node_weights_flags(nw_flags, nw_rsc, flags_to_clear) do { \ flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "Node weight", (nw_rsc)->id, (flags), \ (flags_to_clear), #flags_to_clear); \ } while (0) static bool native_choose_node(pe_resource_t * rsc, pe_node_t * prefer, pe_working_set_t * data_set) { GList *nodes = NULL; pe_node_t *chosen = NULL; pe_node_t *best = NULL; int multiple = 1; int length = 0; bool result = false; process_utilization(rsc, &prefer, data_set); if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to != NULL; } // Sort allowed nodes by weight if (rsc->allowed_nodes) { length = g_hash_table_size(rsc->allowed_nodes); } if (length > 0) { nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = sort_nodes_by_weight(nodes, pe__current_node(rsc), data_set); // First node in sorted list has the best score best = g_list_nth_data(nodes, 0); } if (prefer && nodes) { chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); if (chosen == NULL) { pe_rsc_trace(rsc, "Preferred node %s for %s was unknown", prefer->details->uname, rsc->id); /* Favor the preferred node as long as its weight is at least as good as * the best allowed node's. * * An alternative would be to favor the preferred node even if the best * node is better, when the best node's weight is less than INFINITY. */ } else if ((chosen->weight < 0) || (chosen->weight < best->weight)) { pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable", chosen->details->uname, rsc->id); chosen = NULL; } else if (!can_run_resources(chosen)) { pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable", chosen->details->uname, rsc->id); chosen = NULL; } else { pe_rsc_trace(rsc, "Chose preferred node %s for %s (ignoring %d candidates)", chosen->details->uname, rsc->id, length); } } if ((chosen == NULL) && nodes) { /* Either there is no preferred node, or the preferred node is not * available, but there are other nodes allowed to run the resource. */ chosen = best; pe_rsc_trace(rsc, "Chose node %s for %s from %d candidates", chosen ? chosen->details->uname : "", rsc->id, length); if (!pe_rsc_is_unique_clone(rsc->parent) && chosen && (chosen->weight > 0) && can_run_resources(chosen)) { /* If the resource is already running on a node, prefer that node if * it is just as good as the chosen node. * * We don't do this for unique clone instances, because * distribute_children() has already assigned instances to their * running nodes when appropriate, and if we get here, we don't want * remaining unallocated instances to prefer a node that's already * running another instance. */ pe_node_t *running = pe__current_node(rsc); if (running && (can_run_resources(running) == FALSE)) { pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources", rsc->id, running->details->uname); } else if (running) { for (GList *iter = nodes->next; iter; iter = iter->next) { pe_node_t *tmp = (pe_node_t *) iter->data; if (tmp->weight != chosen->weight) { // The nodes are sorted by weight, so no more are equal break; } if (tmp->details == running->details) { // Scores are equal, so prefer the current node chosen = tmp; } multiple++; } } } } if (multiple > 1) { static char score[33]; int log_level = (chosen->weight >= INFINITY)? LOG_WARNING : LOG_INFO; score2char_stack(chosen->weight, score, sizeof(score)); do_crm_log(log_level, "Chose node %s for %s from %d nodes with score %s", chosen->details->uname, rsc->id, multiple, score); } result = pcmk__assign_primitive(rsc, chosen, false); g_list_free(nodes); return result; } /*! * \internal * \brief Find score of highest-scored node that matches colocation attribute * * \param[in] rsc Resource whose allowed nodes should be searched * \param[in] attr Colocation attribute name (must not be NULL) * \param[in] value Colocation attribute value to require */ static int best_node_score_matching_attr(const pe_resource_t *rsc, const char *attr, const char *value) { GHashTableIter iter; pe_node_t *node = NULL; int best_score = -INFINITY; const char *best_node = NULL; // Find best allowed node with matching attribute g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { if ((node->weight > best_score) && can_run_resources(node) && pcmk__str_eq(value, pe_node_attribute_raw(node, attr), pcmk__str_casei)) { best_score = node->weight; best_node = node->details->uname; } } if (!pcmk__str_eq(attr, CRM_ATTR_UNAME, pcmk__str_casei)) { if (best_node == NULL) { crm_info("No allowed node for %s matches node attribute %s=%s", rsc->id, attr, value); } else { crm_info("Allowed node %s for %s had best score (%d) " "of those matching node attribute %s=%s", best_node, rsc->id, best_score, attr, value); } } return best_score; } /*! * \internal * \brief Add resource's colocation matches to current node allocation scores * * For each node in a given table, if any of a given resource's allowed nodes * have a matching value for the colocation attribute, add the highest of those * nodes' scores to the node's score. * * \param[in,out] nodes Hash table of nodes with allocation scores so far * \param[in] rsc Resource whose allowed nodes should be compared * \param[in] attr Colocation attribute that must match (NULL for default) * \param[in] factor Factor by which to multiply scores being added * \param[in] only_positive Whether to add only positive scores */ static void add_node_scores_matching_attr(GHashTable *nodes, const pe_resource_t *rsc, const char *attr, float factor, bool only_positive) { GHashTableIter iter; pe_node_t *node = NULL; if (attr == NULL) { attr = CRM_ATTR_UNAME; } // Iterate through each node g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { float weight_f = 0; int weight = 0; int score = 0; int new_score = 0; score = best_node_score_matching_attr(rsc, attr, pe_node_attribute_raw(node, attr)); if ((factor < 0) && (score < 0)) { /* Negative preference for a node with a negative score * should not become a positive preference. * * @TODO Consider filtering only if weight is -INFINITY */ crm_trace("%s: Filtering %d + %f * %d (double negative disallowed)", node->details->uname, node->weight, factor, score); continue; } if (node->weight == INFINITY_HACK) { crm_trace("%s: Filtering %d + %f * %d (node was marked unusable)", node->details->uname, node->weight, factor, score); continue; } weight_f = factor * score; // Round the number; see http://c-faq.com/fp/round.html weight = (int) ((weight_f < 0)? (weight_f - 0.5) : (weight_f + 0.5)); /* Small factors can obliterate the small scores that are often actually * used in configurations. If the score and factor are nonzero, ensure * that the result is nonzero as well. */ if ((weight == 0) && (score != 0)) { if (factor > 0.0) { weight = 1; } else if (factor < 0.0) { weight = -1; } } new_score = pe__add_scores(weight, node->weight); if (only_positive && (new_score < 0) && (node->weight > 0)) { crm_trace("%s: Filtering %d + %f * %d = %d " "(negative disallowed, marking node unusable)", node->details->uname, node->weight, factor, score, new_score); node->weight = INFINITY_HACK; continue; } if (only_positive && (new_score < 0) && (node->weight == 0)) { crm_trace("%s: Filtering %d + %f * %d = %d (negative disallowed)", node->details->uname, node->weight, factor, score, new_score); continue; } crm_trace("%s: %d + %f * %d = %d", node->details->uname, node->weight, factor, score, new_score); node->weight = new_score; } } static inline bool is_nonempty_group(pe_resource_t *rsc) { return rsc && (rsc->variant == pe_group) && (rsc->children != NULL); } /*! * \internal * \brief Incorporate colocation constraint scores into node weights * * \param[in,out] rsc Resource being placed * \param[in] primary_id ID of primary resource in constraint * \param[in,out] nodes Nodes, with scores as of this point * \param[in] attr Colocation attribute (ID by default) * \param[in] factor Incorporate scores multiplied by this factor * \param[in] flags Bitmask of enum pe_weights values * * \return Nodes, with scores modified by this constraint * \note This function assumes ownership of the nodes argument. The caller * should free the returned copy rather than the original. */ GHashTable * pcmk__native_merge_weights(pe_resource_t *rsc, const char *primary_id, GHashTable *nodes, const char *attr, float factor, uint32_t flags) { GHashTable *work = NULL; // Avoid infinite recursion if (pcmk_is_set(rsc->flags, pe_rsc_merging)) { pe_rsc_info(rsc, "%s: Breaking dependency loop at %s", primary_id, rsc->id); return nodes; } pe__set_resource_flags(rsc, pe_rsc_merging); if (pcmk_is_set(flags, pe_weights_init)) { if (is_nonempty_group(rsc)) { GList *last = g_list_last(rsc->children); pe_resource_t *last_rsc = last->data; pe_rsc_trace(rsc, "%s: Merging scores from group %s " "using last member %s (at %.6f)", primary_id, rsc->id, last_rsc->id, factor); work = pcmk__native_merge_weights(last_rsc, primary_id, NULL, attr, factor, flags); } else { work = pcmk__copy_node_table(rsc->allowed_nodes); } clear_node_weights_flags(flags, rsc, pe_weights_init); } else if (is_nonempty_group(rsc)) { /* The first member of the group will recursively incorporate any * constraints involving other members (including the group internal * colocation). * * @TODO The indirect colocations from the dependent group's other * members will be incorporated at full strength rather than by * factor, so the group's combined stickiness will be treated as * (factor + (#members - 1)) * stickiness. It is questionable what * the right approach should be. */ pe_rsc_trace(rsc, "%s: Merging scores from first member of group %s " "(at %.6f)", primary_id, rsc->id, factor); work = pcmk__copy_node_table(nodes); work = pcmk__native_merge_weights(rsc->children->data, primary_id, work, attr, factor, flags); } else { pe_rsc_trace(rsc, "%s: Merging scores from %s (at %.6f)", primary_id, rsc->id, factor); work = pcmk__copy_node_table(nodes); add_node_scores_matching_attr(work, rsc, attr, factor, pcmk_is_set(flags, pe_weights_positive)); } if (can_run_any(work)) { GList *gIter = NULL; int multiplier = (factor < 0)? -1 : 1; if (pcmk_is_set(flags, pe_weights_forward)) { gIter = rsc->rsc_cons; pe_rsc_trace(rsc, "Checking additional %d optional '%s with' constraints", g_list_length(gIter), rsc->id); } else if (is_nonempty_group(rsc)) { pe_resource_t *last_rsc = g_list_last(rsc->children)->data; gIter = last_rsc->rsc_cons_lhs; pe_rsc_trace(rsc, "Checking additional %d optional 'with group %s' " "constraints using last member %s", g_list_length(gIter), rsc->id, last_rsc->id); } else { gIter = rsc->rsc_cons_lhs; pe_rsc_trace(rsc, "Checking additional %d optional 'with %s' constraints", g_list_length(gIter), rsc->id); } for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *other = NULL; pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; if (pcmk_is_set(flags, pe_weights_forward)) { other = constraint->primary; } else if (!pcmk__colocation_has_influence(constraint, NULL)) { continue; } else { other = constraint->dependent; } pe_rsc_trace(rsc, "Optionally merging score of '%s' constraint (%s with %s)", constraint->id, constraint->dependent->id, constraint->primary->id); work = pcmk__native_merge_weights(other, primary_id, work, constraint->node_attribute, multiplier * constraint->score / (float) INFINITY, flags|pe_weights_rollback); pe__show_node_weights(true, NULL, primary_id, work, rsc->cluster); } } else if (pcmk_is_set(flags, pe_weights_rollback)) { pe_rsc_info(rsc, "%s: Rolling back optional scores from %s", primary_id, rsc->id); g_hash_table_destroy(work); pe__clear_resource_flags(rsc, pe_rsc_merging); return nodes; } if (pcmk_is_set(flags, pe_weights_positive)) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->weight == INFINITY_HACK) { node->weight = 1; } } } if (nodes) { g_hash_table_destroy(nodes); } pe__clear_resource_flags(rsc, pe_rsc_merging); return work; } pe_node_t * pcmk__native_allocate(pe_resource_t *rsc, pe_node_t *prefer, pe_working_set_t *data_set) { GList *gIter = NULL; if (rsc->parent && !pcmk_is_set(rsc->parent->flags, pe_rsc_allocating)) { /* never allocate children on their own */ pe_rsc_debug(rsc, "Escalating allocation of %s to its parent: %s", rsc->id, rsc->parent->id); rsc->parent->cmds->allocate(rsc->parent, prefer, data_set); } if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to; } if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } pe__set_resource_flags(rsc, pe_rsc_allocating); pe__show_node_weights(true, rsc, "Pre-alloc", rsc->allowed_nodes, data_set); for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; GHashTable *archive = NULL; pe_resource_t *primary = constraint->primary; if ((constraint->dependent_role >= RSC_ROLE_PROMOTED) || (constraint->score < 0 && constraint->score > -INFINITY)) { archive = pcmk__copy_node_table(rsc->allowed_nodes); } pe_rsc_trace(rsc, "%s: Allocating %s first (constraint=%s score=%d role=%s)", rsc->id, primary->id, constraint->id, constraint->score, role2text(constraint->dependent_role)); primary->cmds->allocate(primary, NULL, data_set); rsc->cmds->rsc_colocation_lh(rsc, primary, constraint, data_set); if (archive && can_run_any(rsc->allowed_nodes) == FALSE) { pe_rsc_info(rsc, "%s: Rolling back scores from %s", rsc->id, primary->id); g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = archive; archive = NULL; } if (archive) { g_hash_table_destroy(archive); } } pe__show_node_weights(true, rsc, "Post-coloc", rsc->allowed_nodes, data_set); for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; if (!pcmk__colocation_has_influence(constraint, NULL)) { continue; } pe_rsc_trace(rsc, "Merging score of '%s' constraint (%s with %s)", constraint->id, constraint->dependent->id, constraint->primary->id); rsc->allowed_nodes = constraint->dependent->cmds->merge_weights( constraint->dependent, rsc->id, rsc->allowed_nodes, constraint->node_attribute, constraint->score / (float) INFINITY, pe_weights_rollback); } if (rsc->next_role == RSC_ROLE_STOPPED) { pe_rsc_trace(rsc, "Making sure %s doesn't get allocated", rsc->id); /* make sure it doesn't come up again */ resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE, data_set); } else if(rsc->next_role > rsc->role && !pcmk_is_set(data_set->flags, pe_flag_have_quorum) && data_set->no_quorum_policy == no_quorum_freeze) { crm_notice("Resource %s cannot be elevated from %s to %s: no-quorum-policy=freeze", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); pe__set_next_role(rsc, rsc->role, "no-quorum-policy=freeze"); } pe__show_node_weights(!pcmk_is_set(data_set->flags, pe_flag_show_scores), rsc, __func__, rsc->allowed_nodes, data_set); if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled) && !pcmk_is_set(data_set->flags, pe_flag_have_stonith_resource)) { pe__clear_resource_flags(rsc, pe_rsc_managed); } if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { const char *reason = NULL; pe_node_t *assign_to = NULL; pe__set_next_role(rsc, rsc->role, "unmanaged"); assign_to = pe__current_node(rsc); if (assign_to == NULL) { reason = "inactive"; } else if (rsc->role == RSC_ROLE_PROMOTED) { reason = "promoted"; } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { reason = "failed"; } else { reason = "active"; } pe_rsc_info(rsc, "Unmanaged resource %s allocated to %s: %s", rsc->id, (assign_to? assign_to->details->uname : "no node"), reason); pcmk__assign_primitive(rsc, assign_to, true); } else if (pcmk_is_set(data_set->flags, pe_flag_stop_everything)) { pe_rsc_debug(rsc, "Forcing %s to stop", rsc->id); pcmk__assign_primitive(rsc, NULL, true); } else if (pcmk_is_set(rsc->flags, pe_rsc_provisional) && native_choose_node(rsc, prefer, data_set)) { pe_rsc_trace(rsc, "Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } else if (rsc->allocated_to == NULL) { if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id); } else if (rsc->running_on != NULL) { pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id); } } else { pe_rsc_debug(rsc, "Pre-Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } pe__clear_resource_flags(rsc, pe_rsc_allocating); if (rsc->is_remote_node) { pe_node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); CRM_ASSERT(remote_node != NULL); if (rsc->allocated_to && rsc->next_role != RSC_ROLE_STOPPED) { crm_trace("Setting Pacemaker Remote node %s to ONLINE", remote_node->details->id); remote_node->details->online = TRUE; /* We shouldn't consider an unseen remote-node unclean if we are going * to try and connect to it. Otherwise we get an unnecessary fence */ if (remote_node->details->unseen == TRUE) { remote_node->details->unclean = FALSE; } } else { crm_trace("Setting Pacemaker Remote node %s to SHUTDOWN (next role %s, %sallocated)", remote_node->details->id, role2text(rsc->next_role), (rsc->allocated_to? "" : "un")); remote_node->details->shutdown = TRUE; } } return rsc->allocated_to; } static gboolean is_op_dup(pe_resource_t *rsc, const char *name, guint interval_ms) { gboolean dup = FALSE; const char *id = NULL; const char *value = NULL; xmlNode *operation = NULL; guint interval2_ms = 0; CRM_ASSERT(rsc); for (operation = pcmk__xe_first_child(rsc->ops_xml); operation != NULL; operation = pcmk__xe_next(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { value = crm_element_value(operation, "name"); if (!pcmk__str_eq(value, name, pcmk__str_casei)) { continue; } value = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval2_ms = crm_parse_interval_spec(value); if (interval_ms != interval2_ms) { continue; } if (id == NULL) { id = ID(operation); } else { pcmk__config_err("Operation %s is duplicate of %s (do not use " "same name and interval combination more " "than once per resource)", ID(operation), id); dup = TRUE; } } } return dup; } static bool op_cannot_recur(const char *name) { return pcmk__strcase_any_of(name, RSC_STOP, RSC_START, RSC_DEMOTE, RSC_PROMOTE, NULL); } static void RecurringOp(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *role = NULL; const char *interval_spec = NULL; const char *node_uname = node? node->details->uname : "n/a"; guint interval_ms = 0; pe_action_t *mon = NULL; gboolean is_optional = TRUE; GList *possible_matches = NULL; CRM_ASSERT(rsc); /* Only process for the operations without role="Stopped" */ role = crm_element_value(operation, "role"); if (role && text2role(role) == RSC_ROLE_STOPPED) { return; } interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_parse_interval_spec(interval_spec); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval_ms)) { crm_trace("Not creating duplicate recurring action %s for %dms %s", ID(operation), interval_ms, name); return; } if (op_cannot_recur(name)) { pcmk__config_err("Ignoring %s because action '%s' cannot be recurring", ID(operation), name); return; } key = pcmk__op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { crm_trace("Not creating recurring action %s for disabled resource %s", ID(operation), rsc->id); free(key); return; } pe_rsc_trace(rsc, "Creating recurring action %s for %s in role %s on %s", ID(operation), rsc->id, role2text(rsc->next_role), node_uname); if (start != NULL) { pe_rsc_trace(rsc, "Marking %s %s due to %s", key, pcmk_is_set(start->flags, pe_action_optional)? "optional" : "mandatory", start->uuid); is_optional = (rsc->cmds->action_flags(start, NULL) & pe_action_optional); } else { pe_rsc_trace(rsc, "Marking %s optional", key); is_optional = TRUE; } /* start a monitor for an already active resource */ possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches == NULL) { is_optional = FALSE; pe_rsc_trace(rsc, "Marking %s mandatory: not active", key); } else { GList *gIter = NULL; for (gIter = possible_matches; gIter != NULL; gIter = gIter->next) { pe_action_t *op = (pe_action_t *) gIter->data; if (pcmk_is_set(op->flags, pe_action_reschedule)) { is_optional = FALSE; break; } } g_list_free(possible_matches); } if (((rsc->next_role == RSC_ROLE_PROMOTED) && (role == NULL)) || (role != NULL && text2role(role) != rsc->next_role)) { int log_level = LOG_TRACE; const char *result = "Ignoring"; if (is_optional) { char *after_key = NULL; pe_action_t *cancel_op = NULL; // It's running, so cancel it log_level = LOG_INFO; result = "Cancelling"; cancel_op = pcmk__new_cancel_action(rsc, name, interval_ms, node); switch (rsc->role) { case RSC_ROLE_UNPROMOTED: case RSC_ROLE_STARTED: if (rsc->next_role == RSC_ROLE_PROMOTED) { after_key = promote_key(rsc); } else if (rsc->next_role == RSC_ROLE_STOPPED) { after_key = stop_key(rsc); } break; case RSC_ROLE_PROMOTED: after_key = demote_key(rsc); break; default: break; } if (after_key) { pcmk__new_ordering(rsc, NULL, cancel_op, rsc, after_key, NULL, pe_order_runnable_left, data_set); } } do_crm_log(log_level, "%s action %s (%s vs. %s)", result, key, role ? role : role2text(RSC_ROLE_UNPROMOTED), role2text(rsc->next_role)); free(key); return; } mon = custom_action(rsc, key, name, node, is_optional, TRUE, data_set); key = mon->uuid; if (is_optional) { pe_rsc_trace(rsc, "%s\t %s (optional)", node_uname, mon->uuid); } if ((start == NULL) || !pcmk_is_set(start->flags, pe_action_runnable)) { pe_rsc_debug(rsc, "%s\t %s (cancelled : start un-runnable)", node_uname, mon->uuid); pe__clear_action_flags(mon, pe_action_runnable); } else if (node == NULL || node->details->online == FALSE || node->details->unclean) { pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)", node_uname, mon->uuid); pe__clear_action_flags(mon, pe_action_runnable); } else if (!pcmk_is_set(mon->flags, pe_action_optional)) { pe_rsc_info(rsc, " Start recurring %s (%us) for %s on %s", mon->task, interval_ms / 1000, rsc->id, node_uname); } if (rsc->next_role == RSC_ROLE_PROMOTED) { char *running_promoted = pcmk__itoa(PCMK_OCF_RUNNING_PROMOTED); add_hash_param(mon->meta, XML_ATTR_TE_TARGET_RC, running_promoted); free(running_promoted); } if ((node == NULL) || pcmk_is_set(rsc->flags, pe_rsc_managed)) { pcmk__new_ordering(rsc, start_key(rsc), NULL, NULL, strdup(key), mon, pe_order_implies_then|pe_order_runnable_left, data_set); pcmk__new_ordering(rsc, reload_key(rsc), NULL, NULL, strdup(key), mon, pe_order_implies_then|pe_order_runnable_left, data_set); if (rsc->next_role == RSC_ROLE_PROMOTED) { pcmk__new_ordering(rsc, promote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional|pe_order_runnable_left, data_set); } else if (rsc->role == RSC_ROLE_PROMOTED) { pcmk__new_ordering(rsc, demote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional|pe_order_runnable_left, data_set); } } } static void Recurring(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, pe_working_set_t * data_set) { if (!pcmk_is_set(rsc->flags, pe_rsc_maintenance) && (node == NULL || node->details->maintenance == FALSE)) { xmlNode *operation = NULL; for (operation = pcmk__xe_first_child(rsc->ops_xml); operation != NULL; operation = pcmk__xe_next(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { RecurringOp(rsc, start, node, operation, data_set); } } } } static void RecurringOp_Stopped(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *role = NULL; const char *interval_spec = NULL; const char *node_uname = node? node->details->uname : "n/a"; guint interval_ms = 0; GList *possible_matches = NULL; GList *gIter = NULL; /* Only process for the operations with role="Stopped" */ role = crm_element_value(operation, "role"); if (role == NULL || text2role(role) != RSC_ROLE_STOPPED) { return; } interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_parse_interval_spec(interval_spec); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval_ms)) { crm_trace("Not creating duplicate recurring action %s for %dms %s", ID(operation), interval_ms, name); return; } if (op_cannot_recur(name)) { pcmk__config_err("Ignoring %s because action '%s' cannot be recurring", ID(operation), name); return; } key = pcmk__op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { crm_trace("Not creating recurring action %s for disabled resource %s", ID(operation), rsc->id); free(key); return; } // @TODO add support if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) { crm_notice("Ignoring %s (recurring monitors for Stopped role are " "not supported for anonymous clones)", ID(operation)); return; } pe_rsc_trace(rsc, "Creating recurring action %s for %s in role %s on nodes where it should not be running", ID(operation), rsc->id, role2text(rsc->next_role)); /* if the monitor exists on the node where the resource will be running, cancel it */ if (node != NULL) { possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches) { pe_action_t *cancel_op = NULL; g_list_free(possible_matches); cancel_op = pcmk__new_cancel_action(rsc, name, interval_ms, node); if ((rsc->next_role == RSC_ROLE_STARTED) || (rsc->next_role == RSC_ROLE_UNPROMOTED)) { /* rsc->role == RSC_ROLE_STOPPED: cancel the monitor before start */ /* rsc->role == RSC_ROLE_STARTED: for a migration, cancel the monitor on the target node before start */ pcmk__new_ordering(rsc, NULL, cancel_op, rsc, start_key(rsc), NULL, pe_order_runnable_left, data_set); } pe_rsc_info(rsc, "Cancel action %s (%s vs. %s) on %s", key, role, role2text(rsc->next_role), node_uname); } } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *stop_node = (pe_node_t *) gIter->data; const char *stop_node_uname = stop_node->details->uname; gboolean is_optional = TRUE; gboolean probe_is_optional = TRUE; gboolean stop_is_optional = TRUE; pe_action_t *stopped_mon = NULL; char *rc_inactive = NULL; - GList *probe_complete_ops = NULL; GList *stop_ops = NULL; GList *local_gIter = NULL; if (node && pcmk__str_eq(stop_node_uname, node_uname, pcmk__str_casei)) { continue; } pe_rsc_trace(rsc, "Creating recurring action %s for %s on %s", ID(operation), rsc->id, crm_str(stop_node_uname)); /* start a monitor for an already stopped resource */ possible_matches = find_actions_exact(rsc->actions, key, stop_node); if (possible_matches == NULL) { pe_rsc_trace(rsc, "Marking %s mandatory on %s: not active", key, crm_str(stop_node_uname)); is_optional = FALSE; } else { pe_rsc_trace(rsc, "Marking %s optional on %s: already active", key, crm_str(stop_node_uname)); is_optional = TRUE; g_list_free(possible_matches); } stopped_mon = custom_action(rsc, strdup(key), name, stop_node, is_optional, TRUE, data_set); rc_inactive = pcmk__itoa(PCMK_OCF_NOT_RUNNING); add_hash_param(stopped_mon->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); free(rc_inactive); if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { GList *probes = pe__resource_actions(rsc, stop_node, RSC_STATUS, FALSE); GList *pIter = NULL; for (pIter = probes; pIter != NULL; pIter = pIter->next) { pe_action_t *probe = (pe_action_t *) pIter->data; order_actions(probe, stopped_mon, pe_order_runnable_left); crm_trace("%s then %s on %s", probe->uuid, stopped_mon->uuid, stop_node->details->uname); } g_list_free(probes); } - if (probe_complete_ops) { - g_list_free(probe_complete_ops); - } - stop_ops = pe__resource_actions(rsc, stop_node, RSC_STOP, TRUE); for (local_gIter = stop_ops; local_gIter != NULL; local_gIter = local_gIter->next) { pe_action_t *stop = (pe_action_t *) local_gIter->data; if (!pcmk_is_set(stop->flags, pe_action_optional)) { stop_is_optional = FALSE; } if (!pcmk_is_set(stop->flags, pe_action_runnable)) { crm_debug("%s\t %s (cancelled : stop un-runnable)", crm_str(stop_node_uname), stopped_mon->uuid); pe__clear_action_flags(stopped_mon, pe_action_runnable); } if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { pcmk__new_ordering(rsc, stop_key(rsc), stop, NULL, strdup(key), stopped_mon, pe_order_implies_then|pe_order_runnable_left, data_set); } } if (stop_ops) { g_list_free(stop_ops); } if (is_optional == FALSE && probe_is_optional && stop_is_optional && !pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Marking %s optional on %s due to unmanaged", key, crm_str(stop_node_uname)); pe__set_action_flags(stopped_mon, pe_action_optional); } if (pcmk_is_set(stopped_mon->flags, pe_action_optional)) { pe_rsc_trace(rsc, "%s\t %s (optional)", crm_str(stop_node_uname), stopped_mon->uuid); } if (stop_node->details->online == FALSE || stop_node->details->unclean) { pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)", crm_str(stop_node_uname), stopped_mon->uuid); pe__clear_action_flags(stopped_mon, pe_action_runnable); } if (pcmk_is_set(stopped_mon->flags, pe_action_runnable) && !pcmk_is_set(stopped_mon->flags, pe_action_optional)) { crm_notice(" Start recurring %s (%us) for %s on %s", stopped_mon->task, interval_ms / 1000, rsc->id, crm_str(stop_node_uname)); } } free(key); } static void Recurring_Stopped(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, pe_working_set_t * data_set) { if (!pcmk_is_set(rsc->flags, pe_rsc_maintenance) && (node == NULL || node->details->maintenance == FALSE)) { xmlNode *operation = NULL; for (operation = pcmk__xe_first_child(rsc->ops_xml); operation != NULL; operation = pcmk__xe_next(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { RecurringOp_Stopped(rsc, start, node, operation, data_set); } } } } static void handle_migration_actions(pe_resource_t * rsc, pe_node_t *current, pe_node_t *chosen, pe_working_set_t * data_set) { pe_action_t *migrate_to = NULL; pe_action_t *migrate_from = NULL; pe_action_t *start = NULL; pe_action_t *stop = NULL; gboolean partial = rsc->partial_migration_target ? TRUE : FALSE; pe_rsc_trace(rsc, "Processing migration actions %s moving from %s to %s . partial migration = %s", rsc->id, current->details->id, chosen->details->id, partial ? "TRUE" : "FALSE"); start = start_action(rsc, chosen, TRUE); stop = stop_action(rsc, current, TRUE); if (partial == FALSE) { migrate_to = custom_action(rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), RSC_MIGRATE, current, TRUE, TRUE, data_set); } migrate_from = custom_action(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), RSC_MIGRATED, chosen, TRUE, TRUE, data_set); if ((migrate_to && migrate_from) || (migrate_from && partial)) { pe__set_action_flags(start, pe_action_migrate_runnable); pe__set_action_flags(stop, pe_action_migrate_runnable); // This is easier than trying to delete it from the graph pe__set_action_flags(start, pe_action_pseudo); /* order probes before migrations */ if (partial) { pe__set_action_flags(migrate_from, pe_action_migrate_runnable); migrate_from->needs = start->needs; pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional, data_set); } else { pe__set_action_flags(migrate_from, pe_action_migrate_runnable); pe__set_action_flags(migrate_to, pe_action_migrate_runnable); migrate_to->needs = start->needs; pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), NULL, pe_order_optional, data_set); pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional|pe_order_implies_first_migratable, data_set); } pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_optional|pe_order_implies_first_migratable, data_set); pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, pe_order_optional|pe_order_implies_first_migratable|pe_order_pseudo_left, data_set); } if (migrate_to) { add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname); add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname); /* Pacemaker Remote connections don't require pending to be recorded in * the CIB. We can reduce CIB writes by not setting PENDING for them. */ if (rsc->is_remote_node == FALSE) { /* migrate_to takes place on the source node, but can * have an effect on the target node depending on how * the agent is written. Because of this, we have to maintain * a record that the migrate_to occurred, in case the source node * loses membership while the migrate_to action is still in-flight. */ add_hash_param(migrate_to->meta, XML_OP_ATTR_PENDING, "true"); } } if (migrate_from) { add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname); add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname); } } void native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set) { pe_action_t *start = NULL; pe_node_t *chosen = NULL; pe_node_t *current = NULL; gboolean need_stop = FALSE; bool need_promote = FALSE; gboolean is_moving = FALSE; gboolean allow_migrate = pcmk_is_set(rsc->flags, pe_rsc_allow_migrate)? TRUE : FALSE; GList *gIter = NULL; unsigned int num_all_active = 0; unsigned int num_clean_active = 0; bool multiply_active = FALSE; enum rsc_role_e role = RSC_ROLE_UNKNOWN; enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; CRM_ASSERT(rsc); chosen = rsc->allocated_to; next_role = rsc->next_role; if (next_role == RSC_ROLE_UNKNOWN) { pe__set_next_role(rsc, (chosen == NULL)? RSC_ROLE_STOPPED : RSC_ROLE_STARTED, "allocation"); } pe_rsc_trace(rsc, "Creating all actions for %s transition from %s to %s (%s) on %s", rsc->id, role2text(rsc->role), role2text(rsc->next_role), ((next_role == RSC_ROLE_UNKNOWN)? "implicit" : "explicit"), ((chosen == NULL)? "no node" : chosen->details->uname)); current = pe__find_active_on(rsc, &num_all_active, &num_clean_active); for (gIter = rsc->dangling_migrations; gIter != NULL; gIter = gIter->next) { pe_node_t *dangling_source = (pe_node_t *) gIter->data; pe_action_t *stop = NULL; pe_rsc_trace(rsc, "Creating stop action %sfor %s on %s due to dangling migration", pcmk_is_set(data_set->flags, pe_flag_remove_after_stop)? "and cleanup " : "", rsc->id, dangling_source->details->uname); stop = stop_action(rsc, dangling_source, FALSE); pe__set_action_flags(stop, pe_action_dangle); if (pcmk_is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, dangling_source, FALSE, data_set); } } if ((num_all_active == 2) && (num_clean_active == 2) && chosen && rsc->partial_migration_source && rsc->partial_migration_target && (current->details == rsc->partial_migration_source->details) && (chosen->details == rsc->partial_migration_target->details)) { /* The chosen node is still the migration target from a partial * migration. Attempt to continue the migration instead of recovering * by stopping the resource everywhere and starting it on a single node. */ pe_rsc_trace(rsc, "Will attempt to continue with partial migration " "to target %s from %s", rsc->partial_migration_target->details->id, rsc->partial_migration_source->details->id); } else if (!pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)) { /* If a resource has "requires" set to nothing or quorum, don't consider * it active on unclean nodes (similar to how all resources behave when * stonith-enabled is false). We can start such resources elsewhere * before fencing completes, and if we considered the resource active on * the failed node, we would attempt recovery for being active on * multiple nodes. */ multiply_active = (num_clean_active > 1); } else { multiply_active = (num_all_active > 1); } if (multiply_active) { if (rsc->partial_migration_target && rsc->partial_migration_source) { // Migration was in progress, but we've chosen a different target crm_notice("Resource %s can no longer migrate from %s to %s " "(will stop on both nodes)", rsc->id, rsc->partial_migration_source->details->uname, rsc->partial_migration_target->details->uname); } else { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); // Resource was (possibly) incorrectly multiply active pe_proc_err("%s resource %s might be active on %u nodes (%s)", crm_str(class), rsc->id, num_all_active, recovery2text(rsc->recovery_type)); crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ#Resource_is_Too_Active for more information"); } if (rsc->recovery_type == recovery_stop_start) { need_stop = TRUE; } /* If by chance a partial migration is in process, but the migration * target is not chosen still, clear all partial migration data. */ rsc->partial_migration_source = rsc->partial_migration_target = NULL; allow_migrate = FALSE; } if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) { pe_rsc_trace(rsc, "Creating start action for %s to represent already pending start", rsc->id); start = start_action(rsc, chosen, TRUE); pe__set_action_flags(start, pe_action_print_always); } if (current && chosen && current->details != chosen->details) { pe_rsc_trace(rsc, "Moving %s from %s to %s", rsc->id, crm_str(current->details->uname), crm_str(chosen->details->uname)); is_moving = TRUE; need_stop = TRUE; } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { if (pcmk_is_set(rsc->flags, pe_rsc_stop)) { need_stop = TRUE; pe_rsc_trace(rsc, "Recovering %s", rsc->id); } else { pe_rsc_trace(rsc, "Recovering %s by demotion", rsc->id); if (rsc->next_role == RSC_ROLE_PROMOTED) { need_promote = TRUE; } } } else if (pcmk_is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "Blocking further actions on %s", rsc->id); need_stop = TRUE; } else if (rsc->role > RSC_ROLE_STARTED && current != NULL && chosen != NULL) { pe_rsc_trace(rsc, "Creating start action for promoted resource %s", rsc->id); start = start_action(rsc, chosen, TRUE); if (!pcmk_is_set(start->flags, pe_action_optional)) { // Recovery of a promoted resource pe_rsc_trace(rsc, "%s restart is required for recovery", rsc->id); need_stop = TRUE; } } /* Create any additional actions required when bringing resource down and * back up to same level. */ role = rsc->role; while (role != RSC_ROLE_STOPPED) { next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED]; pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s", (need_stop? "required" : "optional"), rsc->id, role2text(role), role2text(next_role)); if (rsc_action_matrix[role][next_role] (rsc, current, !need_stop, data_set) == FALSE) { break; } role = next_role; } while ((rsc->role <= rsc->next_role) && (role != rsc->role) && !pcmk_is_set(rsc->flags, pe_rsc_block)) { bool required = need_stop; next_role = rsc_state_matrix[role][rsc->role]; if ((next_role == RSC_ROLE_PROMOTED) && need_promote) { required = true; } pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s", (required? "required" : "optional"), rsc->id, role2text(role), role2text(next_role)); if (rsc_action_matrix[role][next_role](rsc, chosen, !required, data_set) == FALSE) { break; } role = next_role; } role = rsc->role; /* Required steps from this role to the next */ while (role != rsc->next_role) { next_role = rsc_state_matrix[role][rsc->next_role]; pe_rsc_trace(rsc, "Creating action to take %s from %s to %s (ending at %s)", rsc->id, role2text(role), role2text(next_role), role2text(rsc->next_role)); if (rsc_action_matrix[role][next_role] (rsc, chosen, FALSE, data_set) == FALSE) { break; } role = next_role; } if (pcmk_is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "Not creating recurring monitors for blocked resource %s", rsc->id); } else if ((rsc->next_role != RSC_ROLE_STOPPED) || !pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Creating recurring monitors for %s resource %s", ((rsc->next_role == RSC_ROLE_STOPPED)? "unmanaged" : "active"), rsc->id); start = start_action(rsc, chosen, TRUE); Recurring(rsc, start, chosen, data_set); Recurring_Stopped(rsc, start, chosen, data_set); } else { pe_rsc_trace(rsc, "Creating recurring monitors for inactive resource %s", rsc->id); Recurring_Stopped(rsc, NULL, NULL, data_set); } /* if we are stuck in a partial migration, where the target * of the partial migration no longer matches the chosen target. * A full stop/start is required */ if (rsc->partial_migration_target && (chosen == NULL || rsc->partial_migration_target->details != chosen->details)) { pe_rsc_trace(rsc, "Not allowing partial migration of %s to continue", rsc->id); allow_migrate = FALSE; } else if (!is_moving || !pcmk_is_set(rsc->flags, pe_rsc_managed) || pcmk_any_flags_set(rsc->flags, pe_rsc_failed|pe_rsc_start_pending) || (current && current->details->unclean) || rsc->next_role < RSC_ROLE_STARTED) { allow_migrate = FALSE; } if (allow_migrate) { handle_migration_actions(rsc, current, chosen, data_set); } } static void rsc_avoids_remote_nodes(pe_resource_t *rsc) { GHashTableIter iter; pe_node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->details->remote_rsc) { node->weight = -INFINITY; } } } /*! * \internal * \brief Return allowed nodes as (possibly sorted) list * * Convert a resource's hash table of allowed nodes to a list. If printing to * stdout, sort the list, to keep action ID numbers consistent for regression * test output (while avoiding the performance hit on a live cluster). * * \param[in] rsc Resource to check for allowed nodes * \param[in] data_set Cluster working set * * \return List of resource's allowed nodes * \note Callers should take care not to rely on the list being sorted. */ static GList * allowed_nodes_as_list(pe_resource_t *rsc, pe_working_set_t *data_set) { GList *allowed_nodes = NULL; if (rsc->allowed_nodes) { allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes); } if (!pcmk__is_daemon) { allowed_nodes = g_list_sort(allowed_nodes, sort_node_uname); } return allowed_nodes; } void native_internal_constraints(pe_resource_t * rsc, pe_working_set_t * data_set) { /* This function is on the critical path and worth optimizing as much as possible */ pe_resource_t *top = NULL; GList *allowed_nodes = NULL; bool check_unfencing = FALSE; bool check_utilization = FALSE; if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Skipping native constraints for unmanaged resource: %s", rsc->id); return; } top = uber_parent(rsc); // Whether resource requires unfencing check_unfencing = !pcmk_is_set(rsc->flags, pe_rsc_fence_device) && pcmk_is_set(data_set->flags, pe_flag_enable_unfencing) && pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing); // Whether a non-default placement strategy is used check_utilization = (g_hash_table_size(rsc->utilization) > 0) && !pcmk__str_eq(data_set->placement_strategy, "default", pcmk__str_casei); // Order stops before starts (i.e. restart) pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, pe_order_optional|pe_order_implies_then|pe_order_restart, data_set); // Promotable ordering: demote before stop, start before promote if (pcmk_is_set(top->flags, pe_rsc_promotable) || (rsc->role > RSC_ROLE_UNPROMOTED)) { pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_DEMOTE, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_promoted_implies_first, data_set); pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_PROMOTE, 0), NULL, pe_order_runnable_left, data_set); } // Don't clear resource history if probing on same node pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL, pe_order_same_node|pe_order_then_cancels_first, data_set); // Certain checks need allowed nodes if (check_unfencing || check_utilization || rsc->container) { allowed_nodes = allowed_nodes_as_list(rsc, data_set); } if (check_unfencing) { /* Check if the node needs to be unfenced first */ for (GList *item = allowed_nodes; item; item = item->next) { pe_node_t *node = item->data; pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE, data_set); crm_debug("Ordering any stops of %s before %s, and any starts after", rsc->id, unfence->uuid); /* * It would be more efficient to order clone resources once, * rather than order each instance, but ordering the instance * allows us to avoid unnecessary dependencies that might conflict * with user constraints. * * @TODO: This constraint can still produce a transition loop if the * resource has a stop scheduled on the node being unfenced, and * there is a user ordering constraint to start some other resource * (which will be ordered after the unfence) before stopping this * resource. An example is "start some slow-starting cloned service * before stopping an associated virtual IP that may be moving to * it": * stop this -> unfencing -> start that -> stop this */ pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL, strdup(unfence->uuid), unfence, pe_order_optional|pe_order_same_node, data_set); pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence, rsc, start_key(rsc), NULL, pe_order_implies_then_on_node|pe_order_same_node, data_set); } } if (check_utilization) { GList *gIter = NULL; pe_rsc_trace(rsc, "Creating utilization constraints for %s - strategy: %s", rsc->id, data_set->placement_strategy); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { pe_node_t *current = (pe_node_t *) gIter->data; char *load_stopped_task = crm_strdup_printf(LOAD_STOPPED "_%s", current->details->uname); pe_action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set); if (load_stopped->node == NULL) { load_stopped->node = pe__copy_node(current); pe__clear_action_flags(load_stopped, pe_action_optional); } pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL, load_stopped_task, load_stopped, pe_order_load, data_set); } for (GList *item = allowed_nodes; item; item = item->next) { pe_node_t *next = item->data; char *load_stopped_task = crm_strdup_printf(LOAD_STOPPED "_%s", next->details->uname); pe_action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set); if (load_stopped->node == NULL) { load_stopped->node = pe__copy_node(next); pe__clear_action_flags(load_stopped, pe_action_optional); } pcmk__new_ordering(NULL, strdup(load_stopped_task), load_stopped, rsc, start_key(rsc), NULL, pe_order_load, data_set); pcmk__new_ordering(NULL, strdup(load_stopped_task), load_stopped, rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), NULL, pe_order_load, data_set); free(load_stopped_task); } } if (rsc->container) { pe_resource_t *remote_rsc = NULL; if (rsc->is_remote_node) { // rsc is the implicit remote connection for a guest or bundle node /* Do not allow a guest resource to live on a Pacemaker Remote node, * to avoid nesting remotes. However, allow bundles to run on remote * nodes. */ if (!pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) { rsc_avoids_remote_nodes(rsc->container); } /* If someone cleans up a guest or bundle node's container, we will * likely schedule a (re-)probe of the container and recovery of the * connection. Order the connection stop after the container probe, * so that if we detect the container running, we will trigger a new * transition and avoid the unnecessary recovery. */ pcmk__order_resource_actions(rsc->container, RSC_STATUS, rsc, RSC_STOP, pe_order_optional, data_set); /* A user can specify that a resource must start on a Pacemaker Remote * node by explicitly configuring it with the container=NODENAME * meta-attribute. This is of questionable merit, since location * constraints can accomplish the same thing. But we support it, so here * we check whether a resource (that is not itself a remote connection) * has container set to a remote node or guest node resource. */ } else if (rsc->container->is_remote_node) { remote_rsc = rsc->container; } else { remote_rsc = pe__resource_contains_guest_node(data_set, rsc->container); } if (remote_rsc) { /* Force the resource on the Pacemaker Remote node instead of * colocating the resource with the container resource. */ for (GList *item = allowed_nodes; item; item = item->next) { pe_node_t *node = item->data; if (node->details->remote_rsc != remote_rsc) { node->weight = -INFINITY; } } } else { /* This resource is either a filler for a container that does NOT * represent a Pacemaker Remote node, or a Pacemaker Remote * connection resource for a guest node or bundle. */ int score; crm_trace("Order and colocate %s relative to its container %s", rsc->id, rsc->container->id); pcmk__new_ordering(rsc->container, pcmk__op_key(rsc->container->id, RSC_START, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, pe_order_implies_then|pe_order_runnable_left, data_set); pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, rsc->container, pcmk__op_key(rsc->container->id, RSC_STOP, 0), NULL, pe_order_implies_first, data_set); if (pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) { score = 10000; /* Highly preferred but not essential */ } else { score = INFINITY; /* Force them to run on the same host */ } pcmk__new_colocation("resource-with-container", NULL, score, rsc, rsc->container, NULL, NULL, true, data_set); } } if (rsc->is_remote_node || pcmk_is_set(rsc->flags, pe_rsc_fence_device)) { /* don't allow remote nodes to run stonith devices * or remote connection resources.*/ rsc_avoids_remote_nodes(rsc); } g_list_free(allowed_nodes); } void native_rsc_colocation_lh(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint, pe_working_set_t *data_set) { if (dependent == NULL) { pe_err("dependent was NULL for %s", constraint->id); return; } else if (constraint->primary == NULL) { pe_err("primary was NULL for %s", constraint->id); return; } pe_rsc_trace(dependent, "Processing colocation constraint between %s and %s", dependent->id, primary->id); primary->cmds->rsc_colocation_rh(dependent, primary, constraint, data_set); } void native_rsc_colocation_rh(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint, pe_working_set_t *data_set) { enum pcmk__coloc_affects filter_results; CRM_ASSERT((dependent != NULL) && (primary != NULL)); filter_results = pcmk__colocation_affects(dependent, primary, constraint, false); pe_rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)", ((constraint->score > 0)? "Colocating" : "Anti-colocating"), dependent->id, primary->id, constraint->id, constraint->score, filter_results); switch (filter_results) { case pcmk__coloc_affects_role: pcmk__apply_coloc_to_priority(dependent, primary, constraint); break; case pcmk__coloc_affects_location: pcmk__apply_coloc_to_weights(dependent, primary, constraint); break; case pcmk__coloc_affects_nothing: default: return; } } enum pe_action_flags native_action_flags(pe_action_t * action, pe_node_t * node) { return action->flags; } static inline bool is_primitive_action(pe_action_t *action) { return action && action->rsc && (action->rsc->variant == pe_native); } /*! * \internal * \brief Clear a single action flag and set reason text * * \param[in] action Action whose flag should be cleared * \param[in] flag Action flag that should be cleared * \param[in] reason Action that is the reason why flag is being cleared */ #define clear_action_flag_because(action, flag, reason) do { \ if (pcmk_is_set((action)->flags, (flag))) { \ pe__clear_action_flags(action, flag); \ if ((action)->rsc != (reason)->rsc) { \ char *reason_text = pe__action2reason((reason), (flag)); \ pe_action_set_reason((action), reason_text, \ ((flag) == pe_action_migrate_runnable)); \ free(reason_text); \ } \ } \ } while (0) /*! * \internal * \brief Set action bits appropriately when pe_restart_order is used * * \param[in] first 'First' action in an ordering with pe_restart_order * \param[in] then 'Then' action in an ordering with pe_restart_order * \param[in] filter What ordering flags to care about * * \note pe_restart_order is set for "stop resource before starting it" and * "stop later group member before stopping earlier group member" */ static void handle_restart_ordering(pe_action_t *first, pe_action_t *then, enum pe_action_flags filter) { const char *reason = NULL; CRM_ASSERT(is_primitive_action(first)); CRM_ASSERT(is_primitive_action(then)); // We need to update the action in two cases: // ... if 'then' is required if (pcmk_is_set(filter, pe_action_optional) && !pcmk_is_set(then->flags, pe_action_optional)) { reason = "restart"; } /* ... if 'then' is unrunnable action on same resource (if a resource * should restart but can't start, we still want to stop) */ if (pcmk_is_set(filter, pe_action_runnable) && !pcmk_is_set(then->flags, pe_action_runnable) && pcmk_is_set(then->rsc->flags, pe_rsc_managed) && (first->rsc == then->rsc)) { reason = "stop"; } if (reason == NULL) { return; } pe_rsc_trace(first->rsc, "Handling %s -> %s for %s", first->uuid, then->uuid, reason); // Make 'first' required if it is runnable if (pcmk_is_set(first->flags, pe_action_runnable)) { clear_action_flag_because(first, pe_action_optional, then); } // Make 'first' required if 'then' is required if (!pcmk_is_set(then->flags, pe_action_optional)) { clear_action_flag_because(first, pe_action_optional, then); } // Make 'first' unmigratable if 'then' is unmigratable if (!pcmk_is_set(then->flags, pe_action_migrate_runnable)) { clear_action_flag_because(first, pe_action_migrate_runnable, then); } // Make 'then' unrunnable if 'first' is required but unrunnable if (!pcmk_is_set(first->flags, pe_action_optional) && !pcmk_is_set(first->flags, pe_action_runnable)) { clear_action_flag_because(then, pe_action_runnable, first); } } /* \param[in] flags Flags from action_flags_for_ordering() */ enum pe_graph_flags native_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set) { enum pe_graph_flags changed = pe_graph_none; enum pe_action_flags then_flags = then->flags; enum pe_action_flags first_flags = first->flags; if (type & pe_order_asymmetrical) { pe_resource_t *then_rsc = then->rsc; enum rsc_role_e then_rsc_role = then_rsc ? then_rsc->fns->state(then_rsc, TRUE) : 0; if (!then_rsc) { /* ignore */ } else if ((then_rsc_role == RSC_ROLE_STOPPED) && pcmk__str_eq(then->task, RSC_STOP, pcmk__str_casei)) { /* ignore... if 'then' is supposed to be stopped after 'first', but * then is already stopped, there is nothing to be done when non-symmetrical. */ } else if ((then_rsc_role >= RSC_ROLE_STARTED) && pcmk__str_eq(then->task, RSC_START, pcmk__str_casei) && pcmk_is_set(then->flags, pe_action_optional) && then->node && pcmk__list_of_1(then_rsc->running_on) && then->node->details == ((pe_node_t *) then_rsc->running_on->data)->details) { /* Ignore. If 'then' is supposed to be started after 'first', but * 'then' is already started, there is nothing to be done when * asymmetrical -- unless the start is mandatory, which indicates * the resource is restarting, and the ordering is still needed. */ } else if (!(first->flags & pe_action_runnable)) { /* prevent 'then' action from happening if 'first' is not runnable and * 'then' has not yet occurred. */ clear_action_flag_because(then, pe_action_optional, first); clear_action_flag_because(then, pe_action_runnable, first); } else { /* ignore... then is allowed to start/stop if it wants to. */ } } if (pcmk_is_set(type, pe_order_implies_first) && !pcmk_is_set(then_flags, pe_action_optional)) { // Then is required, and implies first should be, too if (pcmk_is_set(filter, pe_action_optional) && !pcmk_is_set(flags, pe_action_optional) && pcmk_is_set(first_flags, pe_action_optional)) { clear_action_flag_because(first, pe_action_optional, then); } if (pcmk_is_set(flags, pe_action_migrate_runnable) && !pcmk_is_set(then->flags, pe_action_migrate_runnable)) { clear_action_flag_because(first, pe_action_migrate_runnable, then); } } if (type & pe_order_promoted_implies_first) { if ((filter & pe_action_optional) && ((then->flags & pe_action_optional) == FALSE) && (then->rsc != NULL) && (then->rsc->role == RSC_ROLE_PROMOTED)) { clear_action_flag_because(first, pe_action_optional, then); if (pcmk_is_set(first->flags, pe_action_migrate_runnable) && !pcmk_is_set(then->flags, pe_action_migrate_runnable)) { clear_action_flag_because(first, pe_action_migrate_runnable, then); } } } if ((type & pe_order_implies_first_migratable) && pcmk_is_set(filter, pe_action_optional)) { if (((then->flags & pe_action_migrate_runnable) == FALSE) || ((then->flags & pe_action_runnable) == FALSE)) { clear_action_flag_because(first, pe_action_runnable, then); } if ((then->flags & pe_action_optional) == 0) { clear_action_flag_because(first, pe_action_optional, then); } } if ((type & pe_order_pseudo_left) && pcmk_is_set(filter, pe_action_optional)) { if ((first->flags & pe_action_runnable) == FALSE) { clear_action_flag_because(then, pe_action_migrate_runnable, first); pe__clear_action_flags(then, pe_action_pseudo); } } if (pcmk_is_set(type, pe_order_runnable_left) && pcmk_is_set(filter, pe_action_runnable) && pcmk_is_set(then->flags, pe_action_runnable) && !pcmk_is_set(flags, pe_action_runnable)) { clear_action_flag_because(then, pe_action_runnable, first); clear_action_flag_because(then, pe_action_migrate_runnable, first); } if (pcmk_is_set(type, pe_order_implies_then) && pcmk_is_set(filter, pe_action_optional) && pcmk_is_set(then->flags, pe_action_optional) && !pcmk_is_set(flags, pe_action_optional) && !pcmk_is_set(first->flags, pe_action_migrate_runnable)) { clear_action_flag_because(then, pe_action_optional, first); } if (pcmk_is_set(type, pe_order_restart)) { handle_restart_ordering(first, then, filter); } if (then_flags != then->flags) { pe__set_graph_flags(changed, first, pe_graph_updated_then); pe_rsc_trace(then->rsc, "%s on %s: flags are now %#.6x (was %#.6x) " "because of 'first' %s (%#.6x)", then->uuid, then->node? then->node->details->uname : "no node", then->flags, then_flags, first->uuid, first->flags); if(then->rsc && then->rsc->parent) { /* "X_stop then X_start" doesn't get handled for cloned groups unless we do this */ pcmk__update_action_for_orderings(then, data_set); } } if (first_flags != first->flags) { pe__set_graph_flags(changed, first, pe_graph_updated_first); pe_rsc_trace(first->rsc, "%s on %s: flags are now %#.6x (was %#.6x) " "because of 'then' %s (%#.6x)", first->uuid, first->node? first->node->details->uname : "no node", first->flags, first_flags, then->uuid, then->flags); } return changed; } void native_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { pcmk__apply_location(constraint, rsc); } void native_expand(pe_resource_t * rsc, pe_working_set_t * data_set) { GList *gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Processing actions from %s", rsc->id); for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; crm_trace("processing action %d for rsc=%s", action->id, rsc->id); - graph_element_from_action(action, data_set); + pcmk__add_action_to_graph(action, data_set); } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->expand(child_rsc, data_set); } } gboolean StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { GList *gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { pe_node_t *current = (pe_node_t *) gIter->data; pe_action_t *stop; if (rsc->partial_migration_target) { if (rsc->partial_migration_target->details == current->details) { pe_rsc_trace(rsc, "Filtered %s -> %s %s", current->details->uname, next->details->uname, rsc->id); continue; } else { pe_rsc_trace(rsc, "Forced on %s %s", current->details->uname, rsc->id); optional = FALSE; } } pe_rsc_trace(rsc, "%s on %s", rsc->id, current->details->uname); stop = stop_action(rsc, current, optional); if(rsc->allocated_to == NULL) { pe_action_set_reason(stop, "node availability", TRUE); } if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe__clear_action_flags(stop, pe_action_runnable); } if (pcmk_is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, current, optional, data_set); } if (pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) { pe_action_t *unfence = pe_fence_op(current, "on", TRUE, NULL, FALSE, data_set); order_actions(stop, unfence, pe_order_implies_first); if (!pcmk__node_unfenced(current)) { pe_proc_err("Stopping %s until %s can be unfenced", rsc->id, current->details->uname); } } } return TRUE; } gboolean StartRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { pe_action_t *start = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s on %s %d %d", rsc->id, next ? next->details->uname : "N/A", optional, next ? next->weight : 0); start = start_action(rsc, next, TRUE); pcmk__order_vs_unfence(rsc, next, start, pe_order_implies_then, data_set); if (pcmk_is_set(start->flags, pe_action_runnable) && !optional) { pe__clear_action_flags(start, pe_action_optional); } return TRUE; } gboolean PromoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { GList *gIter = NULL; gboolean runnable = TRUE; GList *action_list = NULL; CRM_ASSERT(rsc); CRM_CHECK(next != NULL, return FALSE); pe_rsc_trace(rsc, "%s on %s", rsc->id, next->details->uname); action_list = pe__resource_actions(rsc, next, RSC_START, TRUE); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { pe_action_t *start = (pe_action_t *) gIter->data; if (!pcmk_is_set(start->flags, pe_action_runnable)) { runnable = FALSE; } } g_list_free(action_list); if (runnable) { promote_action(rsc, next, optional); return TRUE; } pe_rsc_debug(rsc, "%s\tPromote %s (canceled)", next->details->uname, rsc->id); action_list = pe__resource_actions(rsc, next, RSC_PROMOTE, TRUE); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { pe_action_t *promote = (pe_action_t *) gIter->data; pe__clear_action_flags(promote, pe_action_runnable); } g_list_free(action_list); return TRUE; } gboolean DemoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { GList *gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); /* CRM_CHECK(rsc->next_role == RSC_ROLE_UNPROMOTED, return FALSE); */ for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { pe_node_t *current = (pe_node_t *) gIter->data; pe_rsc_trace(rsc, "%s on %s", rsc->id, next ? next->details->uname : "N/A"); demote_action(rsc, current, optional); } return TRUE; } gboolean RoleError(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { CRM_ASSERT(rsc); crm_err("%s on %s", rsc->id, next ? next->details->uname : "N/A"); CRM_CHECK(FALSE, return FALSE); return FALSE; } gboolean NullOp(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); return FALSE; } gboolean DeleteRsc(pe_resource_t * rsc, pe_node_t * node, gboolean optional, pe_working_set_t * data_set) { if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_trace(rsc, "Resource %s not deleted from %s: failed", rsc->id, node->details->uname); return FALSE; } else if (node == NULL) { pe_rsc_trace(rsc, "Resource %s not deleted: NULL node", rsc->id); return FALSE; } else if (node->details->unclean || node->details->online == FALSE) { pe_rsc_trace(rsc, "Resource %s not deleted from %s: unrunnable", rsc->id, node->details->uname); return FALSE; } crm_notice("Removing %s from %s", rsc->id, node->details->uname); delete_action(rsc, node, optional); pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_DELETE, optional? pe_order_implies_then : pe_order_optional, data_set); pcmk__order_resource_actions(rsc, RSC_DELETE, rsc, RSC_START, optional? pe_order_implies_then : pe_order_optional, data_set); return TRUE; } gboolean native_create_probe(pe_resource_t * rsc, pe_node_t * node, pe_action_t * complete, gboolean force, pe_working_set_t * data_set) { enum pe_ordering flags = pe_order_optional; char *key = NULL; pe_action_t *probe = NULL; pe_node_t *running = NULL; pe_node_t *allowed = NULL; pe_resource_t *top = uber_parent(rsc); static const char *rc_promoted = NULL; static const char *rc_inactive = NULL; if (rc_inactive == NULL) { rc_inactive = pcmk__itoa(PCMK_OCF_NOT_RUNNING); rc_promoted = pcmk__itoa(PCMK_OCF_RUNNING_PROMOTED); } CRM_CHECK(node != NULL, return FALSE); if (!force && !pcmk_is_set(data_set->flags, pe_flag_startup_probes)) { pe_rsc_trace(rsc, "Skipping active resource detection for %s", rsc->id); return FALSE; } if (pe__is_guest_or_remote_node(node)) { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { pe_rsc_trace(rsc, "Skipping probe for %s on %s because Pacemaker Remote nodes cannot run stonith agents", rsc->id, node->details->id); return FALSE; } else if (pe__is_guest_node(node) && pe__resource_contains_guest_node(data_set, rsc)) { pe_rsc_trace(rsc, "Skipping probe for %s on %s because guest nodes cannot run resources containing guest nodes", rsc->id, node->details->id); return FALSE; } else if (rsc->is_remote_node) { pe_rsc_trace(rsc, "Skipping probe for %s on %s because Pacemaker Remote nodes cannot host remote connections", rsc->id, node->details->id); return FALSE; } } if (rsc->children) { GList *gIter = NULL; gboolean any_created = FALSE; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; any_created = child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set) || any_created; } return any_created; } else if ((rsc->container) && (!rsc->is_remote_node)) { pe_rsc_trace(rsc, "Skipping %s: it is within container %s", rsc->id, rsc->container->id); return FALSE; } if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_trace(rsc, "Skipping orphan: %s", rsc->id); return FALSE; } // Check whether resource is already known on node if (!force && g_hash_table_lookup(rsc->known_on, node->details->id)) { pe_rsc_trace(rsc, "Skipping known: %s on %s", rsc->id, node->details->uname); return FALSE; } allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (rsc->exclusive_discover || top->exclusive_discover) { if (allowed == NULL) { /* exclusive discover is enabled and this node is not in the allowed list. */ pe_rsc_trace(rsc, "Skipping probe for %s on node %s, A", rsc->id, node->details->id); return FALSE; } else if (allowed->rsc_discover_mode != pe_discover_exclusive) { /* exclusive discover is enabled and this node is not marked * as a node this resource should be discovered on */ pe_rsc_trace(rsc, "Skipping probe for %s on node %s, B", rsc->id, node->details->id); return FALSE; } } if(allowed == NULL && node->rsc_discover_mode == pe_discover_never) { /* If this node was allowed to host this resource it would * have been explicitly added to the 'allowed_nodes' list. * However it wasn't and the node has discovery disabled, so * no need to probe for this resource. */ pe_rsc_trace(rsc, "Skipping probe for %s on node %s, C", rsc->id, node->details->id); return FALSE; } if (allowed && allowed->rsc_discover_mode == pe_discover_never) { /* this resource is marked as not needing to be discovered on this node */ pe_rsc_trace(rsc, "Skipping probe for %s on node %s, discovery mode", rsc->id, node->details->id); return FALSE; } if (pe__is_guest_node(node)) { pe_resource_t *remote = node->details->remote_rsc->container; if(remote->role == RSC_ROLE_STOPPED) { /* If the container is stopped, then we know anything that * might have been inside it is also stopped and there is * no need to probe. * * If we don't know the container's state on the target * either: * * - the container is running, the transition will abort * and we'll end up in a different case next time, or * * - the container is stopped * * Either way there is no need to probe. * */ if(remote->allocated_to && g_hash_table_lookup(remote->known_on, remote->allocated_to->details->id) == NULL) { /* For safety, we order the 'rsc' start after 'remote' * has been probed. * * Using 'top' helps for groups, but we may need to * follow the start's ordering chain backwards. */ pcmk__new_ordering(remote, pcmk__op_key(remote->id, RSC_STATUS, 0), NULL, top, pcmk__op_key(top->id, RSC_START, 0), NULL, pe_order_optional, data_set); } pe_rsc_trace(rsc, "Skipping probe for %s on node %s, %s is stopped", rsc->id, node->details->id, remote->id); return FALSE; /* Here we really we want to check if remote->stop is required, * but that information doesn't exist yet */ } else if(node->details->remote_requires_reset || node->details->unclean || pcmk_is_set(remote->flags, pe_rsc_failed) || remote->next_role == RSC_ROLE_STOPPED || (remote->allocated_to && pe_find_node(remote->running_on, remote->allocated_to->details->uname) == NULL) ) { /* The container is stopping or restarting, don't start * 'rsc' until 'remote' stops as this also implies that * 'rsc' is stopped - avoiding the need to probe */ pcmk__new_ordering(remote, pcmk__op_key(remote->id, RSC_STOP, 0), NULL, top, pcmk__op_key(top->id, RSC_START, 0), NULL, pe_order_optional, data_set); pe_rsc_trace(rsc, "Skipping probe for %s on node %s, %s is stopping, restarting or moving", rsc->id, node->details->id, remote->id); return FALSE; /* } else { * The container is running so there is no problem probing it */ } } key = pcmk__op_key(rsc->id, RSC_STATUS, 0); probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE, data_set); pe__clear_action_flags(probe, pe_action_optional); pcmk__order_vs_unfence(rsc, node, probe, pe_order_optional, data_set); /* * We need to know if it's running_on (not just known_on) this node * to correctly determine the target rc. */ running = pe_find_node_id(rsc->running_on, node->details->id); if (running == NULL) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); } else if (rsc->role == RSC_ROLE_PROMOTED) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_promoted); } crm_debug("Probing %s on %s (%s) %d %p", rsc->id, node->details->uname, role2text(rsc->role), pcmk_is_set(probe->flags, pe_action_runnable), rsc->running_on); if (pcmk__is_unfence_device(rsc, data_set) || !pe_rsc_is_clone(top)) { top = rsc; } else { crm_trace("Probing %s on %s (%s) as %s", rsc->id, node->details->uname, role2text(rsc->role), top->id); } if (!pcmk_is_set(probe->flags, pe_action_runnable) && (rsc->running_on == NULL)) { /* Prevent the start from occurring if rsc isn't active, but * don't cause it to stop if it was active already */ pe__set_order_flags(flags, pe_order_runnable_left); } pcmk__new_ordering(rsc, NULL, probe, top, pcmk__op_key(top->id, RSC_START, 0), NULL, flags, data_set); // Order the probe before any agent reload pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL, pe_order_optional, data_set); -#if 0 - // complete is always null currently - if (!pcmk__is_unfence_device(rsc, data_set)) { - /* Normally rsc.start depends on probe complete which depends - * on rsc.probe. But this can't be the case for fence devices - * with unfencing, as it would create graph loops. - * - * So instead we explicitly order 'rsc.probe then rsc.start' - */ - order_actions(probe, complete, pe_order_implies_then); - } -#endif return TRUE; } void ReloadRsc(pe_resource_t * rsc, pe_node_t *node, pe_working_set_t * data_set) { GList *gIter = NULL; pe_action_t *reload = NULL; if (rsc->children) { for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; ReloadRsc(child_rsc, node, data_set); } return; } else if (rsc->variant > pe_native) { /* Complex resource with no children */ return; } else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "%s: unmanaged", rsc->id); return; } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { /* We don't need to specify any particular actions here, normal failure * recovery will apply. */ pe_rsc_trace(rsc, "%s: preventing agent reload because failed", rsc->id); return; } else if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) { /* If a resource's configuration changed while a start was pending, * force a full restart. */ pe_rsc_trace(rsc, "%s: preventing agent reload because start pending", rsc->id); stop_action(rsc, node, FALSE); return; } else if (node == NULL) { pe_rsc_trace(rsc, "%s: not active", rsc->id); return; } pe_rsc_trace(rsc, "Processing %s", rsc->id); pe__set_resource_flags(rsc, pe_rsc_reload); reload = custom_action(rsc, reload_key(rsc), CRMD_ACTION_RELOAD_AGENT, node, FALSE, TRUE, data_set); pe_action_set_reason(reload, "resource definition change", FALSE); pcmk__new_ordering(NULL, NULL, reload, rsc, stop_key(rsc), NULL, pe_order_optional|pe_order_then_cancels_first, data_set); pcmk__new_ordering(NULL, NULL, reload, rsc, demote_key(rsc), NULL, pe_order_optional|pe_order_then_cancels_first, data_set); } void native_append_meta(pe_resource_t * rsc, xmlNode * xml) { char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION); pe_resource_t *parent; if (value) { char *name = NULL; name = crm_meta_name(XML_RSC_ATTR_INCARNATION); crm_xml_add(xml, name, value); free(name); } value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE); if (value) { char *name = NULL; name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE); crm_xml_add(xml, name, value); free(name); } for (parent = rsc; parent != NULL; parent = parent->parent) { if (parent->container) { crm_xml_add(xml, CRM_META"_"XML_RSC_ATTR_CONTAINER, parent->container->id); } } } diff --git a/lib/pacemaker/pcmk_sched_remote.c b/lib/pacemaker/pcmk_sched_remote.c index fe7d54ca51..3d0372d62c 100644 --- a/lib/pacemaker/pcmk_sched_remote.c +++ b/lib/pacemaker/pcmk_sched_remote.c @@ -1,737 +1,737 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" enum remote_connection_state { remote_state_unknown = 0, remote_state_alive = 1, remote_state_resting = 2, remote_state_failed = 3, remote_state_stopped = 4 }; static const char * state2text(enum remote_connection_state state) { switch (state) { case remote_state_unknown: return "unknown"; case remote_state_alive: return "alive"; case remote_state_resting: return "resting"; case remote_state_failed: return "failed"; case remote_state_stopped: return "stopped"; } return "impossible"; } /* We always use pe_order_preserve with these convenience functions to exempt * internally generated constraints from the prohibition of user constraints * involving remote connection resources. * * The start ordering additionally uses pe_order_runnable_left so that the * specified action is not runnable if the start is not runnable. */ static inline void order_start_then_action(pe_resource_t *lh_rsc, pe_action_t *rh_action, enum pe_ordering extra, pe_working_set_t *data_set) { if ((lh_rsc != NULL) && (rh_action != NULL) && (data_set != NULL)) { pcmk__new_ordering(lh_rsc, start_key(lh_rsc), NULL, rh_action->rsc, NULL, rh_action, pe_order_preserve|pe_order_runnable_left|extra, data_set); } } static inline void order_action_then_stop(pe_action_t *lh_action, pe_resource_t *rh_rsc, enum pe_ordering extra, pe_working_set_t *data_set) { if ((lh_action != NULL) && (rh_rsc != NULL) && (data_set != NULL)) { pcmk__new_ordering(lh_action->rsc, NULL, lh_action, rh_rsc, stop_key(rh_rsc), NULL, pe_order_preserve|extra, data_set); } } static enum remote_connection_state get_remote_node_state(pe_node_t *node) { pe_resource_t *remote_rsc = NULL; pe_node_t *cluster_node = NULL; CRM_ASSERT(node != NULL); remote_rsc = node->details->remote_rsc; CRM_ASSERT(remote_rsc != NULL); cluster_node = pe__current_node(remote_rsc); /* If the cluster node the remote connection resource resides on * is unclean or went offline, we can't process any operations * on that remote node until after it starts elsewhere. */ if ((remote_rsc->next_role == RSC_ROLE_STOPPED) || (remote_rsc->allocated_to == NULL)) { // The connection resource is not going to run anywhere if ((cluster_node != NULL) && cluster_node->details->unclean) { /* The remote connection is failed because its resource is on a * failed node and can't be recovered elsewhere, so we must fence. */ return remote_state_failed; } if (!pcmk_is_set(remote_rsc->flags, pe_rsc_failed)) { /* Connection resource is cleanly stopped */ return remote_state_stopped; } /* Connection resource is failed */ if ((remote_rsc->next_role == RSC_ROLE_STOPPED) && remote_rsc->remote_reconnect_ms && node->details->remote_was_fenced && !pe__shutdown_requested(node)) { /* We won't know whether the connection is recoverable until the * reconnect interval expires and we reattempt connection. */ return remote_state_unknown; } /* The remote connection is in a failed state. If there are any * resources known to be active on it (stop) or in an unknown state * (probe), we must assume the worst and fence it. */ return remote_state_failed; } else if (cluster_node == NULL) { /* Connection is recoverable but not currently running anywhere, so see * if we can recover it first */ return remote_state_unknown; } else if (cluster_node->details->unclean || !(cluster_node->details->online)) { // Connection is running on a dead node, see if we can recover it first return remote_state_resting; } else if (pcmk__list_of_multiple(remote_rsc->running_on) && (remote_rsc->partial_migration_source != NULL) && (remote_rsc->partial_migration_target != NULL)) { /* We're in the middle of migrating a connection resource, so wait until * after the migration completes before performing any actions. */ return remote_state_resting; } return remote_state_alive; } static int is_recurring_action(pe_action_t *action) { guint interval_ms; if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0, &interval_ms) != pcmk_rc_ok) { return 0; } return (interval_ms > 0); } /*! * \internal * \brief Order actions on remote node relative to actions for the connection */ static void apply_remote_ordering(pe_action_t *action, pe_working_set_t *data_set) { pe_resource_t *remote_rsc = NULL; enum action_tasks task = text2task(action->task); enum remote_connection_state state = get_remote_node_state(action->node); enum pe_ordering order_opts = pe_order_none; if (action->rsc == NULL) { return; } CRM_ASSERT(pe__is_guest_or_remote_node(action->node)); remote_rsc = action->node->details->remote_rsc; CRM_ASSERT(remote_rsc != NULL); crm_trace("Order %s action %s relative to %s%s (state: %s)", action->task, action->uuid, pcmk_is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", remote_rsc->id, state2text(state)); if (pcmk__strcase_any_of(action->task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) { /* Migration ops map to "no_action", but we need to apply the same * ordering as for stop or demote (see get_router_node()). */ task = stop_rsc; } switch (task) { case start_rsc: case action_promote: order_opts = pe_order_none; if (state == remote_state_failed) { /* Force recovery, by making this action required */ pe__set_order_flags(order_opts, pe_order_implies_then); } /* Ensure connection is up before running this action */ order_start_then_action(remote_rsc, action, order_opts, data_set); break; case stop_rsc: if (state == remote_state_alive) { order_action_then_stop(action, remote_rsc, pe_order_implies_first, data_set); } else if (state == remote_state_failed) { /* The resource is active on the node, but since we don't have a * valid connection, the only way to stop the resource is by * fencing the node. There is no need to order the stop relative * to the remote connection, since the stop will become implied * by the fencing. */ pe_fence_node(data_set, action->node, "resources are active but connection is unrecoverable", FALSE); } else if (remote_rsc->next_role == RSC_ROLE_STOPPED) { /* State must be remote_state_unknown or remote_state_stopped. * Since the connection is not coming back up in this * transition, stop this resource first. */ order_action_then_stop(action, remote_rsc, pe_order_implies_first, data_set); } else { /* The connection is going to be started somewhere else, so * stop this resource after that completes. */ order_start_then_action(remote_rsc, action, pe_order_none, data_set); } break; case action_demote: /* Only order this demote relative to the connection start if the * connection isn't being torn down. Otherwise, the demote would be * blocked because the connection start would not be allowed. */ if ((state == remote_state_resting) || (state == remote_state_unknown)) { order_start_then_action(remote_rsc, action, pe_order_none, data_set); } /* Otherwise we can rely on the stop ordering */ break; default: /* Wait for the connection resource to be up */ if (is_recurring_action(action)) { /* In case we ever get the recovery logic wrong, force * recurring monitors to be restarted, even if just * the connection was re-established */ order_start_then_action(remote_rsc, action, pe_order_implies_then, data_set); } else { pe_node_t *cluster_node = pe__current_node(remote_rsc); if ((task == monitor_rsc) && (state == remote_state_failed)) { /* We would only be here if we do not know the state of the * resource on the remote node. Since we have no way to find * out, it is necessary to fence the node. */ pe_fence_node(data_set, action->node, "resources are in unknown state " "and connection is unrecoverable", FALSE); } if ((cluster_node != NULL) && (state == remote_state_stopped)) { /* The connection is currently up, but is going down * permanently. Make sure we check services are actually * stopped _before_ we let the connection get closed. */ order_action_then_stop(action, remote_rsc, pe_order_runnable_left, data_set); } else { order_start_then_action(remote_rsc, action, pe_order_none, data_set); } } break; } } static void apply_container_ordering(pe_action_t *action, pe_working_set_t *data_set) { /* VMs are also classified as containers for these purposes... in * that they both involve a 'thing' running on a real or remote * cluster node. * * This allows us to be smarter about the type and extent of * recovery actions required in various scenarios */ pe_resource_t *remote_rsc = NULL; pe_resource_t *container = NULL; enum action_tasks task = text2task(action->task); CRM_ASSERT(action->rsc != NULL); CRM_ASSERT(action->node != NULL); CRM_ASSERT(pe__is_guest_or_remote_node(action->node)); remote_rsc = action->node->details->remote_rsc; CRM_ASSERT(remote_rsc != NULL); container = remote_rsc->container; CRM_ASSERT(container != NULL); if (pcmk_is_set(container->flags, pe_rsc_failed)) { pe_fence_node(data_set, action->node, "container failed", FALSE); } crm_trace("Order %s action %s relative to %s%s for %s%s", action->task, action->uuid, pcmk_is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", remote_rsc->id, pcmk_is_set(container->flags, pe_rsc_failed)? "failed " : "", container->id); if (pcmk__strcase_any_of(action->task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) { /* Migration ops map to "no_action", but we need to apply the same * ordering as for stop or demote (see get_router_node()). */ task = stop_rsc; } switch (task) { case start_rsc: case action_promote: // Force resource recovery if the container is recovered order_start_then_action(container, action, pe_order_implies_then, data_set); // Wait for the connection resource to be up, too order_start_then_action(remote_rsc, action, pe_order_none, data_set); break; case stop_rsc: case action_demote: if (pcmk_is_set(container->flags, pe_rsc_failed)) { /* When the container representing a guest node fails, any stop * or demote actions for resources running on the guest node * are implied by the container stopping. This is similar to * how fencing operations work for cluster nodes and remote * nodes. */ } else { /* Ensure the operation happens before the connection is brought * down. * * If we really wanted to, we could order these after the * connection start, IFF the container's current role was * stopped (otherwise we re-introduce an ordering loop when the * connection is restarting). */ order_action_then_stop(action, remote_rsc, pe_order_none, data_set); } break; default: /* Wait for the connection resource to be up */ if (is_recurring_action(action)) { /* In case we ever get the recovery logic wrong, force * recurring monitors to be restarted, even if just * the connection was re-established */ if(task != no_action) { order_start_then_action(remote_rsc, action, pe_order_implies_then, data_set); } } else { order_start_then_action(remote_rsc, action, pe_order_none, data_set); } break; } } /*! * \internal * \brief Order all relevant actions relative to remote connection actions * * \param[in] data_set Cluster working set */ void pcmk__order_remote_connection_actions(pe_working_set_t *data_set) { if (!pcmk_is_set(data_set->flags, pe_flag_have_remote_nodes)) { return; } crm_trace("Creating remote connection orderings"); for (GList *gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; pe_resource_t *remote = NULL; // We are only interested in resource actions if (action->rsc == NULL) { continue; } /* Special case: If we are clearing the failcount of an actual * remote connection resource, then make sure this happens before * any start of the resource in this transition. */ if (action->rsc->is_remote_node && pcmk__str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT, pcmk__str_casei)) { pcmk__new_ordering(action->rsc, NULL, action, action->rsc, pcmk__op_key(action->rsc->id, RSC_START, 0), NULL, pe_order_optional, data_set); continue; } // We are only interested in actions allocated to a node if (action->node == NULL) { continue; } if (!pe__is_guest_or_remote_node(action->node)) { continue; } /* We are only interested in real actions. * * @TODO This is probably wrong; pseudo-actions might be converted to * real actions and vice versa later in update_actions() at the end of * pcmk__apply_orderings(). */ if (pcmk_is_set(action->flags, pe_action_pseudo)) { continue; } remote = action->node->details->remote_rsc; if (remote == NULL) { // Orphaned continue; } /* Another special case: if a resource is moving to a Pacemaker Remote * node, order the stop on the original node after any start of the * remote connection. This ensures that if the connection fails to * start, we leave the resource running on the original node. */ if (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)) { for (GList *item = action->rsc->actions; item != NULL; item = item->next) { pe_action_t *rsc_action = item->data; if ((rsc_action->node->details != action->node->details) && pcmk__str_eq(rsc_action->task, RSC_STOP, pcmk__str_casei)) { pcmk__new_ordering(remote, start_key(remote), NULL, action->rsc, NULL, rsc_action, pe_order_optional, data_set); } } } /* The action occurs across a remote connection, so create * ordering constraints that guarantee the action occurs while the node * is active (after start, before stop ... things like that). * * This is somewhat brittle in that we need to make sure the results of * this ordering are compatible with the result of get_router_node(). * It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part * of this logic rather than action2xml(). */ if (remote->container) { crm_trace("Container ordering for %s", action->uuid); apply_container_ordering(action, data_set); } else { crm_trace("Remote ordering for %s", action->uuid); apply_remote_ordering(action, data_set); } } } /*! * \internal * \brief Check whether a node is a failed remote node * * \param[in] node Node to check * * \return true if \p node is a failed remote node, false otherwise */ bool pcmk__is_failed_remote_node(pe_node_t *node) { return pe__is_remote_node(node) && (node->details->remote_rsc != NULL) && (get_remote_node_state(node) == remote_state_failed); } /*! * \internal * \brief Check whether a given resource corresponds to a given node as guest * * \param[in] rsc Resource to check * \param[in] node Node to check * * \return true if \p node is a guest node and \p rsc is its containing * resource, otherwise false */ bool pcmk__rsc_corresponds_to_guest(pe_resource_t *rsc, pe_node_t *node) { return (rsc != NULL) && (rsc->fillers != NULL) && (node != NULL) && (node->details->remote_rsc != NULL) && (node->details->remote_rsc->container == rsc); } /*! * \internal * \brief Get proper connection host that a remote action must be routed through * * A remote connection resource might be starting, stopping, or migrating in the * same transition that an action needs to be executed on its Pacemaker Remote * node. Determine the proper node that the remote action should be routed * through. * * \param[in] action (Potentially remote) action to route * * \return Connection host that action should be routed through if remote, * otherwise NULL */ pe_node_t * pcmk__connection_host_for_action(pe_action_t *action) { pe_node_t *began_on = NULL; pe_node_t *ended_on = NULL; bool partial_migration = false; const char *task = action->task; if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei) || !pe__is_guest_or_remote_node(action->node)) { return NULL; } CRM_ASSERT(action->node->details->remote_rsc != NULL); began_on = pe__current_node(action->node->details->remote_rsc); ended_on = action->node->details->remote_rsc->allocated_to; if (action->node->details->remote_rsc && (action->node->details->remote_rsc->container == NULL) && action->node->details->remote_rsc->partial_migration_target) { partial_migration = true; } if (began_on == NULL) { crm_trace("Routing %s for %s through remote connection's " "next node %s (starting)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (ended_on? ended_on->details->uname : "none"), partial_migration? " (partial migration)" : ""); return ended_on; } if (ended_on == NULL) { crm_trace("Routing %s for %s through remote connection's " "current node %s (stopping)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (began_on? began_on->details->uname : "none"), partial_migration? " (partial migration)" : ""); return began_on; } if (began_on->details == ended_on->details) { crm_trace("Routing %s for %s through remote connection's " "current node %s (not moving)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (began_on? began_on->details->uname : "none"), partial_migration? " (partial migration)" : ""); return began_on; } /* If we get here, the remote connection is moving during this transition. * This means some actions for resources behind the connection will get * routed through the cluster node the connection resource is currently on, * and others are routed through the cluster node the connection will end up * on. */ if (pcmk__str_eq(task, "notify", pcmk__str_casei)) { task = g_hash_table_lookup(action->meta, "notify_operation"); } /* * Stop, demote, and migration actions must occur before the connection can * move (these actions are required before the remote resource can stop). In * this case, we know these actions have to be routed through the initial * cluster node the connection resource lived on before the move takes * place. * * The exception is a partial migration of a (non-guest) remote connection * resource; in that case, all actions (even these) will be ordered after * the connection's pseudo-start on the migration target, so the target is * the router node. */ if (pcmk__strcase_any_of(task, "cancel", "stop", "demote", "migrate_from", "migrate_to", NULL) && !partial_migration) { crm_trace("Routing %s for %s through remote connection's " "current node %s (moving)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (began_on? began_on->details->uname : "none"), partial_migration? " (partial migration)" : ""); return began_on; } /* Everything else (start, promote, monitor, probe, refresh, * clear failcount, delete, ...) must occur after the connection starts on * the node it is moving to. */ crm_trace("Routing %s for %s through remote connection's " "next node %s (moving)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (ended_on? ended_on->details->uname : "none"), partial_migration? " (partial migration)" : ""); return ended_on; } /*! * \internal * \brief Replace remote connection's addr="#uname" with actual address * * REMOTE_CONTAINER_HACK: If a given resource is a remote connection resource * with its "addr" parameter set to "#uname", pull the actual value from the - * parameters evaluated without a node (which was put there earlier in stage8() - * when the bundle's expand() method was called). + * parameters evaluated without a node (which was put there earlier in + * pcmk__create_graph() when the bundle's expand() method was called). * * \param[in] rsc Resource to check * \param[in] params Resource parameters evaluated per node * \param[in] data_set Cluster working set */ void pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params, pe_working_set_t *data_set) { const char *remote_addr = g_hash_table_lookup(params, XML_RSC_ATTR_REMOTE_RA_ADDR); if (pcmk__str_eq(remote_addr, "#uname", pcmk__str_none)) { GHashTable *base = pe_rsc_params(rsc, NULL, data_set); remote_addr = g_hash_table_lookup(base, XML_RSC_ATTR_REMOTE_RA_ADDR); if (remote_addr != NULL) { g_hash_table_insert(params, strdup(XML_RSC_ATTR_REMOTE_RA_ADDR), strdup(remote_addr)); } } } /*! * \brief Add special bundle meta-attributes to XML * * If a given action will be executed on a guest node (including a bundle), * add the special bundle meta-attribute "container-attribute-target" and * environment variable "physical_host" as XML attributes (using meta-attribute * naming). * * \param[in] args_xml XML to add attributes to * \param[in] action Action to check */ void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, pe_action_t *action) { pe_node_t *host = NULL; enum action_tasks task; if (!pe__is_guest_node(action->node)) { return; } task = text2task(action->task); if ((task == action_notify) || (task == action_notified)) { task = text2task(g_hash_table_lookup(action->meta, "notify_operation")); } switch (task) { case stop_rsc: case stopped_rsc: case action_demote: case action_demoted: // "Down" actions take place on guest's current host host = pe__current_node(action->node->details->remote_rsc->container); break; case start_rsc: case started_rsc: case monitor_rsc: case action_promote: case action_promoted: // "Up" actions take place on guest's next host host = action->node->details->remote_rsc->container->allocated_to; break; default: break; } if (host != NULL) { hash2metafield((gpointer) XML_RSC_ATTR_TARGET, (gpointer) g_hash_table_lookup(action->rsc->meta, XML_RSC_ATTR_TARGET), (gpointer) args_xml); hash2metafield((gpointer) PCMK__ENV_PHYSICAL_HOST, (gpointer) host->details->uname, (gpointer) args_xml); } } diff --git a/lib/pacemaker/pcmk_sched_transition.c b/lib/pacemaker/pcmk_sched_transition.c index ebf5fc68a2..84c9a2783d 100644 --- a/lib/pacemaker/pcmk_sched_transition.c +++ b/lib/pacemaker/pcmk_sched_transition.c @@ -1,850 +1,850 @@ /* * Copyright 2009-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include // lrmd_event_data_t, lrmd_free_event() #include #include #include #include #include #include #include static pcmk__output_t *out = NULL; static cib_t *fake_cib = NULL; static GList *fake_resource_list = NULL; static GList *fake_op_fail_list = NULL; gboolean bringing_nodes_online = FALSE; #define STATUS_PATH_MAX 512 #define NEW_NODE_TEMPLATE "//"XML_CIB_TAG_NODE"[@uname='%s']" #define NODE_TEMPLATE "//"XML_CIB_TAG_STATE"[@uname='%s']" #define RSC_TEMPLATE "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" static void inject_transient_attr(xmlNode * cib_node, const char *name, const char *value) { xmlNode *attrs = NULL; xmlNode *instance_attrs = NULL; const char *node_uuid = ID(cib_node); out->message(out, "inject-attr", name, value, cib_node); attrs = first_named_child(cib_node, XML_TAG_TRANSIENT_NODEATTRS); if (attrs == NULL) { attrs = create_xml_node(cib_node, XML_TAG_TRANSIENT_NODEATTRS); crm_xml_add(attrs, XML_ATTR_ID, node_uuid); } instance_attrs = first_named_child(attrs, XML_TAG_ATTR_SETS); if (instance_attrs == NULL) { instance_attrs = create_xml_node(attrs, XML_TAG_ATTR_SETS); crm_xml_add(instance_attrs, XML_ATTR_ID, node_uuid); } crm_create_nvpair_xml(instance_attrs, NULL, name, value); } static void update_failcounts(xmlNode * cib_node, const char *resource, const char *task, guint interval_ms, int rc) { if (rc == 0) { return; } else if ((rc == 7) && (interval_ms == 0)) { return; } else { char *name = NULL; char *now = pcmk__ttoa(time(NULL)); name = pcmk__failcount_name(resource, task, interval_ms); inject_transient_attr(cib_node, name, "value++"); free(name); name = pcmk__lastfailure_name(resource, task, interval_ms); inject_transient_attr(cib_node, name, now); free(name); free(now); } } static void create_node_entry(cib_t * cib_conn, const char *node) { int rc = pcmk_ok; char *xpath = crm_strdup_printf(NEW_NODE_TEMPLATE, node); rc = cib_conn->cmds->query(cib_conn, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); if (rc == -ENXIO) { xmlNode *cib_object = create_xml_node(NULL, XML_CIB_TAG_NODE); crm_xml_add(cib_object, XML_ATTR_ID, node); // Use node name as ID crm_xml_add(cib_object, XML_ATTR_UNAME, node); cib_conn->cmds->create(cib_conn, XML_CIB_TAG_NODES, cib_object, cib_sync_call | cib_scope_local); /* Not bothering with subsequent query to see if it exists, we'll bomb out later in the call to query_node_uuid()... */ free_xml(cib_object); } free(xpath); } static lrmd_event_data_t * create_op(xmlNode *cib_resource, const char *task, guint interval_ms, int outcome) { lrmd_event_data_t *op = NULL; xmlNode *xop = NULL; op = lrmd_new_event(ID(cib_resource), task, interval_ms); lrmd__set_result(op, outcome, PCMK_EXEC_DONE, "Simulated action result"); op->params = NULL; /* TODO: Fill me in */ op->t_run = (unsigned int) time(NULL); op->t_rcchange = op->t_run; op->call_id = 0; for (xop = pcmk__xe_first_child(cib_resource); xop != NULL; xop = pcmk__xe_next(xop)) { int tmp = 0; crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp); if (tmp > op->call_id) { op->call_id = tmp; } } op->call_id++; return op; } static xmlNode * inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc) { return pcmk__create_history_xml(cib_resource, op, CRM_FEATURE_SET, target_rc, NULL, crm_system_name, LOG_TRACE); } static xmlNode * inject_node_state(cib_t * cib_conn, const char *node, const char *uuid) { int rc = pcmk_ok; xmlNode *cib_object = NULL; char *xpath = crm_strdup_printf(NODE_TEMPLATE, node); if (bringing_nodes_online) { create_node_entry(cib_conn, node); } rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object, cib_xpath | cib_sync_call | cib_scope_local); if (cib_object && ID(cib_object) == NULL) { crm_err("Detected multiple node_state entries for xpath=%s, bailing", xpath); crm_log_xml_warn(cib_object, "Duplicates"); free(xpath); crm_exit(CRM_EX_SOFTWARE); return NULL; // not reached, but makes static analysis happy } if (rc == -ENXIO) { char *found_uuid = NULL; if (uuid == NULL) { query_node_uuid(cib_conn, node, &found_uuid, NULL); } else { found_uuid = strdup(uuid); } cib_object = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(cib_object, XML_ATTR_UUID, found_uuid); crm_xml_add(cib_object, XML_ATTR_UNAME, node); cib_conn->cmds->create(cib_conn, XML_CIB_TAG_STATUS, cib_object, cib_sync_call | cib_scope_local); free_xml(cib_object); free(found_uuid); rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object, cib_xpath | cib_sync_call | cib_scope_local); crm_trace("injecting node state for %s. rc is %d", node, rc); } free(xpath); CRM_ASSERT(rc == pcmk_ok); return cib_object; } static xmlNode * modify_node(cib_t * cib_conn, char *node, gboolean up) { xmlNode *cib_node = inject_node_state(cib_conn, node, NULL); if (up) { crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_YES); crm_xml_add(cib_node, XML_NODE_IS_PEER, ONLINESTATUS); crm_xml_add(cib_node, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER); crm_xml_add(cib_node, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER); } else { crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO); crm_xml_add(cib_node, XML_NODE_IS_PEER, OFFLINESTATUS); crm_xml_add(cib_node, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_DOWN); crm_xml_add(cib_node, XML_NODE_EXPECTED, CRMD_JOINSTATE_DOWN); } crm_xml_add(cib_node, XML_ATTR_ORIGIN, crm_system_name); return cib_node; } static xmlNode * find_resource_xml(xmlNode * cib_node, const char *resource) { xmlNode *match = NULL; const char *node = crm_element_value(cib_node, XML_ATTR_UNAME); char *xpath = crm_strdup_printf(RSC_TEMPLATE, node, resource); match = get_xpath_object(xpath, cib_node, LOG_TRACE); free(xpath); return match; } static xmlNode * inject_resource(xmlNode * cib_node, const char *resource, const char *lrm_name, const char *rclass, const char *rtype, const char *rprovider) { xmlNode *lrm = NULL; xmlNode *container = NULL; xmlNode *cib_resource = NULL; char *xpath = NULL; cib_resource = find_resource_xml(cib_node, resource); if (cib_resource != NULL) { /* If an existing LRM history entry uses the resource name, * continue using it, even if lrm_name is different. */ return cib_resource; } // Check for history entry under preferred name if (strcmp(resource, lrm_name)) { cib_resource = find_resource_xml(cib_node, lrm_name); if (cib_resource != NULL) { return cib_resource; } } /* One day, add query for class, provider, type */ if (rclass == NULL || rtype == NULL) { out->err(out, "Resource %s not found in the status section of %s." " Please supply the class and type to continue", resource, ID(cib_node)); return NULL; } else if (!pcmk__strcase_any_of(rclass, PCMK_RESOURCE_CLASS_OCF, PCMK_RESOURCE_CLASS_STONITH, PCMK_RESOURCE_CLASS_SERVICE, PCMK_RESOURCE_CLASS_UPSTART, PCMK_RESOURCE_CLASS_SYSTEMD, PCMK_RESOURCE_CLASS_LSB, NULL)) { out->err(out, "Invalid class for %s: %s", resource, rclass); return NULL; } else if (pcmk_is_set(pcmk_get_ra_caps(rclass), pcmk_ra_cap_provider) && (rprovider == NULL)) { out->err(out, "Please specify the provider for resource %s", resource); return NULL; } xpath = (char *)xmlGetNodePath(cib_node); crm_info("Injecting new resource %s into %s '%s'", lrm_name, xpath, ID(cib_node)); free(xpath); lrm = first_named_child(cib_node, XML_CIB_TAG_LRM); if (lrm == NULL) { const char *node_uuid = ID(cib_node); lrm = create_xml_node(cib_node, XML_CIB_TAG_LRM); crm_xml_add(lrm, XML_ATTR_ID, node_uuid); } container = first_named_child(lrm, XML_LRM_TAG_RESOURCES); if (container == NULL) { container = create_xml_node(lrm, XML_LRM_TAG_RESOURCES); } cib_resource = create_xml_node(container, XML_LRM_TAG_RESOURCE); // If we're creating a new entry, use the preferred name crm_xml_add(cib_resource, XML_ATTR_ID, lrm_name); crm_xml_add(cib_resource, XML_AGENT_ATTR_CLASS, rclass); crm_xml_add(cib_resource, XML_AGENT_ATTR_PROVIDER, rprovider); crm_xml_add(cib_resource, XML_ATTR_TYPE, rtype); return cib_resource; } #define XPATH_MAX 1024 static int find_ticket_state(cib_t * the_cib, const char *ticket_id, xmlNode ** ticket_state_xml) { int offset = 0; int rc = pcmk_ok; xmlNode *xml_search = NULL; char *xpath_string = NULL; CRM_ASSERT(ticket_state_xml != NULL); *ticket_state_xml = NULL; xpath_string = calloc(1, XPATH_MAX); offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "%s", "/cib/status/tickets"); if (ticket_id) { offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "/%s[@id=\"%s\"]", XML_CIB_TAG_TICKET_STATE, ticket_id); } CRM_LOG_ASSERT(offset > 0); rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search, cib_sync_call | cib_scope_local | cib_xpath); if (rc != pcmk_ok) { goto bail; } crm_log_xml_debug(xml_search, "Match"); if (xml_has_children(xml_search)) { if (ticket_id) { out->err(out, "Multiple ticket_states match ticket_id=%s", ticket_id); } *ticket_state_xml = xml_search; } else { *ticket_state_xml = xml_search; } bail: free(xpath_string); return rc; } static int set_ticket_state_attr(const char *ticket_id, const char *attr_name, const char *attr_value, cib_t * cib, int cib_options) { int rc = pcmk_ok; xmlNode *xml_top = NULL; xmlNode *ticket_state_xml = NULL; rc = find_ticket_state(cib, ticket_id, &ticket_state_xml); if (rc == pcmk_ok) { crm_debug("Found a match state for ticket: id=%s", ticket_id); xml_top = ticket_state_xml; } else if (rc != -ENXIO) { return rc; } else { xmlNode *xml_obj = NULL; xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS); xml_obj = create_xml_node(xml_top, XML_CIB_TAG_TICKETS); ticket_state_xml = create_xml_node(xml_obj, XML_CIB_TAG_TICKET_STATE); crm_xml_add(ticket_state_xml, XML_ATTR_ID, ticket_id); } crm_xml_add(ticket_state_xml, attr_name, attr_value); crm_log_xml_debug(xml_top, "Update"); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, xml_top, cib_options); free_xml(xml_top); return rc; } void modify_configuration(pe_working_set_t * data_set, cib_t *cib, pcmk_injections_t *injections) { int rc = pcmk_ok; GList *gIter = NULL; xmlNode *cib_op = NULL; xmlNode *cib_node = NULL; xmlNode *cib_resource = NULL; lrmd_event_data_t *op = NULL; out = data_set->priv; out->message(out, "inject-modify-config", injections->quorum, injections->watchdog); if (injections->quorum) { xmlNode *top = create_xml_node(NULL, XML_TAG_CIB); /* crm_xml_add(top, XML_ATTR_DC_UUID, dc_uuid); */ crm_xml_add(top, XML_ATTR_HAVE_QUORUM, injections->quorum); rc = cib->cmds->modify(cib, NULL, top, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } if (injections->watchdog) { rc = update_attr_delegate(cib, cib_sync_call | cib_scope_local, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, XML_ATTR_HAVE_WATCHDOG, injections->watchdog, FALSE, NULL, NULL); CRM_ASSERT(rc == pcmk_ok); } for (gIter = injections->node_up; gIter != NULL; gIter = gIter->next) { char *node = (char *)gIter->data; out->message(out, "inject-modify-node", "Online", node); cib_node = modify_node(cib, node, TRUE); CRM_ASSERT(cib_node != NULL); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); free_xml(cib_node); } for (gIter = injections->node_down; gIter != NULL; gIter = gIter->next) { char xpath[STATUS_PATH_MAX]; char *node = (char *)gIter->data; out->message(out, "inject-modify-node", "Offline", node); cib_node = modify_node(cib, node, FALSE); CRM_ASSERT(cib_node != NULL); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); free_xml(cib_node); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", node, XML_CIB_TAG_LRM); cib->cmds->remove(cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", node, XML_TAG_TRANSIENT_NODEATTRS); cib->cmds->remove(cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); } for (gIter = injections->node_fail; gIter != NULL; gIter = gIter->next) { char *node = (char *)gIter->data; out->message(out, "inject-modify-node", "Failing", node); cib_node = modify_node(cib, node, TRUE); crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO); CRM_ASSERT(cib_node != NULL); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); free_xml(cib_node); } for (gIter = injections->ticket_grant; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; out->message(out, "inject-modify-ticket", "Granting", ticket_id); rc = set_ticket_state_attr(ticket_id, "granted", "true", cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = injections->ticket_revoke; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; out->message(out, "inject-modify-ticket", "Revoking", ticket_id); rc = set_ticket_state_attr(ticket_id, "granted", "false", cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = injections->ticket_standby; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; out->message(out, "inject-modify-ticket", "Standby", ticket_id); rc = set_ticket_state_attr(ticket_id, "standby", "true", cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = injections->ticket_activate; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; out->message(out, "inject-modify-ticket", "Activating", ticket_id); rc = set_ticket_state_attr(ticket_id, "standby", "false", cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = injections->op_inject; gIter != NULL; gIter = gIter->next) { char *spec = (char *)gIter->data; int rc = 0; int outcome = PCMK_OCF_OK; guint interval_ms = 0; char *key = NULL; char *node = NULL; char *task = NULL; char *resource = NULL; const char *rtype = NULL; const char *rclass = NULL; const char *rprovider = NULL; pe_resource_t *rsc = NULL; out->message(out, "inject-spec", spec); key = calloc(1, strlen(spec) + 1); node = calloc(1, strlen(spec) + 1); rc = sscanf(spec, "%[^@]@%[^=]=%d", key, node, &outcome); if (rc != 3) { out->err(out, "Invalid operation spec: %s. Only found %d fields", spec, rc); free(key); free(node); continue; } parse_op_key(key, &resource, &task, &interval_ms); rsc = pe_find_resource(data_set->resources, resource); if (rsc == NULL) { out->err(out, "Invalid resource name: %s", resource); } else { rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); rprovider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); cib_node = inject_node_state(cib, node, NULL); CRM_ASSERT(cib_node != NULL); update_failcounts(cib_node, resource, task, interval_ms, outcome); cib_resource = inject_resource(cib_node, resource, resource, rclass, rtype, rprovider); CRM_ASSERT(cib_resource != NULL); op = create_op(cib_resource, task, interval_ms, outcome); CRM_ASSERT(op != NULL); cib_op = inject_op(cib_resource, op, 0); CRM_ASSERT(cib_op != NULL); lrmd_free_event(op); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } free(task); free(node); free(key); } if (!out->is_quiet(out)) { out->end_list(out); } } static gboolean exec_pseudo_action(crm_graph_t * graph, crm_action_t * action) { const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); crm__set_graph_action_flags(action, pcmk__graph_action_confirmed); out->message(out, "inject-pseudo-action", node, task); pcmk__update_graph(graph, action); return TRUE; } static gboolean exec_rsc_action(crm_graph_t * graph, crm_action_t * action) { int rc = 0; GList *gIter = NULL; lrmd_event_data_t *op = NULL; int target_outcome = PCMK_OCF_OK; const char *rtype = NULL; const char *rclass = NULL; const char *resource = NULL; const char *rprovider = NULL; const char *lrm_name = NULL; const char *operation = crm_element_value(action->xml, "operation"); const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC); xmlNode *cib_node = NULL; xmlNode *cib_resource = NULL; xmlNode *action_rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); char *node = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET); char *uuid = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET_UUID); const char *router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); - if (pcmk__strcase_any_of(operation, CRM_OP_PROBED, CRM_OP_REPROBE, NULL)) { + if (pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) { crm_info("Skipping %s op for %s", operation, node); goto done; } if (action_rsc == NULL) { crm_log_xml_err(action->xml, "Bad"); free(node); free(uuid); return FALSE; } /* Look for the preferred name * If not found, try the expected 'local' name * If not found use the preferred name anyway */ resource = crm_element_value(action_rsc, XML_ATTR_ID); CRM_ASSERT(resource != NULL); // makes static analysis happy lrm_name = resource; // Preferred name when writing history if (pe_find_resource(fake_resource_list, resource) == NULL) { const char *longname = crm_element_value(action_rsc, XML_ATTR_ID_LONG); if (longname && pe_find_resource(fake_resource_list, longname)) { resource = longname; } } if (pcmk__strcase_any_of(operation, "delete", RSC_METADATA, NULL)) { out->message(out, "inject-rsc-action", resource, operation, node, (guint) 0); goto done; } rclass = crm_element_value(action_rsc, XML_AGENT_ATTR_CLASS); rtype = crm_element_value(action_rsc, XML_ATTR_TYPE); rprovider = crm_element_value(action_rsc, XML_AGENT_ATTR_PROVIDER); pcmk__scan_min_int(target_rc_s, &target_outcome, 0); CRM_ASSERT(fake_cib->cmds->query(fake_cib, NULL, NULL, cib_sync_call | cib_scope_local) == pcmk_ok); cib_node = inject_node_state(fake_cib, node, (router_node? node : uuid)); CRM_ASSERT(cib_node != NULL); cib_resource = inject_resource(cib_node, resource, lrm_name, rclass, rtype, rprovider); if (cib_resource == NULL) { crm_err("invalid resource in transition"); free(node); free(uuid); free_xml(cib_node); return FALSE; } op = pcmk__event_from_graph_action(cib_resource, action, PCMK_EXEC_DONE, target_outcome, "User-injected result"); out->message(out, "inject-rsc-action", resource, op->op_type, node, op->interval_ms); for (gIter = fake_op_fail_list; gIter != NULL; gIter = gIter->next) { char *spec = (char *)gIter->data; char *key = NULL; const char *match_name = NULL; // Allow user to specify anonymous clone with or without instance number key = crm_strdup_printf(PCMK__OP_FMT "@%s=", resource, op->op_type, op->interval_ms, node); if (strncasecmp(key, spec, strlen(key)) == 0) { match_name = resource; } free(key); if ((match_name == NULL) && strcmp(resource, lrm_name)) { key = crm_strdup_printf(PCMK__OP_FMT "@%s=", lrm_name, op->op_type, op->interval_ms, node); if (strncasecmp(key, spec, strlen(key)) == 0) { match_name = lrm_name; } free(key); } if (match_name != NULL) { rc = sscanf(spec, "%*[^=]=%d", (int *) &op->rc); // ${match_name}_${task}_${interval_in_ms}@${node}=${rc} if (rc != 1) { out->err(out, "Invalid failed operation spec: %s. Result code must be integer", spec); continue; } crm__set_graph_action_flags(action, pcmk__graph_action_failed); graph->abort_priority = INFINITY; out->info(out, "Pretending action %d failed with rc=%d", action->id, op->rc); update_failcounts(cib_node, match_name, op->op_type, op->interval_ms, op->rc); break; } } inject_op(cib_resource, op, target_outcome); lrmd_free_event(op); rc = fake_cib->cmds->modify(fake_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); done: free(node); free(uuid); free_xml(cib_node); crm__set_graph_action_flags(action, pcmk__graph_action_confirmed); pcmk__update_graph(graph, action); return TRUE; } static gboolean exec_crmd_action(crm_graph_t * graph, crm_action_t * action) { const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); xmlNode *rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); crm__set_graph_action_flags(action, pcmk__graph_action_confirmed); out->message(out, "inject-cluster-action", node, task, rsc); pcmk__update_graph(graph, action); return TRUE; } static gboolean exec_stonith_action(crm_graph_t * graph, crm_action_t * action) { const char *op = crm_meta_value(action->params, "stonith_action"); char *target = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET); out->message(out, "inject-fencing-action", target, op); if(!pcmk__str_eq(op, "on", pcmk__str_casei)) { int rc = 0; char xpath[STATUS_PATH_MAX]; xmlNode *cib_node = modify_node(fake_cib, target, FALSE); crm_xml_add(cib_node, XML_ATTR_ORIGIN, __func__); CRM_ASSERT(cib_node != NULL); rc = fake_cib->cmds->replace(fake_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target, XML_CIB_TAG_LRM); fake_cib->cmds->remove(fake_cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target, XML_TAG_TRANSIENT_NODEATTRS); fake_cib->cmds->remove(fake_cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); free_xml(cib_node); } crm__set_graph_action_flags(action, pcmk__graph_action_confirmed); pcmk__update_graph(graph, action); free(target); return TRUE; } enum transition_status run_simulation(pe_working_set_t * data_set, cib_t *cib, GList *op_fail_list) { crm_graph_t *transition = NULL; enum transition_status graph_rc; crm_graph_functions_t exec_fns = { exec_pseudo_action, exec_rsc_action, exec_crmd_action, exec_stonith_action, }; out = data_set->priv; fake_cib = cib; fake_op_fail_list = op_fail_list; if (!out->is_quiet(out)) { out->begin_list(out, NULL, NULL, "Executing Cluster Transition"); } pcmk__set_graph_functions(&exec_fns); transition = pcmk__unpack_graph(data_set->graph, crm_system_name); pcmk__log_graph(LOG_DEBUG, transition); fake_resource_list = data_set->resources; do { graph_rc = pcmk__execute_graph(transition); } while (graph_rc == transition_active); fake_resource_list = NULL; if (graph_rc != transition_complete) { out->err(out, "Transition failed: %s", pcmk__graph_status2text(graph_rc)); pcmk__log_graph(LOG_ERR, transition); } pcmk__free_graph(transition); if (graph_rc != transition_complete) { out->err(out, "An invalid transition was produced"); } if (!out->is_quiet(out)) { xmlNode *cib_object = NULL; int rc = fake_cib->cmds->query(fake_cib, NULL, &cib_object, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); pe_reset_working_set(data_set); data_set->input = cib_object; out->end_list(out); } return graph_rc; } diff --git a/lib/pacemaker/pcmk_simulate.c b/lib/pacemaker/pcmk_simulate.c index c26655007f..ceab6afe0e 100644 --- a/lib/pacemaker/pcmk_simulate.c +++ b/lib/pacemaker/pcmk_simulate.c @@ -1,563 +1,560 @@ /* * Copyright 2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" static char * create_action_name(pe_action_t *action, bool verbose) { char *action_name = NULL; const char *prefix = ""; const char *action_host = NULL; const char *clone_name = NULL; const char *task = action->task; if (action->node) { action_host = action->node->details->uname; } else if (!pcmk_is_set(action->flags, pe_action_pseudo)) { action_host = ""; } if (pcmk__str_eq(action->task, RSC_CANCEL, pcmk__str_casei)) { prefix = "Cancel "; task = action->cancel_task; } if (action->rsc && action->rsc->clone_name) { clone_name = action->rsc->clone_name; } if (clone_name) { char *key = NULL; guint interval_ms = 0; if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0, &interval_ms) != pcmk_rc_ok) { interval_ms = 0; } if (pcmk__strcase_any_of(action->task, RSC_NOTIFY, RSC_NOTIFIED, NULL)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_key_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_key_operation"); CRM_ASSERT(n_type != NULL); CRM_ASSERT(n_task != NULL); key = pcmk__notify_key(clone_name, n_type, n_task); } else { key = pcmk__op_key(clone_name, task, interval_ms); } if (action_host) { action_name = crm_strdup_printf("%s%s %s", prefix, key, action_host); } else { action_name = crm_strdup_printf("%s%s", prefix, key); } free(key); } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { const char *op = g_hash_table_lookup(action->meta, "stonith_action"); action_name = crm_strdup_printf("%s%s '%s' %s", prefix, action->task, op, action_host); } else if (action->rsc && action_host) { action_name = crm_strdup_printf("%s%s %s", prefix, action->uuid, action_host); } else if (action_host) { action_name = crm_strdup_printf("%s%s %s", prefix, action->task, action_host); } else { action_name = crm_strdup_printf("%s", action->uuid); } if (verbose) { char *with_id = crm_strdup_printf("%s (%d)", action_name, action->id); free(action_name); action_name = with_id; } return action_name; } static void print_cluster_status(pe_working_set_t * data_set, unsigned int show_opts, unsigned int section_opts, const char *title, bool print_spacer) { pcmk__output_t *out = data_set->priv; GList *all = NULL; section_opts |= pcmk_section_nodes | pcmk_section_resources; all = g_list_prepend(all, (gpointer) "*"); PCMK__OUTPUT_SPACER_IF(out, print_spacer); out->begin_list(out, NULL, NULL, "%s", title); out->message(out, "cluster-status", data_set, 0, NULL, FALSE, section_opts, show_opts | pcmk_show_inactive_rscs | pcmk_show_failed_detail, NULL, all, all); out->end_list(out); g_list_free(all); } static void print_transition_summary(pe_working_set_t *data_set, bool print_spacer) { pcmk__output_t *out = data_set->priv; PCMK__OUTPUT_SPACER_IF(out, print_spacer); out->begin_list(out, NULL, NULL, "Transition Summary"); pcmk__output_actions(data_set); out->end_list(out); } static void reset(pe_working_set_t *data_set, xmlNodePtr input, pcmk__output_t *out, char *use_date, unsigned int flags) { data_set->input = input; data_set->priv = out; pcmk__set_effective_date(data_set, true, use_date); if (pcmk_is_set(flags, pcmk_sim_sanitized)) { pe__set_working_set_flags(data_set, pe_flag_sanitized); } if (pcmk_is_set(flags, pcmk_sim_show_scores)) { pe__set_working_set_flags(data_set, pe_flag_show_scores); } if (pcmk_is_set(flags, pcmk_sim_show_utilization)) { pe__set_working_set_flags(data_set, pe_flag_show_utilization); } } int pcmk__write_sim_dotfile(pe_working_set_t *data_set, const char *dot_file, bool all_actions, bool verbose) { GList *gIter = NULL; FILE *dot_strm = fopen(dot_file, "w"); if (dot_strm == NULL) { return errno; } fprintf(dot_strm, " digraph \"g\" {\n"); for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; const char *style = "dashed"; const char *font = "black"; const char *color = "black"; char *action_name = create_action_name(action, verbose); if (pcmk_is_set(action->flags, pe_action_pseudo)) { font = "orange"; } if (pcmk_is_set(action->flags, pe_action_dumped)) { style = "bold"; color = "green"; } else if ((action->rsc != NULL) && !pcmk_is_set(action->rsc->flags, pe_rsc_managed)) { color = "red"; font = "purple"; if (!all_actions) { goto do_not_write; } } else if (pcmk_is_set(action->flags, pe_action_optional)) { color = "blue"; if (!all_actions) { goto do_not_write; } } else { color = "red"; CRM_CHECK(!pcmk_is_set(action->flags, pe_action_runnable), ;); } pe__set_action_flags(action, pe_action_dumped); fprintf(dot_strm, "\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]\n", action_name, style, color, font); do_not_write: free(action_name); } for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; GList *gIter2 = NULL; for (gIter2 = action->actions_before; gIter2 != NULL; gIter2 = gIter2->next) { pe_action_wrapper_t *before = (pe_action_wrapper_t *) gIter2->data; char *before_name = NULL; char *after_name = NULL; const char *style = "dashed"; bool optional = true; if (before->state == pe_link_dumped) { optional = false; style = "bold"; - } else if (pcmk_is_set(action->flags, pe_action_pseudo) - && (before->type & pe_order_stonith_stop)) { - continue; } else if (before->type == pe_order_none) { continue; } else if (pcmk_is_set(before->action->flags, pe_action_dumped) && pcmk_is_set(action->flags, pe_action_dumped) && before->type != pe_order_load) { optional = false; } if (all_actions || !optional) { before_name = create_action_name(before->action, verbose); after_name = create_action_name(action, verbose); fprintf(dot_strm, "\"%s\" -> \"%s\" [ style = %s]\n", before_name, after_name, style); free(before_name); free(after_name); } } } fprintf(dot_strm, "}\n"); fflush(dot_strm); fclose(dot_strm); return pcmk_rc_ok; } void pcmk__profile_file(const char *xml_file, long long repeat, pe_working_set_t *data_set, char *use_date) { pcmk__output_t *out = data_set->priv; xmlNode *cib_object = NULL; clock_t start = 0; clock_t end; CRM_ASSERT(out != NULL); cib_object = filename2xml(xml_file); start = clock(); if (pcmk_find_cib_element(cib_object, XML_CIB_TAG_STATUS) == NULL) { create_xml_node(cib_object, XML_CIB_TAG_STATUS); } if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) { free_xml(cib_object); return; } if (validate_xml(cib_object, NULL, FALSE) != TRUE) { free_xml(cib_object); return; } for (int i = 0; i < repeat; ++i) { xmlNode *input = (repeat == 1)? cib_object : copy_xml(cib_object); data_set->input = input; pcmk__set_effective_date(data_set, false, use_date); pcmk__schedule_actions(data_set, input, NULL); pe_reset_working_set(data_set); } end = clock(); out->message(out, "profile", xml_file, start, end); } void pcmk__profile_dir(const char *dir, long long repeat, pe_working_set_t *data_set, char *use_date) { pcmk__output_t *out = data_set->priv; struct dirent **namelist; int file_num = scandir(dir, &namelist, 0, alphasort); CRM_ASSERT(out != NULL); if (file_num > 0) { struct stat prop; char buffer[FILENAME_MAX]; out->begin_list(out, NULL, NULL, "Timings"); while (file_num--) { if ('.' == namelist[file_num]->d_name[0]) { free(namelist[file_num]); continue; } else if (!pcmk__ends_with_ext(namelist[file_num]->d_name, ".xml")) { free(namelist[file_num]); continue; } snprintf(buffer, sizeof(buffer), "%s/%s", dir, namelist[file_num]->d_name); if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) { pcmk__profile_file(buffer, repeat, data_set, use_date); } free(namelist[file_num]); } free(namelist); out->end_list(out); } } void pcmk__set_effective_date(pe_working_set_t *data_set, bool print_original, char *use_date) { pcmk__output_t *out = data_set->priv; time_t original_date = 0; CRM_ASSERT(out != NULL); crm_element_value_epoch(data_set->input, "execution-date", &original_date); if (use_date) { data_set->now = crm_time_new(use_date); out->info(out, "Setting effective cluster time: %s", use_date); crm_time_log(LOG_NOTICE, "Pretending 'now' is", data_set->now, crm_time_log_date | crm_time_log_timeofday); } else if (original_date) { data_set->now = crm_time_new(NULL); crm_time_set_timet(data_set->now, &original_date); if (print_original) { char *when = crm_time_as_string(data_set->now, crm_time_log_date|crm_time_log_timeofday); out->info(out, "Using the original execution date of: %s", when); free(when); } } } int pcmk__simulate(pe_working_set_t *data_set, pcmk__output_t *out, pcmk_injections_t *injections, unsigned int flags, unsigned int section_opts, char *use_date, char *input_file, char *graph_file, char *dot_file) { int printed = pcmk_rc_no_output; int rc = pcmk_rc_ok; xmlNodePtr input = NULL; cib_t *cib = NULL; bool modified = false; rc = cib__signon_query(&cib, &input); if (rc != pcmk_rc_ok) { goto simulate_done; } reset(data_set, input, out, use_date, flags); cluster_status(data_set); if (!out->is_quiet(out)) { if (pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)) { printed = out->message(out, "maint-mode", data_set->flags); } if (data_set->disabled_resources || data_set->blocked_resources) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); printed = out->info(out, "%d of %d resource instances DISABLED and %d BLOCKED " "from further action due to failure", data_set->disabled_resources, data_set->ninstances, data_set->blocked_resources); } /* Most formatted output headers use caps for each word, but this one * only has the first word capitalized for compatibility with pcs. */ print_cluster_status(data_set, pcmk_is_set(flags, pcmk_sim_show_pending) ? pcmk_show_pending : 0, section_opts, "Current cluster status", printed == pcmk_rc_ok); printed = pcmk_rc_ok; } modified = injections->node_down != NULL || injections->node_fail != NULL || injections->node_up != NULL || injections->op_inject != NULL || injections->ticket_activate != NULL || injections->ticket_grant != NULL || injections->ticket_revoke != NULL || injections->ticket_standby != NULL || injections->watchdog != NULL || injections->watchdog != NULL; if (modified) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); modify_configuration(data_set, cib, injections); printed = pcmk_rc_ok; rc = cib->cmds->query(cib, NULL, &input, cib_sync_call); if (rc != pcmk_rc_ok) { rc = pcmk_legacy2rc(rc); goto simulate_done; } cleanup_calculations(data_set); reset(data_set, input, out, use_date, flags); cluster_status(data_set); } if (input_file != NULL) { rc = write_xml_file(input, input_file, FALSE); if (rc < 0) { rc = pcmk_legacy2rc(rc); goto simulate_done; } } if (pcmk_any_flags_set(flags, pcmk_sim_process | pcmk_sim_simulate)) { crm_time_t *local_date = NULL; pcmk__output_t *logger_out = NULL; if (pcmk_all_flags_set(data_set->flags, pe_flag_show_scores|pe_flag_show_utilization)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Allocation Scores and Utilization Information"); printed = pcmk_rc_ok; } else if (pcmk_is_set(data_set->flags, pe_flag_show_scores)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Allocation Scores"); printed = pcmk_rc_ok; } else if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Utilization Information"); printed = pcmk_rc_ok; } else { logger_out = pcmk__new_logger(); if (logger_out == NULL) { rc = pcmk_rc_error; goto simulate_done; } data_set->priv = logger_out; } pcmk__schedule_actions(data_set, input, local_date); if (logger_out == NULL) { out->end_list(out); } else { logger_out->finish(logger_out, CRM_EX_OK, true, NULL); pcmk__output_free(logger_out); data_set->priv = out; } input = NULL; /* Don't try and free it twice */ if (graph_file != NULL) { rc = write_xml_file(data_set->graph, graph_file, FALSE); if (rc < 0) { rc = pcmk_rc_graph_error; goto simulate_done; } } if (dot_file != NULL) { rc = pcmk__write_sim_dotfile(data_set, dot_file, pcmk_is_set(flags, pcmk_sim_all_actions), pcmk_is_set(flags, pcmk_sim_verbose)); if (rc != pcmk_rc_ok) { rc = pcmk_rc_dot_error; goto simulate_done; } } if (!out->is_quiet(out)) { print_transition_summary(data_set, printed == pcmk_rc_ok); } } rc = pcmk_rc_ok; if (pcmk_is_set(flags, pcmk_sim_simulate)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); if (run_simulation(data_set, cib, injections->op_fail) != transition_complete) { rc = pcmk_rc_invalid_transition; } if (!out->is_quiet(out)) { pcmk__set_effective_date(data_set, true, use_date); if (pcmk_is_set(flags, pcmk_sim_show_scores)) { pe__set_working_set_flags(data_set, pe_flag_show_scores); } if (pcmk_is_set(flags, pcmk_sim_show_utilization)) { pe__set_working_set_flags(data_set, pe_flag_show_utilization); } cluster_status(data_set); print_cluster_status(data_set, 0, section_opts, "Revised Cluster Status", true); } } simulate_done: if (cib) { cib->cmds->signoff(cib); cib_delete(cib); } return rc; } int pcmk_simulate(xmlNodePtr *xml, pe_working_set_t *data_set, pcmk_injections_t *injections, unsigned int flags, unsigned int section_opts, char *use_date, char *input_file, char *graph_file, char *dot_file) { pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__out_prologue(&out, xml); if (rc != pcmk_rc_ok) { return rc; } pe__register_messages(out); pcmk__register_lib_messages(out); rc = pcmk__simulate(data_set, out, injections, flags, section_opts, use_date, input_file, graph_file, dot_file); pcmk__out_epilogue(out, xml, rc); return rc; } void pcmk_free_injections(pcmk_injections_t *injections) { if (injections == NULL) { return; } g_list_free_full(injections->node_up, g_free); g_list_free_full(injections->node_down, g_free); g_list_free_full(injections->node_fail, g_free); g_list_free_full(injections->op_fail, g_free); g_list_free_full(injections->op_inject, g_free); g_list_free_full(injections->ticket_grant, g_free); g_list_free_full(injections->ticket_revoke, g_free); g_list_free_full(injections->ticket_standby, g_free); g_list_free_full(injections->ticket_activate, g_free); free(injections->quorum); free(injections->watchdog); free(injections); } diff --git a/lib/pengine/common.c b/lib/pengine/common.c index fe4223816f..54d69f2594 100644 --- a/lib/pengine/common.c +++ b/lib/pengine/common.c @@ -1,635 +1,633 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include gboolean was_processing_error = FALSE; gboolean was_processing_warning = FALSE; static bool check_health(const char *value) { return pcmk__strcase_any_of(value, "none", "custom", "only-green", "progressive", "migrate-on-red", NULL); } static bool check_placement_strategy(const char *value) { return pcmk__strcase_any_of(value, "default", "utilization", "minimal", "balanced", NULL); } static pcmk__cluster_option_t pe_opts[] = { /* name, old name, type, allowed values, * default value, validator, * short description, * long description */ { "no-quorum-policy", NULL, "select", "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum, "What to do when the cluster does not have quorum", NULL }, { "symmetric-cluster", NULL, "boolean", NULL, "true", pcmk__valid_boolean, "Whether resources can run on any node by default", NULL }, { "maintenance-mode", NULL, "boolean", NULL, "false", pcmk__valid_boolean, "Whether the cluster should refrain from monitoring, starting, " "and stopping resources", NULL }, { "start-failure-is-fatal", NULL, "boolean", NULL, "true", pcmk__valid_boolean, "Whether a start failure should prevent a resource from being " "recovered on the same node", "When true, the cluster will immediately ban a resource from a node " "if it fails to start there. When false, the cluster will instead " "check the resource's fail count against its migration-threshold." }, { "enable-startup-probes", NULL, "boolean", NULL, "true", pcmk__valid_boolean, "Whether the cluster should check for active resources during start-up", NULL }, { XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, "false", pcmk__valid_boolean, "Whether to lock resources to a cleanly shut down node", "When true, resources active on a node when it is cleanly shut down " "are kept \"locked\" to that node (not allowed to run elsewhere) " "until they start again on that node after it rejoins (or for at " "most shutdown-lock-limit, if set). Stonith resources and " "Pacemaker Remote connections are never locked. Clone and bundle " "instances and the promoted role of promotable clones are currently" " never locked, though support could be added in a future release." }, { XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL, "0", pcmk__valid_interval_spec, "Do not lock resources to a cleanly shut down node longer than this", "If shutdown-lock is true and this is set to a nonzero time duration, " "shutdown locks will expire after this much time has passed since " "the shutdown was initiated, even if the node has not rejoined." }, // Fencing-related options { "stonith-enabled", NULL, "boolean", NULL, "true", pcmk__valid_boolean, "*** Advanced Use Only *** " "Whether nodes may be fenced as part of recovery", "If false, unresponsive nodes are immediately assumed to be harmless, " "and resources that were active on them may be recovered " "elsewhere. This can result in a \"split-brain\" situation, " "potentially leading to data loss and/or service unavailability." }, { "stonith-action", NULL, "select", "reboot, off, poweroff", "reboot", pcmk__is_fencing_action, "Action to send to fence device when a node needs to be fenced " "(\"poweroff\" is a deprecated alias for \"off\")", NULL }, { "stonith-timeout", NULL, "time", NULL, "60s", pcmk__valid_interval_spec, "*** Advanced Use Only *** Unused by Pacemaker", "This value is not used by Pacemaker, but is kept for backward " "compatibility, and certain legacy fence agents might use it." }, { XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL, "false", pcmk__valid_boolean, "Whether watchdog integration is enabled", "This is set automatically by the cluster according to whether SBD " "is detected to be in use. User-configured values are ignored. " "The value `true` is meaningful if diskless SBD is used and " "`stonith-watchdog-timeout` is nonzero. In that case, if fencing " "is required, watchdog-based self-fencing will be performed via " "SBD without requiring a fencing resource explicitly configured." }, { "concurrent-fencing", NULL, "boolean", NULL, PCMK__CONCURRENT_FENCING_DEFAULT, pcmk__valid_boolean, "Allow performing fencing operations in parallel", NULL }, { "startup-fencing", NULL, "boolean", NULL, "true", pcmk__valid_boolean, "*** Advanced Use Only *** Whether to fence unseen nodes at start-up", "Setting this to false may lead to a \"split-brain\" situation," "potentially leading to data loss and/or service unavailability." }, { XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY, NULL, "time", NULL, "0", pcmk__valid_interval_spec, "Apply fencing delay targeting the lost nodes with the highest total resource priority", "Apply specified delay for the fencings that are targeting the lost " "nodes with the highest total resource priority in case we don't " "have the majority of the nodes in our cluster partition, so that " "the more significant nodes potentially win any fencing match, " "which is especially meaningful under split-brain of 2-node " "cluster. A promoted resource instance takes the base priority + 1 " "on calculation if the base priority is not 0. Any static/random " "delays that are introduced by `pcmk_delay_base/max` configured " "for the corresponding fencing resources will be added to this " "delay. This delay should be significantly greater than, safely " "twice, the maximum `pcmk_delay_base/max`. By default, priority " "fencing delay is disabled." }, { "cluster-delay", NULL, "time", NULL, "60s", pcmk__valid_interval_spec, "Maximum time for node-to-node communication", "The node elected Designated Controller (DC) will consider an action " "failed if it does not get a response from the node executing the " "action within this time (after considering the action's own " "timeout). The \"correct\" value will depend on the speed and " "load of your network and cluster nodes." }, { "batch-limit", NULL, "integer", NULL, "0", pcmk__valid_number, "Maximum number of jobs that the cluster may execute in parallel " "across all nodes", "The \"correct\" value will depend on the speed and load of your " "network and cluster nodes. If set to 0, the cluster will " "impose a dynamically calculated limit when any node has a " "high load." }, { "migration-limit", NULL, "integer", NULL, "-1", pcmk__valid_number, "The number of live migration actions that the cluster is allowed " "to execute in parallel on a node (-1 means no limit)" }, /* Orphans and stopping */ { "stop-all-resources", NULL, "boolean", NULL, "false", pcmk__valid_boolean, "Whether the cluster should stop all active resources", NULL }, { "stop-orphan-resources", NULL, "boolean", NULL, "true", pcmk__valid_boolean, "Whether to stop resources that were removed from the configuration", NULL }, { "stop-orphan-actions", NULL, "boolean", NULL, "true", pcmk__valid_boolean, "Whether to cancel recurring actions removed from the configuration", NULL }, { "remove-after-stop", NULL, "boolean", NULL, "false", pcmk__valid_boolean, "*** Deprecated *** Whether to remove stopped resources from " "the executor", "Values other than default are poorly tested and potentially dangerous." " This option will be removed in a future release." }, /* Storing inputs */ { "pe-error-series-max", NULL, "integer", NULL, "-1", pcmk__valid_number, "The number of scheduler inputs resulting in errors to save", "Zero to disable, -1 to store unlimited." }, { "pe-warn-series-max", NULL, "integer", NULL, "5000", pcmk__valid_number, "The number of scheduler inputs resulting in warnings to save", "Zero to disable, -1 to store unlimited." }, { "pe-input-series-max", NULL, "integer", NULL, "4000", pcmk__valid_number, "The number of scheduler inputs without errors or warnings to save", "Zero to disable, -1 to store unlimited." }, /* Node health */ { "node-health-strategy", NULL, "select", "none, migrate-on-red, only-green, progressive, custom", "none", check_health, "How cluster should react to node health attributes", "Requires external entities to create node attributes (named with " "the prefix \"#health\") with values \"red\", \"yellow\" or " "\"green\"." }, { "node-health-base", NULL, "integer", NULL, "0", pcmk__valid_number, "Base health score assigned to a node", "Only used when node-health-strategy is set to progressive." }, { "node-health-green", NULL, "integer", NULL, "0", pcmk__valid_number, "The score to use for a node health attribute whose value is \"green\"", "Only used when node-health-strategy is set to custom or progressive." }, { "node-health-yellow", NULL, "integer", NULL, "0", pcmk__valid_number, "The score to use for a node health attribute whose value is \"yellow\"", "Only used when node-health-strategy is set to custom or progressive." }, { "node-health-red", NULL, "integer", NULL, "-INFINITY", pcmk__valid_number, "The score to use for a node health attribute whose value is \"red\"", "Only used when node-health-strategy is set to custom or progressive." }, /*Placement Strategy*/ { "placement-strategy", NULL, "select", "default, utilization, minimal, balanced", "default", check_placement_strategy, "How the cluster should allocate resources to nodes", NULL }, }; void pe_metadata(void) { pcmk__print_option_metadata("pacemaker-schedulerd", "Pacemaker scheduler options", "Cluster options used by Pacemaker's scheduler" " (formerly called pengine)", pe_opts, PCMK__NELEM(pe_opts)); } void verify_pe_options(GHashTable * options) { pcmk__validate_cluster_options(options, pe_opts, PCMK__NELEM(pe_opts)); } const char * pe_pref(GHashTable * options, const char *name) { return pcmk__cluster_option(options, pe_opts, PCMK__NELEM(pe_opts), name); } const char * fail2text(enum action_fail_response fail) { const char *result = ""; switch (fail) { case action_fail_ignore: result = "ignore"; break; case action_fail_demote: result = "demote"; break; case action_fail_block: result = "block"; break; case action_fail_recover: result = "recover"; break; case action_fail_migrate: result = "migrate"; break; case action_fail_stop: result = "stop"; break; case action_fail_fence: result = "fence"; break; case action_fail_standby: result = "standby"; break; case action_fail_restart_container: result = "restart-container"; break; case action_fail_reset_remote: result = "reset-remote"; break; } return result; } enum action_tasks text2task(const char *task) { if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)) { return stop_rsc; } else if (pcmk__str_eq(task, CRMD_ACTION_STOPPED, pcmk__str_casei)) { return stopped_rsc; } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_casei)) { return start_rsc; } else if (pcmk__str_eq(task, CRMD_ACTION_STARTED, pcmk__str_casei)) { return started_rsc; } else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { return shutdown_crm; } else if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) { return stonith_node; } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { return monitor_rsc; } else if (pcmk__str_eq(task, CRMD_ACTION_NOTIFY, pcmk__str_casei)) { return action_notify; } else if (pcmk__str_eq(task, CRMD_ACTION_NOTIFIED, pcmk__str_casei)) { return action_notified; } else if (pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_casei)) { return action_promote; } else if (pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_casei)) { return action_demote; } else if (pcmk__str_eq(task, CRMD_ACTION_PROMOTED, pcmk__str_casei)) { return action_promoted; } else if (pcmk__str_eq(task, CRMD_ACTION_DEMOTED, pcmk__str_casei)) { return action_demoted; } #if SUPPORT_TRACING if (pcmk__str_eq(task, CRMD_ACTION_CANCEL, pcmk__str_casei)) { return no_action; } else if (pcmk__str_eq(task, CRMD_ACTION_DELETE, pcmk__str_casei)) { return no_action; } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { return no_action; - } else if (pcmk__str_eq(task, CRM_OP_PROBED, pcmk__str_casei)) { - return no_action; } else if (pcmk__str_eq(task, CRM_OP_LRM_REFRESH, pcmk__str_casei)) { return no_action; } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { return no_action; } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { return no_action; } crm_trace("Unsupported action: %s", task); #endif return no_action; } const char * task2text(enum action_tasks task) { const char *result = ""; switch (task) { case no_action: result = "no_action"; break; case stop_rsc: result = CRMD_ACTION_STOP; break; case stopped_rsc: result = CRMD_ACTION_STOPPED; break; case start_rsc: result = CRMD_ACTION_START; break; case started_rsc: result = CRMD_ACTION_STARTED; break; case shutdown_crm: result = CRM_OP_SHUTDOWN; break; case stonith_node: result = CRM_OP_FENCE; break; case monitor_rsc: result = CRMD_ACTION_STATUS; break; case action_notify: result = CRMD_ACTION_NOTIFY; break; case action_notified: result = CRMD_ACTION_NOTIFIED; break; case action_promote: result = CRMD_ACTION_PROMOTE; break; case action_promoted: result = CRMD_ACTION_PROMOTED; break; case action_demote: result = CRMD_ACTION_DEMOTE; break; case action_demoted: result = CRMD_ACTION_DEMOTED; break; } return result; } const char * role2text(enum rsc_role_e role) { switch (role) { case RSC_ROLE_UNKNOWN: return RSC_ROLE_UNKNOWN_S; case RSC_ROLE_STOPPED: return RSC_ROLE_STOPPED_S; case RSC_ROLE_STARTED: return RSC_ROLE_STARTED_S; case RSC_ROLE_UNPROMOTED: #ifdef PCMK__COMPAT_2_0 return RSC_ROLE_UNPROMOTED_LEGACY_S; #else return RSC_ROLE_UNPROMOTED_S; #endif case RSC_ROLE_PROMOTED: #ifdef PCMK__COMPAT_2_0 return RSC_ROLE_PROMOTED_LEGACY_S; #else return RSC_ROLE_PROMOTED_S; #endif } CRM_CHECK(role >= RSC_ROLE_UNKNOWN, return RSC_ROLE_UNKNOWN_S); CRM_CHECK(role < RSC_ROLE_MAX, return RSC_ROLE_UNKNOWN_S); // coverity[dead_error_line] return RSC_ROLE_UNKNOWN_S; } enum rsc_role_e text2role(const char *role) { CRM_ASSERT(role != NULL); if (pcmk__str_eq(role, RSC_ROLE_STOPPED_S, pcmk__str_casei)) { return RSC_ROLE_STOPPED; } else if (pcmk__str_eq(role, RSC_ROLE_STARTED_S, pcmk__str_casei)) { return RSC_ROLE_STARTED; } else if (pcmk__strcase_any_of(role, RSC_ROLE_UNPROMOTED_S, RSC_ROLE_UNPROMOTED_LEGACY_S, NULL)) { return RSC_ROLE_UNPROMOTED; } else if (pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S, RSC_ROLE_PROMOTED_LEGACY_S, NULL)) { return RSC_ROLE_PROMOTED; } else if (pcmk__str_eq(role, RSC_ROLE_UNKNOWN_S, pcmk__str_casei)) { return RSC_ROLE_UNKNOWN; } crm_err("Unknown role: %s", role); return RSC_ROLE_UNKNOWN; } /*! * \internal * \brief Add two scores (bounding to +/- INFINITY) * * \param[in] score1 First score to add * \param[in] score2 Second score to add */ int pe__add_scores(int score1, int score2) { int result = score1 + score2; // First handle the cases where one or both is infinite if (score1 <= -CRM_SCORE_INFINITY) { if (score2 <= -CRM_SCORE_INFINITY) { crm_trace("-INFINITY + -INFINITY = -INFINITY"); } else if (score2 >= CRM_SCORE_INFINITY) { crm_trace("-INFINITY + +INFINITY = -INFINITY"); } else { crm_trace("-INFINITY + %d = -INFINITY", score2); } return -CRM_SCORE_INFINITY; } else if (score2 <= -CRM_SCORE_INFINITY) { if (score1 >= CRM_SCORE_INFINITY) { crm_trace("+INFINITY + -INFINITY = -INFINITY"); } else { crm_trace("%d + -INFINITY = -INFINITY", score1); } return -CRM_SCORE_INFINITY; } else if (score1 >= CRM_SCORE_INFINITY) { if (score2 >= CRM_SCORE_INFINITY) { crm_trace("+INFINITY + +INFINITY = +INFINITY"); } else { crm_trace("+INFINITY + %d = +INFINITY", score2); } return CRM_SCORE_INFINITY; } else if (score2 >= CRM_SCORE_INFINITY) { crm_trace("%d + +INFINITY = +INFINITY", score1); return CRM_SCORE_INFINITY; } /* As long as CRM_SCORE_INFINITY is less than half of the maximum integer, * we can ignore the possibility of integer overflow */ // Bound result to infinity if (result >= CRM_SCORE_INFINITY) { crm_trace("%d + %d = +INFINITY", score1, score2); return CRM_SCORE_INFINITY; } else if (result <= -CRM_SCORE_INFINITY) { crm_trace("%d + %d = -INFINITY", score1, score2); return -CRM_SCORE_INFINITY; } crm_trace("%d + %d = %d", score1, score2, result); return result; } void add_hash_param(GHashTable * hash, const char *name, const char *value) { CRM_CHECK(hash != NULL, return); crm_trace("adding: name=%s value=%s", crm_str(name), crm_str(value)); if (name == NULL || value == NULL) { return; } else if (pcmk__str_eq(value, "#default", pcmk__str_casei)) { return; } else if (g_hash_table_lookup(hash, name) == NULL) { g_hash_table_insert(hash, strdup(name), strdup(value)); } } const char * pe_node_attribute_calculated(const pe_node_t *node, const char *name, const pe_resource_t *rsc) { const char *source; if(node == NULL) { return NULL; } else if(rsc == NULL) { return g_hash_table_lookup(node->details->attrs, name); } source = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET); if(source == NULL || !pcmk__str_eq("host", source, pcmk__str_casei)) { return g_hash_table_lookup(node->details->attrs, name); } /* Use attributes set for the containers location * instead of for the container itself * * Useful when the container is using the host's local * storage */ CRM_ASSERT(node->details->remote_rsc); CRM_ASSERT(node->details->remote_rsc->container); if(node->details->remote_rsc->container->running_on) { pe_node_t *host = node->details->remote_rsc->container->running_on->data; pe_rsc_trace(rsc, "%s: Looking for %s on the container host %s", rsc->id, name, host->details->uname); return g_hash_table_lookup(host->details->attrs, name); } pe_rsc_trace(rsc, "%s: Not looking for %s on the container host: %s is inactive", rsc->id, name, node->details->remote_rsc->container->id); return NULL; } const char * pe_node_attribute_raw(pe_node_t *node, const char *name) { if(node == NULL) { return NULL; } return g_hash_table_lookup(node->details->attrs, name); } diff --git a/lib/pengine/pe_output.c b/lib/pengine/pe_output.c index 715e001d51..5985642137 100644 --- a/lib/pengine/pe_output.c +++ b/lib/pengine/pe_output.c @@ -1,2891 +1,2890 @@ /* * Copyright 2019-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include /* Never display node attributes whose name starts with one of these prefixes */ #define FILTER_STR { PCMK__FAIL_COUNT_PREFIX, PCMK__LAST_FAILURE_PREFIX, \ - "shutdown", "terminate", "standby", "probe_complete", \ - "#", NULL } + "shutdown", "terminate", "standby", "#", NULL } static int compare_attribute(gconstpointer a, gconstpointer b) { int rc; rc = strcmp((const char *)a, (const char *)b); return rc; } /*! * \internal * \brief Determine whether extended information about an attribute should be added. * * \param[in] node Node that ran this resource. * \param[in] rsc_list The list of resources for this node. * \param[in] attrname The attribute to find. * \param[out] expected_score The expected value for this attribute. * * \return TRUE if extended information should be printed, FALSE otherwise * \note Currently, extended information is only supported for ping/pingd * resources, for which a message will be printed if connectivity is lost * or degraded. */ static gboolean add_extra_info(pe_node_t *node, GList *rsc_list, pe_working_set_t *data_set, const char *attrname, int *expected_score) { GList *gIter = NULL; for (gIter = rsc_list; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; const char *type = g_hash_table_lookup(rsc->meta, "type"); const char *name = NULL; GHashTable *params = NULL; if (rsc->children != NULL) { if (add_extra_info(node, rsc->children, data_set, attrname, expected_score)) { return TRUE; } } if (!pcmk__strcase_any_of(type, "ping", "pingd", NULL)) { continue; } params = pe_rsc_params(rsc, node, data_set); name = g_hash_table_lookup(params, "name"); if (name == NULL) { name = "pingd"; } /* To identify the resource with the attribute name. */ if (pcmk__str_eq(name, attrname, pcmk__str_casei)) { int host_list_num = 0; const char *hosts = g_hash_table_lookup(params, "host_list"); const char *multiplier = g_hash_table_lookup(params, "multiplier"); int multiplier_i; if (hosts) { char **host_list = g_strsplit(hosts, " ", 0); host_list_num = g_strv_length(host_list); g_strfreev(host_list); } if ((multiplier == NULL) || (pcmk__scan_min_int(multiplier, &multiplier_i, INT_MIN) != pcmk_rc_ok)) { /* The ocf:pacemaker:ping resource agent defaults multiplier to * 1. The agent currently does not handle invalid text, but it * should, and this would be a reasonable choice ... */ multiplier_i = 1; } *expected_score = host_list_num * multiplier_i; return TRUE; } } return FALSE; } static GList * filter_attr_list(GList *attr_list, char *name) { int i; const char *filt_str[] = FILTER_STR; CRM_CHECK(name != NULL, return attr_list); /* filtering automatic attributes */ for (i = 0; filt_str[i] != NULL; i++) { if (g_str_has_prefix(name, filt_str[i])) { return attr_list; } } return g_list_insert_sorted(attr_list, name, compare_attribute); } static GList * get_operation_list(xmlNode *rsc_entry) { GList *op_list = NULL; xmlNode *rsc_op = NULL; for (rsc_op = pcmk__xe_first_child(rsc_entry); rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) { const char *task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); const char *interval_ms_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL_MS); const char *op_rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); int op_rc_i; pcmk__scan_min_int(op_rc, &op_rc_i, 0); /* Display 0-interval monitors as "probe" */ if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei) && pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches | pcmk__str_casei)) { task = "probe"; } /* Ignore notifies and some probes */ if (pcmk__str_eq(task, CRMD_ACTION_NOTIFY, pcmk__str_casei) || (pcmk__str_eq(task, "probe", pcmk__str_casei) && (op_rc_i == 7))) { continue; } if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) { op_list = g_list_append(op_list, rsc_op); } } op_list = g_list_sort(op_list, sort_op_by_callid); return op_list; } static void add_dump_node(gpointer key, gpointer value, gpointer user_data) { xmlNodePtr node = user_data; pcmk_create_xml_text_node(node, (const char *) key, (const char *) value); } static void append_dump_text(gpointer key, gpointer value, gpointer user_data) { char **dump_text = user_data; char *new_text = crm_strdup_printf("%s %s=%s", *dump_text, (char *)key, (char *)value); free(*dump_text); *dump_text = new_text; } static const char * get_cluster_stack(pe_working_set_t *data_set) { xmlNode *stack = get_xpath_object("//nvpair[@name='cluster-infrastructure']", data_set->input, LOG_DEBUG); return stack? crm_element_value(stack, XML_NVPAIR_ATTR_VALUE) : "unknown"; } static char * last_changed_string(const char *last_written, const char *user, const char *client, const char *origin) { if (last_written != NULL || user != NULL || client != NULL || origin != NULL) { return crm_strdup_printf("%s%s%s%s%s%s%s", last_written ? last_written : "", user ? " by " : "", user ? user : "", client ? " via " : "", client ? client : "", origin ? " on " : "", origin ? origin : ""); } else { return strdup(""); } } static char * op_history_string(xmlNode *xml_op, const char *task, const char *interval_ms_s, int rc, gboolean print_timing) { const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); char *interval_str = NULL; char *buf = NULL; if (interval_ms_s && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_casei)) { char *pair = pcmk__format_nvpair("interval", interval_ms_s, "ms"); interval_str = crm_strdup_printf(" %s", pair); free(pair); } if (print_timing) { char *last_change_str = NULL; char *exec_str = NULL; char *queue_str = NULL; const char *value = NULL; time_t epoch = 0; if ((crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &epoch) == pcmk_ok) && (epoch > 0)) { char *time = pcmk__format_named_time(XML_RSC_OP_LAST_CHANGE, epoch); last_change_str = crm_strdup_printf(" %s", time); free(time); } value = crm_element_value(xml_op, XML_RSC_OP_T_EXEC); if (value) { char *pair = pcmk__format_nvpair(XML_RSC_OP_T_EXEC, value, "ms"); exec_str = crm_strdup_printf(" %s", pair); free(pair); } value = crm_element_value(xml_op, XML_RSC_OP_T_QUEUE); if (value) { char *pair = pcmk__format_nvpair(XML_RSC_OP_T_QUEUE, value, "ms"); queue_str = crm_strdup_printf(" %s", pair); free(pair); } buf = crm_strdup_printf("(%s) %s:%s%s%s%s rc=%d (%s)", call, task, interval_str ? interval_str : "", last_change_str ? last_change_str : "", exec_str ? exec_str : "", queue_str ? queue_str : "", rc, services_ocf_exitcode_str(rc)); if (last_change_str) { free(last_change_str); } if (exec_str) { free(exec_str); } if (queue_str) { free(queue_str); } } else { buf = crm_strdup_printf("(%s) %s%s%s", call, task, interval_str ? ":" : "", interval_str ? interval_str : ""); } if (interval_str) { free(interval_str); } return buf; } static char * resource_history_string(pe_resource_t *rsc, const char *rsc_id, gboolean all, int failcount, time_t last_failure) { char *buf = NULL; if (rsc == NULL) { buf = crm_strdup_printf("%s: orphan", rsc_id); } else if (all || failcount || last_failure > 0) { char *failcount_s = NULL; char *lastfail_s = NULL; if (failcount > 0) { failcount_s = crm_strdup_printf(" %s=%d", PCMK__FAIL_COUNT_PREFIX, failcount); } else { failcount_s = strdup(""); } if (last_failure > 0) { lastfail_s = crm_strdup_printf(" %s='%s'", PCMK__LAST_FAILURE_PREFIX, pcmk__epoch2str(&last_failure)); } buf = crm_strdup_printf("%s: migration-threshold=%d%s%s", rsc_id, rsc->migration_threshold, failcount_s, lastfail_s? lastfail_s : ""); free(failcount_s); free(lastfail_s); } else { buf = crm_strdup_printf("%s:", rsc_id); } return buf; } PCMK__OUTPUT_ARGS("cluster-summary", "pe_working_set_t *", "unsigned int", "unsigned int") static int cluster_summary(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); unsigned int section_opts = va_arg(args, unsigned int); unsigned int show_opts = va_arg(args, unsigned int); int rc = pcmk_rc_no_output; const char *stack_s = get_cluster_stack(data_set); if (pcmk_is_set(section_opts, pcmk_section_stack)) { PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-stack", stack_s); } if (pcmk_is_set(section_opts, pcmk_section_dc)) { xmlNode *dc_version = get_xpath_object("//nvpair[@name='dc-version']", data_set->input, LOG_DEBUG); const char *dc_version_s = dc_version? crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE) : NULL; const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); char *dc_name = data_set->dc_node ? pe__node_display_name(data_set->dc_node, pcmk_is_set(show_opts, pcmk_show_node_id)) : NULL; PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-dc", data_set->dc_node, quorum, dc_version_s, dc_name); free(dc_name); } if (pcmk_is_set(section_opts, pcmk_section_times)) { const char *last_written = crm_element_value(data_set->input, XML_CIB_ATTR_WRITTEN); const char *user = crm_element_value(data_set->input, XML_ATTR_UPDATE_USER); const char *client = crm_element_value(data_set->input, XML_ATTR_UPDATE_CLIENT); const char *origin = crm_element_value(data_set->input, XML_ATTR_UPDATE_ORIG); PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-times", last_written, user, client, origin); } if (pcmk_is_set(section_opts, pcmk_section_counts)) { PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-counts", g_list_length(data_set->nodes), data_set->ninstances, data_set->disabled_resources, data_set->blocked_resources); } if (pcmk_is_set(section_opts, pcmk_section_options)) { PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-options", data_set); } PCMK__OUTPUT_LIST_FOOTER(out, rc); if (pcmk_is_set(section_opts, pcmk_section_maint_mode)) { if (out->message(out, "maint-mode", data_set->flags) == pcmk_rc_ok) { rc = pcmk_rc_ok; } } return rc; } PCMK__OUTPUT_ARGS("cluster-summary", "pe_working_set_t *", "unsigned int", "unsigned int") static int cluster_summary_html(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); unsigned int section_opts = va_arg(args, unsigned int); unsigned int show_opts = va_arg(args, unsigned int); int rc = pcmk_rc_no_output; const char *stack_s = get_cluster_stack(data_set); if (pcmk_is_set(section_opts, pcmk_section_stack)) { PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-stack", stack_s); } /* Always print DC if none, even if not requested */ if (data_set->dc_node == NULL || pcmk_is_set(section_opts, pcmk_section_dc)) { xmlNode *dc_version = get_xpath_object("//nvpair[@name='dc-version']", data_set->input, LOG_DEBUG); const char *dc_version_s = dc_version? crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE) : NULL; const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); char *dc_name = data_set->dc_node ? pe__node_display_name(data_set->dc_node, pcmk_is_set(show_opts, pcmk_show_node_id)) : NULL; PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-dc", data_set->dc_node, quorum, dc_version_s, dc_name); free(dc_name); } if (pcmk_is_set(section_opts, pcmk_section_times)) { const char *last_written = crm_element_value(data_set->input, XML_CIB_ATTR_WRITTEN); const char *user = crm_element_value(data_set->input, XML_ATTR_UPDATE_USER); const char *client = crm_element_value(data_set->input, XML_ATTR_UPDATE_CLIENT); const char *origin = crm_element_value(data_set->input, XML_ATTR_UPDATE_ORIG); PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-times", last_written, user, client, origin); } if (pcmk_is_set(section_opts, pcmk_section_counts)) { PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-counts", g_list_length(data_set->nodes), data_set->ninstances, data_set->disabled_resources, data_set->blocked_resources); } if (pcmk_is_set(section_opts, pcmk_section_options)) { /* Kind of a hack - close the list we may have opened earlier in this * function so we can put all the options into their own list. We * only want to do this on HTML output, though. */ PCMK__OUTPUT_LIST_FOOTER(out, rc); out->begin_list(out, NULL, NULL, "Config Options"); out->message(out, "cluster-options", data_set); } PCMK__OUTPUT_LIST_FOOTER(out, rc); if (pcmk_is_set(section_opts, pcmk_section_maint_mode)) { if (out->message(out, "maint-mode", data_set->flags) == pcmk_rc_ok) { rc = pcmk_rc_ok; } } return rc; } char * pe__node_display_name(pe_node_t *node, bool print_detail) { char *node_name; const char *node_host = NULL; const char *node_id = NULL; int name_len; CRM_ASSERT((node != NULL) && (node->details != NULL) && (node->details->uname != NULL)); /* Host is displayed only if this is a guest node */ if (pe__is_guest_node(node)) { pe_node_t *host_node = pe__current_node(node->details->remote_rsc); if (host_node && host_node->details) { node_host = host_node->details->uname; } if (node_host == NULL) { node_host = ""; /* so we at least get "uname@" to indicate guest */ } } /* Node ID is displayed if different from uname and detail is requested */ if (print_detail && !pcmk__str_eq(node->details->uname, node->details->id, pcmk__str_casei)) { node_id = node->details->id; } /* Determine name length */ name_len = strlen(node->details->uname) + 1; if (node_host) { name_len += strlen(node_host) + 1; /* "@node_host" */ } if (node_id) { name_len += strlen(node_id) + 3; /* + " (node_id)" */ } /* Allocate and populate display name */ node_name = malloc(name_len); CRM_ASSERT(node_name != NULL); strcpy(node_name, node->details->uname); if (node_host) { strcat(node_name, "@"); strcat(node_name, node_host); } if (node_id) { strcat(node_name, " ("); strcat(node_name, node_id); strcat(node_name, ")"); } return node_name; } int pe__name_and_nvpairs_xml(pcmk__output_t *out, bool is_list, const char *tag_name , size_t pairs_count, ...) { xmlNodePtr xml_node = NULL; va_list args; CRM_ASSERT(tag_name != NULL); xml_node = pcmk__output_xml_peek_parent(out); CRM_ASSERT(xml_node != NULL); xml_node = is_list ? create_xml_node(xml_node, tag_name) : xmlNewChild(xml_node, NULL, (pcmkXmlStr) tag_name, NULL); va_start(args, pairs_count); while(pairs_count--) { const char *param_name = va_arg(args, const char *); const char *param_value = va_arg(args, const char *); if (param_name && param_value) { crm_xml_add(xml_node, param_name, param_value); } }; va_end(args); if (is_list) { pcmk__output_xml_push_parent(out, xml_node); } return pcmk_rc_ok; } static const char * role_desc(enum rsc_role_e role) { if (role == RSC_ROLE_PROMOTED) { #ifdef PCMK__COMPAT_2_0 return "as " RSC_ROLE_PROMOTED_LEGACY_S " "; #else return "in " RSC_ROLE_PROMOTED_S " role "; #endif } return ""; } PCMK__OUTPUT_ARGS("ban", "pe_node_t *", "pe__location_t *", "unsigned int") static int ban_html(pcmk__output_t *out, va_list args) { pe_node_t *pe_node = va_arg(args, pe_node_t *); pe__location_t *location = va_arg(args, pe__location_t *); unsigned int show_opts = va_arg(args, unsigned int); char *node_name = pe__node_display_name(pe_node, pcmk_is_set(show_opts, pcmk_show_node_id)); char *buf = crm_strdup_printf("%s\tprevents %s from running %son %s", location->id, location->rsc_lh->id, role_desc(location->role_filter), node_name); pcmk__output_create_html_node(out, "li", NULL, NULL, buf); free(node_name); free(buf); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ban", "pe_node_t *", "pe__location_t *", "unsigned int") static int ban_text(pcmk__output_t *out, va_list args) { pe_node_t *pe_node = va_arg(args, pe_node_t *); pe__location_t *location = va_arg(args, pe__location_t *); unsigned int show_opts = va_arg(args, unsigned int); char *node_name = pe__node_display_name(pe_node, pcmk_is_set(show_opts, pcmk_show_node_id)); out->list_item(out, NULL, "%s\tprevents %s from running %son %s", location->id, location->rsc_lh->id, role_desc(location->role_filter), node_name); free(node_name); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ban", "pe_node_t *", "pe__location_t *", "unsigned int") static int ban_xml(pcmk__output_t *out, va_list args) { pe_node_t *pe_node = va_arg(args, pe_node_t *); pe__location_t *location = va_arg(args, pe__location_t *); unsigned int show_opts G_GNUC_UNUSED = va_arg(args, unsigned int); const char *promoted_only = pcmk__btoa(location->role_filter == RSC_ROLE_PROMOTED); char *weight_s = pcmk__itoa(pe_node->weight); pcmk__output_create_xml_node(out, "ban", "id", location->id, "resource", location->rsc_lh->id, "node", pe_node->details->uname, "weight", weight_s, "promoted-only", promoted_only, /* This is a deprecated alias for * promoted_only. Removing it will break * backward compatibility of the API schema, * which will require an API schema major * version bump. */ "master_only", promoted_only, NULL); free(weight_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ban-list", "pe_working_set_t *", "const char *", "GList *", "unsigned int", "gboolean") static int ban_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); const char *prefix = va_arg(args, const char *); GList *only_rsc = va_arg(args, GList *); unsigned int show_opts = va_arg(args, unsigned int); gboolean print_spacer = va_arg(args, gboolean); GList *gIter, *gIter2; int rc = pcmk_rc_no_output; /* Print each ban */ for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { pe__location_t *location = gIter->data; if (prefix != NULL && !g_str_has_prefix(location->id, prefix)) { continue; } if (!pcmk__str_in_list(rsc_printable_id(location->rsc_lh), only_rsc, pcmk__str_star_matches) && !pcmk__str_in_list(rsc_printable_id(uber_parent(location->rsc_lh)), only_rsc, pcmk__str_star_matches)) { continue; } for (gIter2 = location->node_list_rh; gIter2 != NULL; gIter2 = gIter2->next) { pe_node_t *node = (pe_node_t *) gIter2->data; if (node->weight < 0) { PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Negative Location Constraints"); out->message(out, "ban", node, location, show_opts); } } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int") static int cluster_counts_html(pcmk__output_t *out, va_list args) { unsigned int nnodes = va_arg(args, unsigned int); int nresources = va_arg(args, int); int ndisabled = va_arg(args, int); int nblocked = va_arg(args, int); xmlNodePtr nodes_node = pcmk__output_create_xml_node(out, "li", NULL); xmlNodePtr resources_node = pcmk__output_create_xml_node(out, "li", NULL); char *nnodes_str = crm_strdup_printf("%d node%s configured", nnodes, pcmk__plural_s(nnodes)); pcmk_create_html_node(nodes_node, "span", NULL, NULL, nnodes_str); free(nnodes_str); if (ndisabled && nblocked) { char *s = crm_strdup_printf("%d resource instance%s configured (%d ", nresources, pcmk__plural_s(nresources), ndisabled); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); pcmk_create_html_node(resources_node, "span", NULL, "bold", "DISABLED"); s = crm_strdup_printf(", %d ", nblocked); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); pcmk_create_html_node(resources_node, "span", NULL, "bold", "BLOCKED"); pcmk_create_html_node(resources_node, "span", NULL, NULL, " from further action due to failure)"); } else if (ndisabled && !nblocked) { char *s = crm_strdup_printf("%d resource instance%s configured (%d ", nresources, pcmk__plural_s(nresources), ndisabled); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); pcmk_create_html_node(resources_node, "span", NULL, "bold", "DISABLED"); pcmk_create_html_node(resources_node, "span", NULL, NULL, ")"); } else if (!ndisabled && nblocked) { char *s = crm_strdup_printf("%d resource instance%s configured (%d ", nresources, pcmk__plural_s(nresources), nblocked); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); pcmk_create_html_node(resources_node, "span", NULL, "bold", "BLOCKED"); pcmk_create_html_node(resources_node, "span", NULL, NULL, " from further action due to failure)"); } else { char *s = crm_strdup_printf("%d resource instance%s configured", nresources, pcmk__plural_s(nresources)); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int") static int cluster_counts_text(pcmk__output_t *out, va_list args) { unsigned int nnodes = va_arg(args, unsigned int); int nresources = va_arg(args, int); int ndisabled = va_arg(args, int); int nblocked = va_arg(args, int); out->list_item(out, NULL, "%d node%s configured", nnodes, pcmk__plural_s(nnodes)); if (ndisabled && nblocked) { out->list_item(out, NULL, "%d resource instance%s configured " "(%d DISABLED, %d BLOCKED from " "further action due to failure)", nresources, pcmk__plural_s(nresources), ndisabled, nblocked); } else if (ndisabled && !nblocked) { out->list_item(out, NULL, "%d resource instance%s configured " "(%d DISABLED)", nresources, pcmk__plural_s(nresources), ndisabled); } else if (!ndisabled && nblocked) { out->list_item(out, NULL, "%d resource instance%s configured " "(%d BLOCKED from further action " "due to failure)", nresources, pcmk__plural_s(nresources), nblocked); } else { out->list_item(out, NULL, "%d resource instance%s configured", nresources, pcmk__plural_s(nresources)); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int") static int cluster_counts_xml(pcmk__output_t *out, va_list args) { unsigned int nnodes = va_arg(args, unsigned int); int nresources = va_arg(args, int); int ndisabled = va_arg(args, int); int nblocked = va_arg(args, int); xmlNodePtr nodes_node = pcmk__output_create_xml_node(out, "nodes_configured", NULL); xmlNodePtr resources_node = pcmk__output_create_xml_node(out, "resources_configured", NULL); char *s = pcmk__itoa(nnodes); crm_xml_add(nodes_node, "number", s); free(s); s = pcmk__itoa(nresources); crm_xml_add(resources_node, "number", s); free(s); s = pcmk__itoa(ndisabled); crm_xml_add(resources_node, "disabled", s); free(s); s = pcmk__itoa(nblocked); crm_xml_add(resources_node, "blocked", s); free(s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-dc", "pe_node_t *", "const char *", "const char *", "char *") static int cluster_dc_html(pcmk__output_t *out, va_list args) { pe_node_t *dc = va_arg(args, pe_node_t *); const char *quorum = va_arg(args, const char *); const char *dc_version_s = va_arg(args, const char *); char *dc_name = va_arg(args, char *); xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(node, "span", NULL, "bold", "Current DC: "); if (dc) { if (crm_is_true(quorum)) { char *buf = crm_strdup_printf("%s (version %s) - partition with quorum", dc_name, dc_version_s ? dc_version_s : "unknown"); pcmk_create_html_node(node, "span", NULL, NULL, buf); free(buf); } else { char *buf = crm_strdup_printf("%s (version %s) - partition", dc_name, dc_version_s ? dc_version_s : "unknown"); pcmk_create_html_node(node, "span", NULL, NULL, buf); free(buf); pcmk_create_html_node(node, "span", NULL, "warning", "WITHOUT"); pcmk_create_html_node(node, "span", NULL, NULL, "quorum"); } } else { pcmk_create_html_node(node ,"span", NULL, "warning", "NONE"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-dc", "pe_node_t *", "const char *", "const char *", "char *") static int cluster_dc_text(pcmk__output_t *out, va_list args) { pe_node_t *dc = va_arg(args, pe_node_t *); const char *quorum = va_arg(args, const char *); const char *dc_version_s = va_arg(args, const char *); char *dc_name = va_arg(args, char *); if (dc) { out->list_item(out, "Current DC", "%s (version %s) - partition %s quorum", dc_name, dc_version_s ? dc_version_s : "unknown", crm_is_true(quorum) ? "with" : "WITHOUT"); } else { out->list_item(out, "Current DC", "NONE"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-dc", "pe_node_t *", "const char *", "const char *", "char *") static int cluster_dc_xml(pcmk__output_t *out, va_list args) { pe_node_t *dc = va_arg(args, pe_node_t *); const char *quorum = va_arg(args, const char *); const char *dc_version_s = va_arg(args, const char *); char *dc_name G_GNUC_UNUSED = va_arg(args, char *); if (dc) { pcmk__output_create_xml_node(out, "current_dc", "present", "true", "version", dc_version_s ? dc_version_s : "", "name", dc->details->uname, "id", dc->details->id, "with_quorum", pcmk__btoa(crm_is_true(quorum)), NULL); } else { pcmk__output_create_xml_node(out, "current_dc", "present", "false", NULL); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("maint-mode", "unsigned long long int") static int cluster_maint_mode_text(pcmk__output_t *out, va_list args) { unsigned long long flags = va_arg(args, unsigned long long); if (pcmk_is_set(flags, pe_flag_maintenance_mode)) { pcmk__formatted_printf(out, "\n *** Resource management is DISABLED ***\n"); pcmk__formatted_printf(out, " The cluster will not attempt to start, stop or recover services\n"); return pcmk_rc_ok; } else if (pcmk_is_set(flags, pe_flag_stop_everything)) { pcmk__formatted_printf(out, "\n *** Resource management is DISABLED ***\n"); pcmk__formatted_printf(out, " The cluster will keep all resources stopped\n"); return pcmk_rc_ok; } else { return pcmk_rc_no_output; } } PCMK__OUTPUT_ARGS("cluster-options", "pe_working_set_t *") static int cluster_options_html(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); out->list_item(out, NULL, "STONITH of failed nodes %s", pcmk_is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); out->list_item(out, NULL, "Cluster is %s", pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster) ? "symmetric" : "asymmetric"); switch (data_set->no_quorum_policy) { case no_quorum_freeze: out->list_item(out, NULL, "No quorum policy: Freeze resources"); break; case no_quorum_stop: out->list_item(out, NULL, "No quorum policy: Stop ALL resources"); break; case no_quorum_demote: out->list_item(out, NULL, "No quorum policy: Demote promotable " "resources and stop all other resources"); break; case no_quorum_ignore: out->list_item(out, NULL, "No quorum policy: Ignore"); break; case no_quorum_suicide: out->list_item(out, NULL, "No quorum policy: Suicide"); break; } if (pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)) { xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(node, "span", NULL, NULL, "Resource management: "); pcmk_create_html_node(node, "span", NULL, "bold", "DISABLED"); pcmk_create_html_node(node, "span", NULL, NULL, " (the cluster will not attempt to start, stop, or recover services)"); } else if (pcmk_is_set(data_set->flags, pe_flag_stop_everything)) { xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(node, "span", NULL, NULL, "Resource management: "); pcmk_create_html_node(node, "span", NULL, "bold", "STOPPED"); pcmk_create_html_node(node, "span", NULL, NULL, " (the cluster will keep all resources stopped)"); } else { out->list_item(out, NULL, "Resource management: enabled"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-options", "pe_working_set_t *") static int cluster_options_log(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); if (pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)) { return out->info(out, "Resource management is DISABLED. The cluster will not attempt to start, stop or recover services."); } else if (pcmk_is_set(data_set->flags, pe_flag_stop_everything)) { return out->info(out, "Resource management is DISABLED. The cluster has stopped all resources."); } else { return pcmk_rc_no_output; } } PCMK__OUTPUT_ARGS("cluster-options", "pe_working_set_t *") static int cluster_options_text(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); out->list_item(out, NULL, "STONITH of failed nodes %s", pcmk_is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); out->list_item(out, NULL, "Cluster is %s", pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster) ? "symmetric" : "asymmetric"); switch (data_set->no_quorum_policy) { case no_quorum_freeze: out->list_item(out, NULL, "No quorum policy: Freeze resources"); break; case no_quorum_stop: out->list_item(out, NULL, "No quorum policy: Stop ALL resources"); break; case no_quorum_demote: out->list_item(out, NULL, "No quorum policy: Demote promotable " "resources and stop all other resources"); break; case no_quorum_ignore: out->list_item(out, NULL, "No quorum policy: Ignore"); break; case no_quorum_suicide: out->list_item(out, NULL, "No quorum policy: Suicide"); break; } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-options", "pe_working_set_t *") static int cluster_options_xml(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); const char *no_quorum_policy = NULL; char *stonith_timeout_str = pcmk__itoa(data_set->stonith_timeout); char *priority_fencing_delay_str = pcmk__itoa(data_set->priority_fencing_delay * 1000); switch (data_set->no_quorum_policy) { case no_quorum_freeze: no_quorum_policy = "freeze"; break; case no_quorum_stop: no_quorum_policy = "stop"; break; case no_quorum_demote: no_quorum_policy = "demote"; break; case no_quorum_ignore: no_quorum_policy = "ignore"; break; case no_quorum_suicide: no_quorum_policy = "suicide"; break; } pcmk__output_create_xml_node(out, "cluster_options", "stonith-enabled", pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)), "symmetric-cluster", pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster)), "no-quorum-policy", no_quorum_policy, "maintenance-mode", pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)), "stop-all-resources", pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_stop_everything)), "stonith-timeout-ms", stonith_timeout_str, "priority-fencing-delay-ms", priority_fencing_delay_str, NULL); free(stonith_timeout_str); free(priority_fencing_delay_str); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-stack", "const char *") static int cluster_stack_html(pcmk__output_t *out, va_list args) { const char *stack_s = va_arg(args, const char *); xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(node, "span", NULL, "bold", "Stack: "); pcmk_create_html_node(node, "span", NULL, NULL, stack_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-stack", "const char *") static int cluster_stack_text(pcmk__output_t *out, va_list args) { const char *stack_s = va_arg(args, const char *); out->list_item(out, "Stack", "%s", stack_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-stack", "const char *") static int cluster_stack_xml(pcmk__output_t *out, va_list args) { const char *stack_s = va_arg(args, const char *); pcmk__output_create_xml_node(out, "stack", "type", stack_s, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *", "const char *", "const char *") static int cluster_times_html(pcmk__output_t *out, va_list args) { const char *last_written = va_arg(args, const char *); const char *user = va_arg(args, const char *); const char *client = va_arg(args, const char *); const char *origin = va_arg(args, const char *); xmlNodePtr updated_node = pcmk__output_create_xml_node(out, "li", NULL); xmlNodePtr changed_node = pcmk__output_create_xml_node(out, "li", NULL); char *buf = last_changed_string(last_written, user, client, origin); pcmk_create_html_node(updated_node, "span", NULL, "bold", "Last updated: "); pcmk_create_html_node(updated_node, "span", NULL, NULL, pcmk__epoch2str(NULL)); pcmk_create_html_node(changed_node, "span", NULL, "bold", "Last change: "); pcmk_create_html_node(changed_node, "span", NULL, NULL, buf); free(buf); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *", "const char *", "const char *") static int cluster_times_xml(pcmk__output_t *out, va_list args) { const char *last_written = va_arg(args, const char *); const char *user = va_arg(args, const char *); const char *client = va_arg(args, const char *); const char *origin = va_arg(args, const char *); pcmk__output_create_xml_node(out, "last_update", "time", pcmk__epoch2str(NULL), NULL); pcmk__output_create_xml_node(out, "last_change", "time", last_written ? last_written : "", "user", user ? user : "", "client", client ? client : "", "origin", origin ? origin : "", NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *", "const char *", "const char *") static int cluster_times_text(pcmk__output_t *out, va_list args) { const char *last_written = va_arg(args, const char *); const char *user = va_arg(args, const char *); const char *client = va_arg(args, const char *); const char *origin = va_arg(args, const char *); char *buf = last_changed_string(last_written, user, client, origin); out->list_item(out, "Last updated", "%s", pcmk__epoch2str(NULL)); out->list_item(out, "Last change", " %s", buf); free(buf); return pcmk_rc_ok; } /*! * \internal * \brief Display a failed action in less-technical natural language */ static void failed_action_friendly(pcmk__output_t *out, xmlNodePtr xml_op, const char *op_key, const char *node_name, int rc, int status, const char *exit_reason, const char *exec_time) { char *rsc_id = NULL; char *task = NULL; guint interval_ms = 0; const char *last_change_str = NULL; time_t last_change_epoch = 0; GString *str = NULL; if (pcmk__str_empty(op_key) || !parse_op_key(op_key, &rsc_id, &task, &interval_ms)) { rsc_id = strdup("unknown resource"); task = strdup("unknown action"); interval_ms = 0; } CRM_ASSERT((rsc_id != NULL) && (task != NULL)); str = g_string_sized_new(strlen(rsc_id) + strlen(task) + strlen(node_name) + 100); // reasonable starting size g_string_printf(str, "%s ", rsc_id); if (interval_ms != 0) { g_string_append_printf(str, "%s-interval ", pcmk__readable_interval(interval_ms)); } g_string_append_printf(str, "%s on %s", crm_action_str(task, interval_ms), node_name); if (status == PCMK_EXEC_DONE) { g_string_append_printf(str, " returned '%s'", services_ocf_exitcode_str(rc)); } else { g_string_append_printf(str, " could not be executed (%s)", pcmk_exec_status_str(status)); } if (!pcmk__str_empty(exit_reason)) { g_string_append_printf(str, " because '%s'", exit_reason); } if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &last_change_epoch) == pcmk_ok) { last_change_str = pcmk__epoch2str(&last_change_epoch); if (last_change_str != NULL) { g_string_append_printf(str, " at %s", last_change_str); } } if (!pcmk__str_empty(exec_time)) { int exec_time_ms = 0; if ((pcmk__scan_min_int(exec_time, &exec_time_ms, 0) == pcmk_rc_ok) && (exec_time_ms > 0)) { g_string_append_printf(str, " after %s", pcmk__readable_interval(exec_time_ms)); } } out->list_item(out, NULL, "%s", str->str); g_string_free(str, TRUE); free(rsc_id); free(task); } /*! * \internal * \brief Display a failed action with technical details */ static void failed_action_technical(pcmk__output_t *out, xmlNodePtr xml_op, const char *op_key, const char *node_name, int rc, int status, const char *exit_reason, const char *exec_time) { const char *call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); const char *queue_time = crm_element_value(xml_op, XML_RSC_OP_T_QUEUE); const char *exit_status = services_ocf_exitcode_str(rc); const char *lrm_status = pcmk_exec_status_str(status); const char *last_change_str = NULL; time_t last_change_epoch = 0; GString *str = NULL; if (pcmk__str_empty(op_key)) { op_key = "unknown operation"; } if (pcmk__str_empty(exit_status)) { exit_status = "unknown exit status"; } if (pcmk__str_empty(call_id)) { call_id = "unknown"; } str = g_string_sized_new(strlen(op_key) + strlen(node_name) + strlen(exit_status) + strlen(call_id) + strlen(lrm_status) + 50); // rough estimate g_string_printf(str, "%s on %s '%s' (%d): call=%s, status='%s'", op_key, node_name, exit_status, rc, call_id, lrm_status); if (!pcmk__str_empty(exit_reason)) { g_string_append_printf(str, ", exitreason='%s'", exit_reason); } if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &last_change_epoch) == pcmk_ok) { last_change_str = pcmk__epoch2str(&last_change_epoch); if (last_change_str != NULL) { g_string_append_printf(str, ", " XML_RSC_OP_LAST_CHANGE "='%s'", last_change_str); } } if (!pcmk__str_empty(queue_time)) { g_string_append_printf(str, ", queued=%sms", queue_time); } if (!pcmk__str_empty(exec_time)) { g_string_append_printf(str, ", exec=%sms", exec_time); } out->list_item(out, NULL, "%s", str->str); g_string_free(str, TRUE); } PCMK__OUTPUT_ARGS("failed-action", "xmlNodePtr", "unsigned int") static int failed_action_default(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); unsigned int show_opts = va_arg(args, unsigned int); const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); const char *node_name = crm_element_value(xml_op, XML_ATTR_UNAME); const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); const char *exec_time = crm_element_value(xml_op, XML_RSC_OP_T_EXEC); int rc; int status; pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_RC), &rc, 0); pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), &status, 0); if (pcmk__str_empty(op_key)) { op_key = ID(xml_op); } if (pcmk__str_empty(node_name)) { node_name = "unknown node"; } if (pcmk_is_set(show_opts, pcmk_show_failed_detail)) { failed_action_technical(out, xml_op, op_key, node_name, rc, status, exit_reason, exec_time); } else { failed_action_friendly(out, xml_op, op_key, node_name, rc, status, exit_reason, exec_time); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("failed-action", "xmlNodePtr", "unsigned int") static int failed_action_xml(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); unsigned int show_opts G_GNUC_UNUSED = va_arg(args, unsigned int); const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); int rc; int status; const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); time_t epoch = 0; char *rc_s = NULL; char *reason_s = crm_xml_escape(exit_reason ? exit_reason : "none"); xmlNodePtr node = NULL; pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_RC), &rc, 0); pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), &status, 0); rc_s = pcmk__itoa(rc); node = pcmk__output_create_xml_node(out, "failure", (op_key == NULL)? "id" : "op_key", (op_key == NULL)? ID(xml_op) : op_key, "node", crm_element_value(xml_op, XML_ATTR_UNAME), "exitstatus", services_ocf_exitcode_str(rc), "exitreason", crm_str(reason_s), "exitcode", rc_s, "call", crm_element_value(xml_op, XML_LRM_ATTR_CALLID), "status", pcmk_exec_status_str(status), NULL); free(rc_s); if ((crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &epoch) == pcmk_ok) && (epoch > 0)) { guint interval_ms = 0; char *s = NULL; crm_time_t *crm_when = crm_time_new_undefined(); char *rc_change = NULL; crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); s = pcmk__itoa(interval_ms); crm_time_set_timet(crm_when, &epoch); rc_change = crm_time_as_string(crm_when, crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); pcmk__xe_set_props(node, XML_RSC_OP_LAST_CHANGE, rc_change, "queued", crm_element_value(xml_op, XML_RSC_OP_T_QUEUE), "exec", crm_element_value(xml_op, XML_RSC_OP_T_EXEC), "interval", s, "task", crm_element_value(xml_op, XML_LRM_ATTR_TASK), NULL); free(s); free(rc_change); crm_time_free(crm_when); } free(reason_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("failed-action-list", "pe_working_set_t *", "GList *", "GList *", "unsigned int", "gboolean") static int failed_action_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); unsigned int show_opts = va_arg(args, gboolean); gboolean print_spacer = va_arg(args, gboolean); xmlNode *xml_op = NULL; int rc = pcmk_rc_no_output; const char *id = NULL; if (xmlChildElementCount(data_set->failed) == 0) { return rc; } for (xml_op = pcmk__xml_first_child(data_set->failed); xml_op != NULL; xml_op = pcmk__xml_next(xml_op)) { char *rsc = NULL; if (!pcmk__str_in_list(crm_element_value(xml_op, XML_ATTR_UNAME), only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } id = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); if (parse_op_key(id ? id : ID(xml_op), &rsc, NULL, NULL) == FALSE) { continue; } if (!pcmk__str_in_list(rsc, only_rsc, pcmk__str_star_matches)) { free(rsc); continue; } free(rsc); PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Failed Resource Actions"); out->message(out, "failed-action", xml_op, show_opts); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } static void status_node(pe_node_t *node, xmlNodePtr parent) { if (node->details->standby_onfail && node->details->online) { pcmk_create_html_node(parent, "span", NULL, "standby", " standby (on-fail)"); } else if (node->details->standby && node->details->online) { char *s = crm_strdup_printf(" standby%s", node->details->running_rsc ? " (with active resources)" : ""); pcmk_create_html_node(parent, "span", NULL, " standby", s); free(s); } else if (node->details->standby) { pcmk_create_html_node(parent, "span", NULL, "offline", " OFFLINE (standby)"); } else if (node->details->maintenance && node->details->online) { pcmk_create_html_node(parent, "span", NULL, "maint", " maintenance"); } else if (node->details->maintenance) { pcmk_create_html_node(parent, "span", NULL, "offline", " OFFLINE (maintenance)"); } else if (node->details->online) { pcmk_create_html_node(parent, "span", NULL, "online", " online"); } else { pcmk_create_html_node(parent, "span", NULL, "offline", " OFFLINE"); } } PCMK__OUTPUT_ARGS("node", "pe_node_t *", "unsigned int", "gboolean", "const char *", "GList *", "GList *") static int node_html(pcmk__output_t *out, va_list args) { pe_node_t *node = va_arg(args, pe_node_t *); unsigned int show_opts = va_arg(args, unsigned int); gboolean full = va_arg(args, gboolean); const char *node_mode G_GNUC_UNUSED = va_arg(args, const char *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); if (full) { xmlNodePtr item_node; if (pcmk_all_flags_set(show_opts, pcmk_show_brief | pcmk_show_rscs_by_node)) { GList *rscs = pe__filter_rsc_list(node->details->running_rsc, only_rsc); out->begin_list(out, NULL, NULL, "Node: %s", node_name); item_node = pcmk__output_xml_create_parent(out, "li", NULL); pcmk_create_html_node(item_node, "span", NULL, NULL, "Status:"); status_node(node, item_node); if (rscs != NULL) { unsigned int new_show_opts = (show_opts | pcmk_show_rsc_only) & ~pcmk_show_inactive_rscs; out->begin_list(out, NULL, NULL, "Resources"); pe__rscs_brief_output(out, rscs, new_show_opts); out->end_list(out); } pcmk__output_xml_pop_parent(out); out->end_list(out); } else if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { GList *lpc2 = NULL; int rc = pcmk_rc_no_output; out->begin_list(out, NULL, NULL, "Node: %s", node_name); item_node = pcmk__output_xml_create_parent(out, "li", NULL); pcmk_create_html_node(item_node, "span", NULL, NULL, "Status:"); status_node(node, item_node); for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) { pe_resource_t *rsc = (pe_resource_t *) lpc2->data; PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Resources"); out->message(out, crm_map_element_name(rsc->xml), show_opts | pcmk_show_rsc_only, rsc, only_node, only_rsc); } PCMK__OUTPUT_LIST_FOOTER(out, rc); pcmk__output_xml_pop_parent(out); out->end_list(out); } else { char *buf = crm_strdup_printf("Node: %s", node_name); item_node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(item_node, "span", NULL, NULL, buf); status_node(node, item_node); free(buf); } } else { out->begin_list(out, NULL, NULL, "Node: %s", node_name); } free(node_name); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node", "pe_node_t *", "unsigned int", "gboolean", "const char *", "GList *", "GList *") static int node_text(pcmk__output_t *out, va_list args) { pe_node_t *node = va_arg(args, pe_node_t *); unsigned int show_opts = va_arg(args, unsigned int); gboolean full = va_arg(args, gboolean); const char *node_mode = va_arg(args, const char *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); if (full) { char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); char *buf = NULL; /* Print the node name and status */ if (pe__is_guest_node(node)) { buf = crm_strdup_printf("GuestNode %s: %s", node_name, node_mode); } else if (pe__is_remote_node(node)) { buf = crm_strdup_printf("RemoteNode %s: %s", node_name, node_mode); } else { buf = crm_strdup_printf("Node %s: %s", node_name, node_mode); } /* If we're grouping by node, print its resources */ if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { if (pcmk_is_set(show_opts, pcmk_show_brief)) { GList *rscs = pe__filter_rsc_list(node->details->running_rsc, only_rsc); if (rscs != NULL) { unsigned int new_show_opts = (show_opts | pcmk_show_rsc_only) & ~pcmk_show_inactive_rscs; out->begin_list(out, NULL, NULL, "%s", buf); out->begin_list(out, NULL, NULL, "Resources"); pe__rscs_brief_output(out, rscs, new_show_opts); out->end_list(out); out->end_list(out); g_list_free(rscs); } } else { GList *gIter2 = NULL; out->begin_list(out, NULL, NULL, "%s", buf); out->begin_list(out, NULL, NULL, "Resources"); for (gIter2 = node->details->running_rsc; gIter2 != NULL; gIter2 = gIter2->next) { pe_resource_t *rsc = (pe_resource_t *) gIter2->data; out->message(out, crm_map_element_name(rsc->xml), show_opts | pcmk_show_rsc_only, rsc, only_node, only_rsc); } out->end_list(out); out->end_list(out); } } else { out->list_item(out, NULL, "%s", buf); } free(buf); free(node_name); } else { char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); out->begin_list(out, NULL, NULL, "Node: %s", node_name); free(node_name); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node", "pe_node_t *", "unsigned int", "gboolean", "const char *", "GList *", "GList *") static int node_xml(pcmk__output_t *out, va_list args) { pe_node_t *node = va_arg(args, pe_node_t *); unsigned int show_opts G_GNUC_UNUSED = va_arg(args, unsigned int); gboolean full = va_arg(args, gboolean); const char *node_mode G_GNUC_UNUSED = va_arg(args, const char *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); if (full) { const char *node_type = "unknown"; char *length_s = pcmk__itoa(g_list_length(node->details->running_rsc)); switch (node->details->type) { case node_member: node_type = "member"; break; case node_remote: node_type = "remote"; break; case node_ping: node_type = "ping"; break; } pe__name_and_nvpairs_xml(out, true, "node", 13, "name", node->details->uname, "id", node->details->id, "online", pcmk__btoa(node->details->online), "standby", pcmk__btoa(node->details->standby), "standby_onfail", pcmk__btoa(node->details->standby_onfail), "maintenance", pcmk__btoa(node->details->maintenance), "pending", pcmk__btoa(node->details->pending), "unclean", pcmk__btoa(node->details->unclean), "shutdown", pcmk__btoa(node->details->shutdown), "expected_up", pcmk__btoa(node->details->expected_up), "is_dc", pcmk__btoa(node->details->is_dc), "resources_running", length_s, "type", node_type); if (pe__is_guest_node(node)) { xmlNodePtr xml_node = pcmk__output_xml_peek_parent(out); crm_xml_add(xml_node, "id_as_resource", node->details->remote_rsc->container->id); } if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { GList *lpc = NULL; for (lpc = node->details->running_rsc; lpc != NULL; lpc = lpc->next) { pe_resource_t *rsc = (pe_resource_t *) lpc->data; out->message(out, crm_map_element_name(rsc->xml), show_opts | pcmk_show_rsc_only, rsc, only_node, only_rsc); } } free(length_s); out->end_list(out); } else { pcmk__output_xml_create_parent(out, "node", "name", node->details->uname, NULL); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "gboolean", "int") static int node_attribute_text(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); gboolean add_extra = va_arg(args, gboolean); int expected_score = va_arg(args, int); if (add_extra) { int v; if (value == NULL) { v = 0; } else { pcmk__scan_min_int(value, &v, INT_MIN); } if (v <= 0) { out->list_item(out, NULL, "%-32s\t: %-10s\t: Connectivity is lost", name, value); } else if (v < expected_score) { out->list_item(out, NULL, "%-32s\t: %-10s\t: Connectivity is degraded (Expected=%d)", name, value, expected_score); } else { out->list_item(out, NULL, "%-32s\t: %-10s", name, value); } } else { out->list_item(out, NULL, "%-32s\t: %-10s", name, value); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "gboolean", "int") static int node_attribute_html(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); gboolean add_extra = va_arg(args, gboolean); int expected_score = va_arg(args, int); if (add_extra) { int v; char *s = crm_strdup_printf("%s: %s", name, value); xmlNodePtr item_node = pcmk__output_create_xml_node(out, "li", NULL); if (value == NULL) { v = 0; } else { pcmk__scan_min_int(value, &v, INT_MIN); } pcmk_create_html_node(item_node, "span", NULL, NULL, s); free(s); if (v <= 0) { pcmk_create_html_node(item_node, "span", NULL, "bold", "(connectivity is lost)"); } else if (v < expected_score) { char *buf = crm_strdup_printf("(connectivity is degraded -- expected %d", expected_score); pcmk_create_html_node(item_node, "span", NULL, "bold", buf); free(buf); } } else { out->list_item(out, NULL, "%s: %s", name, value); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-and-op", "pe_working_set_t *", "xmlNodePtr") static int node_and_op(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); xmlNodePtr xml_op = va_arg(args, xmlNodePtr); pe_resource_t *rsc = NULL; gchar *node_str = NULL; char *last_change_str = NULL; const char *op_rsc = crm_element_value(xml_op, "resource"); const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); int status; time_t last_change = 0; pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), &status, 0); rsc = pe_find_resource(data_set->resources, op_rsc); if (rsc) { pe_node_t *node = pe__current_node(rsc); const char *target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); unsigned int show_opts = pcmk_show_rsc_only | pcmk_show_pending; if (node == NULL) { node = rsc->pending_node; } node_str = pcmk__native_output_string(rsc, rsc_printable_id(rsc), node, show_opts, target_role, false); } else { node_str = crm_strdup_printf("Unknown resource %s", op_rsc); } if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &last_change) == pcmk_ok) { last_change_str = crm_strdup_printf(", %s=%s, exec=%sms", XML_RSC_OP_LAST_CHANGE, pcmk__trim(ctime(&last_change)), crm_element_value(xml_op, XML_RSC_OP_T_EXEC)); } out->list_item(out, NULL, "%s: %s (node=%s, call=%s, rc=%s%s): %s", node_str, op_key ? op_key : ID(xml_op), crm_element_value(xml_op, XML_ATTR_UNAME), crm_element_value(xml_op, XML_LRM_ATTR_CALLID), crm_element_value(xml_op, XML_LRM_ATTR_RC), last_change_str ? last_change_str : "", pcmk_exec_status_str(status)); g_free(node_str); free(last_change_str); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-and-op", "pe_working_set_t *", "xmlNodePtr") static int node_and_op_xml(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); xmlNodePtr xml_op = va_arg(args, xmlNodePtr); pe_resource_t *rsc = NULL; const char *op_rsc = crm_element_value(xml_op, "resource"); const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); int status; time_t last_change = 0; xmlNode *node = NULL; pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), &status, 0); node = pcmk__output_create_xml_node(out, "operation", "op", op_key ? op_key : ID(xml_op), "node", crm_element_value(xml_op, XML_ATTR_UNAME), "call", crm_element_value(xml_op, XML_LRM_ATTR_CALLID), "rc", crm_element_value(xml_op, XML_LRM_ATTR_RC), "status", pcmk_exec_status_str(status), NULL); rsc = pe_find_resource(data_set->resources, op_rsc); if (rsc) { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE); char *agent_tuple = NULL; agent_tuple = crm_strdup_printf("%s:%s:%s", class, pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) ? crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER) : "", kind); pcmk__xe_set_props(node, "rsc", rsc_printable_id(rsc), "agent", agent_tuple, NULL); free(agent_tuple); } if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &last_change) == pcmk_ok) { pcmk__xe_set_props(node, XML_RSC_OP_LAST_CHANGE, pcmk__trim(ctime(&last_change)), XML_RSC_OP_T_EXEC, crm_element_value(xml_op, XML_RSC_OP_T_EXEC), NULL); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "gboolean", "int") static int node_attribute_xml(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); gboolean add_extra = va_arg(args, gboolean); int expected_score = va_arg(args, int); xmlNodePtr node = pcmk__output_create_xml_node(out, "attribute", "name", name, "value", value, NULL); if (add_extra) { char *buf = pcmk__itoa(expected_score); crm_xml_add(node, "expected", buf); free(buf); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute-list", "pe_working_set_t *", "unsigned int", "gboolean", "GList *", "GList *") static int node_attribute_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); unsigned int show_opts = va_arg(args, unsigned int); gboolean print_spacer = va_arg(args, gboolean); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); int rc = pcmk_rc_no_output; /* Display each node's attributes */ for (GList *gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = gIter->data; GList *attr_list = NULL; GHashTableIter iter; gpointer key; if (!node || !node->details || !node->details->online) { continue; } g_hash_table_iter_init(&iter, node->details->attrs); while (g_hash_table_iter_next (&iter, &key, NULL)) { attr_list = filter_attr_list(attr_list, key); } if (attr_list == NULL) { continue; } if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { g_list_free(attr_list); continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Node Attributes"); out->message(out, "node", node, show_opts, FALSE, NULL, only_node, only_rsc); for (GList *aIter = attr_list; aIter != NULL; aIter = aIter->next) { const char *name = aIter->data; const char *value = NULL; int expected_score = 0; gboolean add_extra = FALSE; value = pe_node_attribute_raw(node, name); add_extra = add_extra_info(node, node->details->running_rsc, data_set, name, &expected_score); /* Print attribute name and value */ out->message(out, "node-attribute", name, value, add_extra, expected_score); } g_list_free(attr_list); out->end_list(out); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-capacity", "pe_node_t *", "const char *") static int node_capacity(pcmk__output_t *out, va_list args) { pe_node_t *node = va_arg(args, pe_node_t *); const char *comment = va_arg(args, const char *); char *dump_text = crm_strdup_printf("%s: %s capacity:", comment, node->details->uname); g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text); out->list_item(out, NULL, "%s", dump_text); free(dump_text); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-capacity", "pe_node_t *", "const char *") static int node_capacity_xml(pcmk__output_t *out, va_list args) { pe_node_t *node = va_arg(args, pe_node_t *); const char *comment = va_arg(args, const char *); xmlNodePtr xml_node = pcmk__output_create_xml_node(out, "capacity", "node", node->details->uname, "comment", comment, NULL); g_hash_table_foreach(node->details->utilization, add_dump_node, xml_node); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-history-list", "pe_working_set_t *", "pe_node_t *", "xmlNodePtr", "GList *", "GList *", "unsigned int", "unsigned int") static int node_history_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); pe_node_t *node = va_arg(args, pe_node_t *); xmlNode *node_state = va_arg(args, xmlNode *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); unsigned int section_opts = va_arg(args, unsigned int); unsigned int show_opts = va_arg(args, unsigned int); xmlNode *lrm_rsc = NULL; xmlNode *rsc_entry = NULL; int rc = pcmk_rc_no_output; lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); /* Print history of each of the node's resources */ for (rsc_entry = first_named_child(lrm_rsc, XML_LRM_TAG_RESOURCE); rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) { const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); pe_resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); /* We can't use is_filtered here to filter group resources. For is_filtered, * we have to decide whether to check the parent or not. If we check the * parent, all elements of a group will always be printed because that's how * is_filtered works for groups. If we do not check the parent, sometimes * this will filter everything out. * * For other resource types, is_filtered is okay. */ if (uber_parent(rsc)->variant == pe_group) { if (!pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) && !pcmk__str_in_list(rsc_printable_id(uber_parent(rsc)), only_rsc, pcmk__str_star_matches)) { continue; } } else { if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) { continue; } } if (!pcmk_is_set(section_opts, pcmk_section_operations)) { time_t last_failure = 0; int failcount = pe_get_failcount(node, rsc, &last_failure, pe_fc_default, NULL, data_set); if (failcount <= 0) { continue; } if (rc == pcmk_rc_no_output) { rc = pcmk_rc_ok; out->message(out, "node", node, show_opts, FALSE, NULL, only_node, only_rsc); } out->message(out, "resource-history", rsc, rsc_id, FALSE, failcount, last_failure, FALSE); } else { GList *op_list = get_operation_list(rsc_entry); pe_resource_t *rsc = pe_find_resource(data_set->resources, crm_element_value(rsc_entry, XML_ATTR_ID)); if (op_list == NULL) { continue; } if (rc == pcmk_rc_no_output) { rc = pcmk_rc_ok; out->message(out, "node", node, show_opts, FALSE, NULL, only_node, only_rsc); } out->message(out, "resource-operation-list", data_set, rsc, node, op_list, show_opts); } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "unsigned int", "gboolean") static int node_list_html(pcmk__output_t *out, va_list args) { GList *nodes = va_arg(args, GList *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); unsigned int show_opts = va_arg(args, unsigned int); gboolean print_spacer G_GNUC_UNUSED = va_arg(args, gboolean); int rc = pcmk_rc_no_output; for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Node List"); out->message(out, "node", node, show_opts, TRUE, NULL, only_node, only_rsc); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "unsigned int", "gboolean") static int node_list_text(pcmk__output_t *out, va_list args) { GList *nodes = va_arg(args, GList *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); unsigned int show_opts = va_arg(args, unsigned int); gboolean print_spacer = va_arg(args, gboolean); /* space-separated lists of node names */ char *online_nodes = NULL; char *online_remote_nodes = NULL; char *online_guest_nodes = NULL; char *offline_nodes = NULL; char *offline_remote_nodes = NULL; size_t online_nodes_len = 0; size_t online_remote_nodes_len = 0; size_t online_guest_nodes_len = 0; size_t offline_nodes_len = 0; size_t offline_remote_nodes_len = 0; int rc = pcmk_rc_no_output; for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; const char *node_mode = NULL; char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { free(node_name); continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer == TRUE, rc, "Node List"); /* Get node mode */ if (node->details->unclean) { if (node->details->online) { node_mode = "UNCLEAN (online)"; } else if (node->details->pending) { node_mode = "UNCLEAN (pending)"; } else { node_mode = "UNCLEAN (offline)"; } } else if (node->details->pending) { node_mode = "pending"; } else if (node->details->standby_onfail && node->details->online) { node_mode = "standby (on-fail)"; } else if (node->details->standby) { if (node->details->online) { if (node->details->running_rsc) { node_mode = "standby (with active resources)"; } else { node_mode = "standby"; } } else { node_mode = "OFFLINE (standby)"; } } else if (node->details->maintenance) { if (node->details->online) { node_mode = "maintenance"; } else { node_mode = "OFFLINE (maintenance)"; } } else if (node->details->online) { node_mode = "online"; if (!pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { if (pe__is_guest_node(node)) { pcmk__add_word(&online_guest_nodes, &online_guest_nodes_len, node_name); } else if (pe__is_remote_node(node)) { pcmk__add_word(&online_remote_nodes, &online_remote_nodes_len, node_name); } else { pcmk__add_word(&online_nodes, &online_nodes_len, node_name); } free(node_name); continue; } } else { node_mode = "OFFLINE"; if (!pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { if (pe__is_remote_node(node)) { pcmk__add_word(&offline_remote_nodes, &offline_remote_nodes_len, node_name); } else if (pe__is_guest_node(node)) { /* ignore offline guest nodes */ } else { pcmk__add_word(&offline_nodes, &offline_nodes_len, node_name); } free(node_name); continue; } } /* If we get here, node is in bad state, or we're grouping by node */ out->message(out, "node", node, show_opts, TRUE, node_mode, only_node, only_rsc); free(node_name); } /* If we're not grouping by node, summarize nodes by status */ if (online_nodes) { out->list_item(out, "Online", "[ %s ]", online_nodes); free(online_nodes); } if (offline_nodes) { out->list_item(out, "OFFLINE", "[ %s ]", offline_nodes); free(offline_nodes); } if (online_remote_nodes) { out->list_item(out, "RemoteOnline", "[ %s ]", online_remote_nodes); free(online_remote_nodes); } if (offline_remote_nodes) { out->list_item(out, "RemoteOFFLINE", "[ %s ]", offline_remote_nodes); free(offline_remote_nodes); } if (online_guest_nodes) { out->list_item(out, "GuestOnline", "[ %s ]", online_guest_nodes); free(online_guest_nodes); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "unsigned int", "gboolean") static int node_list_xml(pcmk__output_t *out, va_list args) { GList *nodes = va_arg(args, GList *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); unsigned int show_opts = va_arg(args, unsigned int); gboolean print_spacer G_GNUC_UNUSED = va_arg(args, gboolean); out->begin_list(out, NULL, NULL, "nodes"); for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } out->message(out, "node", node, show_opts, TRUE, NULL, only_node, only_rsc); } out->end_list(out); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-summary", "pe_working_set_t *", "GList *", "GList *", "unsigned int", "unsigned int", "gboolean") static int node_summary(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); unsigned int section_opts = va_arg(args, unsigned int); unsigned int show_opts = va_arg(args, unsigned int); gboolean print_spacer = va_arg(args, gboolean); xmlNode *node_state = NULL; xmlNode *cib_status = pcmk_find_cib_element(data_set->input, XML_CIB_TAG_STATUS); int rc = pcmk_rc_no_output; if (xmlChildElementCount(cib_status) == 0) { return rc; } for (node_state = first_named_child(cib_status, XML_CIB_TAG_STATE); node_state != NULL; node_state = crm_next_same_xml(node_state)) { pe_node_t *node = pe_find_node_id(data_set->nodes, ID(node_state)); if (!node || !node->details || !node->details->online) { continue; } if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, pcmk_is_set(section_opts, pcmk_section_operations) ? "Operations" : "Migration Summary"); out->message(out, "node-history-list", data_set, node, node_state, only_node, only_rsc, section_opts, show_opts); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-weight", "pe_resource_t *", "const char *", "const char *", "char *") static int node_weight(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); const char *prefix = va_arg(args, const char *); const char *uname = va_arg(args, const char *); char *score = va_arg(args, char *); if (rsc) { out->list_item(out, NULL, "%s: %s allocation score on %s: %s", prefix, rsc->id, uname, score); } else { out->list_item(out, NULL, "%s: %s = %s", prefix, uname, score); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-weight", "pe_resource_t *", "const char *", "const char *", "char *") static int node_weight_xml(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); const char *prefix = va_arg(args, const char *); const char *uname = va_arg(args, const char *); char *score = va_arg(args, char *); xmlNodePtr node = pcmk__output_create_xml_node(out, "node_weight", "function", prefix, "node", uname, "score", score, NULL); if (rsc) { crm_xml_add(node, "id", rsc->id); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("op-history", "xmlNodePtr", "const char *", "const char *", "int", "unsigned int") static int op_history_text(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); const char *task = va_arg(args, const char *); const char *interval_ms_s = va_arg(args, const char *); int rc = va_arg(args, int); unsigned int show_opts = va_arg(args, unsigned int); char *buf = op_history_string(xml_op, task, interval_ms_s, rc, pcmk_is_set(show_opts, pcmk_show_timing)); out->list_item(out, NULL, "%s", buf); free(buf); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("op-history", "xmlNodePtr", "const char *", "const char *", "int", "unsigned int") static int op_history_xml(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); const char *task = va_arg(args, const char *); const char *interval_ms_s = va_arg(args, const char *); int rc = va_arg(args, int); unsigned int show_opts = va_arg(args, unsigned int); char *rc_s = pcmk__itoa(rc); xmlNodePtr node = pcmk__output_create_xml_node(out, "operation_history", "call", crm_element_value(xml_op, XML_LRM_ATTR_CALLID), "task", task, "rc", rc_s, "rc_text", services_ocf_exitcode_str(rc), NULL); free(rc_s); if (interval_ms_s && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_casei)) { char *s = crm_strdup_printf("%sms", interval_ms_s); crm_xml_add(node, "interval", s); free(s); } if (pcmk_is_set(show_opts, pcmk_show_timing)) { const char *value = NULL; time_t epoch = 0; if ((crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &epoch) == pcmk_ok) && (epoch > 0)) { crm_xml_add(node, XML_RSC_OP_LAST_CHANGE, pcmk__epoch2str(&epoch)); } value = crm_element_value(xml_op, XML_RSC_OP_T_EXEC); if (value) { char *s = crm_strdup_printf("%sms", value); crm_xml_add(node, XML_RSC_OP_T_EXEC, s); free(s); } value = crm_element_value(xml_op, XML_RSC_OP_T_QUEUE); if (value) { char *s = crm_strdup_printf("%sms", value); crm_xml_add(node, XML_RSC_OP_T_QUEUE, s); free(s); } } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("promotion-score", "pe_resource_t *", "pe_node_t *", "char *") static int promotion_score(pcmk__output_t *out, va_list args) { pe_resource_t *child_rsc = va_arg(args, pe_resource_t *); pe_node_t *chosen = va_arg(args, pe_node_t *); char *score = va_arg(args, char *); out->list_item(out, NULL, "%s promotion score on %s: %s", child_rsc->id, chosen? chosen->details->uname : "none", score); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("promotion-score", "pe_resource_t *", "pe_node_t *", "char *") static int promotion_score_xml(pcmk__output_t *out, va_list args) { pe_resource_t *child_rsc = va_arg(args, pe_resource_t *); pe_node_t *chosen = va_arg(args, pe_node_t *); char *score = va_arg(args, char *); xmlNodePtr node = pcmk__output_create_xml_node(out, "promotion_score", "id", child_rsc->id, "score", score, NULL); if (chosen) { crm_xml_add(node, "node", chosen->details->uname); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-config", "pe_resource_t *", "gboolean") static int resource_config(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); gboolean raw = va_arg(args, gboolean); char *rsc_xml = NULL; if (raw) { rsc_xml = dump_xml_formatted(rsc->orig_xml ? rsc->orig_xml : rsc->xml); } else { rsc_xml = dump_xml_formatted(rsc->xml); } pcmk__formatted_printf(out, "Resource XML:\n"); out->output_xml(out, "xml", rsc_xml); free(rsc_xml); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-history", "pe_resource_t *", "const char *", "gboolean", "int", "time_t", "gboolean") static int resource_history_text(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); const char *rsc_id = va_arg(args, const char *); gboolean all = va_arg(args, gboolean); int failcount = va_arg(args, int); time_t last_failure = va_arg(args, int); gboolean as_header = va_arg(args, gboolean); char *buf = resource_history_string(rsc, rsc_id, all, failcount, last_failure); if (as_header) { out->begin_list(out, NULL, NULL, "%s", buf); } else { out->list_item(out, NULL, "%s", buf); } free(buf); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-history", "pe_resource_t *", "const char *", "gboolean", "int", "time_t", "gboolean") static int resource_history_xml(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); const char *rsc_id = va_arg(args, const char *); gboolean all = va_arg(args, gboolean); int failcount = va_arg(args, int); time_t last_failure = va_arg(args, int); gboolean as_header = va_arg(args, gboolean); xmlNodePtr node = pcmk__output_xml_create_parent(out, "resource_history", "id", rsc_id, NULL); if (rsc == NULL) { pcmk__xe_set_bool_attr(node, "orphan", true); } else if (all || failcount || last_failure > 0) { char *migration_s = pcmk__itoa(rsc->migration_threshold); pcmk__xe_set_props(node, "orphan", "false", "migration-threshold", migration_s, NULL); free(migration_s); if (failcount > 0) { char *s = pcmk__itoa(failcount); crm_xml_add(node, PCMK__FAIL_COUNT_PREFIX, s); free(s); } if (last_failure > 0) { crm_xml_add(node, PCMK__LAST_FAILURE_PREFIX, pcmk__epoch2str(&last_failure)); } } if (as_header == FALSE) { pcmk__output_xml_pop_parent(out); } return pcmk_rc_ok; } static void print_resource_header(pcmk__output_t *out, unsigned int show_opts) { if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { /* Active resources have already been printed by node */ out->begin_list(out, NULL, NULL, "Inactive Resources"); } else if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { out->begin_list(out, NULL, NULL, "Full List of Resources"); } else { out->begin_list(out, NULL, NULL, "Active Resources"); } } PCMK__OUTPUT_ARGS("resource-list", "pe_working_set_t *", "unsigned int", "gboolean", "GList *", "GList *", "gboolean") static int resource_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); unsigned int show_opts = va_arg(args, unsigned int); gboolean print_summary = va_arg(args, gboolean); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); gboolean print_spacer = va_arg(args, gboolean); GList *rsc_iter; int rc = pcmk_rc_no_output; bool printed_header = false; /* If we already showed active resources by node, and * we're not showing inactive resources, we have nothing to do */ if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node) && !pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { return rc; } /* If we haven't already printed resources grouped by node, * and brief output was requested, print resource summary */ if (pcmk_is_set(show_opts, pcmk_show_brief) && !pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { GList *rscs = pe__filter_rsc_list(data_set->resources, only_rsc); PCMK__OUTPUT_SPACER_IF(out, print_spacer); print_resource_header(out, show_opts); printed_header = true; rc = pe__rscs_brief_output(out, rscs, show_opts); g_list_free(rscs); } /* For each resource, display it if appropriate */ for (rsc_iter = data_set->resources; rsc_iter != NULL; rsc_iter = rsc_iter->next) { pe_resource_t *rsc = (pe_resource_t *) rsc_iter->data; int x; /* Complex resources may have some sub-resources active and some inactive */ gboolean is_active = rsc->fns->active(rsc, TRUE); gboolean partially_active = rsc->fns->active(rsc, FALSE); /* Skip inactive orphans (deleted but still in CIB) */ if (pcmk_is_set(rsc->flags, pe_rsc_orphan) && !is_active) { continue; /* Skip active resources if we already displayed them by node */ } else if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { if (is_active) { continue; } /* Skip primitives already counted in a brief summary */ } else if (pcmk_is_set(show_opts, pcmk_show_brief) && (rsc->variant == pe_native)) { continue; /* Skip resources that aren't at least partially active, * unless we're displaying inactive resources */ } else if (!partially_active && !pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { continue; } else if (partially_active && !pe__rsc_running_on_any(rsc, only_node)) { continue; } if (!printed_header) { PCMK__OUTPUT_SPACER_IF(out, print_spacer); print_resource_header(out, show_opts); printed_header = true; } /* Print this resource */ x = out->message(out, crm_map_element_name(rsc->xml), show_opts, rsc, only_node, only_rsc); if (x == pcmk_rc_ok) { rc = pcmk_rc_ok; } } if (print_summary && rc != pcmk_rc_ok) { if (!printed_header) { PCMK__OUTPUT_SPACER_IF(out, print_spacer); print_resource_header(out, show_opts); printed_header = true; } if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { out->list_item(out, NULL, "No inactive resources"); } else if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { out->list_item(out, NULL, "No resources"); } else { out->list_item(out, NULL, "No active resources"); } } if (printed_header) { out->end_list(out); } return rc; } PCMK__OUTPUT_ARGS("resource-operation-list", "pe_working_set_t *", "pe_resource_t *", "pe_node_t *", "GList *", "unsigned int") static int resource_operation_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_node_t *node = va_arg(args, pe_node_t *); GList *op_list = va_arg(args, GList *); unsigned int show_opts = va_arg(args, unsigned int); GList *gIter = NULL; int rc = pcmk_rc_no_output; /* Print each operation */ for (gIter = op_list; gIter != NULL; gIter = gIter->next) { xmlNode *xml_op = (xmlNode *) gIter->data; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *interval_ms_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS); const char *op_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); int op_rc_i; pcmk__scan_min_int(op_rc, &op_rc_i, 0); /* Display 0-interval monitors as "probe" */ if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei) && pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches | pcmk__str_casei)) { task = "probe"; } /* If this is the first printed operation, print heading for resource */ if (rc == pcmk_rc_no_output) { time_t last_failure = 0; int failcount = pe_get_failcount(node, rsc, &last_failure, pe_fc_default, NULL, data_set); out->message(out, "resource-history", rsc, rsc_printable_id(rsc), TRUE, failcount, last_failure, TRUE); rc = pcmk_rc_ok; } /* Print the operation */ out->message(out, "op-history", xml_op, task, interval_ms_s, op_rc_i, show_opts); } /* Free the list we created (no need to free the individual items) */ g_list_free(op_list); PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("resource-util", "pe_resource_t *", "pe_node_t *", "const char *") static int resource_util(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_node_t *node = va_arg(args, pe_node_t *); const char *fn = va_arg(args, const char *); char *dump_text = crm_strdup_printf("%s: %s utilization on %s:", fn, rsc->id, node->details->uname); g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text); out->list_item(out, NULL, "%s", dump_text); free(dump_text); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-util", "pe_resource_t *", "pe_node_t *", "const char *") static int resource_util_xml(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_node_t *node = va_arg(args, pe_node_t *); const char *fn = va_arg(args, const char *); xmlNodePtr xml_node = pcmk__output_create_xml_node(out, "utilization", "resource", rsc->id, "node", node->details->uname, "function", fn, NULL); g_hash_table_foreach(rsc->utilization, add_dump_node, xml_node); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket", "pe_ticket_t *") static int ticket_html(pcmk__output_t *out, va_list args) { pe_ticket_t *ticket = va_arg(args, pe_ticket_t *); if (ticket->last_granted > -1) { char *time = pcmk__format_named_time("last-granted", ticket->last_granted); out->list_item(out, NULL, "%s:\t%s%s %s", ticket->id, ticket->granted ? "granted" : "revoked", ticket->standby ? " [standby]" : "", time); free(time); } else { out->list_item(out, NULL, "%s:\t%s%s", ticket->id, ticket->granted ? "granted" : "revoked", ticket->standby ? " [standby]" : ""); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket", "pe_ticket_t *") static int ticket_text(pcmk__output_t *out, va_list args) { pe_ticket_t *ticket = va_arg(args, pe_ticket_t *); if (ticket->last_granted > -1) { char *time = pcmk__format_named_time("last-granted", ticket->last_granted); out->list_item(out, ticket->id, "%s%s %s", ticket->granted ? "granted" : "revoked", ticket->standby ? " [standby]" : "", time); free(time); } else { out->list_item(out, ticket->id, "%s%s", ticket->granted ? "granted" : "revoked", ticket->standby ? " [standby]" : ""); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket", "pe_ticket_t *") static int ticket_xml(pcmk__output_t *out, va_list args) { pe_ticket_t *ticket = va_arg(args, pe_ticket_t *); xmlNodePtr node = NULL; node = pcmk__output_create_xml_node(out, "ticket", "id", ticket->id, "status", ticket->granted ? "granted" : "revoked", "standby", pcmk__btoa(ticket->standby), NULL); if (ticket->last_granted > -1) { crm_xml_add(node, "last-granted", pcmk__epoch2str(&ticket->last_granted)); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket-list", "pe_working_set_t *", "gboolean") static int ticket_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); gboolean print_spacer = va_arg(args, gboolean); GHashTableIter iter; gpointer key, value; if (g_hash_table_size(data_set->tickets) == 0) { return pcmk_rc_no_output; } PCMK__OUTPUT_SPACER_IF(out, print_spacer); /* Print section heading */ out->begin_list(out, NULL, NULL, "Tickets"); /* Print each ticket */ g_hash_table_iter_init(&iter, data_set->tickets); while (g_hash_table_iter_next(&iter, &key, &value)) { pe_ticket_t *ticket = (pe_ticket_t *) value; out->message(out, "ticket", ticket); } /* Close section */ out->end_list(out); return pcmk_rc_ok; } static pcmk__message_entry_t fmt_functions[] = { { "ban", "default", ban_text }, { "ban", "html", ban_html }, { "ban", "xml", ban_xml }, { "ban-list", "default", ban_list }, { "bundle", "default", pe__bundle_text }, { "bundle", "xml", pe__bundle_xml }, { "bundle", "html", pe__bundle_html }, { "clone", "default", pe__clone_default }, { "clone", "xml", pe__clone_xml }, { "cluster-counts", "default", cluster_counts_text }, { "cluster-counts", "html", cluster_counts_html }, { "cluster-counts", "xml", cluster_counts_xml }, { "cluster-dc", "default", cluster_dc_text }, { "cluster-dc", "html", cluster_dc_html }, { "cluster-dc", "xml", cluster_dc_xml }, { "cluster-options", "default", cluster_options_text }, { "cluster-options", "html", cluster_options_html }, { "cluster-options", "log", cluster_options_log }, { "cluster-options", "xml", cluster_options_xml }, { "cluster-summary", "default", cluster_summary }, { "cluster-summary", "html", cluster_summary_html }, { "cluster-stack", "default", cluster_stack_text }, { "cluster-stack", "html", cluster_stack_html }, { "cluster-stack", "xml", cluster_stack_xml }, { "cluster-times", "default", cluster_times_text }, { "cluster-times", "html", cluster_times_html }, { "cluster-times", "xml", cluster_times_xml }, { "failed-action", "default", failed_action_default }, { "failed-action", "xml", failed_action_xml }, { "failed-action-list", "default", failed_action_list }, { "group", "default", pe__group_default}, { "group", "xml", pe__group_xml }, { "maint-mode", "text", cluster_maint_mode_text }, { "node", "default", node_text }, { "node", "html", node_html }, { "node", "xml", node_xml }, { "node-and-op", "default", node_and_op }, { "node-and-op", "xml", node_and_op_xml }, { "node-capacity", "default", node_capacity }, { "node-capacity", "xml", node_capacity_xml }, { "node-history-list", "default", node_history_list }, { "node-list", "default", node_list_text }, { "node-list", "html", node_list_html }, { "node-list", "xml", node_list_xml }, { "node-weight", "default", node_weight }, { "node-weight", "xml", node_weight_xml }, { "node-attribute", "default", node_attribute_text }, { "node-attribute", "html", node_attribute_html }, { "node-attribute", "xml", node_attribute_xml }, { "node-attribute-list", "default", node_attribute_list }, { "node-summary", "default", node_summary }, { "op-history", "default", op_history_text }, { "op-history", "xml", op_history_xml }, { "primitive", "default", pe__resource_text }, { "primitive", "xml", pe__resource_xml }, { "primitive", "html", pe__resource_html }, { "promotion-score", "default", promotion_score }, { "promotion-score", "xml", promotion_score_xml }, { "resource-config", "default", resource_config }, { "resource-history", "default", resource_history_text }, { "resource-history", "xml", resource_history_xml }, { "resource-list", "default", resource_list }, { "resource-operation-list", "default", resource_operation_list }, { "resource-util", "default", resource_util }, { "resource-util", "xml", resource_util_xml }, { "ticket", "default", ticket_text }, { "ticket", "html", ticket_html }, { "ticket", "xml", ticket_xml }, { "ticket-list", "default", ticket_list }, { NULL, NULL, NULL } }; void pe__register_messages(pcmk__output_t *out) { pcmk__register_messages(out, fmt_functions); } void pe__output_node(pe_node_t *node, gboolean details, pcmk__output_t *out) { if (node == NULL) { crm_trace(""); return; } CRM_ASSERT(node->details); crm_trace("%sNode %s: (weight=%d, fixed=%s)", node->details->online ? "" : "Unavailable/Unclean ", node->details->uname, node->weight, node->fixed ? "True" : "False"); if (details) { char *pe_mutable = strdup("\t\t"); GList *gIter = node->details->running_rsc; GList *all = NULL; all = g_list_prepend(all, (gpointer) "*"); crm_trace("\t\t===Node Attributes"); g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable); free(pe_mutable); crm_trace("\t\t=== Resources"); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; out->message(out, crm_map_element_name(rsc->xml), pe_print_pending, rsc, all, all); } g_list_free(all); } }