diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c index 42a9a9057f..a9a495cff0 100644 --- a/daemons/controld/controld_execd.c +++ b/daemons/controld/controld_execd.c @@ -1,2443 +1,2443 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include // lrmd_event_data_t, lrmd_rsc_info_t, etc. #include #include #include #include #include #include #include #define START_DELAY_THRESHOLD 5 * 60 * 1000 #define MAX_LRM_REG_FAILS 30 struct delete_event_s { int rc; const char *rsc; lrm_state_t *lrm_state; }; static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static lrmd_event_data_t *construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op, const char *rsc_id, const char *operation); static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, struct ra_metadata_s *md); static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level); static void lrm_connection_destroy(void) { if (pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) { crm_crit("Connection to executor failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); controld_clear_fsa_input_flags(R_LRM_CONNECTED); } else { crm_info("Disconnected from executor"); } } static char * make_stop_id(const char *rsc, int call_id) { return crm_strdup_printf("%s:%d", rsc, call_id); } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } /*! * \internal * \brief Remove a recurring operation from a resource's history * * \param[in,out] history Resource history to modify * \param[in] op Operation to remove * * \return TRUE if the operation was found and removed, FALSE otherwise */ static gboolean history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_event_data_t *existing = iter->data; if ((op->interval_ms == existing->interval_ms) && pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none) && pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) { history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); lrmd_free_event(existing); return TRUE; } } return FALSE; } /*! * \internal * \brief Free all recurring operations in resource history * * \param[in,out] history Resource history to modify */ static void history_free_recurring_ops(rsc_history_t *history) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_free_event(iter->data); } g_list_free(history->recurring_op_list); history->recurring_op_list = NULL; } /*! * \internal * \brief Free resource history * * \param[in,out] history Resource history to free */ void history_free(gpointer data) { rsc_history_t *history = (rsc_history_t*)data; if (history->stop_params) { g_hash_table_destroy(history->stop_params); } /* Don't need to free history->rsc.id because it's set to history->id */ free(history->rsc.type); free(history->rsc.standard); free(history->rsc.provider); lrmd_free_event(history->failed); lrmd_free_event(history->last); free(history->id); history_free_recurring_ops(history); free(history); } static void update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); controld_delete_resource_history(op->rsc_id, lrm_state->node_name, NULL, crmd_cib_smart_opt()); return; } if (pcmk__str_eq(op->op_type, PCMK_ACTION_NOTIFY, pcmk__str_casei)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = calloc(1, sizeof(rsc_history_t)); entry->id = strdup(op->rsc_id); g_hash_table_insert(lrm_state->resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = strdup(rsc->type); entry->rsc.standard = strdup(rsc->standard); pcmk__str_update(&entry->rsc.provider, rsc->provider); } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } entry->last_callid = op->call_id; target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_EXEC_CANCELLED) { if (op->interval_ms > 0) { crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT, op->rsc_id, op->op_type, op->interval_ms); history_remove_recurring_op(entry, op); return; } else { crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d", op->rsc_id, op->op_type, op->interval_ms, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* Store failed monitors here, otherwise the block below will cause them * to be forgotten when a stop happens. */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval_ms == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && pcmk__strcase_any_of(op->op_type, PCMK_ACTION_START, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT, PCMK_ACTION_MONITOR, NULL)) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = pcmk__strkey_table(free, free); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval_ms > 0) { /* Ensure there are no duplicates */ history_remove_recurring_op(entry, op); crm_trace("Adding recurring op: " PCMK__OP_FMT, op->rsc_id, op->op_type, op->interval_ms); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if ((entry->recurring_op_list != NULL) && !pcmk__str_eq(op->op_type, PCMK_ACTION_MONITOR, pcmk__str_casei)) { crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT, g_list_length(entry->recurring_op_list), op->rsc_id, op->op_type, op->interval_ms); history_free_recurring_ops(entry); } } /*! * \internal * \brief Send a direct OK ack for a resource task * * \param[in] lrm_state LRM connection * \param[in] input Input message being ack'ed * \param[in] rsc_id ID of affected resource * \param[in] rsc Affected resource (if available) * \param[in] task Operation task being ack'ed * \param[in] ack_host Name of host to send ack to * \param[in] ack_sys IPC system name to ack */ static void send_task_ok_ack(const lrm_state_t *lrm_state, const ha_msg_input_t *input, const char *rsc_id, const lrmd_rsc_info_t *rsc, const char *task, const char *ack_host, const char *ack_sys) { lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task); lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id); lrmd_free_event(op); } static inline const char * op_node_name(lrmd_event_data_t *op) { return pcmk__s(op->remote_nodename, controld_globals.our_nodename); } void lrm_op_callback(lrmd_event_data_t * op) { CRM_CHECK(op != NULL, return); switch (op->type) { case lrmd_event_disconnect: if (op->remote_nodename == NULL) { /* If this is the local executor IPC connection, set the right * bits in the controller when the connection goes down. */ lrm_connection_destroy(); } break; case lrmd_event_exec_complete: { lrm_state_t *lrm_state = lrm_state_find(op_node_name(op)); CRM_ASSERT(lrm_state != NULL); process_lrm_event(lrm_state, op, NULL, NULL); } break; default: break; } } static void try_local_executor_connect(long long action, fsa_data_t *msg_data, lrm_state_t *lrm_state) { int rc = pcmk_rc_ok; crm_debug("Connecting to the local executor"); // If we can connect, great rc = controld_connect_local_executor(lrm_state); if (rc == pcmk_rc_ok) { controld_set_fsa_input_flags(R_LRM_CONNECTED); crm_info("Connection to the local executor established"); return; } // Otherwise, if we can try again, set a timer to do so if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { crm_warn("Failed to connect to the local executor %d time%s " "(%d max): %s", lrm_state->num_lrm_register_fails, pcmk__plural_s(lrm_state->num_lrm_register_fails), MAX_LRM_REG_FAILS, pcmk_rc_str(rc)); controld_start_wait_timer(); crmd_fsa_stall(FALSE); return; } // Otherwise give up crm_err("Failed to connect to the executor the max allowed " "%d time%s: %s", lrm_state->num_lrm_register_fails, pcmk__plural_s(lrm_state->num_lrm_register_fails), pcmk_rc_str(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* This only pertains to local executor connections. Remote connections are * handled as resources within the scheduler. Connecting and disconnecting * from remote executor instances is handled differently. */ lrm_state_t *lrm_state = NULL; if (controld_globals.our_nodename == NULL) { return; /* Nothing to do */ } lrm_state = lrm_state_find_or_create(controld_globals.our_nodename); if (lrm_state == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { if (action == A_LRM_DISCONNECT) { crmd_fsa_stall(FALSE); return; } } controld_clear_fsa_input_flags(R_LRM_CONNECTED); crm_info("Disconnecting from the executor"); lrm_state_disconnect(lrm_state); lrm_state_reset_tables(lrm_state, FALSE); crm_notice("Disconnected from the executor"); } if (action & A_LRM_CONNECT) { try_local_executor_connect(action, msg_data, lrm_state); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __func__); } } static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; const char *when = "lrm disconnect"; GHashTableIter gIter; const char *key = NULL; rsc_history_t *entry = NULL; active_op_t *pending = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; when = "shutdown"; } else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { when = "shutdown... waiting"; } if ((lrm_state->active_ops != NULL) && lrm_state_is_connected(lrm_state)) { guint removed = g_hash_table_foreach_remove(lrm_state->active_ops, stop_recurring_actions, lrm_state); guint nremaining = g_hash_table_size(lrm_state->active_ops); if (removed || nremaining) { crm_notice("Stopped %u recurring operation%s at %s (%u remaining)", removed, pcmk__plural_s(removed), when, nremaining); } } if (lrm_state->active_ops != NULL) { g_hash_table_iter_init(&gIter, lrm_state->active_ops); while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { /* Ignore recurring actions in the shutdown calculations */ if (pending->interval_ms == 0) { counter++; } } } if (counter > 0) { do_crm_log(log_level, "%d pending executor operation%s at %s", counter, pcmk__plural_s(counter), when); if ((cur_state == S_TERMINATE) || !pcmk_is_set(controld_globals.fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_iter_init(&gIter, lrm_state->active_ops); while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); } } else { rc = FALSE; } return rc; } if (lrm_state->resource_history == NULL) { return rc; } if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { /* At this point we're not waiting, we're just shutting down */ when = "shutdown"; } counter = 0; g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { if (is_rsc_active(lrm_state, entry->id) == FALSE) { continue; } counter++; if (log_level == LOG_ERR) { crm_info("Found %s active at %s", entry->id, when); } else { crm_trace("Found %s active at %s", entry->id, when); } if (lrm_state->active_ops != NULL) { GHashTableIter hIter; g_hash_table_iter_init(&hIter, lrm_state->active_ops); while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) { crm_notice("%sction %s (%s) incomplete at %s", pending->interval_ms == 0 ? "A" : "Recurring a", key, pending->op_key, when); } } } } if (counter) { crm_err("%d resource%s active at %s", counter, (counter == 1)? " was" : "s were", when); } return rc; } static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) { rsc_history_t *entry = NULL; entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, entry->last->interval_ms, entry->last->rc); if ((entry->last->rc == PCMK_OCF_OK) && pcmk__str_eq(entry->last->op_type, PCMK_ACTION_STOP, pcmk__str_casei)) { return FALSE; } else if (entry->last->rc == PCMK_OCF_OK && pcmk__str_eq(entry->last->op_type, PCMK_ACTION_MIGRATE_TO, pcmk__str_casei)) { // A stricter check is too complex ... leave that to the scheduler return FALSE; } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { return FALSE; } else if ((entry->last->interval_ms == 0) && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider); if (entry->last && entry->last->params) { const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); if (container) { crm_trace("Resource %s is a part of container resource %s", entry->id, container); crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container); } } controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name); controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { controld_add_resource_history_xml(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name); } } return FALSE; } xmlNode * controld_query_executor_state(void) { xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; crm_node_t *peer = NULL; lrm_state_t *lrm_state = lrm_state_find(controld_globals.our_nodename); if (!lrm_state) { crm_err("Could not find executor state for node %s", controld_globals.our_nodename); return NULL; } peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return NULL); xml_state = create_node_state_update(peer, node_update_cluster|node_update_peer, NULL, __func__); if (xml_state == NULL) { return NULL; } xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(lrm_state, rsc_list); crm_log_xml_trace(xml_state, "Current executor state"); return xml_state; } /*! * \internal * \brief Map standard Pacemaker return code to operation status and OCF code * * \param[out] event Executor event whose status and return code should be set * \param[in] rc Standard Pacemaker return code */ void controld_rc2event(lrmd_event_data_t *event, int rc) { /* This is called for cleanup requests from controller peers/clients, not * for resource actions, so no exit reason is needed. */ switch (rc) { case pcmk_rc_ok: lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); break; case EACCES: lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV, PCMK_EXEC_ERROR, NULL); break; default: lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, NULL); break; } } /*! * \internal * \brief Trigger a new transition after CIB status was deleted * * If a CIB status delete was not expected (as part of the transition graph), * trigger a new transition by updating the (arbitrary) "last-lrm-refresh" * cluster property. * * \param[in] from_sys IPC name that requested the delete * \param[in] rsc_id Resource whose status was deleted (for logging only) */ void controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id) { if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) { char *now_s = crm_strdup_printf("%lld", (long long) time(NULL)); crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id); cib__update_node_attr(controld_globals.logger_out, controld_globals.cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, NULL, NULL); free(now_s); } } static void notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "localhost"), rsc_id, ((rc == pcmk_ok)? "" : " not")); op = construct_op(lrm_state, input->xml, rsc_id, PCMK_ACTION_DELETE); controld_rc2event(op, pcmk_legacy2rc(rc)); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); controld_trigger_delete_refresh(from_sys, rsc_id); } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) { notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; active_op_t *pending = value; if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } static void delete_rsc_entry(lrm_state_t *lrm_state, ha_msg_input_t *input, const char *rsc_id, GHashTableIter *rsc_iter, int rc, const char *user_name, bool from_cib) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = strdup(rsc_id); if (rsc_iter) { g_hash_table_iter_remove(rsc_iter); } else { g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); } if (from_cib) { controld_delete_resource_history(rsc_id_copy, lrm_state->node_name, user_name, crmd_cib_smart_opt()); } g_hash_table_foreach_remove(lrm_state->active_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(lrm_state, input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; event.lrm_state = lrm_state; g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); } static inline gboolean last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms) { if (entry == NULL) { return FALSE; } if (op == NULL) { return TRUE; } return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei) && (interval_ms == entry->failed->interval_ms)); } /*! * \internal * \brief Clear a resource's last failure * * Erase a resource's last failure on a particular node from both the * LRM resource history in the CIB, and the resource history remembered * for the LRM state. * * \param[in] rsc_id Resource name * \param[in] node_name Node name * \param[in] operation If specified, only clear if matching this operation * \param[in] interval_ms If operation is specified, it has this interval */ void lrm_clear_last_failure(const char *rsc_id, const char *node_name, const char *operation, guint interval_ms) { lrm_state_t *lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { return; } if (lrm_state->resource_history != NULL) { rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (last_failed_matches_op(entry, operation, interval_ms)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } /* Returns: gboolean - cancellation is in progress */ static gboolean cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; char *local_key = NULL; active_op_t *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { local_key = make_stop_id(rsc_id, op); key = local_key; } pending = g_hash_table_lookup(lrm_state->active_ops, key); if (pending) { if (remove && !pcmk_is_set(pending->flags, active_op_remove)) { controld_set_active_op_flags(pending, active_op_remove); crm_debug("Scheduling %s for removal", key); } if (pcmk_is_set(pending->flags, active_op_cancelled)) { crm_debug("Operation %s already cancelled", key); free(local_key); return FALSE; } controld_set_active_op_flags(pending, active_op_cancelled); } else { crm_info("No pending op found for %s", key); free(local_key); return FALSE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); free(local_key); return TRUE; } crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the active operations list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from active operations will block * the node from shutting down */ free(local_key); return FALSE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct cancel_data *data = user_data; active_op_t *op = value; if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) { data->done = TRUE; remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); } return remove; } static gboolean cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { guint removed = 0; struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->active_ops, cancel_action_by_key, &data); crm_trace("Removed %u op cache entries, new size: %u", removed, g_hash_table_size(lrm_state->active_ops)); return data.done; } /*! * \internal * \brief Retrieve resource information from LRM * * \param[in,out] lrm_state Executor connection state to use * \param[in] rsc_xml XML containing resource configuration * \param[in] do_create If true, register resource if not already * \param[out] rsc_info Where to store information obtained from executor * * \retval pcmk_ok Success (and rsc_info holds newly allocated result) * \retval -EINVAL Required information is missing from arguments * \retval -ENOTCONN No active connection to LRM * \retval -ENODEV Resource not found * \retval -errno Error communicating with executor when registering resource * * \note Caller is responsible for freeing result on success. */ static int get_lrm_resource(lrm_state_t *lrm_state, const xmlNode *rsc_xml, gboolean do_create, lrmd_rsc_info_t **rsc_info) { const char *id = ID(rsc_xml); CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL); CRM_CHECK(id, return -EINVAL); if (lrm_state_is_connected(lrm_state) == FALSE) { return -ENOTCONN; } crm_trace("Retrieving resource information for %s from the executor", id); *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); // If resource isn't known by ID, try clone name, if provided if (!*rsc_info) { const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG); if (long_id) { *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0); } } if ((*rsc_info == NULL) && do_create) { const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER); const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE); int rc; crm_trace("Registering resource %s with the executor", id); rc = lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring); if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Could not register resource %s with the executor on %s: %s " CRM_XS " rc=%d", id, lrm_state->node_name, pcmk_strerror(rc), rc); /* Register this as an internal error if this involves the local * executor. Otherwise, we're likely dealing with an unresponsive * remote node, which is not an FSA failure. */ if (lrm_state_is_local(lrm_state) == TRUE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } return rc; } *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); } return *rsc_info? pcmk_ok : -ENODEV; } static void delete_resource(lrm_state_t *lrm_state, const char *id, lrmd_rsc_info_t *rsc, GHashTableIter *iter, const char *sys, const char *user, ha_msg_input_t *request, bool unregister, bool from_cib) { int rc = pcmk_ok; crm_info("Removing resource %s from executor for %s%s%s", id, sys, (user? " as " : ""), (user? user : "")); if (rsc && unregister) { rc = lrm_state_unregister_rsc(lrm_state, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource %s deleted from executor", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' from executor is pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); op = calloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = strdup(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(lrm_state->deletion_ops, ref, op); } return; } else { crm_warn("Could not delete '%s' from executor for %s%s%s: %s " CRM_XS " rc=%d", id, sys, (user? " as " : ""), (user? user : ""), pcmk_strerror(rc), rc); } delete_rsc_entry(lrm_state, request, id, iter, rc, user, from_cib); } static int get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) { int call_id = 999999999; rsc_history_t *entry = NULL; if(lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry) { call_id = entry->last_callid + 1; } if (call_id < 0) { call_id = 1; } return call_id; } static void fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status, enum ocf_exitcode op_exitcode, const char *exit_reason) { op->call_id = get_fake_call_id(lrm_state, op->rsc_id); op->t_run = time(NULL); op->t_rcchange = op->t_run; lrmd__set_result(op, op_exitcode, op_status, exit_reason); } static void force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node, bool reprobe_all_nodes) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_info("Clearing resource history on node %s", lrm_state->node_name); g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { /* only unregister the resource during a reprobe if it is not a remote connection * resource. otherwise unregistering the connection will terminate remote-node * membership */ bool unregister = true; if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { unregister = false; if (reprobe_all_nodes) { lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); if (remote_lrm_state != NULL) { /* If reprobing all nodes, be sure to reprobe the remote * node before clearing its connection resource */ force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE, reprobe_all_nodes); } } } /* Don't delete from the CIB, since we'll delete the whole node's LRM * state from the CIB soon */ delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, user_name, NULL, unregister, false); } /* Now delete the copy in the CIB */ controld_delete_node_state(lrm_state->node_name, controld_section_lrm, cib_scope_local); // @COMPAT DCs < 1.1.14 need this deleted (in case it was explicitly false) update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); } /*! * \internal * \brief Fail a requested action without actually executing it * * For an action that can't be executed, process it similarly to an actual * execution result, with specified error status (except for notify actions, * which will always be treated as successful). * * \param[in,out] lrm_state Executor connection that action is for * \param[in] action Action XML from request * \param[in] rc Desired return code to use * \param[in] op_status Desired operation status to use * \param[in] exit_reason Human-friendly detail, if error */ static void synthesize_lrmd_failure(lrm_state_t *lrm_state, const xmlNode *action, int op_status, enum ocf_exitcode rc, const char *exit_reason) { lrmd_event_data_t *op = NULL; const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) { /* @TODO Should we do something else, like direct ack? */ crm_info("Can't fake %s failure (%d) on %s without resource configuration", crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node); return; } else if(operation == NULL) { /* This probably came from crm_resource -C, nothing to do */ crm_info("Can't fake %s failure (%d) on %s without operation", ID(xml_rsc), rc, target_node); return; } op = construct_op(lrm_state, action, ID(xml_rsc), operation); if (pcmk__str_eq(operation, PCMK_ACTION_NOTIFY, pcmk__str_casei)) { // Notifications can't fail fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL); } else { fake_op_status(lrm_state, op, op_status, rc, exit_reason); } crm_info("Faking " PCMK__OP_FMT " result (%d) on %s", op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node); // Process the result as if it came from the LRM process_lrm_event(lrm_state, op, NULL, action); lrmd_free_event(op); } /*! * \internal * \brief Get target of an LRM operation (replacing \p NULL with local node * name) * * \param[in] xml LRM operation data XML * * \return LRM operation target node name (local node or Pacemaker Remote node) */ static const char * lrm_op_target(const xmlNode *xml) { const char *target = NULL; if (xml) { target = crm_element_value(xml, XML_LRM_ATTR_TARGET); } if (target == NULL) { target = controld_globals.our_nodename; } return target; } static void fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, const char *from_host, const char *from_sys) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* The executor simply executes operations and reports the results, without * any concept of success or failure, so to fail a resource, we must fake * what a failure looks like. * * To do this, we create a fake executor operation event for the resource, * and pass that event to the executor client callback so it will be * processed as if it came from the executor. */ op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon"); free((char*) op->user_data); op->user_data = NULL; op->interval_ms = 0; if (user_name && !pcmk__is_privileged(user_name)) { crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_INSUFFICIENT_PRIV, "Unprivileged user cannot fail resources"); controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) { crm_info("Failing resource %s...", rsc->id); fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR, "Simulated failure"); process_lrm_event(lrm_state, op, NULL, xml); op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(xml, "bad input"); fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR, "Cannot fail unknown resource"); } controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); } static void handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node, bool reprobe_all_nodes) { crm_notice("Forcing the status of all resources to be redetected"); force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node, reprobe_all_nodes); if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) { xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host, from_sys, CRM_SYSTEM_LRMD, controld_globals.our_uuid); crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } } static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys) { char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; guint interval_ms = 0; gboolean in_progress = FALSE; xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); CRM_CHECK(params != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_TASK); op_task = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS); if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) { free(meta_key); return FALSE; } free(meta_key); op_key = pcmk__op_key(rsc->id, op_task, interval_ms); meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); call_id = crm_element_value(params, meta_key); free(meta_key); crm_debug("Scheduler requested op %s (call=%s) be cancelled", op_key, (call_id? call_id : "NA")); pcmk__scan_min_int(call_id, &call, 0); if (call == 0) { // Normal case when the scheduler cancels a recurring op in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); } else { // Normal case when the scheduler cancels an orphan op in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); } // Acknowledge cancellation operation if for a remote connection resource if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { char *op_id = make_stop_id(rsc->id, call); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { crm_info("Nothing known about operation %d for %s", call, op_key); } controld_delete_action_history_by_key(rsc->id, lrm_state->node_name, op_key, call); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); /* needed at least for cancellation of a remote operation */ if (lrm_state->active_ops != NULL) { g_hash_table_remove(lrm_state->active_ops, op_id); } free(op_id); } else { /* No ack is needed since abcdaa8, but peers with older versions * in a rolling upgrade need one. We didn't bump the feature set * at that commit, so we can only compare against the previous * CRM version (3.0.8). If any peers have feature set 3.0.9 but * not abcdaa8, they will time out waiting for the ack (no * released versions of Pacemaker are affected). */ const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION); if (compare_version(peer_version, "3.0.8") <= 0) { crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)", op_key, from_host, peer_version); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); } } free(op_key); return TRUE; } static void do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host, bool crm_rsc_delete, const char *user_name) { bool unregister = true; int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name, user_name, cib_dryrun|cib_sync_call); if (cib_rc != pcmk_rc_ok) { lrmd_event_data_t *op = NULL; op = construct_op(lrm_state, input->xml, rsc->id, PCMK_ACTION_DELETE); /* These are resource clean-ups, not actions, so no exit reason is * needed. */ lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); return; } if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { unregister = false; } delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, user_name, input, unregister, true); } // User data for asynchronous metadata execution struct metadata_cb_data { lrmd_rsc_info_t *rsc; // Copy of resource information xmlNode *input_xml; // Copy of FSA input XML }; static struct metadata_cb_data * new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml) { struct metadata_cb_data *data = NULL; data = calloc(1, sizeof(struct metadata_cb_data)); CRM_ASSERT(data != NULL); data->input_xml = copy_xml(input_xml); data->rsc = lrmd_copy_rsc_info(rsc); return data; } static void free_metadata_cb_data(struct metadata_cb_data *data) { lrmd_free_rsc_info(data->rsc); free_xml(data->input_xml); free(data); } /*! * \internal * \brief Execute an action after metadata has been retrieved * * \param[in] pid Ignored * \param[in] result Result of metadata action * \param[in] user_data Metadata callback data */ static void metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data) { struct metadata_cb_data *data = (struct metadata_cb_data *) user_data; struct ra_metadata_s *md = NULL; lrm_state_t *lrm_state = lrm_state_find(lrm_op_target(data->input_xml)); if ((lrm_state != NULL) && pcmk__result_ok(result)) { md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc, result->action_stdout); } do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md); free_metadata_cb_data(data); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { lrm_state_t *lrm_state = NULL; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; const char *target_node = lrm_op_target(input->xml); gboolean is_remote_node = FALSE; bool crm_rsc_delete = FALSE; // Message routed to the local node is targeting a specific, non-local node is_remote_node = !pcmk__str_eq(target_node, controld_globals.our_nodename, pcmk__str_casei); lrm_state = lrm_state_find(target_node); if ((lrm_state == NULL) && is_remote_node) { crm_err("Failing action because local node has never had connection to remote node %s", target_node); synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, "Local node has no connection to remote"); return; } CRM_ASSERT(lrm_state != NULL); user_name = pcmk__update_acl_user(input->msg, F_CRM_USER, NULL); crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } - if (pcmk__str_eq(crm_op, CRM_OP_LRM_DELETE, pcmk__str_none)) { + if (pcmk__str_eq(crm_op, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) { if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { crm_rsc_delete = TRUE; // from crm_resource } operation = PCMK_ACTION_DELETE; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } CRM_CHECK(!pcmk__str_empty(crm_op) || !pcmk__str_empty(operation), return); crm_trace("'%s' execution request from %s as %s user", pcmk__s(crm_op, operation), pcmk__s(from_sys, "unknown subsystem"), pcmk__s(user_name, "current")); if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_none)) { fail_lrm_resource(input->xml, lrm_state, user_name, from_host, from_sys); } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_REFRESH, pcmk__str_none)) { /* @COMPAT This can only be sent by crm_resource --refresh on a * Pacemaker Remote node running Pacemaker 1.1.9, which is extremely * unlikely. It previously would cause the controller to re-write its * resource history to the CIB. Just ignore it. */ crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node"); // @COMPAT DCs <1.1.14 in a rolling upgrade might schedule this op } else if (pcmk__str_eq(operation, CRM_OP_PROBED, pcmk__str_none)) { update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node); } else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_none) || pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) { const char *raw_target = NULL; if (input->xml != NULL) { // For CRM_OP_REPROBE, a NULL target means we're targeting all nodes raw_target = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); } handle_reprobe_op(lrm_state, from_sys, from_host, user_name, is_remote_node, (raw_target == NULL)); } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); gboolean create_rsc = !pcmk__str_eq(operation, PCMK_ACTION_DELETE, pcmk__str_none); int rc; // We can't return anything meaningful without a resource ID CRM_CHECK(xml_rsc && ID(xml_rsc), return); rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc); if (rc == -ENOTCONN) { synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, "Not connected to remote executor"); return; } else if ((rc < 0) && !create_rsc) { /* Delete of malformed or nonexistent resource * (deleting something that does not exist is a success) */ crm_notice("Not registering resource '%s' for a %s event " CRM_XS " get-rc=%d (%s) transition-key=%s", ID(xml_rsc), operation, rc, pcmk_strerror(rc), ID(input->xml)); delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name, true); return; } else if (rc == -EINVAL) { // Resource operation on malformed resource crm_err("Invalid resource definition for %s", ID(xml_rsc)); crm_log_xml_warn(input->msg, "invalid resource"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, PCMK_OCF_NOT_CONFIGURED, // fatal error "Invalid resource definition"); return; } else if (rc < 0) { // Error communicating with the executor crm_err("Could not register resource '%s' with executor: %s " CRM_XS " rc=%d", ID(xml_rsc), pcmk_strerror(rc), rc); crm_log_xml_warn(input->msg, "failed registration"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, PCMK_OCF_INVALID_PARAM, // hard error "Could not register resource with executor"); return; } if (pcmk__str_eq(operation, PCMK_ACTION_CANCEL, pcmk__str_none)) { if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) { crm_log_xml_warn(input->xml, "Bad command"); } } else if (pcmk__str_eq(operation, PCMK_ACTION_DELETE, pcmk__str_none)) { do_lrm_delete(input, lrm_state, rsc, from_sys, from_host, crm_rsc_delete, user_name); } else { struct ra_metadata_s *md = NULL; /* Getting metadata from cache is OK except for start actions -- * always refresh from the agent for those, in case the resource * agent was updated. * * @TODO Only refresh metadata for starts if the agent actually * changed (using something like inotify, or a hash or modification * time of the agent executable). */ if (strcmp(operation, PCMK_ACTION_START) != 0) { md = controld_get_rsc_metadata(lrm_state, rsc, controld_metadata_from_cache); } if ((md == NULL) && crm_op_needs_metadata(rsc->standard, operation)) { /* Most likely, we'll need the agent metadata to record the * pending operation and the operation result. Get it now rather * than wait until then, so the metadata action doesn't eat into * the real action's timeout. * * @TODO Metadata is retrieved via direct execution of the * agent, which has a couple of related issues: the executor * should execute agents, not the controller; and metadata for * Pacemaker Remote nodes should be collected on those nodes, * not locally. */ struct metadata_cb_data *data = NULL; data = new_metadata_cb_data(rsc, input->xml); crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously", rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"), ((rsc->provider == NULL)? "" : rsc->provider), rsc->type); (void) lrmd__metadata_async(rsc, metadata_complete, (void *) data); } else { do_lrm_rsc_op(lrm_state, rsc, input->xml, md); } } lrmd_free_rsc_info(rsc); } else { crm_err("Invalid execution request: unknown command '%s' (bug?)", crm_op); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static lrmd_event_data_t * construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; GHashTable *params = NULL; xmlNode *primitive = NULL; const char *class = NULL; const char *transition = NULL; CRM_ASSERT(rsc_id && operation); op = lrmd_new_event(rsc_id, operation, 0); op->type = lrmd_event_exec_complete; op->timeout = 0; op->start_delay = 0; lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); if (rsc_op == NULL) { CRM_LOG_ASSERT(pcmk__str_eq(operation, PCMK_ACTION_STOP, pcmk__str_casei)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = pcmk__strkey_table(free, free); g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_op_target_rc"); op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY); pcmk__scan_min_int(op_delay, &op->start_delay, 0); op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT); pcmk__scan_min_int(op_timeout, &op->timeout, 0); if (pcmk__guint_from_hash(params, CRM_META "_" XML_LRM_ATTR_INTERVAL_MS, 0, &(op->interval_ms)) != pcmk_rc_ok) { op->interval_ms = 0; } /* Use pcmk_monitor_timeout instead of meta timeout for stonith recurring monitor, if set */ primitive = find_xml_node(rsc_op, XML_CIB_TAG_RESOURCE, FALSE); class = crm_element_value(primitive, XML_AGENT_ATTR_CLASS); if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params) && pcmk__str_eq(operation, PCMK_ACTION_MONITOR, pcmk__str_casei) && (op->interval_ms > 0)) { op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout"); if (op_timeout != NULL) { op->timeout = crm_get_msec(op_timeout); } } if (!pcmk__str_eq(operation, PCMK_ACTION_STOP, pcmk__str_casei)) { op->params = params; } else { rsc_history_t *entry = NULL; if (lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = pcmk__strkey_table(free, free); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; } } /* sanity */ if (op->timeout <= 0) { op->timeout = op->interval_ms; } if (op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = strdup(transition); if (op->interval_ms != 0) { if (pcmk__strcase_any_of(operation, PCMK_ACTION_START, PCMK_ACTION_STOP, NULL)) { crm_err("Start and Stop actions cannot have an interval: %u", op->interval_ms); op->interval_ms = 0; } } crm_trace("Constructed %s op for %s: interval=%u", operation, rsc_id, op->interval_ms); return op; } /*! * \internal * \brief Send a (synthesized) event result * * Reply with a synthesized event result directly, as opposed to going through * the executor. * * \param[in] to_host Host to send result to * \param[in] to_sys IPC name to send result (NULL for transition engine) * \param[in] rsc Type information about resource the result is for * \param[in,out] op Event with result to send * \param[in] rsc_id ID of resource the result is for */ void controld_ack_event_directly(const char *to_host, const char *to_sys, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; crm_node_t *peer = NULL; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { CRM_ASSERT(rsc_id != NULL); op->rsc_id = strdup(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } peer = crm_get_peer(0, controld_globals.our_nodename); update = create_node_state_update(peer, node_update_none, NULL, __func__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, controld_globals.our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); controld_add_resource_history_xml(iter, rsc, op, controld_globals.our_nodename); reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_trace(update, "[direct ACK]"); crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s", op->rsc_id, op->op_type, op->interval_ms, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(update); free_xml(reply); } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { gboolean res = TRUE; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { /* keep iterating through all even when false is returned */ res = FALSE; } } controld_set_fsa_input_flags(R_SENT_RSC_STOP); g_list_free(lrm_state_list); lrm_state_list = NULL; return res; } struct stop_recurring_action_s { lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct stop_recurring_action_s *event = user_data; active_op_t *op = value; if ((op->interval_ms != 0) && pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) { crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key); remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); } return remove; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; lrm_state_t *lrm_state = user_data; active_op_t *op = value; if (op->interval_ms != 0) { crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (const char *) key); remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); } return remove; } /*! * \internal * \brief Check whether recurring actions should be cancelled before an action * * \param[in] rsc_id Resource that action is for * \param[in] action Action being performed * \param[in] interval_ms Operation interval of \p action (in milliseconds) * * \return true if recurring actions should be cancelled, otherwise false */ static bool should_cancel_recurring(const char *rsc_id, const char *action, guint interval_ms) { if (is_remote_lrmd_ra(NULL, NULL, rsc_id) && (interval_ms == 0) && (strcmp(action, PCMK_ACTION_MIGRATE_TO) == 0)) { /* Don't stop monitoring a migrating Pacemaker Remote connection * resource until the entire migration has completed. We must detect if * the connection is unexpectedly severed, even during a migration. */ return false; } // Cancel recurring actions before changing resource state return (interval_ms == 0) && !pcmk__str_any_of(action, PCMK_ACTION_MONITOR, PCMK_ACTION_NOTIFY, NULL); } /*! * \internal * \brief Check whether an action should not be performed at this time * * \param[in] operation Action to be performed * * \return Readable description of why action should not be performed, * or NULL if it should be performed */ static const char * should_nack_action(const char *action) { if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN) && pcmk__str_eq(action, PCMK_ACTION_START, pcmk__str_none)) { register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); return "Not attempting start due to shutdown in progress"; } switch (controld_globals.fsa_state) { case S_NOT_DC: case S_POLICY_ENGINE: // Recalculating case S_TRANSITION_ENGINE: break; default: if (!pcmk__str_eq(action, PCMK_ACTION_STOP, pcmk__str_none)) { return "Controller cannot attempt actions at this time"; } break; } return NULL; } static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, struct ra_metadata_s *md) { int rc; int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; const char *operation = NULL; const char *nack_reason = NULL; CRM_CHECK((rsc != NULL) && (msg != NULL), return); operation = crm_element_value(msg, XML_LRM_ATTR_TASK); CRM_CHECK(!pcmk__str_empty(operation), return); transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if (pcmk__str_empty(transition)) { crm_log_xml_err(msg, "Missing transition number"); } if (lrm_state == NULL) { // This shouldn't be possible, but provide a failsafe just in case crm_err("Cannot execute %s of %s: No executor connection " CRM_XS " transition_key=%s", operation, rsc->id, pcmk__s(transition, "")); synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID, PCMK_OCF_UNKNOWN_ERROR, "No executor connection"); return; } if (pcmk__str_any_of(operation, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT, NULL)) { /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs * will schedule reload-agent actions only. In either case, we need * to map that to whatever the resource agent actually supports. * Default to the OCF 1.1 name. */ if ((md != NULL) && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) { operation = PCMK_ACTION_RELOAD; } else { operation = PCMK_ACTION_RELOAD_AGENT; } } op = construct_op(lrm_state, msg, rsc->id, operation); CRM_CHECK(op != NULL, return); if (should_cancel_recurring(rsc->id, operation, op->interval_ms)) { guint removed = 0; struct stop_recurring_action_s data; data.rsc = rsc; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->active_ops, stop_recurring_action_by_rsc, &data); if (removed) { crm_debug("Stopped %u recurring operation%s in preparation for " PCMK__OP_FMT, removed, pcmk__plural_s(removed), rsc->id, operation, op->interval_ms); } } /* now do the op */ crm_notice("Requesting local execution of %s operation for %s on %s " CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT, crm_action_str(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name, pcmk__s(transition, ""), rsc->id, operation, op->interval_ms); nack_reason = should_nack_action(operation); if (nack_reason != NULL) { crm_notice("Discarding attempt to perform action %s on %s in state %s " "(shutdown=%s)", operation, rsc->id, fsa_state2string(controld_globals.fsa_state), pcmk__btoa(pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN))); lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID, nack_reason); controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } controld_record_pending_op(lrm_state->node_name, rsc, op); op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms); if (op->interval_ms > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(lrm_state, rsc, op_id, FALSE); } rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type, op->user_data, op->interval_ms, op->timeout, op->start_delay, op->params, &call_id); if (rc == pcmk_rc_ok) { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); active_op_t *pending = NULL; pending = calloc(1, sizeof(active_op_t)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval_ms = op->interval_ms; pending->op_type = strdup(operation); pending->op_key = strdup(op_id); pending->rsc_id = strdup(rsc->id); pending->start_time = time(NULL); pcmk__str_update(&pending->user_data, op->user_data); if (crm_element_value_epoch(msg, XML_CONFIG_ATTR_SHUTDOWN_LOCK, &(pending->lock_time)) != pcmk_ok) { pending->lock_time = 0; } g_hash_table_replace(lrm_state->active_ops, call_id_s, pending); if ((op->interval_ms > 0) && (op->start_delay > START_DELAY_THRESHOLD)) { int target_rc = PCMK_OCF_OK; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc); lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL); controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); } pending->params = op->params; op->params = NULL; } else if (lrm_state_is_local(lrm_state)) { crm_err("Could not initiate %s action for resource %s locally: %s " CRM_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc); fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); process_lrm_event(lrm_state, op, NULL, NULL); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else { crm_err("Could not initiate %s action for resource %s remotely on %s: " "%s " CRM_XS " rc=%d", operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc); fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); process_lrm_event(lrm_state, op, NULL, NULL); } free(op_id); lrmd_free_event(op); } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) { CRM_CHECK(FALSE, return); } static char * unescape_newlines(const char *string) { char *pch = NULL; char *ret = NULL; static const char *escaped_newline = "\\n"; if (!string) { return NULL; } ret = strdup(string); pch = strstr(ret, escaped_newline); while (pch != NULL) { /* Replace newline escape pattern with actual newline (and a space so we * don't have to shuffle the rest of the buffer) */ pch[0] = '\n'; pch[1] = ' '; pch = strstr(pch, escaped_newline); } return ret; } static bool did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id, const char * op_type, guint interval_ms) { rsc_history_t *entry = NULL; CRM_CHECK(lrm_state != NULL, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); CRM_CHECK(op_type != NULL, return FALSE); entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->failed == NULL) { return FALSE; } if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none) && pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei) && entry->failed->interval_ms == interval_ms) { return TRUE; } return FALSE; } /*! * \internal * \brief Log the result of an executor action (actual or synthesized) * * \param[in] op Executor action to log result for * \param[in] op_key Operation key for action * \param[in] node_name Name of node action was performed on, if known * \param[in] confirmed Whether to log that graph action was confirmed */ static void log_executor_event(const lrmd_event_data_t *op, const char *op_key, const char *node_name, gboolean confirmed) { int log_level = LOG_ERR; GString *str = g_string_sized_new(100); // reasonable starting size pcmk__g_strcat(str, "Result of ", crm_action_str(op->op_type, op->interval_ms), " operation for ", op->rsc_id, NULL); if (node_name != NULL) { pcmk__g_strcat(str, " on ", node_name, NULL); } switch (op->op_status) { case PCMK_EXEC_DONE: log_level = LOG_NOTICE; pcmk__g_strcat(str, ": ", services_ocf_exitcode_str(op->rc), NULL); break; case PCMK_EXEC_TIMEOUT: pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status), " after ", pcmk__readable_interval(op->timeout), NULL); break; case PCMK_EXEC_CANCELLED: log_level = LOG_INFO; /* order of __attribute__ and Fall through comment is IMPORTANT! * do not change it without proper testing with both clang and gcc * in multiple versions. * the clang check allows to build with all versions of clang. * the has_c_attribute check is to workaround a bug in clang version * in rhel7. has_attribute would happily return "YES SIR WE GOT IT" * and fail the build the next line. */ #ifdef __clang__ #ifdef __has_c_attribute #if __has_attribute(fallthrough) __attribute__((fallthrough)); #endif #endif #endif // Fall through default: pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status), NULL); } if ((op->exit_reason != NULL) && ((op->op_status != PCMK_EXEC_DONE) || (op->rc != PCMK_OCF_OK))) { pcmk__g_strcat(str, " (", op->exit_reason, ")", NULL); } g_string_append(str, " " CRM_XS); g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s", (confirmed? "" : "un"), op->call_id, op_key); if (op->op_status == PCMK_EXEC_DONE) { g_string_append_printf(str, " rc=%d", op->rc); } do_crm_log(log_level, "%s", str->str); g_string_free(str, TRUE); /* The services library has already logged the output at info or debug * level, so just raise to notice if it looks like a failure. */ if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) { char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output", op->rsc_id, op->op_type, op->interval_ms, node_name); crm_log_output(LOG_NOTICE, prefix, op->output); free(prefix); } } void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, active_op_t *pending, const xmlNode *action_xml) { char *op_id = NULL; char *op_key = NULL; gboolean remove = FALSE; gboolean removed = FALSE; bool need_direct_ack = FALSE; lrmd_rsc_info_t *rsc = NULL; const char *node_name = NULL; CRM_CHECK(op != NULL, return); CRM_CHECK(op->rsc_id != NULL, return); // Remap new status codes for older DCs if (compare_version(controld_globals.dc_version, "3.2.0") < 0) { switch (op->op_status) { case PCMK_EXEC_NOT_CONNECTED: lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED, PCMK_EXEC_ERROR, op->exit_reason); break; case PCMK_EXEC_INVALID: lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR, op->exit_reason); break; default: break; } } op_id = make_stop_id(op->rsc_id, op->call_id); op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); // Get resource info if available (from executor state or action XML) if (lrm_state) { rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); } if ((rsc == NULL) && action_xml) { xmlNode *xml = find_xml_node(action_xml, XML_CIB_TAG_RESOURCE, TRUE); const char *standard = crm_element_value(xml, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER); const char *type = crm_element_value(xml, XML_ATTR_TYPE); if (standard && type) { crm_info("%s agent information not cached, using %s%s%s:%s from action XML", op->rsc_id, standard, (provider? ":" : ""), (provider? provider : ""), type); rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); } else { crm_err("Can't process %s result because %s agent information not cached or in XML", op_key, op->rsc_id); } } // Get node name if available (from executor state or action XML) if (lrm_state) { node_name = lrm_state->node_name; } else if (action_xml) { node_name = crm_element_value(action_xml, XML_LRM_ATTR_TARGET); } if(pending == NULL) { remove = TRUE; if (lrm_state) { pending = g_hash_table_lookup(lrm_state->active_ops, op_id); } } if (op->op_status == PCMK_EXEC_ERROR) { switch(op->rc) { case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_PROMOTED: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_PROMOTED: // Leave it to the TE/scheduler to decide if this is an error op->op_status = PCMK_EXEC_DONE; break; default: /* Nothing to do */ break; } } if (op->op_status != PCMK_EXEC_CANCELLED) { /* We might not record the result, so directly acknowledge it to the * originator instead, so it doesn't time out waiting for the result * (especially important if part of a transition). */ need_direct_ack = TRUE; if (controld_action_is_recordable(op->op_type)) { if (node_name && rsc) { // We should record the result, and happily, we can time_t lock_time = (pending == NULL)? 0 : pending->lock_time; controld_update_resource_history(node_name, rsc, op, lock_time); need_direct_ack = FALSE; } else if (op->rsc_deleted) { /* We shouldn't record the result (likely the resource was * refreshed, cleaned, or removed while this operation was * in flight). */ crm_notice("Not recording %s result in CIB because " "resource information was removed since it was initiated", op_key); } else { /* This shouldn't be possible; the executor didn't consider the * resource deleted, but we couldn't find resource or node * information. */ crm_err("Unable to record %s result in CIB: %s", op_key, (node_name? "No resource information" : "No node name")); } } } else if (op->interval_ms == 0) { /* A non-recurring operation was cancelled. Most likely, the * never-initiated action was removed from the executor's pending * operations list upon resource removal. */ need_direct_ack = TRUE; } else if (pending == NULL) { /* This recurring operation was cancelled, but was not pending. No * transition actions are waiting on it, nothing needs to be done. */ } else if (op->user_data == NULL) { /* This recurring operation was cancelled and pending, but we don't * have a transition key. This should never happen. */ crm_err("Recurring operation %s was cancelled without transition information", op_key); } else if (pcmk_is_set(pending->flags, active_op_remove)) { /* This recurring operation was cancelled (by us) and pending, and we * have been waiting for it to finish. */ if (lrm_state) { controld_delete_action_history(op); } /* Directly acknowledge failed recurring actions here. The above call to * controld_delete_action_history() will not erase any corresponding * last_failure entry, which means that the DC won't confirm the * cancellation via process_op_deletion(), and the transition would * otherwise wait for the action timer to pop. */ if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms)) { need_direct_ack = TRUE; } } else if (op->rsc_deleted) { /* This recurring operation was cancelled (but not by us, and the * executor does not have resource information, likely due to resource * cleanup, refresh, or removal) and pending. */ crm_debug("Recurring op %s was cancelled due to resource deletion", op_key); need_direct_ack = TRUE; } else { /* This recurring operation was cancelled (but not by us, likely by the * executor before stopping the resource) and pending. We don't need to * do anything special. */ } if (need_direct_ack) { controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id); } if(remove == FALSE) { /* The caller will do this afterwards, but keep the logging consistent */ removed = TRUE; } else if (lrm_state && ((op->interval_ms == 0) || (op->op_status == PCMK_EXEC_CANCELLED))) { gboolean found = g_hash_table_remove(lrm_state->active_ops, op_id); if (op->interval_ms != 0) { removed = TRUE; } else if (found) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", op_key, op->call_id, op_id, g_hash_table_size(lrm_state->active_ops)); } } log_executor_event(op, op_key, node_name, removed); if (lrm_state) { if (!pcmk__str_eq(op->op_type, PCMK_ACTION_META_DATA, pcmk__str_casei)) { crmd_alert_resource_op(lrm_state->node_name, op); } else if (rsc && (op->rc == PCMK_OCF_OK)) { char *metadata = unescape_newlines(op->output); controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata); free(metadata); } } if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); if (lrm_state) { delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL, true); } } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ controld_trigger_fsa(); if (lrm_state && rsc) { update_history_cache(lrm_state, rsc, op); } lrmd_free_rsc_info(rsc); free(op_key); free(op_id); } diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c index 480b97be99..435204ea07 100644 --- a/daemons/controld/controld_te_actions.c +++ b/daemons/controld/controld_te_actions.c @@ -1,749 +1,749 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include // lrmd_event_data_t, lrmd_free_event() #include #include #include #include #include static GHashTable *te_targets = NULL; void send_rsc_command(pcmk__graph_action_t *action); static void te_update_job_count(pcmk__graph_action_t *action, int offset); static void te_start_action_timer(const pcmk__graph_t *graph, pcmk__graph_action_t *action) { action->timer = g_timeout_add(action->timeout + graph->network_delay, action_timer_callback, (void *) action); CRM_ASSERT(action->timer != 0); } /*! * \internal * \brief Execute a graph pseudo-action * * \param[in,out] graph Transition graph being executed * \param[in,out] pseudo Pseudo-action to execute * * \return Standard Pacemaker return code */ static int execute_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *pseudo) { const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK); /* send to peers as well? */ if (pcmk__str_eq(task, PCMK_ACTION_MAINTENANCE_NODES, pcmk__str_casei)) { GHashTableIter iter; crm_node_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { xmlNode *cmd = NULL; if (pcmk__str_eq(controld_globals.our_nodename, node->uname, pcmk__str_casei)) { continue; } cmd = create_request(task, pseudo->xml, node->uname, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); send_cluster_message(node, crm_msg_crmd, cmd, FALSE); free_xml(cmd); } remote_ra_process_maintenance_nodes(pseudo->xml); } else { /* Check action for Pacemaker Remote node side effects */ remote_ra_process_pseudo(pseudo->xml); } crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id, crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY)); te_action_confirmed(pseudo, graph); return pcmk_rc_ok; } static int get_target_rc(pcmk__graph_action_t *action) { int exit_status; pcmk__scan_min_int(crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC), &exit_status, 0); return exit_status; } /*! * \internal * \brief Execute a cluster action from a transition graph * * \param[in,out] graph Transition graph being executed * \param[in,out] action Cluster action to execute * * \return Standard Pacemaker return code */ static int execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { char *counter = NULL; xmlNode *cmd = NULL; gboolean is_local = FALSE; const char *id = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *router_node = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; id = ID(action->xml); CRM_CHECK(!pcmk__str_empty(id), return EPROTO); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); CRM_CHECK(!pcmk__str_empty(task), return EPROTO); on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(!pcmk__str_empty(on_node), return pcmk_rc_node_unknown); router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if (router_node == NULL) { router_node = on_node; - if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_none)) { + if (pcmk__str_eq(task, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) { const char *mode = crm_element_value(action->xml, PCMK__XA_MODE); if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_none)) { router_node = controld_globals.our_nodename; } } } if (pcmk__str_eq(router_node, controld_globals.our_nodename, pcmk__str_casei)) { is_local = TRUE; } value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); if (crm_is_true(value)) { no_wait = TRUE; } crm_info("Handling controller request '%s' (%s on %s)%s%s", id, task, on_node, (is_local? " locally" : ""), (no_wait? " without waiting" : "")); if (is_local && pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { /* defer until everything else completes */ crm_info("Controller request '%s' is a local shutdown", id); graph->completion_action = pcmk__graph_shutdown; graph->abort_reason = "local shutdown"; te_action_confirmed(action, graph); return pcmk_rc_ok; } else if (pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { crm_node_t *peer = crm_get_peer(0, router_node); pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN); } cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); counter = pcmk__transition_key(controld_globals.transition_graph->id, action->id, get_target_rc(action), controld_globals.te_uuid); crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE); free(counter); free_xml(cmd); if (rc == FALSE) { crm_err("Action %d failed: send", action->id); return ECOMM; } else if (no_wait) { te_action_confirmed(action, graph); } else { if (action->timeout <= 0) { crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead", action->id, task, on_node, action->timeout, graph->network_delay); action->timeout = (int) graph->network_delay; } te_start_action_timer(graph, action); } return pcmk_rc_ok; } /*! * \internal * \brief Synthesize an executor event for a resource action timeout * * \param[in] action Resource action that timed out * \param[in] target_rc Expected result of action that timed out * * Synthesize an executor event for a resource action timeout. (If the executor * gets a timeout while waiting for a resource action to complete, that will be * reported via the usual callback. This timeout means we didn't hear from the * executor itself or the controller that relayed the action to the executor.) * * \return Newly created executor event for result of \p action * \note The caller is responsible for freeing the return value using * lrmd_free_event(). */ static lrmd_event_data_t * synthesize_timeout_event(const pcmk__graph_action_t *action, int target_rc) { lrmd_event_data_t *op = NULL; const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *reason = NULL; char *dynamic_reason = NULL; if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) { reason = "Local executor did not return result in time"; } else { const char *router_node = NULL; router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if (router_node == NULL) { router_node = target; } dynamic_reason = crm_strdup_printf("Controller on %s did not return " "result in time", router_node); reason = dynamic_reason; } op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT, PCMK_OCF_UNKNOWN_ERROR, reason); op->call_id = -1; op->user_data = pcmk__transition_key(controld_globals.transition_graph->id, action->id, target_rc, controld_globals.te_uuid); free(dynamic_reason); return op; } static void controld_record_action_event(pcmk__graph_action_t *action, lrmd_event_data_t *op) { cib_t *cib_conn = controld_globals.cib_conn; xmlNode *state = NULL; xmlNode *rsc = NULL; xmlNode *action_rsc = NULL; int rc = pcmk_ok; const char *rsc_id = NULL; const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); int target_rc = get_target_rc(action); action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE); if (action_rsc == NULL) { return; } rsc_id = ID(action_rsc); CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action"); return); /* update the CIB */ state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(state, XML_ATTR_ID, target_uuid); crm_xml_add(state, XML_ATTR_UNAME, target); rsc = create_xml_node(state, XML_CIB_TAG_LRM); crm_xml_add(rsc, XML_ATTR_ID, target_uuid); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); crm_xml_add(rsc, XML_ATTR_ID, rsc_id); crm_copy_xml_element(action_rsc, rsc, XML_ATTR_TYPE); crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS); crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER); pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target, __func__); rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_STATUS, state, cib_scope_local); fsa_register_cib_callback(rc, NULL, cib_action_updated); free_xml(state); crm_trace("Sent CIB update (call ID %d) for synthesized event of action %d (%s on %s)", rc, action->id, task_uuid, target); pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update); } void controld_record_action_timeout(pcmk__graph_action_t *action) { lrmd_event_data_t *op = NULL; const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); int target_rc = get_target_rc(action); crm_warn("%s %d: %s on %s timed out", crm_element_name(action->xml), action->id, task_uuid, target); op = synthesize_timeout_event(action, target_rc); controld_record_action_event(action, op); lrmd_free_event(op); } /*! * \internal * \brief Execute a resource action from a transition graph * * \param[in,out] graph Transition graph being executed * \param[in,out] action Resource action to execute * * \return Standard Pacemaker return code */ static int execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ xmlNode *cmd = NULL; xmlNode *rsc_op = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; gboolean is_local = FALSE; char *counter = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *router_node = NULL; const char *task_uuid = NULL; CRM_ASSERT(action != NULL); CRM_ASSERT(action->xml != NULL); pcmk__clear_graph_action_flags(action, pcmk__graph_action_executed); on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(!pcmk__str_empty(on_node), crm_err("Corrupted command(id=%s) %s: no node", ID(action->xml), pcmk__s(task, "without task")); return pcmk_rc_node_unknown); rsc_op = action->xml; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE); if (!router_node) { router_node = on_node; } counter = pcmk__transition_key(controld_globals.transition_graph->id, action->id, get_target_rc(action), controld_globals.te_uuid); crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); if (pcmk__str_eq(router_node, controld_globals.our_nodename, pcmk__str_casei)) { is_local = TRUE; } value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); if (crm_is_true(value)) { no_wait = TRUE; } crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d", task, task_uuid, (is_local? " locally" : ""), on_node, (no_wait? " without waiting" : ""), action->id); cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node, CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); if (is_local) { /* shortcut local resource commands */ ha_msg_input_t data = { .msg = cmd, .xml = rsc_op, }; fsa_data_t msg = { .id = 0, .data = &data, .data_type = fsa_dt_ha_msg, .fsa_input = I_NULL, .fsa_cause = C_FSA_INTERNAL, .actions = A_LRM_INVOKE, .origin = __func__, }; do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, controld_globals.fsa_state, I_NULL, &msg); } else { rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE); } free(counter); free_xml(cmd); pcmk__set_graph_action_flags(action, pcmk__graph_action_executed); if (rc == FALSE) { crm_err("Action %d failed: send", action->id); return ECOMM; } else if (no_wait) { /* Just mark confirmed. Don't bump the job count only to immediately * decrement it. */ crm_info("Action %d confirmed - no wait", action->id); pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); pcmk__update_graph(controld_globals.transition_graph, action); trigger_graph(); } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.", action->id, task, task_uuid, on_node, action->timeout); } else { if (action->timeout <= 0) { crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead", action->id, task, task_uuid, on_node, action->timeout, graph->network_delay); action->timeout = (int) graph->network_delay; } te_update_job_count(action, 1); te_start_action_timer(graph, action); } return pcmk_rc_ok; } struct te_peer_s { char *name; int jobs; int migrate_jobs; }; static void te_peer_free(gpointer p) { struct te_peer_s *peer = p; free(peer->name); free(peer); } void te_reset_job_counts(void) { GHashTableIter iter; struct te_peer_s *peer = NULL; if(te_targets == NULL) { te_targets = pcmk__strkey_table(NULL, te_peer_free); } g_hash_table_iter_init(&iter, te_targets); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) { peer->jobs = 0; peer->migrate_jobs = 0; } } static void te_update_job_count_on(const char *target, int offset, bool migrate) { struct te_peer_s *r = NULL; if(target == NULL || te_targets == NULL) { return; } r = g_hash_table_lookup(te_targets, target); if(r == NULL) { r = calloc(1, sizeof(struct te_peer_s)); r->name = strdup(target); g_hash_table_insert(te_targets, r->name, r); } r->jobs += offset; if(migrate) { r->migrate_jobs += offset; } crm_trace("jobs[%s] = %d", target, r->jobs); } static void te_update_job_count(pcmk__graph_action_t *action, int offset) { const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); if ((action->type != pcmk__rsc_graph_action) || (target == NULL)) { /* No limit on these */ return; } /* if we have a router node, this means the action is performing * on a remote node. For now, we count all actions occurring on a * remote node against the job list on the cluster node hosting * the connection resources */ target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if ((target == NULL) && pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE); const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET); te_update_job_count_on(t1, offset, TRUE); te_update_job_count_on(t2, offset, TRUE); return; } else if (target == NULL) { target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); } te_update_job_count_on(target, offset, FALSE); } /*! * \internal * \brief Check whether a graph action is allowed to be executed on a node * * \param[in] graph Transition graph being executed * \param[in] action Graph action being executed * \param[in] target Name of node where action should be executed * * \return true if action is allowed, otherwise false */ static bool allowed_on_node(const pcmk__graph_t *graph, const pcmk__graph_action_t *action, const char *target) { int limit = 0; struct te_peer_s *r = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); if(target == NULL) { /* No limit on these */ return true; } else if(te_targets == NULL) { return false; } r = g_hash_table_lookup(te_targets, target); limit = throttle_get_job_limit(target); if(r == NULL) { r = calloc(1, sizeof(struct te_peer_s)); r->name = strdup(target); g_hash_table_insert(te_targets, r->name, r); } if(limit <= r->jobs) { crm_trace("Peer %s is over their job limit of %d (%d): deferring %s", target, limit, r->jobs, id); return false; } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) { if (pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s", target, graph->migration_limit, r->migrate_jobs, id); return false; } } crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit); return true; } /*! * \internal * \brief Check whether a graph action is allowed to be executed * * \param[in] graph Transition graph being executed * \param[in] action Graph action being executed * * \return true if action is allowed, otherwise false */ static bool graph_action_allowed(pcmk__graph_t *graph, pcmk__graph_action_t *action) { const char *target = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (action->type != pcmk__rsc_graph_action) { /* No limit on these */ return true; } /* if we have a router node, this means the action is performing * on a remote node. For now, we count all actions occurring on a * remote node against the job list on the cluster node hosting * the connection resources */ target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if ((target == NULL) && pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE); if (!allowed_on_node(graph, action, target)) { return false; } target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET); } else if (target == NULL) { target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); } return allowed_on_node(graph, action, target); } /*! * \brief Confirm a graph action (and optionally update graph) * * \param[in,out] action Action to confirm * \param[in,out] graph Update and trigger this graph (if non-NULL) */ void te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph) { if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { if ((action->type == pcmk__rsc_graph_action) && (crm_element_value(action->xml, XML_LRM_ATTR_TARGET) != NULL)) { te_update_job_count(action, -1); } pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); } if (graph) { pcmk__update_graph(graph, action); trigger_graph(); } } static pcmk__graph_functions_t te_graph_fns = { execute_pseudo_action, execute_rsc_action, execute_cluster_action, controld_execute_fence_action, graph_action_allowed, }; /* * \internal * \brief Register the transitioner's graph functions with \p libpacemaker */ void controld_register_graph_functions(void) { pcmk__set_graph_functions(&te_graph_fns); } void notify_crmd(pcmk__graph_t *graph) { const char *type = "unknown"; enum crmd_fsa_input event = I_NULL; crm_debug("Processing transition completion in state %s", fsa_state2string(controld_globals.fsa_state)); CRM_CHECK(graph->complete, graph->complete = true); switch (graph->completion_action) { case pcmk__graph_wait: type = "stop"; if (controld_globals.fsa_state == S_TRANSITION_ENGINE) { event = I_TE_SUCCESS; } break; case pcmk__graph_done: type = "done"; if (controld_globals.fsa_state == S_TRANSITION_ENGINE) { event = I_TE_SUCCESS; } break; case pcmk__graph_restart: type = "restart"; if (controld_globals.fsa_state == S_TRANSITION_ENGINE) { if (controld_get_period_transition_timer() > 0) { controld_stop_transition_timer(); controld_start_transition_timer(); } else { event = I_PE_CALC; } } else if (controld_globals.fsa_state == S_POLICY_ENGINE) { controld_set_fsa_action_flags(A_PE_INVOKE); controld_trigger_fsa(); } break; case pcmk__graph_shutdown: type = "shutdown"; if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { event = I_STOP; } else { crm_err("We didn't ask to be shut down, yet the scheduler is telling us to"); event = I_TERMINATE; } } crm_debug("Transition %d status: %s - %s", graph->id, type, pcmk__s(graph->abort_reason, "unspecified reason")); graph->abort_reason = NULL; graph->completion_action = pcmk__graph_done; if (event != I_NULL) { register_fsa_input(C_FSA_INTERNAL, event, NULL); } else { controld_trigger_fsa(); } } diff --git a/include/crm/common/actions.h b/include/crm/common/actions.h index 94f1e1f251..6ccd70c76a 100644 --- a/include/crm/common/actions.h +++ b/include/crm/common/actions.h @@ -1,59 +1,60 @@ /* * Copyright 2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_ACTIONS__H #define PCMK__CRM_COMMON_ACTIONS__H #ifdef __cplusplus extern "C" { #endif /*! * \file * \brief APIs related to actions * \ingroup core */ // Action names as strings #define PCMK_ACTION_CANCEL "cancel" #define PCMK_ACTION_CLONE_ONE_OR_MORE "clone-one-or-more" #define PCMK_ACTION_DELETE "delete" #define PCMK_ACTION_DEMOTE "demote" #define PCMK_ACTION_DEMOTED "demoted" #define PCMK_ACTION_DO_SHUTDOWN "do_shutdown" #define PCMK_ACTION_LIST "list" +#define PCMK_ACTION_LRM_DELETE "lrm_delete" #define PCMK_ACTION_LOAD_STOPPED "load_stopped" #define PCMK_ACTION_MAINTENANCE_NODES "maintenance_nodes" #define PCMK_ACTION_META_DATA "meta-data" #define PCMK_ACTION_MIGRATE_FROM "migrate_from" #define PCMK_ACTION_MIGRATE_TO "migrate_to" #define PCMK_ACTION_MONITOR "monitor" #define PCMK_ACTION_NOTIFIED "notified" #define PCMK_ACTION_NOTIFY "notify" #define PCMK_ACTION_OFF "off" #define PCMK_ACTION_ON "on" #define PCMK_ACTION_ONE_OR_MORE "one-or-more" #define PCMK_ACTION_PROMOTE "promote" #define PCMK_ACTION_PROMOTED "promoted" #define PCMK_ACTION_REBOOT "reboot" #define PCMK_ACTION_RELOAD "reload" #define PCMK_ACTION_RELOAD_AGENT "reload-agent" #define PCMK_ACTION_RUNNING "running" #define PCMK_ACTION_START "start" #define PCMK_ACTION_STATUS "status" #define PCMK_ACTION_STONITH "stonith" #define PCMK_ACTION_STOP "stop" #define PCMK_ACTION_STOPPED "stopped" #define PCMK_ACTION_VALIDATE_ALL "validate-all" #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_ACTIONS__H diff --git a/include/crm/crm.h b/include/crm/crm.h index beb97eb40e..c3a69e1bb0 100644 --- a/include/crm/crm.h +++ b/include/crm/crm.h @@ -1,185 +1,185 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_CRM__H # define PCMK__CRM_CRM__H # include # include # include # include # include # include #ifdef __cplusplus extern "C" { #endif /** * \file * \brief A dumping ground * \ingroup core */ #ifndef PCMK_ALLOW_DEPRECATED /*! * \brief Allow use of deprecated Pacemaker APIs * * By default, external code using Pacemaker headers is allowed to use * deprecated Pacemaker APIs. If PCMK_ALLOW_DEPRECATED is defined to 0 before * including any Pacemaker headers, deprecated APIs will be unusable. It is * strongly recommended to leave this unchanged for production and release * builds, to avoid breakage when users upgrade to new Pacemaker releases that * deprecate more APIs. This should be defined to 0 only for development and * testing builds when desiring to check for usage of currently deprecated APIs. */ #define PCMK_ALLOW_DEPRECATED 1 #endif /*! * The CRM feature set assists with compatibility in mixed-version clusters. * The major version number increases when nodes with different versions * would not work (rolling upgrades are not allowed). The minor version * number increases when mixed-version clusters are allowed only during * rolling upgrades (a node with the oldest feature set will be elected DC). The * minor-minor version number is ignored, but allows resource agents to detect * cluster support for various features. * * The feature set also affects the processing of old saved CIBs (such as for * many scheduler regression tests). * * Particular feature points currently tested by Pacemaker code: * * >2.1: Operation updates include timing data * >=3.0.5: XML v2 digests are created * >=3.0.8: Peers do not need acks for cancellations * >=3.0.9: DC will send its own shutdown request to all peers * XML v2 patchsets are created by default * >=3.0.13: Fail counts include operation name and interval * >=3.2.0: DC supports PCMK_EXEC_INVALID and PCMK_EXEC_NOT_CONNECTED */ # define CRM_FEATURE_SET "3.18.0" /* Pacemaker's CPG protocols use fixed-width binary fields for the sender and * recipient of a CPG message. This imposes an arbitrary limit on cluster node * names. */ //! \brief Maximum length of a Corosync cluster node name (in bytes) #define MAX_NAME 256 # define CRM_META "CRM_meta" extern char *crm_system_name; // How we represent "infinite" scores # define CRM_SCORE_INFINITY 1000000 # define CRM_INFINITY_S "INFINITY" # define CRM_PLUS_INFINITY_S "+" CRM_INFINITY_S # define CRM_MINUS_INFINITY_S "-" CRM_INFINITY_S /* @COMPAT API < 2.0.0 Deprecated "infinity" aliases * * INFINITY might be defined elsewhere (e.g. math.h), so undefine it first. * This, of course, complicates any attempt to use the other definition in any * code that includes this header. */ # undef INFINITY # define INFINITY_S "INFINITY" # define MINUS_INFINITY_S "-INFINITY" # define INFINITY 1000000 /* Sub-systems */ # define CRM_SYSTEM_DC "dc" #define CRM_SYSTEM_DCIB "dcib" // Primary instance of CIB manager # define CRM_SYSTEM_CIB "cib" # define CRM_SYSTEM_CRMD "crmd" # define CRM_SYSTEM_LRMD "lrmd" # define CRM_SYSTEM_PENGINE "pengine" # define CRM_SYSTEM_TENGINE "tengine" # define CRM_SYSTEM_STONITHD "stonithd" # define CRM_SYSTEM_MCP "pacemakerd" // Names of internally generated node attributes # define CRM_ATTR_UNAME "#uname" # define CRM_ATTR_ID "#id" # define CRM_ATTR_KIND "#kind" # define CRM_ATTR_ROLE "#role" # define CRM_ATTR_IS_DC "#is_dc" # define CRM_ATTR_CLUSTER_NAME "#cluster-name" # define CRM_ATTR_SITE_NAME "#site-name" # define CRM_ATTR_UNFENCED "#node-unfenced" # define CRM_ATTR_DIGESTS_ALL "#digests-all" # define CRM_ATTR_DIGESTS_SECURE "#digests-secure" # define CRM_ATTR_PROTOCOL "#attrd-protocol" # define CRM_ATTR_FEATURE_SET "#feature-set" /* Valid operations */ # define CRM_OP_NOOP "noop" # define CRM_OP_JOIN_ANNOUNCE "join_announce" # define CRM_OP_JOIN_OFFER "join_offer" # define CRM_OP_JOIN_REQUEST "join_request" # define CRM_OP_JOIN_ACKNAK "join_ack_nack" # define CRM_OP_JOIN_CONFIRM "join_confirm" # define CRM_OP_PING "ping" # define CRM_OP_NODE_INFO "node-info" # define CRM_OP_THROTTLE "throttle" # define CRM_OP_VOTE "vote" # define CRM_OP_NOVOTE "no-vote" # define CRM_OP_HELLO "hello" # define CRM_OP_PECALC "pe_calc" # define CRM_OP_QUIT "quit" # define CRM_OP_LOCAL_SHUTDOWN "start_shutdown" # define CRM_OP_SHUTDOWN_REQ "req_shutdown" # define CRM_OP_SHUTDOWN PCMK_ACTION_DO_SHUTDOWN # define CRM_OP_REGISTER "register" # define CRM_OP_IPC_FWD "ipc_fwd" # define CRM_OP_INVOKE_LRM "lrm_invoke" # define CRM_OP_LRM_REFRESH "lrm_refresh" //!< Deprecated since 1.1.10 -# define CRM_OP_LRM_DELETE "lrm_delete" +# define CRM_OP_LRM_DELETE PCMK_ACTION_LRM_DELETE # define CRM_OP_LRM_FAIL "lrm_fail" # define CRM_OP_PROBED "probe_complete" # define CRM_OP_REPROBE "probe_again" # define CRM_OP_CLEAR_FAILCOUNT "clear_failcount" # define CRM_OP_REMOTE_STATE "remote_state" # define CRM_OP_RM_NODE_CACHE "rm_node_cache" # define CRM_OP_MAINTENANCE_NODES PCMK_ACTION_MAINTENANCE_NODES /* Possible cluster membership states */ # define CRMD_JOINSTATE_DOWN "down" # define CRMD_JOINSTATE_PENDING "pending" # define CRMD_JOINSTATE_MEMBER "member" # define CRMD_JOINSTATE_NACK "banned" # define CRMD_METADATA_CALL_TIMEOUT 30000 # include # include # include # include static inline const char * crm_action_str(const char *task, guint interval_ms) { if ((task != NULL) && (interval_ms == 0) && (strcasecmp(task, PCMK_ACTION_MONITOR) == 0)) { return "probe"; } return task; } #if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) #include #endif #ifdef __cplusplus } #endif #endif diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c index bd5a41de36..03025748f8 100644 --- a/lib/pacemaker/pcmk_graph_producer.c +++ b/lib/pacemaker/pcmk_graph_producer.c @@ -1,1091 +1,1092 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" // Convenience macros for logging action properties #define action_type_str(flags) \ (pcmk_is_set((flags), pe_action_pseudo)? "pseudo-action" : "action") #define action_optional_str(flags) \ (pcmk_is_set((flags), pe_action_optional)? "optional" : "required") #define action_runnable_str(flags) \ (pcmk_is_set((flags), pe_action_runnable)? "runnable" : "unrunnable") #define action_node_str(a) \ (((a)->node == NULL)? "no node" : (a)->node->details->uname) /*! * \internal * \brief Add an XML node tag for a specified ID * * \param[in] id Node UUID to add * \param[in,out] xml Parent XML tag to add to */ static xmlNode* add_node_to_xml_by_id(const char *id, xmlNode *xml) { xmlNode *node_xml; node_xml = create_xml_node(xml, XML_CIB_TAG_NODE); crm_xml_add(node_xml, XML_ATTR_ID, id); return node_xml; } /*! * \internal * \brief Add an XML node tag for a specified node * * \param[in] node Node to add * \param[in,out] xml XML to add node to */ static void add_node_to_xml(const pe_node_t *node, void *xml) { add_node_to_xml_by_id(node->details->id, (xmlNode *) xml); } /*! * \internal * \brief Count (optionally add to XML) nodes needing maintenance state update * * \param[in,out] xml Parent XML tag to add to, if any * \param[in] data_set Working set for cluster * * \return Count of nodes added * \note Only Pacemaker Remote nodes are considered currently */ static int add_maintenance_nodes(xmlNode *xml, const pe_working_set_t *data_set) { xmlNode *maintenance = NULL; int count = 0; if (xml != NULL) { maintenance = create_xml_node(xml, XML_GRAPH_TAG_MAINTENANCE); } for (const GList *iter = data_set->nodes; iter != NULL; iter = iter->next) { const pe_node_t *node = iter->data; if (pe__is_guest_or_remote_node(node) && (node->details->maintenance != node->details->remote_maintenance)) { if (maintenance != NULL) { crm_xml_add(add_node_to_xml_by_id(node->details->id, maintenance), XML_NODE_IS_MAINTENANCE, (node->details->maintenance? "1" : "0")); } count++; } } crm_trace("%s %d nodes in need of maintenance mode update in state", ((maintenance == NULL)? "Counted" : "Added"), count); return count; } /*! * \internal * \brief Add pseudo action with nodes needing maintenance state update * * \param[in,out] data_set Working set for cluster */ static void add_maintenance_update(pe_working_set_t *data_set) { pe_action_t *action = NULL; if (add_maintenance_nodes(NULL, data_set) != 0) { action = get_pseudo_op(PCMK_ACTION_MAINTENANCE_NODES, data_set); pe__set_action_flags(action, pe_action_print_always); } } /*! * \internal * \brief Add XML with nodes that an action is expected to bring down * * If a specified action is expected to bring any nodes down, add an XML block * with their UUIDs. When a node is lost, this allows the controller to * determine whether it was expected. * * \param[in,out] xml Parent XML tag to add to * \param[in] action Action to check for downed nodes */ static void add_downed_nodes(xmlNode *xml, const pe_action_t *action) { CRM_CHECK((xml != NULL) && (action != NULL) && (action->node != NULL), return); if (pcmk__str_eq(action->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { /* Shutdown makes the action's node down */ xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->node->details->id, downed); } else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { /* Fencing makes the action's node and any hosted guest nodes down */ const char *fence = g_hash_table_lookup(action->meta, "stonith_action"); if (pcmk__is_fencing_action(fence)) { xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->node->details->id, downed); pe_foreach_guest_node(action->node->details->data_set, action->node, add_node_to_xml, downed); } } else if (action->rsc && action->rsc->is_remote_node && pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* Stopping a remote connection resource makes connected node down, * unless it's part of a migration */ GList *iter; pe_action_t *input; bool migrating = false; for (iter = action->actions_before; iter != NULL; iter = iter->next) { input = ((pe_action_wrapper_t *) iter->data)->action; if ((input->rsc != NULL) && pcmk__str_eq(action->rsc->id, input->rsc->id, pcmk__str_none) && pcmk__str_eq(input->task, PCMK_ACTION_MIGRATE_FROM, pcmk__str_none)) { migrating = true; break; } } if (!migrating) { xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->rsc->id, downed); } } } /*! * \internal * \brief Create a transition graph operation key for a clone action * * \param[in] action Clone action * \param[in] interval_ms Action interval in milliseconds * * \return Newly allocated string with transition graph operation key */ static char * clone_op_key(const pe_action_t *action, guint interval_ms) { if (pcmk__str_eq(action->task, PCMK_ACTION_NOTIFY, pcmk__str_none)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_operation"); CRM_LOG_ASSERT((n_type != NULL) && (n_task != NULL)); return pcmk__notify_key(action->rsc->clone_name, n_type, n_task); } else if (action->cancel_task != NULL) { return pcmk__op_key(action->rsc->clone_name, action->cancel_task, interval_ms); } else { return pcmk__op_key(action->rsc->clone_name, action->task, interval_ms); } } /*! * \internal * \brief Add node details to transition graph action XML * * \param[in] action Scheduled action * \param[in,out] xml Transition graph action XML for \p action */ static void add_node_details(const pe_action_t *action, xmlNode *xml) { pe_node_t *router_node = pcmk__connection_host_for_action(action); crm_xml_add(xml, XML_LRM_ATTR_TARGET, action->node->details->uname); crm_xml_add(xml, XML_LRM_ATTR_TARGET_UUID, action->node->details->id); if (router_node != NULL) { crm_xml_add(xml, XML_LRM_ATTR_ROUTER_NODE, router_node->details->uname); } } /*! * \internal * \brief Add resource details to transition graph action XML * * \param[in] action Scheduled action * \param[in,out] action_xml Transition graph action XML for \p action */ static void add_resource_details(const pe_action_t *action, xmlNode *action_xml) { xmlNode *rsc_xml = NULL; const char *attr_list[] = { XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER, XML_ATTR_TYPE }; /* If a resource is locked to a node via shutdown-lock, mark its actions * so the controller can preserve the lock when the action completes. */ if (pcmk__action_locks_rsc_to_node(action)) { crm_xml_add_ll(action_xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK, (long long) action->rsc->lock_time); } // List affected resource rsc_xml = create_xml_node(action_xml, crm_element_name(action->rsc->xml)); if (pcmk_is_set(action->rsc->flags, pe_rsc_orphan) && (action->rsc->clone_name != NULL)) { /* Use the numbered instance name here, because if there is more * than one instance on a node, we need to make sure the command * goes to the right one. * * This is important even for anonymous clones, because the clone's * unique meta-attribute might have just been toggled from on to * off. */ crm_debug("Using orphan clone name %s instead of %s", action->rsc->id, action->rsc->clone_name); crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name); crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); } else if (!pcmk_is_set(action->rsc->flags, pe_rsc_unique)) { const char *xml_id = ID(action->rsc->xml); crm_debug("Using anonymous clone name %s for %s (aka %s)", xml_id, action->rsc->id, action->rsc->clone_name); /* ID is what we'd like client to use * ID_LONG is what they might know it as instead * * ID_LONG is only strictly needed /here/ during the * transition period until all nodes in the cluster * are running the new software /and/ have rebooted * once (meaning that they've only ever spoken to a DC * supporting this feature). * * If anyone toggles the unique flag to 'on', the * 'instance free' name will correspond to an orphan * and fall into the clause above instead */ crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id); if ((action->rsc->clone_name != NULL) && !pcmk__str_eq(xml_id, action->rsc->clone_name, pcmk__str_none)) { crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name); } else { crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); } } else { CRM_ASSERT(action->rsc->clone_name == NULL); crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id); } for (int lpc = 0; lpc < PCMK__NELEM(attr_list); lpc++) { crm_xml_add(rsc_xml, attr_list[lpc], g_hash_table_lookup(action->rsc->meta, attr_list[lpc])); } } /*! * \internal * \brief Add action attributes to transition graph action XML * * \param[in,out] action Scheduled action * \param[in,out] action_xml Transition graph action XML for \p action */ static void add_action_attributes(pe_action_t *action, xmlNode *action_xml) { xmlNode *args_xml = NULL; /* We create free-standing XML to start, so we can sort the attributes * before adding it to action_xml, which keeps the scheduler regression * test graphs comparable. */ args_xml = create_xml_node(NULL, XML_TAG_ATTRS); crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); g_hash_table_foreach(action->extra, hash2field, args_xml); if ((action->rsc != NULL) && (action->node != NULL)) { // Get the resource instance attributes, evaluated properly for node GHashTable *params = pe_rsc_params(action->rsc, action->node, action->rsc->cluster); pcmk__substitute_remote_addr(action->rsc, params); g_hash_table_foreach(params, hash2smartfield, args_xml); } else if ((action->rsc != NULL) && (action->rsc->variant <= pe_native)) { GHashTable *params = pe_rsc_params(action->rsc, NULL, action->rsc->cluster); g_hash_table_foreach(params, hash2smartfield, args_xml); } g_hash_table_foreach(action->meta, hash2metafield, args_xml); if (action->rsc != NULL) { pe_resource_t *parent = action->rsc; while (parent != NULL) { parent->cmds->add_graph_meta(parent, args_xml); parent = parent->parent; } pcmk__add_bundle_meta_to_xml(args_xml, action); } else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none) && (action->node != NULL)) { /* Pass the node's attributes as meta-attributes. * * @TODO: Determine whether it is still necessary to do this. It was * added in 33d99707, probably for the libfence-based implementation in * c9a90bd, which is no longer used. */ g_hash_table_foreach(action->node->details->attrs, hash2metafield, args_xml); } sorted_xml(args_xml, action_xml, FALSE); free_xml(args_xml); } /*! * \internal * \brief Create the transition graph XML for a scheduled action * * \param[in,out] parent Parent XML element to add action to * \param[in,out] action Scheduled action * \param[in] skip_details If false, add action details as sub-elements * \param[in] data_set Cluster working set */ static void create_graph_action(xmlNode *parent, pe_action_t *action, bool skip_details, const pe_working_set_t *data_set) { bool needs_node_info = true; bool needs_maintenance_info = false; xmlNode *action_xml = NULL; if ((action == NULL) || (data_set == NULL)) { return; } // Create the top-level element based on task if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { /* All fences need node info; guest node fences are pseudo-events */ if (pcmk_is_set(action->flags, pe_action_pseudo)) { action_xml = create_xml_node(parent, XML_GRAPH_TAG_PSEUDO_EVENT); } else { action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT); } } else if (pcmk__str_any_of(action->task, PCMK_ACTION_DO_SHUTDOWN, CRM_OP_CLEAR_FAILCOUNT, NULL)) { action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT); - } else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_none)) { + } else if (pcmk__str_eq(action->task, PCMK_ACTION_LRM_DELETE, + pcmk__str_none)) { // CIB-only clean-up for shutdown locks action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT); crm_xml_add(action_xml, PCMK__XA_MODE, XML_TAG_CIB); } else if (pcmk_is_set(action->flags, pe_action_pseudo)) { if (pcmk__str_eq(action->task, PCMK_ACTION_MAINTENANCE_NODES, pcmk__str_none)) { needs_maintenance_info = true; } action_xml = create_xml_node(parent, XML_GRAPH_TAG_PSEUDO_EVENT); needs_node_info = false; } else { action_xml = create_xml_node(parent, XML_GRAPH_TAG_RSC_OP); } crm_xml_add_int(action_xml, XML_ATTR_ID, action->id); crm_xml_add(action_xml, XML_LRM_ATTR_TASK, action->task); if ((action->rsc != NULL) && (action->rsc->clone_name != NULL)) { char *clone_key = NULL; guint interval_ms; if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0, &interval_ms) != pcmk_rc_ok) { interval_ms = 0; } clone_key = clone_op_key(action, interval_ms); crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, clone_key); crm_xml_add(action_xml, "internal_" XML_LRM_ATTR_TASK_KEY, action->uuid); free(clone_key); } else { crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, action->uuid); } if (needs_node_info && (action->node != NULL)) { add_node_details(action, action_xml); g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET), strdup(action->node->details->uname)); g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET_UUID), strdup(action->node->details->id)); } if (skip_details) { return; } if ((action->rsc != NULL) && !pcmk_is_set(action->flags, pe_action_pseudo)) { // This is a real resource action, so add resource details add_resource_details(action, action_xml); } /* List any attributes in effect */ add_action_attributes(action, action_xml); /* List any nodes this action is expected to make down */ if (needs_node_info && (action->node != NULL)) { add_downed_nodes(action_xml, action); } if (needs_maintenance_info) { add_maintenance_nodes(action_xml, data_set); } } /*! * \internal * \brief Check whether an action should be added to the transition graph * * \param[in] action Action to check * * \return true if action should be added to graph, otherwise false */ static bool should_add_action_to_graph(const pe_action_t *action) { if (!pcmk_is_set(action->flags, pe_action_runnable)) { crm_trace("Ignoring action %s (%d): unrunnable", action->uuid, action->id); return false; } if (pcmk_is_set(action->flags, pe_action_optional) && !pcmk_is_set(action->flags, pe_action_print_always)) { crm_trace("Ignoring action %s (%d): optional", action->uuid, action->id); return false; } /* Actions for unmanaged resources should be excluded from the graph, * with the exception of monitors and cancellation of recurring monitors. */ if ((action->rsc != NULL) && !pcmk_is_set(action->rsc->flags, pe_rsc_managed) && !pcmk__str_eq(action->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { const char *interval_ms_s; /* A cancellation of a recurring monitor will get here because the task * is cancel rather than monitor, but the interval can still be used to * recognize it. The interval has been normalized to milliseconds by * this point, so a string comparison is sufficient. */ interval_ms_s = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS); if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)) { crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)", action->uuid, action->id, action->rsc->id); return false; } } /* Always add pseudo-actions, fence actions, and shutdown actions (already * determined to be required and runnable by this point) */ if (pcmk_is_set(action->flags, pe_action_pseudo) || pcmk__strcase_any_of(action->task, PCMK_ACTION_STONITH, PCMK_ACTION_DO_SHUTDOWN, NULL)) { return true; } if (action->node == NULL) { pe_err("Skipping action %s (%d) " "because it was not assigned to a node (bug?)", action->uuid, action->id); pcmk__log_action("Unassigned", action, false); return false; } if (pcmk_is_set(action->flags, pe_action_dc)) { crm_trace("Action %s (%d) should be dumped: " "can run on DC instead of %s", action->uuid, action->id, pe__node_name(action->node)); } else if (pe__is_guest_node(action->node) && !action->node->details->remote_requires_reset) { crm_trace("Action %s (%d) should be dumped: " "assuming will be runnable on guest %s", action->uuid, action->id, pe__node_name(action->node)); } else if (!action->node->details->online) { pe_err("Skipping action %s (%d) " "because it was scheduled for offline node (bug?)", action->uuid, action->id); pcmk__log_action("Offline node", action, false); return false; } else if (action->node->details->unclean) { pe_err("Skipping action %s (%d) " "because it was scheduled for unclean node (bug?)", action->uuid, action->id); pcmk__log_action("Unclean node", action, false); return false; } return true; } /*! * \internal * \brief Check whether an ordering's flags can change an action * * \param[in] ordering Ordering to check * * \return true if ordering has flags that can change an action, false otherwise */ static bool ordering_can_change_actions(const pe_action_wrapper_t *ordering) { return pcmk_any_flags_set(ordering->type, ~(pe_order_implies_first_printed |pe_order_implies_then_printed |pe_order_optional)); } /*! * \internal * \brief Check whether an action input should be in the transition graph * * \param[in] action Action to check * \param[in,out] input Action input to check * * \return true if input should be in graph, false otherwise * \note This function may not only check an input, but disable it under certian * circumstances (load or anti-colocation orderings that are not needed). */ static bool should_add_input_to_graph(const pe_action_t *action, pe_action_wrapper_t *input) { if (input->state == pe_link_dumped) { return true; } if (input->type == pe_order_none) { crm_trace("Ignoring %s (%d) input %s (%d): " "ordering disabled", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk_is_set(input->action->flags, pe_action_runnable) && !ordering_can_change_actions(input)) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional and input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk_is_set(input->action->flags, pe_action_runnable) && pcmk_is_set(input->type, pe_order_one_or_more)) { crm_trace("Ignoring %s (%d) input %s (%d): " "one-or-more and input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (pcmk_is_set(input->type, pe_order_implies_first_migratable) && !pcmk_is_set(input->action->flags, pe_action_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "implies input migratable but input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (pcmk_is_set(input->type, pe_order_apply_first_non_migratable) && pcmk_is_set(input->action->flags, pe_action_migrate_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "only if input unmigratable but input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if ((input->type == pe_order_optional) && pcmk_is_set(input->action->flags, pe_action_migrate_runnable) && pcmk__ends_with(input->action->uuid, "_stop_0")) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional but stop in migration", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (input->type == pe_order_load) { pe_node_t *input_node = input->action->node; // load orderings are relevant only if actions are for same node if ((action->rsc != NULL) && pcmk__str_eq(action->task, PCMK_ACTION_MIGRATE_TO, pcmk__str_none)) { pe_node_t *assigned = action->rsc->allocated_to; /* For load_stopped -> migrate_to orderings, we care about where it * has been assigned to, not where it will be executed. */ if (!pe__same_node(input_node, assigned)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, (assigned? assigned->details->uname : ""), (input_node? input_node->details->uname : "")); input->type = pe_order_none; return false; } } else if (!pe__same_node(input_node, action->node)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, (action->node? action->node->details->uname : ""), (input_node? input_node->details->uname : "")); input->type = pe_order_none; return false; } else if (pcmk_is_set(input->action->flags, pe_action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering input optional", action->uuid, action->id, input->action->uuid, input->action->id); input->type = pe_order_none; return false; } } else if (input->type == pe_order_anti_colocation) { if (input->action->node && action->node && !pe__same_node(input->action->node, action->node)) { crm_trace("Ignoring %s (%d) input %s (%d): " "anti-colocation node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, pe__node_name(action->node), pe__node_name(input->action->node)); input->type = pe_order_none; return false; } else if (pcmk_is_set(input->action->flags, pe_action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): " "anti-colocation input optional", action->uuid, action->id, input->action->uuid, input->action->id); input->type = pe_order_none; return false; } } else if (input->action->rsc && input->action->rsc != action->rsc && pcmk_is_set(input->action->rsc->flags, pe_rsc_failed) && !pcmk_is_set(input->action->rsc->flags, pe_rsc_managed) && pcmk__ends_with(input->action->uuid, "_stop_0") && action->rsc && pe_rsc_is_clone(action->rsc)) { crm_warn("Ignoring requirement that %s complete before %s:" " unmanaged failed resources cannot prevent clone shutdown", input->action->uuid, action->uuid); return false; } else if (pcmk_is_set(input->action->flags, pe_action_optional) && !pcmk_any_flags_set(input->action->flags, pe_action_print_always|pe_action_dumped) && !should_add_action_to_graph(input->action)) { crm_trace("Ignoring %s (%d) input %s (%d): " "input optional", action->uuid, action->id, input->action->uuid, input->action->id); return false; } crm_trace("%s (%d) input %s %s (%d) on %s should be dumped: %s %s %#.6x", action->uuid, action->id, action_type_str(input->action->flags), input->action->uuid, input->action->id, action_node_str(input->action), action_runnable_str(input->action->flags), action_optional_str(input->action->flags), input->type); return true; } /*! * \internal * \brief Check whether an ordering creates an ordering loop * * \param[in] init_action "First" action in ordering * \param[in] action Callers should always set this the same as * \p init_action (this function may use a different * value for recursive calls) * \param[in,out] input Action wrapper for "then" action in ordering * * \return true if the ordering creates a loop, otherwise false */ bool pcmk__graph_has_loop(const pe_action_t *init_action, const pe_action_t *action, pe_action_wrapper_t *input) { bool has_loop = false; if (pcmk_is_set(input->action->flags, pe_action_tracking)) { crm_trace("Breaking tracking loop: %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->details->uname : "", action->uuid, action->node? action->node->details->uname : "", input->type); return false; } // Don't need to check inputs that won't be used if (!should_add_input_to_graph(action, input)) { return false; } if (input->action == init_action) { crm_debug("Input loop found in %s@%s ->...-> %s@%s", action->uuid, action->node? action->node->details->uname : "", init_action->uuid, init_action->node? init_action->node->details->uname : ""); return true; } pe__set_action_flags(input->action, pe_action_tracking); crm_trace("Checking inputs of action %s@%s input %s@%s (%#.6x)" "for graph loop with %s@%s ", action->uuid, action->node? action->node->details->uname : "", input->action->uuid, input->action->node? input->action->node->details->uname : "", input->type, init_action->uuid, init_action->node? init_action->node->details->uname : ""); // Recursively check input itself for loops for (GList *iter = input->action->actions_before; iter != NULL; iter = iter->next) { if (pcmk__graph_has_loop(init_action, input->action, (pe_action_wrapper_t *) iter->data)) { // Recursive call already logged a debug message has_loop = true; break; } } pe__clear_action_flags(input->action, pe_action_tracking); if (!has_loop) { crm_trace("No input loop found in %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->details->uname : "", action->uuid, action->node? action->node->details->uname : "", input->type); } return has_loop; } /*! * \internal * \brief Create a synapse XML element for a transition graph * * \param[in] action Action that synapse is for * \param[in,out] data_set Cluster working set containing graph * * \return Newly added XML element for new graph synapse */ static xmlNode * create_graph_synapse(const pe_action_t *action, pe_working_set_t *data_set) { int synapse_priority = 0; xmlNode *syn = create_xml_node(data_set->graph, "synapse"); crm_xml_add_int(syn, XML_ATTR_ID, data_set->num_synapse); data_set->num_synapse++; if (action->rsc != NULL) { synapse_priority = action->rsc->priority; } if (action->priority > synapse_priority) { synapse_priority = action->priority; } if (synapse_priority > 0) { crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority); } return syn; } /*! * \internal * \brief Add an action to the transition graph XML if appropriate * * \param[in,out] data Action to possibly add * \param[in,out] user_data Cluster working set * * \note This will de-duplicate the action inputs, meaning that the * pe_action_wrapper_t:type flags can no longer be relied on to retain * their original settings. That means this MUST be called after * pcmk__apply_orderings() is complete, and nothing after this should rely * on those type flags. (For example, some code looks for type equal to * some flag rather than whether the flag is set, and some code looks for * particular combinations of flags -- such code must be done before * pcmk__create_graph().) */ static void add_action_to_graph(gpointer data, gpointer user_data) { pe_action_t *action = (pe_action_t *) data; pe_working_set_t *data_set = (pe_working_set_t *) user_data; xmlNode *syn = NULL; xmlNode *set = NULL; xmlNode *in = NULL; /* If we haven't already, de-duplicate inputs (even if we won't be adding * the action to the graph, so that crm_simulate's dot graphs don't have * duplicates). */ if (!pcmk_is_set(action->flags, pe_action_dedup)) { pcmk__deduplicate_action_inputs(action); pe__set_action_flags(action, pe_action_dedup); } if (pcmk_is_set(action->flags, pe_action_dumped) // Already added, or || !should_add_action_to_graph(action)) { // shouldn't be added return; } pe__set_action_flags(action, pe_action_dumped); crm_trace("Adding action %d (%s%s%s) to graph", action->id, action->uuid, ((action->node == NULL)? "" : " on "), ((action->node == NULL)? "" : action->node->details->uname)); syn = create_graph_synapse(action, data_set); set = create_xml_node(syn, "action_set"); in = create_xml_node(syn, "inputs"); create_graph_action(set, action, false, data_set); for (GList *lpc = action->actions_before; lpc != NULL; lpc = lpc->next) { pe_action_wrapper_t *input = (pe_action_wrapper_t *) lpc->data; if (should_add_input_to_graph(action, input)) { xmlNode *input_xml = create_xml_node(in, "trigger"); input->state = pe_link_dumped; create_graph_action(input_xml, input->action, true, data_set); } } } static int transition_id = -1; /*! * \internal * \brief Log a message after calculating a transition * * \param[in] filename Where transition input is stored */ void pcmk__log_transition_summary(const char *filename) { if (was_processing_error) { crm_err("Calculated transition %d (with errors)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else if (was_processing_warning) { crm_warn("Calculated transition %d (with warnings)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else { crm_notice("Calculated transition %d%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } if (crm_config_error) { crm_notice("Configuration errors found during scheduler processing," " please run \"crm_verify -L\" to identify issues"); } } /*! * \internal * \brief Add a resource's actions to the transition graph * * \param[in,out] rsc Resource whose actions should be added */ void pcmk__add_rsc_actions_to_graph(pe_resource_t *rsc) { GList *iter = NULL; CRM_ASSERT(rsc != NULL); pe_rsc_trace(rsc, "Adding actions for %s to graph", rsc->id); // First add the resource's own actions g_list_foreach(rsc->actions, add_action_to_graph, rsc->cluster); // Then recursively add its children's actions (appropriate to variant) for (iter = rsc->children; iter != NULL; iter = iter->next) { pe_resource_t *child_rsc = (pe_resource_t *) iter->data; child_rsc->cmds->add_actions_to_graph(child_rsc); } } /*! * \internal * \brief Create a transition graph with all cluster actions needed * * \param[in,out] data_set Cluster working set */ void pcmk__create_graph(pe_working_set_t *data_set) { GList *iter = NULL; const char *value = NULL; long long limit = 0LL; transition_id++; crm_trace("Creating transition graph %d", transition_id); data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); value = pe_pref(data_set->config_hash, "cluster-delay"); crm_xml_add(data_set->graph, "cluster-delay", value); value = pe_pref(data_set->config_hash, "stonith-timeout"); crm_xml_add(data_set->graph, "stonith-timeout", value); crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY"); if (pcmk_is_set(data_set->flags, pe_flag_start_failure_fatal)) { crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY"); } else { crm_xml_add(data_set->graph, "failed-start-offset", "1"); } value = pe_pref(data_set->config_hash, "batch-limit"); crm_xml_add(data_set->graph, "batch-limit", value); crm_xml_add_int(data_set->graph, "transition_id", transition_id); value = pe_pref(data_set->config_hash, "migration-limit"); if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) { crm_xml_add(data_set->graph, "migration-limit", value); } if (data_set->recheck_by > 0) { char *recheck_epoch = NULL; recheck_epoch = crm_strdup_printf("%llu", (long long) data_set->recheck_by); crm_xml_add(data_set->graph, "recheck-by", recheck_epoch); free(recheck_epoch); } /* The following code will de-duplicate action inputs, so nothing past this * should rely on the action input type flags retaining their original * values. */ // Add resource actions to graph for (iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *rsc = (pe_resource_t *) iter->data; pe_rsc_trace(rsc, "Processing actions for %s", rsc->id); rsc->cmds->add_actions_to_graph(rsc); } // Add pseudo-action for list of nodes with maintenance state update add_maintenance_update(data_set); // Add non-resource (node) actions for (iter = data_set->actions; iter != NULL; iter = iter->next) { pe_action_t *action = (pe_action_t *) iter->data; if ((action->rsc != NULL) && (action->node != NULL) && action->node->details->shutdown && !pcmk_is_set(action->rsc->flags, pe_rsc_maintenance) && !pcmk_any_flags_set(action->flags, pe_action_optional|pe_action_runnable) && pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* Eventually we should just ignore the 'fence' case, but for now * it's the best way to detect (in CTS) when CIB resource updates * are being lost. */ if (pcmk_is_set(data_set->flags, pe_flag_have_quorum) || (data_set->no_quorum_policy == no_quorum_ignore)) { const bool managed = pcmk_is_set(action->rsc->flags, pe_rsc_managed); const bool failed = pcmk_is_set(action->rsc->flags, pe_rsc_failed); crm_crit("Cannot %s %s because of %s:%s%s (%s)", action->node->details->unclean? "fence" : "shut down", pe__node_name(action->node), action->rsc->id, (managed? " blocked" : " unmanaged"), (failed? " failed" : ""), action->uuid); } } add_action_to_graph((gpointer) action, (gpointer) data_set); } crm_log_xml_trace(data_set->graph, "graph"); } diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c index e72f7594d3..949f6baa25 100644 --- a/lib/pacemaker/pcmk_sched_primitive.c +++ b/lib/pacemaker/pcmk_sched_primitive.c @@ -1,1645 +1,1645 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include // uint8_t, uint32_t #include #include #include "libpacemaker_private.h" static void stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional); static void start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional); static void demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional); static void promote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional); static void assert_role_error(pe_resource_t *rsc, pe_node_t *node, bool optional); static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* This array lists the immediate next role when transitioning from one role * to a target role. For example, when going from Stopped to Promoted, the * next role is Unpromoted, because the resource must be started before it * can be promoted. The current state then becomes Started, which is fed * into this array again, giving a next role of Promoted. * * Current role Immediate next role Final target role * ------------ ------------------- ----------------- */ /* Unknown */ { RSC_ROLE_UNKNOWN, /* Unknown */ RSC_ROLE_STOPPED, /* Stopped */ RSC_ROLE_STOPPED, /* Started */ RSC_ROLE_STOPPED, /* Unpromoted */ RSC_ROLE_STOPPED, /* Promoted */ }, /* Stopped */ { RSC_ROLE_STOPPED, /* Unknown */ RSC_ROLE_STOPPED, /* Stopped */ RSC_ROLE_STARTED, /* Started */ RSC_ROLE_UNPROMOTED, /* Unpromoted */ RSC_ROLE_UNPROMOTED, /* Promoted */ }, /* Started */ { RSC_ROLE_STOPPED, /* Unknown */ RSC_ROLE_STOPPED, /* Stopped */ RSC_ROLE_STARTED, /* Started */ RSC_ROLE_UNPROMOTED, /* Unpromoted */ RSC_ROLE_PROMOTED, /* Promoted */ }, /* Unpromoted */ { RSC_ROLE_STOPPED, /* Unknown */ RSC_ROLE_STOPPED, /* Stopped */ RSC_ROLE_STOPPED, /* Started */ RSC_ROLE_UNPROMOTED, /* Unpromoted */ RSC_ROLE_PROMOTED, /* Promoted */ }, /* Promoted */ { RSC_ROLE_STOPPED, /* Unknown */ RSC_ROLE_UNPROMOTED, /* Stopped */ RSC_ROLE_UNPROMOTED, /* Started */ RSC_ROLE_UNPROMOTED, /* Unpromoted */ RSC_ROLE_PROMOTED, /* Promoted */ }, }; /*! * \internal * \brief Function to schedule actions needed for a role change * * \param[in,out] rsc Resource whose role is changing * \param[in,out] node Node where resource will be in its next role * \param[in] optional Whether scheduled actions should be optional */ typedef void (*rsc_transition_fn)(pe_resource_t *rsc, pe_node_t *node, bool optional); static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* This array lists the function needed to transition directly from one role * to another. NULL indicates that nothing is needed. * * Current role Transition function Next role * ------------ ------------------- ---------- */ /* Unknown */ { assert_role_error, /* Unknown */ stop_resource, /* Stopped */ assert_role_error, /* Started */ assert_role_error, /* Unpromoted */ assert_role_error, /* Promoted */ }, /* Stopped */ { assert_role_error, /* Unknown */ NULL, /* Stopped */ start_resource, /* Started */ start_resource, /* Unpromoted */ assert_role_error, /* Promoted */ }, /* Started */ { assert_role_error, /* Unknown */ stop_resource, /* Stopped */ NULL, /* Started */ NULL, /* Unpromoted */ promote_resource, /* Promoted */ }, /* Unpromoted */ { assert_role_error, /* Unknown */ stop_resource, /* Stopped */ stop_resource, /* Started */ NULL, /* Unpromoted */ promote_resource, /* Promoted */ }, /* Promoted */ { assert_role_error, /* Unknown */ demote_resource, /* Stopped */ demote_resource, /* Started */ demote_resource, /* Unpromoted */ NULL, /* Promoted */ }, }; /*! * \internal * \brief Get a list of a resource's allowed nodes sorted by node score * * \param[in] rsc Resource to check * * \return List of allowed nodes sorted by node score */ static GList * sorted_allowed_nodes(const pe_resource_t *rsc) { if (rsc->allowed_nodes != NULL) { GList *nodes = g_hash_table_get_values(rsc->allowed_nodes); if (nodes != NULL) { return pcmk__sort_nodes(nodes, pe__current_node(rsc)); } } return NULL; } /*! * \internal * \brief Assign a resource to its best allowed node, if possible * * \param[in,out] rsc Resource to choose a node for * \param[in] prefer If not \c NULL, prefer this node when all else * equal * \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a * node, set next role to stopped and update * existing actions * * \return true if \p rsc could be assigned to a node, otherwise false * * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can * completely undo the assignment. A successful assignment can be either * undone or left alone as final. A failed assignment has the same effect * as calling pcmk__unassign_resource(); there are no side effects on * roles or actions. */ static bool assign_best_node(pe_resource_t *rsc, const pe_node_t *prefer, bool stop_if_fail) { GList *nodes = NULL; pe_node_t *chosen = NULL; pe_node_t *best = NULL; const pe_node_t *most_free_node = pcmk__ban_insufficient_capacity(rsc); if (prefer == NULL) { prefer = most_free_node; } if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { // We've already finished assignment of resources to nodes return rsc->allocated_to != NULL; } // Sort allowed nodes by score nodes = sorted_allowed_nodes(rsc); if (nodes != NULL) { best = (pe_node_t *) nodes->data; // First node has best score } if ((prefer != NULL) && (nodes != NULL)) { // Get the allowed node version of prefer chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); if (chosen == NULL) { pe_rsc_trace(rsc, "Preferred node %s for %s was unknown", pe__node_name(prefer), rsc->id); /* Favor the preferred node as long as its score is at least as good as * the best allowed node's. * * An alternative would be to favor the preferred node even if the best * node is better, when the best node's score is less than INFINITY. */ } else if (chosen->weight < best->weight) { pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable", pe__node_name(chosen), rsc->id); chosen = NULL; } else if (!pcmk__node_available(chosen, true, false)) { pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable", pe__node_name(chosen), rsc->id); chosen = NULL; } else { pe_rsc_trace(rsc, "Chose preferred node %s for %s " "(ignoring %d candidates)", pe__node_name(chosen), rsc->id, g_list_length(nodes)); } } if ((chosen == NULL) && (best != NULL)) { /* Either there is no preferred node, or the preferred node is not * suitable, but another node is allowed to run the resource. */ chosen = best; if (!pe_rsc_is_unique_clone(rsc->parent) && (chosen->weight > 0) // Zero not acceptable && pcmk__node_available(chosen, false, false)) { /* If the resource is already running on a node, prefer that node if * it is just as good as the chosen node. * * We don't do this for unique clone instances, because * pcmk__assign_instances() has already assigned instances to their * running nodes when appropriate, and if we get here, we don't want * remaining unassigned instances to prefer a node that's already * running another instance. */ pe_node_t *running = pe__current_node(rsc); if (running == NULL) { // Nothing to do } else if (!pcmk__node_available(running, true, false)) { pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources", rsc->id, pe__node_name(running)); } else { int nodes_with_best_score = 1; for (GList *iter = nodes->next; iter; iter = iter->next) { pe_node_t *allowed = (pe_node_t *) iter->data; if (allowed->weight != chosen->weight) { // The nodes are sorted by score, so no more are equal break; } if (pe__same_node(allowed, running)) { // Scores are equal, so prefer the current node chosen = allowed; } nodes_with_best_score++; } if (nodes_with_best_score > 1) { uint8_t log_level = LOG_INFO; if (chosen->weight >= INFINITY) { log_level = LOG_WARNING; } do_crm_log(log_level, "Chose %s for %s from %d nodes with score %s", pe__node_name(chosen), rsc->id, nodes_with_best_score, pcmk_readable_score(chosen->weight)); } } } pe_rsc_trace(rsc, "Chose %s for %s from %d candidates", pe__node_name(chosen), rsc->id, g_list_length(nodes)); } pcmk__assign_resource(rsc, chosen, false, stop_if_fail); g_list_free(nodes); return rsc->allocated_to != NULL; } /*! * \internal * \brief Apply a "this with" colocation to a node's allowed node scores * * \param[in,out] colocation Colocation to apply * \param[in,out] rsc Resource being assigned */ static void apply_this_with(pcmk__colocation_t *colocation, pe_resource_t *rsc) { GHashTable *archive = NULL; pe_resource_t *other = colocation->primary; // In certain cases, we will need to revert the node scores if ((colocation->dependent_role >= RSC_ROLE_PROMOTED) || ((colocation->score < 0) && (colocation->score > -INFINITY))) { archive = pcmk__copy_node_table(rsc->allowed_nodes); } if (pcmk_is_set(other->flags, pe_rsc_provisional)) { pe_rsc_trace(rsc, "%s: Assigning colocation %s primary %s first" "(score=%d role=%s)", rsc->id, colocation->id, other->id, colocation->score, role2text(colocation->dependent_role)); other->cmds->assign(other, NULL, true); } // Apply the colocation score to this resource's allowed node scores rsc->cmds->apply_coloc_score(rsc, other, colocation, true); if ((archive != NULL) && !pcmk__any_node_available(rsc->allowed_nodes)) { pe_rsc_info(rsc, "%s: Reverting scores from colocation with %s " "because no nodes allowed", rsc->id, other->id); g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = archive; archive = NULL; } if (archive != NULL) { g_hash_table_destroy(archive); } } /*! * \internal * \brief Update a Pacemaker Remote node once its connection has been assigned * * \param[in] connection Connection resource that has been assigned */ static void remote_connection_assigned(const pe_resource_t *connection) { pe_node_t *remote_node = pe_find_node(connection->cluster->nodes, connection->id); CRM_CHECK(remote_node != NULL, return); if ((connection->allocated_to != NULL) && (connection->next_role != RSC_ROLE_STOPPED)) { crm_trace("Pacemaker Remote node %s will be online", remote_node->details->id); remote_node->details->online = TRUE; if (remote_node->details->unseen) { // Avoid unnecessary fence, since we will attempt connection remote_node->details->unclean = FALSE; } } else { crm_trace("Pacemaker Remote node %s will be shut down " "(%sassigned connection's next role is %s)", remote_node->details->id, ((connection->allocated_to == NULL)? "un" : ""), role2text(connection->next_role)); remote_node->details->shutdown = TRUE; } } /*! * \internal * \brief Assign a primitive resource to a node * * \param[in,out] rsc Resource to assign to a node * \param[in] prefer Node to prefer, if all else is equal * \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a * node, set next role to stopped and update * existing actions * * \return Node that \p rsc is assigned to, if assigned entirely to one node * * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can * completely undo the assignment. A successful assignment can be either * undone or left alone as final. A failed assignment has the same effect * as calling pcmk__unassign_resource(); there are no side effects on * roles or actions. */ pe_node_t * pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer, bool stop_if_fail) { GList *this_with_colocations = NULL; GList *with_this_colocations = NULL; GList *iter = NULL; pcmk__colocation_t *colocation = NULL; CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_native)); // Never assign a child without parent being assigned first if ((rsc->parent != NULL) && !pcmk_is_set(rsc->parent->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "%s: Assigning parent %s first", rsc->id, rsc->parent->id); rsc->parent->cmds->assign(rsc->parent, prefer, stop_if_fail); } if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { // Assignment has already been done const char *node_name = "no node"; if (rsc->allocated_to != NULL) { node_name = pe__node_name(rsc->allocated_to); } pe_rsc_debug(rsc, "%s: pre-assigned to %s", rsc->id, node_name); return rsc->allocated_to; } // Ensure we detect assignment loops if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id); return NULL; } pe__set_resource_flags(rsc, pe_rsc_allocating); pe__show_node_scores(true, rsc, "Pre-assignment", rsc->allowed_nodes, rsc->cluster); this_with_colocations = pcmk__this_with_colocations(rsc); with_this_colocations = pcmk__with_this_colocations(rsc); // Apply mandatory colocations first, to satisfy as many as possible for (iter = this_with_colocations; iter != NULL; iter = iter->next) { colocation = iter->data; if ((colocation->score <= -CRM_SCORE_INFINITY) || (colocation->score >= CRM_SCORE_INFINITY)) { apply_this_with(colocation, rsc); } } for (iter = with_this_colocations; iter != NULL; iter = iter->next) { colocation = iter->data; if ((colocation->score <= -CRM_SCORE_INFINITY) || (colocation->score >= CRM_SCORE_INFINITY)) { pcmk__add_dependent_scores(colocation, rsc); } } pe__show_node_scores(true, rsc, "Mandatory-colocations", rsc->allowed_nodes, rsc->cluster); // Then apply optional colocations for (iter = this_with_colocations; iter != NULL; iter = iter->next) { colocation = iter->data; if ((colocation->score > -CRM_SCORE_INFINITY) && (colocation->score < CRM_SCORE_INFINITY)) { apply_this_with(colocation, rsc); } } for (iter = with_this_colocations; iter != NULL; iter = iter->next) { colocation = iter->data; if ((colocation->score > -CRM_SCORE_INFINITY) && (colocation->score < CRM_SCORE_INFINITY)) { pcmk__add_dependent_scores(colocation, rsc); } } g_list_free(this_with_colocations); g_list_free(with_this_colocations); if (rsc->next_role == RSC_ROLE_STOPPED) { pe_rsc_trace(rsc, "Banning %s from all nodes because it will be stopped", rsc->id); resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE, rsc->cluster); } else if ((rsc->next_role > rsc->role) && !pcmk_is_set(rsc->cluster->flags, pe_flag_have_quorum) && (rsc->cluster->no_quorum_policy == no_quorum_freeze)) { crm_notice("Resource %s cannot be elevated from %s to %s due to " "no-quorum-policy=freeze", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); pe__set_next_role(rsc, rsc->role, "no-quorum-policy=freeze"); } pe__show_node_scores(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores), rsc, __func__, rsc->allowed_nodes, rsc->cluster); // Unmanage resource if fencing is enabled but no device is configured if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled) && !pcmk_is_set(rsc->cluster->flags, pe_flag_have_stonith_resource)) { pe__clear_resource_flags(rsc, pe_rsc_managed); } if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { // Unmanaged resources stay on their current node const char *reason = NULL; pe_node_t *assign_to = NULL; pe__set_next_role(rsc, rsc->role, "unmanaged"); assign_to = pe__current_node(rsc); if (assign_to == NULL) { reason = "inactive"; } else if (rsc->role == RSC_ROLE_PROMOTED) { reason = "promoted"; } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { reason = "failed"; } else { reason = "active"; } pe_rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id, (assign_to? assign_to->details->uname : "no node"), reason); pcmk__assign_resource(rsc, assign_to, true, stop_if_fail); } else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stop_everything)) { // Must stop at some point, but be consistent with stop_if_fail if (stop_if_fail) { pe_rsc_debug(rsc, "Forcing %s to stop: stop-all-resources", rsc->id); } pcmk__assign_resource(rsc, NULL, true, stop_if_fail); } else if (!assign_best_node(rsc, prefer, stop_if_fail)) { // Assignment failed if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id); } else if ((rsc->running_on != NULL) && stop_if_fail) { pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id); } } pe__clear_resource_flags(rsc, pe_rsc_allocating); if (rsc->is_remote_node) { remote_connection_assigned(rsc); } return rsc->allocated_to; } /*! * \internal * \brief Schedule actions to bring resource down and back to current role * * \param[in,out] rsc Resource to restart * \param[in,out] current Node that resource should be brought down on * \param[in] need_stop Whether the resource must be stopped * \param[in] need_promote Whether the resource must be promoted * * \return Role that resource would have after scheduled actions are taken */ static void schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current, bool need_stop, bool need_promote) { enum rsc_role_e role = rsc->role; enum rsc_role_e next_role; rsc_transition_fn fn = NULL; pe__set_resource_flags(rsc, pe_rsc_restarting); // Bring resource down to a stop on its current node while (role != RSC_ROLE_STOPPED) { next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED]; pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s", (need_stop? "required" : "optional"), rsc->id, role2text(role), role2text(next_role)); fn = rsc_action_matrix[role][next_role]; if (fn == NULL) { break; } fn(rsc, current, !need_stop); role = next_role; } // Bring resource up to its next role on its next node while ((rsc->role <= rsc->next_role) && (role != rsc->role) && !pcmk_is_set(rsc->flags, pe_rsc_block)) { bool required = need_stop; next_role = rsc_state_matrix[role][rsc->role]; if ((next_role == RSC_ROLE_PROMOTED) && need_promote) { required = true; } pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s", (required? "required" : "optional"), rsc->id, role2text(role), role2text(next_role)); fn = rsc_action_matrix[role][next_role]; if (fn == NULL) { break; } fn(rsc, rsc->allocated_to, !required); role = next_role; } pe__clear_resource_flags(rsc, pe_rsc_restarting); } /*! * \internal * \brief If a resource's next role is not explicitly specified, set a default * * \param[in,out] rsc Resource to set next role for * * \return "explicit" if next role was explicitly set, otherwise "implicit" */ static const char * set_default_next_role(pe_resource_t *rsc) { if (rsc->next_role != RSC_ROLE_UNKNOWN) { return "explicit"; } if (rsc->allocated_to == NULL) { pe__set_next_role(rsc, RSC_ROLE_STOPPED, "assignment"); } else { pe__set_next_role(rsc, RSC_ROLE_STARTED, "assignment"); } return "implicit"; } /*! * \internal * \brief Create an action to represent an already pending start * * \param[in,out] rsc Resource to create start action for */ static void create_pending_start(pe_resource_t *rsc) { pe_action_t *start = NULL; pe_rsc_trace(rsc, "Creating action for %s to represent already pending start", rsc->id); start = start_action(rsc, rsc->allocated_to, TRUE); pe__set_action_flags(start, pe_action_print_always); } /*! * \internal * \brief Schedule actions needed to take a resource to its next role * * \param[in,out] rsc Resource to schedule actions for */ static void schedule_role_transition_actions(pe_resource_t *rsc) { enum rsc_role_e role = rsc->role; while (role != rsc->next_role) { enum rsc_role_e next_role = rsc_state_matrix[role][rsc->next_role]; rsc_transition_fn fn = NULL; pe_rsc_trace(rsc, "Creating action to take %s from %s to %s (ending at %s)", rsc->id, role2text(role), role2text(next_role), role2text(rsc->next_role)); fn = rsc_action_matrix[role][next_role]; if (fn == NULL) { break; } fn(rsc, rsc->allocated_to, false); role = next_role; } } /*! * \internal * \brief Create all actions needed for a given primitive resource * * \param[in,out] rsc Primitive resource to create actions for */ void pcmk__primitive_create_actions(pe_resource_t *rsc) { bool need_stop = false; bool need_promote = false; bool is_moving = false; bool allow_migrate = false; bool multiply_active = false; pe_node_t *current = NULL; unsigned int num_all_active = 0; unsigned int num_clean_active = 0; const char *next_role_source = NULL; CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_native)); next_role_source = set_default_next_role(rsc); pe_rsc_trace(rsc, "Creating all actions for %s transition from %s to %s " "(%s) on %s", rsc->id, role2text(rsc->role), role2text(rsc->next_role), next_role_source, pe__node_name(rsc->allocated_to)); current = rsc->fns->active_node(rsc, &num_all_active, &num_clean_active); g_list_foreach(rsc->dangling_migrations, pcmk__abort_dangling_migration, rsc); if ((current != NULL) && (rsc->allocated_to != NULL) && !pe__same_node(current, rsc->allocated_to) && (rsc->next_role >= RSC_ROLE_STARTED)) { pe_rsc_trace(rsc, "Moving %s from %s to %s", rsc->id, pe__node_name(current), pe__node_name(rsc->allocated_to)); is_moving = true; allow_migrate = pcmk__rsc_can_migrate(rsc, current); // This is needed even if migrating (though I'm not sure why ...) need_stop = true; } // Check whether resource is partially migrated and/or multiply active if ((rsc->partial_migration_source != NULL) && (rsc->partial_migration_target != NULL) && allow_migrate && (num_all_active == 2) && pe__same_node(current, rsc->partial_migration_source) && pe__same_node(rsc->allocated_to, rsc->partial_migration_target)) { /* A partial migration is in progress, and the migration target remains * the same as when the migration began. */ pe_rsc_trace(rsc, "Partial migration of %s from %s to %s will continue", rsc->id, pe__node_name(rsc->partial_migration_source), pe__node_name(rsc->partial_migration_target)); } else if ((rsc->partial_migration_source != NULL) || (rsc->partial_migration_target != NULL)) { // A partial migration is in progress but can't be continued if (num_all_active > 2) { // The resource is migrating *and* multiply active! crm_notice("Forcing recovery of %s because it is migrating " "from %s to %s and possibly active elsewhere", rsc->id, pe__node_name(rsc->partial_migration_source), pe__node_name(rsc->partial_migration_target)); } else { // The migration source or target isn't available crm_notice("Forcing recovery of %s because it can no longer " "migrate from %s to %s", rsc->id, pe__node_name(rsc->partial_migration_source), pe__node_name(rsc->partial_migration_target)); } need_stop = true; rsc->partial_migration_source = rsc->partial_migration_target = NULL; allow_migrate = false; } else if (pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)) { multiply_active = (num_all_active > 1); } else { /* If a resource has "requires" set to nothing or quorum, don't consider * it active on unclean nodes (similar to how all resources behave when * stonith-enabled is false). We can start such resources elsewhere * before fencing completes, and if we considered the resource active on * the failed node, we would attempt recovery for being active on * multiple nodes. */ multiply_active = (num_clean_active > 1); } if (multiply_active) { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); // Resource was (possibly) incorrectly multiply active pe_proc_err("%s resource %s might be active on %u nodes (%s)", pcmk__s(class, "Untyped"), rsc->id, num_all_active, recovery2text(rsc->recovery_type)); crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ" "#Resource_is_Too_Active for more information"); switch (rsc->recovery_type) { case pcmk_multiply_active_restart: need_stop = true; break; case pcmk_multiply_active_unexpected: need_stop = true; // stop_resource() will skip expected node pe__set_resource_flags(rsc, pe_rsc_stop_unexpected); break; default: break; } } else { pe__clear_resource_flags(rsc, pe_rsc_stop_unexpected); } if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) { create_pending_start(rsc); } if (is_moving) { // Remaining tests are only for resources staying where they are } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { if (pcmk_is_set(rsc->flags, pe_rsc_stop)) { need_stop = true; pe_rsc_trace(rsc, "Recovering %s", rsc->id); } else { pe_rsc_trace(rsc, "Recovering %s by demotion", rsc->id); if (rsc->next_role == RSC_ROLE_PROMOTED) { need_promote = true; } } } else if (pcmk_is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "Blocking further actions on %s", rsc->id); need_stop = true; } else if ((rsc->role > RSC_ROLE_STARTED) && (current != NULL) && (rsc->allocated_to != NULL)) { pe_action_t *start = NULL; pe_rsc_trace(rsc, "Creating start action for promoted resource %s", rsc->id); start = start_action(rsc, rsc->allocated_to, TRUE); if (!pcmk_is_set(start->flags, pe_action_optional)) { // Recovery of a promoted resource pe_rsc_trace(rsc, "%s restart is required for recovery", rsc->id); need_stop = true; } } // Create any actions needed to bring resource down and back up to same role schedule_restart_actions(rsc, current, need_stop, need_promote); // Create any actions needed to take resource from this role to the next schedule_role_transition_actions(rsc); pcmk__create_recurring_actions(rsc); if (allow_migrate) { pcmk__create_migration_actions(rsc, current); } } /*! * \internal * \brief Ban a resource from any allowed nodes that are Pacemaker Remote nodes * * \param[in] rsc Resource to check */ static void rsc_avoids_remote_nodes(const pe_resource_t *rsc) { GHashTableIter iter; pe_node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { if (node->details->remote_rsc != NULL) { node->weight = -INFINITY; } } } /*! * \internal * \brief Return allowed nodes as (possibly sorted) list * * Convert a resource's hash table of allowed nodes to a list. If printing to * stdout, sort the list, to keep action ID numbers consistent for regression * test output (while avoiding the performance hit on a live cluster). * * \param[in] rsc Resource to check for allowed nodes * * \return List of resource's allowed nodes * \note Callers should take care not to rely on the list being sorted. */ static GList * allowed_nodes_as_list(const pe_resource_t *rsc) { GList *allowed_nodes = NULL; if (rsc->allowed_nodes) { allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes); } if (!pcmk__is_daemon) { allowed_nodes = g_list_sort(allowed_nodes, pe__cmp_node_name); } return allowed_nodes; } /*! * \internal * \brief Create implicit constraints needed for a primitive resource * * \param[in,out] rsc Primitive resource to create implicit constraints for */ void pcmk__primitive_internal_constraints(pe_resource_t *rsc) { GList *allowed_nodes = NULL; bool check_unfencing = false; bool check_utilization = false; CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_native)); if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Skipping implicit constraints for unmanaged resource %s", rsc->id); return; } // Whether resource requires unfencing check_unfencing = !pcmk_is_set(rsc->flags, pe_rsc_fence_device) && pcmk_is_set(rsc->cluster->flags, pe_flag_enable_unfencing) && pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing); // Whether a non-default placement strategy is used check_utilization = (g_hash_table_size(rsc->utilization) > 0) && !pcmk__str_eq(rsc->cluster->placement_strategy, "default", pcmk__str_casei); // Order stops before starts (i.e. restart) pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0), NULL, rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0), NULL, pe_order_optional|pe_order_implies_then|pe_order_restart, rsc->cluster); // Promotable ordering: demote before stop, start before promote if (pcmk_is_set(pe__const_top_resource(rsc, false)->flags, pe_rsc_promotable) || (rsc->role > RSC_ROLE_UNPROMOTED)) { pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_DEMOTE, 0), NULL, rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0), NULL, pe_order_promoted_implies_first, rsc->cluster); pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0), NULL, rsc, pcmk__op_key(rsc->id, PCMK_ACTION_PROMOTE, 0), NULL, pe_order_runnable_left, rsc->cluster); } // Don't clear resource history if probing on same node - pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0), + pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_LRM_DELETE, 0), NULL, rsc, pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0), NULL, pe_order_same_node|pe_order_then_cancels_first, rsc->cluster); // Certain checks need allowed nodes if (check_unfencing || check_utilization || (rsc->container != NULL)) { allowed_nodes = allowed_nodes_as_list(rsc); } if (check_unfencing) { g_list_foreach(allowed_nodes, pcmk__order_restart_vs_unfence, rsc); } if (check_utilization) { pcmk__create_utilization_constraints(rsc, allowed_nodes); } if (rsc->container != NULL) { pe_resource_t *remote_rsc = NULL; if (rsc->is_remote_node) { // rsc is the implicit remote connection for a guest or bundle node /* Guest resources are not allowed to run on Pacemaker Remote nodes, * to avoid nesting remotes. However, bundles are allowed. */ if (!pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) { rsc_avoids_remote_nodes(rsc->container); } /* If someone cleans up a guest or bundle node's container, we will * likely schedule a (re-)probe of the container and recovery of the * connection. Order the connection stop after the container probe, * so that if we detect the container running, we will trigger a new * transition and avoid the unnecessary recovery. */ pcmk__order_resource_actions(rsc->container, PCMK_ACTION_MONITOR, rsc, PCMK_ACTION_STOP, pe_order_optional); /* A user can specify that a resource must start on a Pacemaker Remote * node by explicitly configuring it with the container=NODENAME * meta-attribute. This is of questionable merit, since location * constraints can accomplish the same thing. But we support it, so here * we check whether a resource (that is not itself a remote connection) * has container set to a remote node or guest node resource. */ } else if (rsc->container->is_remote_node) { remote_rsc = rsc->container; } else { remote_rsc = pe__resource_contains_guest_node(rsc->cluster, rsc->container); } if (remote_rsc != NULL) { /* Force the resource on the Pacemaker Remote node instead of * colocating the resource with the container resource. */ for (GList *item = allowed_nodes; item; item = item->next) { pe_node_t *node = item->data; if (node->details->remote_rsc != remote_rsc) { node->weight = -INFINITY; } } } else { /* This resource is either a filler for a container that does NOT * represent a Pacemaker Remote node, or a Pacemaker Remote * connection resource for a guest node or bundle. */ int score; crm_trace("Order and colocate %s relative to its container %s", rsc->id, rsc->container->id); pcmk__new_ordering(rsc->container, pcmk__op_key(rsc->container->id, PCMK_ACTION_START, 0), NULL, rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0), NULL, pe_order_implies_then|pe_order_runnable_left, rsc->cluster); pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0), NULL, rsc->container, pcmk__op_key(rsc->container->id, PCMK_ACTION_STOP, 0), NULL, pe_order_implies_first, rsc->cluster); if (pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) { score = 10000; /* Highly preferred but not essential */ } else { score = INFINITY; /* Force them to run on the same host */ } pcmk__new_colocation("#resource-with-container", NULL, score, rsc, rsc->container, NULL, NULL, pcmk__coloc_influence); } } if (rsc->is_remote_node || pcmk_is_set(rsc->flags, pe_rsc_fence_device)) { /* Remote connections and fencing devices are not allowed to run on * Pacemaker Remote nodes */ rsc_avoids_remote_nodes(rsc); } g_list_free(allowed_nodes); } /*! * \internal * \brief Apply a colocation's score to node scores or resource priority * * Given a colocation constraint, apply its score to the dependent's * allowed node scores (if we are still placing resources) or priority (if * we are choosing promotable clone instance roles). * * \param[in,out] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint to apply * \param[in] for_dependent true if called on behalf of dependent */ void pcmk__primitive_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent) { enum pcmk__coloc_affects filter_results; CRM_ASSERT((dependent != NULL) && (primary != NULL) && (colocation != NULL)); if (for_dependent) { // Always process on behalf of primary resource primary->cmds->apply_coloc_score(dependent, primary, colocation, false); return; } filter_results = pcmk__colocation_affects(dependent, primary, colocation, false); pe_rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)", ((colocation->score > 0)? "Colocating" : "Anti-colocating"), dependent->id, primary->id, colocation->id, colocation->score, filter_results); switch (filter_results) { case pcmk__coloc_affects_role: pcmk__apply_coloc_to_priority(dependent, primary, colocation); break; case pcmk__coloc_affects_location: pcmk__apply_coloc_to_scores(dependent, primary, colocation); break; default: // pcmk__coloc_affects_nothing return; } } /* Primitive implementation of * resource_alloc_functions_t:with_this_colocations() */ void pcmk__with_primitive_colocations(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList **list) { CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_native) && (list != NULL)); if (rsc == orig_rsc) { /* For the resource itself, add all of its own colocations and relevant * colocations from its parent (if any). */ pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc); if (rsc->parent != NULL) { rsc->parent->cmds->with_this_colocations(rsc->parent, orig_rsc, list); } } else { // For an ancestor, add only explicitly configured constraints for (GList *iter = rsc->rsc_cons_lhs; iter != NULL; iter = iter->next) { pcmk__colocation_t *colocation = iter->data; if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) { pcmk__add_with_this(list, colocation, orig_rsc); } } } } /* Primitive implementation of * resource_alloc_functions_t:this_with_colocations() */ void pcmk__primitive_with_colocations(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList **list) { CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_native) && (list != NULL)); if (rsc == orig_rsc) { /* For the resource itself, add all of its own colocations and relevant * colocations from its parent (if any). */ pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc); if (rsc->parent != NULL) { rsc->parent->cmds->this_with_colocations(rsc->parent, orig_rsc, list); } } else { // For an ancestor, add only explicitly configured constraints for (GList *iter = rsc->rsc_cons; iter != NULL; iter = iter->next) { pcmk__colocation_t *colocation = iter->data; if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) { pcmk__add_this_with(list, colocation, orig_rsc); } } } } /*! * \internal * \brief Return action flags for a given primitive resource action * * \param[in,out] action Action to get flags for * \param[in] node If not NULL, limit effects to this node (ignored) * * \return Flags appropriate to \p action on \p node */ uint32_t pcmk__primitive_action_flags(pe_action_t *action, const pe_node_t *node) { CRM_ASSERT(action != NULL); return (uint32_t) action->flags; } /*! * \internal * \brief Check whether a node is a multiply active resource's expected node * * \param[in] rsc Resource to check * \param[in] node Node to check * * \return true if \p rsc is multiply active with multiple-active set to * stop_unexpected, and \p node is the node where it will remain active * \note This assumes that the resource's next role cannot be changed to stopped * after this is called, which should be reasonable if status has already * been unpacked and resources have been assigned to nodes. */ static bool is_expected_node(const pe_resource_t *rsc, const pe_node_t *node) { return pcmk_all_flags_set(rsc->flags, pe_rsc_stop_unexpected|pe_rsc_restarting) && (rsc->next_role > RSC_ROLE_STOPPED) && pe__same_node(rsc->allocated_to, node); } /*! * \internal * \brief Schedule actions needed to stop a resource wherever it is active * * \param[in,out] rsc Resource being stopped * \param[in] node Node where resource is being stopped (ignored) * \param[in] optional Whether actions should be optional */ static void stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional) { for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) { pe_node_t *current = (pe_node_t *) iter->data; pe_action_t *stop = NULL; if (is_expected_node(rsc, current)) { /* We are scheduling restart actions for a multiply active resource * with multiple-active=stop_unexpected, and this is where it should * not be stopped. */ pe_rsc_trace(rsc, "Skipping stop of multiply active resource %s " "on expected node %s", rsc->id, pe__node_name(current)); continue; } if (rsc->partial_migration_target != NULL) { // Continue migration if node originally was and remains target if (pe__same_node(current, rsc->partial_migration_target) && pe__same_node(current, rsc->allocated_to)) { pe_rsc_trace(rsc, "Skipping stop of %s on %s " "because partial migration there will continue", rsc->id, pe__node_name(current)); continue; } else { pe_rsc_trace(rsc, "Forcing stop of %s on %s " "because migration target changed", rsc->id, pe__node_name(current)); optional = false; } } pe_rsc_trace(rsc, "Scheduling stop of %s on %s", rsc->id, pe__node_name(current)); stop = stop_action(rsc, current, optional); if (rsc->allocated_to == NULL) { pe_action_set_reason(stop, "node availability", true); } else if (pcmk_all_flags_set(rsc->flags, pe_rsc_restarting |pe_rsc_stop_unexpected)) { /* We are stopping a multiply active resource on a node that is * not its expected node, and we are still scheduling restart * actions, so the stop is for being multiply active. */ pe_action_set_reason(stop, "being multiply active", true); } if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe__clear_action_flags(stop, pe_action_runnable); } if (pcmk_is_set(rsc->cluster->flags, pe_flag_remove_after_stop)) { pcmk__schedule_cleanup(rsc, current, optional); } if (pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) { pe_action_t *unfence = pe_fence_op(current, PCMK_ACTION_ON, true, NULL, false, rsc->cluster); order_actions(stop, unfence, pe_order_implies_first); if (!pcmk__node_unfenced(current)) { pe_proc_err("Stopping %s until %s can be unfenced", rsc->id, pe__node_name(current)); } } } } /*! * \internal * \brief Schedule actions needed to start a resource on a node * * \param[in,out] rsc Resource being started * \param[in,out] node Node where resource should be started * \param[in] optional Whether actions should be optional */ static void start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional) { pe_action_t *start = NULL; CRM_ASSERT(node != NULL); pe_rsc_trace(rsc, "Scheduling %s start of %s on %s (score %d)", (optional? "optional" : "required"), rsc->id, pe__node_name(node), node->weight); start = start_action(rsc, node, TRUE); pcmk__order_vs_unfence(rsc, node, start, pe_order_implies_then); if (pcmk_is_set(start->flags, pe_action_runnable) && !optional) { pe__clear_action_flags(start, pe_action_optional); } if (is_expected_node(rsc, node)) { /* This could be a problem if the start becomes necessary for other * reasons later. */ pe_rsc_trace(rsc, "Start of multiply active resouce %s " "on expected node %s will be a pseudo-action", rsc->id, pe__node_name(node)); pe__set_action_flags(start, pe_action_pseudo); } } /*! * \internal * \brief Schedule actions needed to promote a resource on a node * * \param[in,out] rsc Resource being promoted * \param[in] node Node where resource should be promoted * \param[in] optional Whether actions should be optional */ static void promote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional) { GList *iter = NULL; GList *action_list = NULL; bool runnable = true; CRM_ASSERT(node != NULL); // Any start must be runnable for promotion to be runnable action_list = pe__resource_actions(rsc, node, PCMK_ACTION_START, true); for (iter = action_list; iter != NULL; iter = iter->next) { pe_action_t *start = (pe_action_t *) iter->data; if (!pcmk_is_set(start->flags, pe_action_runnable)) { runnable = false; } } g_list_free(action_list); if (runnable) { pe_action_t *promote = promote_action(rsc, node, optional); pe_rsc_trace(rsc, "Scheduling %s promotion of %s on %s", (optional? "optional" : "required"), rsc->id, pe__node_name(node)); if (is_expected_node(rsc, node)) { /* This could be a problem if the promote becomes necessary for * other reasons later. */ pe_rsc_trace(rsc, "Promotion of multiply active resouce %s " "on expected node %s will be a pseudo-action", rsc->id, pe__node_name(node)); pe__set_action_flags(promote, pe_action_pseudo); } } else { pe_rsc_trace(rsc, "Not promoting %s on %s: start unrunnable", rsc->id, pe__node_name(node)); action_list = pe__resource_actions(rsc, node, PCMK_ACTION_PROMOTE, true); for (iter = action_list; iter != NULL; iter = iter->next) { pe_action_t *promote = (pe_action_t *) iter->data; pe__clear_action_flags(promote, pe_action_runnable); } g_list_free(action_list); } } /*! * \internal * \brief Schedule actions needed to demote a resource wherever it is active * * \param[in,out] rsc Resource being demoted * \param[in] node Node where resource should be demoted (ignored) * \param[in] optional Whether actions should be optional */ static void demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional) { /* Since this will only be called for a primitive (possibly as an instance * of a collective resource), the resource is multiply active if it is * running on more than one node, so we want to demote on all of them as * part of recovery, regardless of which one is the desired node. */ for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) { pe_node_t *current = (pe_node_t *) iter->data; if (is_expected_node(rsc, current)) { pe_rsc_trace(rsc, "Skipping demote of multiply active resource %s " "on expected node %s", rsc->id, pe__node_name(current)); } else { pe_rsc_trace(rsc, "Scheduling %s demotion of %s on %s", (optional? "optional" : "required"), rsc->id, pe__node_name(current)); demote_action(rsc, current, optional); } } } static void assert_role_error(pe_resource_t *rsc, pe_node_t *node, bool optional) { CRM_ASSERT(false); } /*! * \internal * \brief Schedule cleanup of a resource * * \param[in,out] rsc Resource to clean up * \param[in] node Node to clean up on * \param[in] optional Whether clean-up should be optional */ void pcmk__schedule_cleanup(pe_resource_t *rsc, const pe_node_t *node, bool optional) { /* If the cleanup is required, its orderings are optional, because they're * relevant only if both actions are required. Conversely, if the cleanup is * optional, the orderings make the then action required if the first action * becomes required. */ uint32_t flag = optional? pe_order_implies_then : pe_order_optional; CRM_CHECK((rsc != NULL) && (node != NULL), return); if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: resource failed", rsc->id, pe__node_name(node)); return; } if (node->details->unclean || !node->details->online) { pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: node unavailable", rsc->id, pe__node_name(node)); return; } crm_notice("Scheduling clean-up of %s on %s", rsc->id, pe__node_name(node)); delete_action(rsc, node, optional); // stop -> clean-up -> start pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP, rsc, PCMK_ACTION_DELETE, flag); pcmk__order_resource_actions(rsc, PCMK_ACTION_DELETE, rsc, PCMK_ACTION_START, flag); } /*! * \internal * \brief Add primitive meta-attributes relevant to graph actions to XML * * \param[in] rsc Primitive resource whose meta-attributes should be added * \param[in,out] xml Transition graph action attributes XML to add to */ void pcmk__primitive_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml) { char *name = NULL; char *value = NULL; const pe_resource_t *parent = NULL; CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_native) && (xml != NULL)); /* Clone instance numbers get set internally as meta-attributes, and are * needed in the transition graph (for example, to tell unique clone * instances apart). */ value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION); if (value != NULL) { name = crm_meta_name(XML_RSC_ATTR_INCARNATION); crm_xml_add(xml, name, value); free(name); } // Not sure if this one is really needed ... value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE); if (value != NULL) { name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE); crm_xml_add(xml, name, value); free(name); } /* The container meta-attribute can be set on the primitive itself or one of * its parents (for example, a group inside a container resource), so check * them all, and keep the highest one found. */ for (parent = rsc; parent != NULL; parent = parent->parent) { if (parent->container != NULL) { crm_xml_add(xml, CRM_META "_" XML_RSC_ATTR_CONTAINER, parent->container->id); } } /* Bundle replica children will get their external-ip set internally as a * meta-attribute. The graph action needs it, but under a different naming * convention than other meta-attributes. */ value = g_hash_table_lookup(rsc->meta, "external-ip"); if (value != NULL) { crm_xml_add(xml, "pcmk_external_ip", value); } } // Primitive implementation of resource_alloc_functions_t:add_utilization() void pcmk__primitive_add_utilization(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization) { CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_native) && (orig_rsc != NULL) && (utilization != NULL)); if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return; } pe_rsc_trace(orig_rsc, "%s: Adding primitive %s as colocated utilization", orig_rsc->id, rsc->id); pcmk__release_node_capacity(utilization, rsc); } /*! * \internal * \brief Get epoch time of node's shutdown attribute (or now if none) * * \param[in,out] node Node to check * * \return Epoch time corresponding to shutdown attribute if set or now if not */ static time_t shutdown_time(pe_node_t *node) { const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN); time_t result = 0; if (shutdown != NULL) { long long result_ll; if (pcmk__scan_ll(shutdown, &result_ll, 0LL) == pcmk_rc_ok) { result = (time_t) result_ll; } } return (result == 0)? get_effective_time(node->details->data_set) : result; } /*! * \internal * \brief Ban a resource from a node if it's not locked to the node * * \param[in] data Node to check * \param[in,out] user_data Resource to check */ static void ban_if_not_locked(gpointer data, gpointer user_data) { const pe_node_t *node = (const pe_node_t *) data; pe_resource_t *rsc = (pe_resource_t *) user_data; if (strcmp(node->details->uname, rsc->lock_node->details->uname) != 0) { resource_location(rsc, node, -CRM_SCORE_INFINITY, XML_CONFIG_ATTR_SHUTDOWN_LOCK, rsc->cluster); } } // Primitive implementation of resource_alloc_functions_t:shutdown_lock() void pcmk__primitive_shutdown_lock(pe_resource_t *rsc) { const char *class = NULL; CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_native)); class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); // Fence devices and remote connections can't be locked if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches) || pe__resource_is_remote_conn(rsc, rsc->cluster)) { return; } if (rsc->lock_node != NULL) { // The lock was obtained from resource history if (rsc->running_on != NULL) { /* The resource was started elsewhere even though it is now * considered locked. This shouldn't be possible, but as a * failsafe, we don't want to disturb the resource now. */ pe_rsc_info(rsc, "Cancelling shutdown lock because %s is already active", rsc->id); pe__clear_resource_history(rsc, rsc->lock_node, rsc->cluster); rsc->lock_node = NULL; rsc->lock_time = 0; } // Only a resource active on exactly one node can be locked } else if (pcmk__list_of_1(rsc->running_on)) { pe_node_t *node = rsc->running_on->data; if (node->details->shutdown) { if (node->details->unclean) { pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown", rsc->id, pe__node_name(node)); } else { rsc->lock_node = node; rsc->lock_time = shutdown_time(node); } } } if (rsc->lock_node == NULL) { // No lock needed return; } if (rsc->cluster->shutdown_lock > 0) { time_t lock_expiration = rsc->lock_time + rsc->cluster->shutdown_lock; pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)", rsc->id, pe__node_name(rsc->lock_node), (long long) lock_expiration); pe__update_recheck_time(++lock_expiration, rsc->cluster); } else { pe_rsc_info(rsc, "Locking %s to %s due to shutdown", rsc->id, pe__node_name(rsc->lock_node)); } // If resource is locked to one node, ban it from all other nodes g_list_foreach(rsc->cluster->nodes, ban_if_not_locked, rsc); } diff --git a/lib/pengine/pe_actions.c b/lib/pengine/pe_actions.c index 847f86a371..d5ed8359a7 100644 --- a/lib/pengine/pe_actions.c +++ b/lib/pengine/pe_actions.c @@ -1,1697 +1,1698 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include "pe_status_private.h" static void unpack_operation(pe_action_t *action, const xmlNode *xml_obj, const pe_resource_t *container, pe_working_set_t *data_set, guint interval_ms); static void add_singleton(pe_working_set_t *data_set, pe_action_t *action) { if (data_set->singletons == NULL) { data_set->singletons = pcmk__strkey_table(NULL, NULL); } g_hash_table_insert(data_set->singletons, action->uuid, action); } static pe_action_t * lookup_singleton(pe_working_set_t *data_set, const char *action_uuid) { if (data_set->singletons == NULL) { return NULL; } return g_hash_table_lookup(data_set->singletons, action_uuid); } /*! * \internal * \brief Find an existing action that matches arguments * * \param[in] key Action key to match * \param[in] rsc Resource to match (if any) * \param[in] node Node to match (if any) * \param[in] data_set Cluster working set * * \return Existing action that matches arguments (or NULL if none) */ static pe_action_t * find_existing_action(const char *key, const pe_resource_t *rsc, const pe_node_t *node, const pe_working_set_t *data_set) { GList *matches = NULL; pe_action_t *action = NULL; /* When rsc is NULL, it would be quicker to check data_set->singletons, * but checking all data_set->actions takes the node into account. */ matches = find_actions(((rsc == NULL)? data_set->actions : rsc->actions), key, node); if (matches == NULL) { return NULL; } CRM_LOG_ASSERT(!pcmk__list_of_multiple(matches)); action = matches->data; g_list_free(matches); return action; } static xmlNode * find_rsc_op_entry_helper(const pe_resource_t *rsc, const char *key, gboolean include_disabled) { guint interval_ms = 0; gboolean do_retry = TRUE; char *local_key = NULL; const char *name = NULL; const char *interval_spec = NULL; char *match_key = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; retry: for (operation = pcmk__xe_first_child(rsc->ops_xml); operation != NULL; operation = pcmk__xe_next(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { bool enabled = false; name = crm_element_value(operation, "name"); interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if (!include_disabled && pcmk__xe_get_bool_attr(operation, "enabled", &enabled) == pcmk_rc_ok && !enabled) { continue; } interval_ms = crm_parse_interval_spec(interval_spec); match_key = pcmk__op_key(rsc->id, name, interval_ms); if (pcmk__str_eq(key, match_key, pcmk__str_casei)) { op = operation; } free(match_key); if (rsc->clone_name) { match_key = pcmk__op_key(rsc->clone_name, name, interval_ms); if (pcmk__str_eq(key, match_key, pcmk__str_casei)) { op = operation; } free(match_key); } if (op != NULL) { free(local_key); return op; } } } free(local_key); if (do_retry == FALSE) { return NULL; } do_retry = FALSE; if ((strstr(key, PCMK_ACTION_MIGRATE_TO) != NULL) || (strstr(key, PCMK_ACTION_MIGRATE_FROM) != NULL)) { local_key = pcmk__op_key(rsc->id, "migrate", 0); key = local_key; goto retry; } else if (strstr(key, "_notify_")) { local_key = pcmk__op_key(rsc->id, PCMK_ACTION_NOTIFY, 0); key = local_key; goto retry; } return NULL; } xmlNode * find_rsc_op_entry(const pe_resource_t *rsc, const char *key) { return find_rsc_op_entry_helper(rsc, key, FALSE); } /*! * \internal * \brief Create a new action object * * \param[in] key Action key * \param[in] task Action name * \param[in,out] rsc Resource that action is for (if any) * \param[in] node Node that action is on (if any) * \param[in] optional Whether action should be considered optional * \param[in] for_graph Whether action should be recorded in transition graph * \param[in,out] data_set Cluster working set * * \return Newly allocated action * \note This function takes ownership of \p key. It is the caller's * responsibility to free the return value with pe_free_action(). */ static pe_action_t * new_action(char *key, const char *task, pe_resource_t *rsc, const pe_node_t *node, bool optional, bool for_graph, pe_working_set_t *data_set) { pe_action_t *action = calloc(1, sizeof(pe_action_t)); CRM_ASSERT(action != NULL); action->rsc = rsc; action->task = strdup(task); CRM_ASSERT(action->task != NULL); action->uuid = key; action->extra = pcmk__strkey_table(free, free); action->meta = pcmk__strkey_table(free, free); if (node) { action->node = pe__copy_node(node); } - if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { + if (pcmk__str_eq(task, PCMK_ACTION_LRM_DELETE, pcmk__str_casei)) { // Resource history deletion for a node can be done on the DC pe__set_action_flags(action, pe_action_dc); } pe__set_action_flags(action, pe_action_runnable); if (optional) { pe__set_action_flags(action, pe_action_optional); } else { pe__clear_action_flags(action, pe_action_optional); } if (rsc != NULL) { guint interval_ms = 0; action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE); parse_op_key(key, NULL, NULL, &interval_ms); unpack_operation(action, action->op_entry, rsc->container, data_set, interval_ms); } if (for_graph) { pe_rsc_trace(rsc, "Created %s action %d (%s): %s for %s on %s", (optional? "optional" : "required"), data_set->action_id, key, task, ((rsc == NULL)? "no resource" : rsc->id), pe__node_name(node)); action->id = data_set->action_id++; data_set->actions = g_list_prepend(data_set->actions, action); if (rsc == NULL) { add_singleton(data_set, action); } else { rsc->actions = g_list_prepend(rsc->actions, action); } } return action; } /*! * \internal * \brief Evaluate node attribute values for an action * * \param[in,out] action Action to unpack attributes for * \param[in,out] data_set Cluster working set */ static void unpack_action_node_attributes(pe_action_t *action, pe_working_set_t *data_set) { if (!pcmk_is_set(action->flags, pe_action_have_node_attrs) && (action->op_entry != NULL)) { pe_rule_eval_data_t rule_data = { .node_hash = action->node->details->attrs, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; pe__set_action_flags(action, pe_action_have_node_attrs); pe__unpack_dataset_nvpairs(action->op_entry, XML_TAG_ATTR_SETS, &rule_data, action->extra, NULL, FALSE, data_set); } } /*! * \internal * \brief Update an action's optional flag * * \param[in,out] action Action to update * \param[in] optional Requested optional status */ static void update_action_optional(pe_action_t *action, gboolean optional) { // Force a non-recurring action to be optional if its resource is unmanaged if ((action->rsc != NULL) && (action->node != NULL) && !pcmk_is_set(action->flags, pe_action_pseudo) && !pcmk_is_set(action->rsc->flags, pe_rsc_managed) && (g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS) == NULL)) { pe_rsc_debug(action->rsc, "%s on %s is optional (%s is unmanaged)", action->uuid, pe__node_name(action->node), action->rsc->id); pe__set_action_flags(action, pe_action_optional); // We shouldn't clear runnable here because ... something // Otherwise require the action if requested } else if (!optional) { pe__clear_action_flags(action, pe_action_optional); } } static enum pe_quorum_policy effective_quorum_policy(pe_resource_t *rsc, pe_working_set_t *data_set) { enum pe_quorum_policy policy = data_set->no_quorum_policy; if (pcmk_is_set(data_set->flags, pe_flag_have_quorum)) { policy = no_quorum_ignore; } else if (data_set->no_quorum_policy == no_quorum_demote) { switch (rsc->role) { case RSC_ROLE_PROMOTED: case RSC_ROLE_UNPROMOTED: if (rsc->next_role > RSC_ROLE_UNPROMOTED) { pe__set_next_role(rsc, RSC_ROLE_UNPROMOTED, "no-quorum-policy=demote"); } policy = no_quorum_ignore; break; default: policy = no_quorum_stop; break; } } return policy; } /*! * \internal * \brief Update a resource action's runnable flag * * \param[in,out] action Action to update * \param[in] for_graph Whether action should be recorded in transition graph * \param[in,out] data_set Cluster working set * * \note This may also schedule fencing if a stop is unrunnable. */ static void update_resource_action_runnable(pe_action_t *action, bool for_graph, pe_working_set_t *data_set) { if (pcmk_is_set(action->flags, pe_action_pseudo)) { return; } if (action->node == NULL) { pe_rsc_trace(action->rsc, "%s is unrunnable (unallocated)", action->uuid); pe__clear_action_flags(action, pe_action_runnable); } else if (!pcmk_is_set(action->flags, pe_action_dc) && !(action->node->details->online) && (!pe__is_guest_node(action->node) || action->node->details->remote_requires_reset)) { pe__clear_action_flags(action, pe_action_runnable); do_crm_log((for_graph? LOG_WARNING: LOG_TRACE), "%s on %s is unrunnable (node is offline)", action->uuid, pe__node_name(action->node)); if (pcmk_is_set(action->rsc->flags, pe_rsc_managed) && for_graph && pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_casei) && !(action->node->details->unclean)) { pe_fence_node(data_set, action->node, "stop is unrunnable", false); } } else if (!pcmk_is_set(action->flags, pe_action_dc) && action->node->details->pending) { pe__clear_action_flags(action, pe_action_runnable); do_crm_log((for_graph? LOG_WARNING: LOG_TRACE), "Action %s on %s is unrunnable (node is pending)", action->uuid, pe__node_name(action->node)); } else if (action->needs == rsc_req_nothing) { pe_action_set_reason(action, NULL, TRUE); if (pe__is_guest_node(action->node) && !pe_can_fence(data_set, action->node)) { /* An action that requires nothing usually does not require any * fencing in order to be runnable. However, there is an exception: * such an action cannot be completed if it is on a guest node whose * host is unclean and cannot be fenced. */ pe_rsc_debug(action->rsc, "%s on %s is unrunnable " "(node's host cannot be fenced)", action->uuid, pe__node_name(action->node)); pe__clear_action_flags(action, pe_action_runnable); } else { pe_rsc_trace(action->rsc, "%s on %s does not require fencing or quorum", action->uuid, pe__node_name(action->node)); pe__set_action_flags(action, pe_action_runnable); } } else { switch (effective_quorum_policy(action->rsc, data_set)) { case no_quorum_stop: pe_rsc_debug(action->rsc, "%s on %s is unrunnable (no quorum)", action->uuid, pe__node_name(action->node)); pe__clear_action_flags(action, pe_action_runnable); pe_action_set_reason(action, "no quorum", true); break; case no_quorum_freeze: if (!action->rsc->fns->active(action->rsc, TRUE) || (action->rsc->next_role > action->rsc->role)) { pe_rsc_debug(action->rsc, "%s on %s is unrunnable (no quorum)", action->uuid, pe__node_name(action->node)); pe__clear_action_flags(action, pe_action_runnable); pe_action_set_reason(action, "quorum freeze", true); } break; default: //pe_action_set_reason(action, NULL, TRUE); pe__set_action_flags(action, pe_action_runnable); break; } } } /*! * \internal * \brief Update a resource object's flags for a new action on it * * \param[in,out] rsc Resource that action is for (if any) * \param[in] action New action */ static void update_resource_flags_for_action(pe_resource_t *rsc, const pe_action_t *action) { /* @COMPAT pe_rsc_starting and pe_rsc_stopping are not actually used * within Pacemaker, and should be deprecated and eventually removed */ if (pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_casei)) { pe__set_resource_flags(rsc, pe_rsc_stopping); } else if (pcmk__str_eq(action->task, PCMK_ACTION_START, pcmk__str_casei)) { if (pcmk_is_set(action->flags, pe_action_runnable)) { pe__set_resource_flags(rsc, pe_rsc_starting); } else { pe__clear_resource_flags(rsc, pe_rsc_starting); } } } static bool valid_stop_on_fail(const char *value) { return !pcmk__strcase_any_of(value, "standby", "demote", "stop", NULL); } static const char * unpack_operation_on_fail(pe_action_t * action) { const char *name = NULL; const char *role = NULL; const char *on_fail = NULL; const char *interval_spec = NULL; const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL); if (pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_casei) && !valid_stop_on_fail(value)) { pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s stop " "action to default value because '%s' is not " "allowed for stop", action->rsc->id, value); return NULL; } else if (pcmk__str_eq(action->task, PCMK_ACTION_DEMOTE, pcmk__str_casei) && (value == NULL)) { // demote on_fail defaults to monitor value for promoted role if present xmlNode *operation = NULL; CRM_CHECK(action->rsc != NULL, return NULL); for (operation = pcmk__xe_first_child(action->rsc->ops_xml); (operation != NULL) && (value == NULL); operation = pcmk__xe_next(operation)) { bool enabled = false; if (!pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { continue; } name = crm_element_value(operation, "name"); role = crm_element_value(operation, "role"); on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL); interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if (!on_fail) { continue; } else if (pcmk__xe_get_bool_attr(operation, "enabled", &enabled) == pcmk_rc_ok && !enabled) { continue; } else if (!pcmk__str_eq(name, PCMK_ACTION_MONITOR, pcmk__str_casei) || !pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S, RSC_ROLE_PROMOTED_LEGACY_S, NULL)) { continue; } else if (crm_parse_interval_spec(interval_spec) == 0) { continue; } else if (pcmk__str_eq(on_fail, "demote", pcmk__str_casei)) { continue; } value = on_fail; } - } else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { + } else if (pcmk__str_eq(action->task, PCMK_ACTION_LRM_DELETE, + pcmk__str_casei)) { value = "ignore"; } else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) { name = crm_element_value(action->op_entry, "name"); role = crm_element_value(action->op_entry, "role"); interval_spec = crm_element_value(action->op_entry, XML_LRM_ATTR_INTERVAL); if (!pcmk__str_eq(name, PCMK_ACTION_PROMOTE, pcmk__str_casei) && (!pcmk__str_eq(name, PCMK_ACTION_MONITOR, pcmk__str_casei) || !pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S, RSC_ROLE_PROMOTED_LEGACY_S, NULL) || (crm_parse_interval_spec(interval_spec) == 0))) { pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s %s " "action to default value because 'demote' is not " "allowed for it", action->rsc->id, name); return NULL; } } return value; } static int unpack_timeout(const char *value) { int timeout_ms = crm_get_msec(value); if (timeout_ms < 0) { timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S); } return timeout_ms; } // true if value contains valid, non-NULL interval origin for recurring op static bool unpack_interval_origin(const char *value, const xmlNode *xml_obj, guint interval_ms, const crm_time_t *now, long long *start_delay) { long long result = 0; guint interval_sec = interval_ms / 1000; crm_time_t *origin = NULL; // Ignore unspecified values and non-recurring operations if ((value == NULL) || (interval_ms == 0) || (now == NULL)) { return false; } // Parse interval origin from text origin = crm_time_new(value); if (origin == NULL) { pcmk__config_err("Ignoring '" XML_OP_ATTR_ORIGIN "' for operation " "'%s' because '%s' is not valid", (ID(xml_obj)? ID(xml_obj) : "(missing ID)"), value); return false; } // Get seconds since origin (negative if origin is in the future) result = crm_time_get_seconds(now) - crm_time_get_seconds(origin); crm_time_free(origin); // Calculate seconds from closest interval to now result = result % interval_sec; // Calculate seconds remaining until next interval result = ((result <= 0)? 0 : interval_sec) - result; crm_info("Calculated a start delay of %llds for operation '%s'", result, (ID(xml_obj)? ID(xml_obj) : "(unspecified)")); if (start_delay != NULL) { *start_delay = result * 1000; // milliseconds } return true; } static int unpack_start_delay(const char *value, GHashTable *meta) { int start_delay = 0; if (value != NULL) { start_delay = crm_get_msec(value); if (start_delay < 0) { start_delay = 0; } if (meta) { g_hash_table_replace(meta, strdup(XML_OP_ATTR_START_DELAY), pcmk__itoa(start_delay)); } } return start_delay; } static xmlNode * find_min_interval_mon(pe_resource_t * rsc, gboolean include_disabled) { guint interval_ms = 0; guint min_interval_ms = G_MAXUINT; const char *name = NULL; const char *interval_spec = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; for (operation = pcmk__xe_first_child(rsc->ops_xml); operation != NULL; operation = pcmk__xe_next(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { bool enabled = false; name = crm_element_value(operation, "name"); interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if (!include_disabled && pcmk__xe_get_bool_attr(operation, "enabled", &enabled) == pcmk_rc_ok && !enabled) { continue; } if (!pcmk__str_eq(name, PCMK_ACTION_MONITOR, pcmk__str_casei)) { continue; } interval_ms = crm_parse_interval_spec(interval_spec); if (interval_ms && (interval_ms < min_interval_ms)) { min_interval_ms = interval_ms; op = operation; } } } return op; } /*! * \brief Unpack operation XML into an action structure * * Unpack an operation's meta-attributes (normalizing the interval, timeout, * and start delay values as integer milliseconds), requirements, and * failure policy. * * \param[in,out] action Action to unpack into * \param[in] xml_obj Operation XML (or NULL if all defaults) * \param[in] container Resource that contains affected resource, if any * \param[in,out] data_set Cluster state * \param[in] interval_ms How frequently to perform the operation */ static void unpack_operation(pe_action_t *action, const xmlNode *xml_obj, const pe_resource_t *container, pe_working_set_t *data_set, guint interval_ms) { int timeout_ms = 0; const char *value = NULL; bool is_probe = false; pe_rsc_eval_data_t rsc_rule_data = { .standard = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_CLASS), .provider = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_PROVIDER), .agent = crm_element_value(action->rsc->xml, XML_EXPR_ATTR_TYPE) }; pe_op_eval_data_t op_rule_data = { .op_name = action->task, .interval = interval_ms }; pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = &rsc_rule_data, .op_data = &op_rule_data }; CRM_CHECK(action && action->rsc, return); is_probe = pcmk_is_probe(action->task, interval_ms); // Cluster-wide pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS, &rule_data, action->meta, NULL, FALSE, data_set); // Determine probe default timeout differently if (is_probe) { xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE); if (min_interval_mon) { value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT); if (value) { crm_trace("\t%s: Setting default timeout to minimum-interval " "monitor's timeout '%s'", action->uuid, value); g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), strdup(value)); } } } if (xml_obj) { xmlAttrPtr xIter = NULL; // take precedence over defaults pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_META_SETS, &rule_data, action->meta, NULL, TRUE, data_set); /* Anything set as an XML property has highest precedence. * This ensures we use the name and interval from the tag. */ for (xIter = xml_obj->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = pcmk__xml_attr_value(xIter); g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value)); } } g_hash_table_remove(action->meta, "id"); // Normalize interval to milliseconds if (interval_ms > 0) { g_hash_table_replace(action->meta, strdup(XML_LRM_ATTR_INTERVAL), crm_strdup_printf("%u", interval_ms)); } else { g_hash_table_remove(action->meta, XML_LRM_ATTR_INTERVAL); } /* * Timeout order of precedence: * 1. pcmk_monitor_timeout (if rsc has pcmk_ra_cap_fence_params * and task is start or a probe; pcmk_monitor_timeout works * by default for a recurring monitor) * 2. explicit op timeout on the primitive * 3. default op timeout * a. if probe, then min-interval monitor's timeout * b. else, in XML_CIB_TAG_OPCONFIG * 4. CRM_DEFAULT_OP_TIMEOUT_S * * #1 overrides general rule of XML property having highest * precedence. */ if (pcmk_is_set(pcmk_get_ra_caps(rsc_rule_data.standard), pcmk_ra_cap_fence_params) && (pcmk__str_eq(action->task, PCMK_ACTION_START, pcmk__str_casei) || is_probe)) { GHashTable *params = pe_rsc_params(action->rsc, action->node, data_set); value = g_hash_table_lookup(params, "pcmk_monitor_timeout"); if (value) { crm_trace("\t%s: Setting timeout to pcmk_monitor_timeout '%s', " "overriding default", action->uuid, value); g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), strdup(value)); } } // Normalize timeout to positive milliseconds value = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT); timeout_ms = unpack_timeout(value); g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), pcmk__itoa(timeout_ms)); if (!pcmk__strcase_any_of(action->task, PCMK_ACTION_START, PCMK_ACTION_PROMOTE, NULL)) { action->needs = rsc_req_nothing; value = "nothing (not start or promote)"; } else if (pcmk_is_set(action->rsc->flags, pe_rsc_needs_fencing)) { action->needs = rsc_req_stonith; value = "fencing"; } else if (pcmk_is_set(action->rsc->flags, pe_rsc_needs_quorum)) { action->needs = rsc_req_quorum; value = "quorum"; } else { action->needs = rsc_req_nothing; value = "nothing"; } pe_rsc_trace(action->rsc, "%s requires %s", action->uuid, value); value = unpack_operation_on_fail(action); if (value == NULL) { } else if (pcmk__str_eq(value, "block", pcmk__str_casei)) { action->on_fail = action_fail_block; g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block")); value = "block"; // The above could destroy the original string } else if (pcmk__str_eq(value, "fence", pcmk__str_casei)) { action->on_fail = action_fail_fence; value = "node fencing"; if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for " "operation '%s' to 'stop' because 'fence' is not " "valid when fencing is disabled", action->uuid); action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } } else if (pcmk__str_eq(value, "standby", pcmk__str_casei)) { action->on_fail = action_fail_standby; value = "node standby"; } else if (pcmk__strcase_any_of(value, "ignore", PCMK__VALUE_NOTHING, NULL)) { action->on_fail = action_fail_ignore; value = "ignore"; } else if (pcmk__str_eq(value, "migrate", pcmk__str_casei)) { action->on_fail = action_fail_migrate; value = "force migration"; } else if (pcmk__str_eq(value, "stop", pcmk__str_casei)) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } else if (pcmk__str_eq(value, "restart", pcmk__str_casei)) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate)"; } else if (pcmk__str_eq(value, "restart-container", pcmk__str_casei)) { if (container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate)"; } else { value = NULL; } } else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) { action->on_fail = action_fail_demote; value = "demote instance"; } else { pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); value = NULL; } /* defaults */ if (value == NULL && container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate) (default)"; /* For remote nodes, ensure that any failure that results in dropping an * active connection to the node results in fencing of the node. * * There are only two action failures that don't result in fencing. * 1. probes - probe failures are expected. * 2. start - a start failure indicates that an active connection does not already * exist. The user can set op on-fail=fence if they really want to fence start * failures. */ } else if (((value == NULL) || !pcmk_is_set(action->rsc->flags, pe_rsc_managed)) && pe__resource_is_remote_conn(action->rsc, data_set) && !(pcmk__str_eq(action->task, PCMK_ACTION_MONITOR, pcmk__str_casei) && (interval_ms == 0)) && !pcmk__str_eq(action->task, PCMK_ACTION_START, pcmk__str_casei)) { if (!pcmk_is_set(action->rsc->flags, pe_rsc_managed)) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop unmanaged remote node (enforcing default)"; } else { if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { value = "fence remote node (default)"; } else { value = "recover remote node connection (default)"; } if (action->rsc->remote_reconnect_ms) { action->fail_role = RSC_ROLE_STOPPED; } action->on_fail = action_fail_reset_remote; } } else if ((value == NULL) && pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_casei)) { if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { action->on_fail = action_fail_fence; value = "resource fence (default)"; } else { action->on_fail = action_fail_block; value = "resource block (default)"; } } else if (value == NULL) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate) (default)"; } pe_rsc_trace(action->rsc, "%s failure handling: %s", action->uuid, value); value = NULL; if (xml_obj != NULL) { value = g_hash_table_lookup(action->meta, "role_after_failure"); if (value) { pe_warn_once(pe_wo_role_after, "Support for role_after_failure is deprecated and will be removed in a future release"); } } if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) { action->fail_role = text2role(value); } /* defaults */ if (action->fail_role == RSC_ROLE_UNKNOWN) { if (pcmk__str_eq(action->task, PCMK_ACTION_PROMOTE, pcmk__str_casei)) { action->fail_role = RSC_ROLE_UNPROMOTED; } else { action->fail_role = RSC_ROLE_STARTED; } } pe_rsc_trace(action->rsc, "%s failure results in: %s", action->uuid, role2text(action->fail_role)); value = g_hash_table_lookup(action->meta, XML_OP_ATTR_START_DELAY); if (value) { unpack_start_delay(value, action->meta); } else { long long start_delay = 0; value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN); if (unpack_interval_origin(value, xml_obj, interval_ms, data_set->now, &start_delay)) { g_hash_table_replace(action->meta, strdup(XML_OP_ATTR_START_DELAY), crm_strdup_printf("%lld", start_delay)); } } } /*! * \brief Create or update an action object * * \param[in,out] rsc Resource that action is for (if any) * \param[in,out] key Action key (must be non-NULL) * \param[in] task Action name (must be non-NULL) * \param[in] on_node Node that action is on (if any) * \param[in] optional Whether action should be considered optional * \param[in] save_action Whether action should be recorded in transition graph * \param[in,out] data_set Cluster working set * * \return Action object corresponding to arguments * \note This function takes ownership of (and might free) \p key. If * \p save_action is true, \p data_set will own the returned action, * otherwise it is the caller's responsibility to free the return value * with pe_free_action(). */ pe_action_t * custom_action(pe_resource_t *rsc, char *key, const char *task, const pe_node_t *on_node, gboolean optional, gboolean save_action, pe_working_set_t *data_set) { pe_action_t *action = NULL; CRM_ASSERT((key != NULL) && (task != NULL) && (data_set != NULL)); if (save_action) { action = find_existing_action(key, rsc, on_node, data_set); } if (action == NULL) { action = new_action(key, task, rsc, on_node, optional, save_action, data_set); } else { free(key); } update_action_optional(action, optional); if (rsc != NULL) { if (action->node != NULL) { unpack_action_node_attributes(action, data_set); } update_resource_action_runnable(action, save_action, data_set); if (save_action) { update_resource_flags_for_action(rsc, action); } } return action; } pe_action_t * get_pseudo_op(const char *name, pe_working_set_t * data_set) { pe_action_t *op = lookup_singleton(data_set, name); if (op == NULL) { op = custom_action(NULL, strdup(name), name, NULL, TRUE, TRUE, data_set); pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable); } return op; } static GList * find_unfencing_devices(GList *candidates, GList *matches) { for (GList *gIter = candidates; gIter != NULL; gIter = gIter->next) { pe_resource_t *candidate = gIter->data; if (candidate->children != NULL) { matches = find_unfencing_devices(candidate->children, matches); } else if (!pcmk_is_set(candidate->flags, pe_rsc_fence_device)) { continue; } else if (pcmk_is_set(candidate->flags, pe_rsc_needs_unfencing)) { matches = g_list_prepend(matches, candidate); } else if (pcmk__str_eq(g_hash_table_lookup(candidate->meta, PCMK_STONITH_PROVIDES), PCMK__VALUE_UNFENCING, pcmk__str_casei)) { matches = g_list_prepend(matches, candidate); } } return matches; } static int node_priority_fencing_delay(const pe_node_t *node, const pe_working_set_t *data_set) { int member_count = 0; int online_count = 0; int top_priority = 0; int lowest_priority = 0; GList *gIter = NULL; // `priority-fencing-delay` is disabled if (data_set->priority_fencing_delay <= 0) { return 0; } /* No need to request a delay if the fencing target is not a normal cluster * member, for example if it's a remote node or a guest node. */ if (node->details->type != node_member) { return 0; } // No need to request a delay if the fencing target is in our partition if (node->details->online) { return 0; } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *n = gIter->data; if (n->details->type != node_member) { continue; } member_count ++; if (n->details->online) { online_count++; } if (member_count == 1 || n->details->priority > top_priority) { top_priority = n->details->priority; } if (member_count == 1 || n->details->priority < lowest_priority) { lowest_priority = n->details->priority; } } // No need to delay if we have more than half of the cluster members if (online_count > member_count / 2) { return 0; } /* All the nodes have equal priority. * Any configured corresponding `pcmk_delay_base/max` will be applied. */ if (lowest_priority == top_priority) { return 0; } if (node->details->priority < top_priority) { return 0; } return data_set->priority_fencing_delay; } pe_action_t * pe_fence_op(pe_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t *data_set) { char *op_key = NULL; pe_action_t *stonith_op = NULL; if(op == NULL) { op = data_set->stonith_action; } op_key = crm_strdup_printf("%s-%s-%s", PCMK_ACTION_STONITH, node->details->uname, op); stonith_op = lookup_singleton(data_set, op_key); if(stonith_op == NULL) { stonith_op = custom_action(NULL, op_key, PCMK_ACTION_STONITH, node, TRUE, TRUE, data_set); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id); add_hash_param(stonith_op->meta, "stonith_action", op); if (pcmk_is_set(data_set->flags, pe_flag_enable_unfencing)) { /* Extra work to detect device changes */ GString *digests_all = g_string_sized_new(1024); GString *digests_secure = g_string_sized_new(1024); GList *matches = find_unfencing_devices(data_set->resources, NULL); char *key = NULL; char *value = NULL; for (GList *gIter = matches; gIter != NULL; gIter = gIter->next) { pe_resource_t *match = gIter->data; const char *agent = g_hash_table_lookup(match->meta, XML_ATTR_TYPE); op_digest_cache_t *data = NULL; data = pe__compare_fencing_digest(match, agent, node, data_set); if(data->rc == RSC_DIGEST_ALL) { optional = FALSE; crm_notice("Unfencing node %s because the definition of " "%s changed", pe__node_name(node), match->id); if (!pcmk__is_daemon && data_set->priv != NULL) { pcmk__output_t *out = data_set->priv; out->info(out, "notice: Unfencing node %s because the " "definition of %s changed", pe__node_name(node), match->id); } } pcmk__g_strcat(digests_all, match->id, ":", agent, ":", data->digest_all_calc, ",", NULL); pcmk__g_strcat(digests_secure, match->id, ":", agent, ":", data->digest_secure_calc, ",", NULL); } key = strdup(XML_OP_ATTR_DIGESTS_ALL); value = strdup((const char *) digests_all->str); CRM_ASSERT((key != NULL) && (value != NULL)); g_hash_table_insert(stonith_op->meta, key, value); g_string_free(digests_all, TRUE); key = strdup(XML_OP_ATTR_DIGESTS_SECURE); value = strdup((const char *) digests_secure->str); CRM_ASSERT((key != NULL) && (value != NULL)); g_hash_table_insert(stonith_op->meta, key, value); g_string_free(digests_secure, TRUE); } } else { free(op_key); } if (data_set->priority_fencing_delay > 0 /* It's a suitable case where `priority-fencing-delay` applies. * At least add `priority-fencing-delay` field as an indicator. */ && (priority_delay /* The priority delay needs to be recalculated if this function has * been called by schedule_fencing_and_shutdowns() after node * priority has already been calculated by native_add_running(). */ || g_hash_table_lookup(stonith_op->meta, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY) != NULL)) { /* Add `priority-fencing-delay` to the fencing op even if it's 0 for * the targeting node. So that it takes precedence over any possible * `pcmk_delay_base/max`. */ char *delay_s = pcmk__itoa(node_priority_fencing_delay(node, data_set)); g_hash_table_insert(stonith_op->meta, strdup(XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY), delay_s); } if(optional == FALSE && pe_can_fence(data_set, node)) { pe__clear_action_flags(stonith_op, pe_action_optional); pe_action_set_reason(stonith_op, reason, false); } else if(reason && stonith_op->reason == NULL) { stonith_op->reason = strdup(reason); } return stonith_op; } void pe_free_action(pe_action_t * action) { if (action == NULL) { return; } g_list_free_full(action->actions_before, free); /* pe_action_wrapper_t* */ g_list_free_full(action->actions_after, free); /* pe_action_wrapper_t* */ if (action->extra) { g_hash_table_destroy(action->extra); } if (action->meta) { g_hash_table_destroy(action->meta); } free(action->cancel_task); free(action->reason); free(action->task); free(action->uuid); free(action->node); free(action); } int pe_get_configured_timeout(pe_resource_t *rsc, const char *action, pe_working_set_t *data_set) { xmlNode *child = NULL; GHashTable *action_meta = NULL; const char *timeout_spec = NULL; int timeout_ms = 0; pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; for (child = first_named_child(rsc->ops_xml, XML_ATTR_OP); child != NULL; child = crm_next_same_xml(child)) { if (pcmk__str_eq(action, crm_element_value(child, XML_NVPAIR_ATTR_NAME), pcmk__str_casei)) { timeout_spec = crm_element_value(child, XML_ATTR_TIMEOUT); break; } } if (timeout_spec == NULL && data_set->op_defaults) { action_meta = pcmk__strkey_table(free, free); pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS, &rule_data, action_meta, NULL, FALSE, data_set); timeout_spec = g_hash_table_lookup(action_meta, XML_ATTR_TIMEOUT); } // @TODO check meta-attributes // @TODO maybe use min-interval monitor timeout as default for monitors timeout_ms = crm_get_msec(timeout_spec); if (timeout_ms < 0) { timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S); } if (action_meta != NULL) { g_hash_table_destroy(action_meta); } return timeout_ms; } enum action_tasks get_complex_task(const pe_resource_t *rsc, const char *name) { enum action_tasks task = text2task(name); if ((rsc != NULL) && (rsc->variant == pe_native)) { switch (task) { case stopped_rsc: case started_rsc: case action_demoted: case action_promoted: crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id); --task; break; default: break; } } return task; } /*! * \internal * \brief Find first matching action in a list * * \param[in] input List of actions to search * \param[in] uuid If not NULL, action must have this UUID * \param[in] task If not NULL, action must have this action name * \param[in] on_node If not NULL, action must be on this node * * \return First action in list that matches criteria, or NULL if none */ pe_action_t * find_first_action(const GList *input, const char *uuid, const char *task, const pe_node_t *on_node) { CRM_CHECK(uuid || task, return NULL); for (const GList *gIter = input; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (uuid != NULL && !pcmk__str_eq(uuid, action->uuid, pcmk__str_casei)) { continue; } else if (task != NULL && !pcmk__str_eq(task, action->task, pcmk__str_casei)) { continue; } else if (on_node == NULL) { return action; } else if (action->node == NULL) { continue; } else if (on_node->details == action->node->details) { return action; } } return NULL; } GList * find_actions(GList *input, const char *key, const pe_node_t *on_node) { GList *gIter = input; GList *result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (!pcmk__str_eq(key, action->uuid, pcmk__str_casei)) { continue; } else if (on_node == NULL) { crm_trace("Action %s matches (ignoring node)", key); result = g_list_prepend(result, action); } else if (action->node == NULL) { crm_trace("Action %s matches (unallocated, assigning to %s)", key, pe__node_name(on_node)); action->node = pe__copy_node(on_node); result = g_list_prepend(result, action); } else if (on_node->details == action->node->details) { crm_trace("Action %s on %s matches", key, pe__node_name(on_node)); result = g_list_prepend(result, action); } } return result; } GList * find_actions_exact(GList *input, const char *key, const pe_node_t *on_node) { GList *result = NULL; CRM_CHECK(key != NULL, return NULL); if (on_node == NULL) { return NULL; } for (GList *gIter = input; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if ((action->node != NULL) && pcmk__str_eq(key, action->uuid, pcmk__str_casei) && pcmk__str_eq(on_node->details->id, action->node->details->id, pcmk__str_casei)) { crm_trace("Action %s on %s matches", key, pe__node_name(on_node)); result = g_list_prepend(result, action); } } return result; } /*! * \brief Find all actions of given type for a resource * * \param[in] rsc Resource to search * \param[in] node Find only actions scheduled on this node * \param[in] task Action name to search for * \param[in] require_node If TRUE, NULL node or action node will not match * * \return List of actions found (or NULL if none) * \note If node is not NULL and require_node is FALSE, matching actions * without a node will be assigned to node. */ GList * pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node) { GList *result = NULL; char *key = pcmk__op_key(rsc->id, task, 0); if (require_node) { result = find_actions_exact(rsc->actions, key, node); } else { result = find_actions(rsc->actions, key, node); } free(key); return result; } /*! * \internal * \brief Create an action reason string based on the action itself * * \param[in] action Action to create reason string for * \param[in] flag Action flag that was cleared * * \return Newly allocated string suitable for use as action reason * \note It is the caller's responsibility to free() the result. */ char * pe__action2reason(const pe_action_t *action, enum pe_action_flags flag) { const char *change = NULL; switch (flag) { case pe_action_runnable: change = "unrunnable"; break; case pe_action_migrate_runnable: change = "unmigrateable"; break; case pe_action_optional: change = "required"; break; default: // Bug: caller passed unsupported flag CRM_CHECK(change != NULL, change = ""); break; } return crm_strdup_printf("%s%s%s %s", change, (action->rsc == NULL)? "" : " ", (action->rsc == NULL)? "" : action->rsc->id, action->task); } void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite) { if (action->reason != NULL && overwrite) { pe_rsc_trace(action->rsc, "Changing %s reason from '%s' to '%s'", action->uuid, action->reason, pcmk__s(reason, "(none)")); } else if (action->reason == NULL) { pe_rsc_trace(action->rsc, "Set %s reason to '%s'", action->uuid, pcmk__s(reason, "(none)")); } else { // crm_assert(action->reason != NULL && !overwrite); return; } pcmk__str_update(&action->reason, reason); } /*! * \internal * \brief Create an action to clear a resource's history from CIB * * \param[in,out] rsc Resource to clear * \param[in] node Node to clear history on * \param[in,out] data_set Cluster working set * * \return New action to clear resource history */ pe_action_t * pe__clear_resource_history(pe_resource_t *rsc, const pe_node_t *node, pe_working_set_t *data_set) { char *key = NULL; CRM_ASSERT(rsc && node); - key = pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0); - return custom_action(rsc, key, CRM_OP_LRM_DELETE, node, FALSE, TRUE, + key = pcmk__op_key(rsc->id, PCMK_ACTION_LRM_DELETE, 0); + return custom_action(rsc, key, PCMK_ACTION_LRM_DELETE, node, FALSE, TRUE, data_set); } #define sort_return(an_int, why) do { \ free(a_uuid); \ free(b_uuid); \ crm_trace("%s (%d) %c %s (%d) : %s", \ a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \ b_xml_id, b_call_id, why); \ return an_int; \ } while(0) int pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b, bool same_node_default) { int a_call_id = -1; int b_call_id = -1; char *a_uuid = NULL; char *b_uuid = NULL; const char *a_xml_id = crm_element_value(xml_a, XML_ATTR_ID); const char *b_xml_id = crm_element_value(xml_b, XML_ATTR_ID); const char *a_node = crm_element_value(xml_a, XML_LRM_ATTR_TARGET); const char *b_node = crm_element_value(xml_b, XML_LRM_ATTR_TARGET); bool same_node = true; /* @COMPAT The on_node attribute was added to last_failure as of 1.1.13 (via * 8b3ca1c) and the other entries as of 1.1.12 (via 0b07b5c). * * In case that any of the lrm_rsc_op entries doesn't have on_node * attribute, we need to explicitly tell whether the two operations are on * the same node. */ if (a_node == NULL || b_node == NULL) { same_node = same_node_default; } else { same_node = pcmk__str_eq(a_node, b_node, pcmk__str_casei); } if (same_node && pcmk__str_eq(a_xml_id, b_xml_id, pcmk__str_none)) { /* We have duplicate lrm_rsc_op entries in the status * section which is unlikely to be a good thing * - we can handle it easily enough, but we need to get * to the bottom of why it's happening. */ pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id); sort_return(0, "duplicate"); } crm_element_value_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id); crm_element_value_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id); if (a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesn't matter since * stops are never pending */ sort_return(0, "pending"); } else if (same_node && a_call_id >= 0 && a_call_id < b_call_id) { sort_return(-1, "call id"); } else if (same_node && b_call_id >= 0 && a_call_id > b_call_id) { sort_return(1, "call id"); } else if (a_call_id >= 0 && b_call_id >= 0 && (!same_node || a_call_id == b_call_id)) { /* * The op and last_failed_op are the same * Order on last-rc-change */ time_t last_a = -1; time_t last_b = -1; crm_element_value_epoch(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a); crm_element_value_epoch(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b); crm_trace("rc-change: %lld vs %lld", (long long) last_a, (long long) last_b); if (last_a >= 0 && last_a < last_b) { sort_return(-1, "rc-change"); } else if (last_b >= 0 && last_a > last_b) { sort_return(1, "rc-change"); } sort_return(0, "rc-change"); } else { /* One of the inputs is a pending operation * Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other */ int a_id = -1; int b_id = -1; const char *a_magic = crm_element_value(xml_a, XML_ATTR_TRANSITION_MAGIC); const char *b_magic = crm_element_value(xml_b, XML_ATTR_TRANSITION_MAGIC); CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic")); if (!decode_transition_magic(a_magic, &a_uuid, &a_id, NULL, NULL, NULL, NULL)) { sort_return(0, "bad magic a"); } if (!decode_transition_magic(b_magic, &b_uuid, &b_id, NULL, NULL, NULL, NULL)) { sort_return(0, "bad magic b"); } /* try to determine the relative age of the operation... * some pending operations (e.g. a start) may have been superseded * by a subsequent stop * * [a|b]_id == -1 means it's a shutdown operation and _always_ comes last */ if (!pcmk__str_eq(a_uuid, b_uuid, pcmk__str_casei) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesn't match then one better * be a pending operation. * pending operations don't survive between elections and joins * because we query the LRM directly */ if (b_call_id == -1) { sort_return(-1, "transition + call"); } else if (a_call_id == -1) { sort_return(1, "transition + call"); } } else if ((a_id >= 0 && a_id < b_id) || b_id == -1) { sort_return(-1, "transition"); } else if ((b_id >= 0 && a_id > b_id) || a_id == -1) { sort_return(1, "transition"); } } /* we should never end up here */ CRM_CHECK(FALSE, sort_return(0, "default")); } gint sort_op_by_callid(gconstpointer a, gconstpointer b) { const xmlNode *xml_a = a; const xmlNode *xml_b = b; return pe__is_newer_op(xml_a, xml_b, true); } /*! * \internal * \brief Create a new pseudo-action for a resource * * \param[in,out] rsc Resource to create action for * \param[in] task Action name * \param[in] optional Whether action should be considered optional * \param[in] runnable Whethe action should be considered runnable * * \return New action object corresponding to arguments */ pe_action_t * pe__new_rsc_pseudo_action(pe_resource_t *rsc, const char *task, bool optional, bool runnable) { pe_action_t *action = NULL; CRM_ASSERT((rsc != NULL) && (task != NULL)); action = custom_action(rsc, pcmk__op_key(rsc->id, task, 0), task, NULL, optional, TRUE, rsc->cluster); pe__set_action_flags(action, pe_action_pseudo); if (runnable) { pe__set_action_flags(action, pe_action_runnable); } return action; } /*! * \internal * \brief Add the expected result to an action * * \param[in,out] action Action to add expected result to * \param[in] expected_result Expected result to add * * \note This is more efficient than calling add_hash_param(). */ void pe__add_action_expected_result(pe_action_t *action, int expected_result) { char *name = NULL; CRM_ASSERT((action != NULL) && (action->meta != NULL)); name = strdup(XML_ATTR_TE_TARGET_RC); CRM_ASSERT (name != NULL); g_hash_table_insert(action->meta, name, pcmk__itoa(expected_result)); }