diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c index 36d7a3a7af..5587263633 100644 --- a/daemons/controld/controld_execd.c +++ b/daemons/controld/controld_execd.c @@ -1,2414 +1,2406 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include // lrmd_event_data_t, lrmd_rsc_info_t, etc. #include #include #include #include #include #define START_DELAY_THRESHOLD 5 * 60 * 1000 #define MAX_LRM_REG_FAILS 30 struct delete_event_s { int rc; const char *rsc; lrm_state_t *lrm_state; }; static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static lrmd_event_data_t *construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op, const char *rsc_id, const char *operation); static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, struct ra_metadata_s *md); static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level); static void lrm_connection_destroy(void) { if (pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) { crm_crit("Lost connection to local executor"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); controld_clear_fsa_input_flags(R_LRM_CONNECTED); } } static char * make_stop_id(const char *rsc, int call_id) { return crm_strdup_printf("%s:%d", rsc, call_id); } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { pcmk__insert_dup(user_data, (const char *) key, (const char *) value); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { pcmk__insert_dup(user_data, (const char *) key, (const char *) value); } } /*! * \internal * \brief Remove a recurring operation from a resource's history * * \param[in,out] history Resource history to modify * \param[in] op Operation to remove * * \return TRUE if the operation was found and removed, FALSE otherwise */ static gboolean history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_event_data_t *existing = iter->data; if ((op->interval_ms == existing->interval_ms) && pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none) && pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) { history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); lrmd_free_event(existing); return TRUE; } } return FALSE; } /*! * \internal * \brief Free all recurring operations in resource history * * \param[in,out] history Resource history to modify */ static void history_free_recurring_ops(rsc_history_t *history) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_free_event(iter->data); } g_list_free(history->recurring_op_list); history->recurring_op_list = NULL; } /*! * \internal * \brief Free resource history * * \param[in,out] history Resource history to free */ void history_free(gpointer data) { rsc_history_t *history = (rsc_history_t*)data; if (history->stop_params) { g_hash_table_destroy(history->stop_params); } /* Don't need to free history->rsc.id because it's set to history->id */ free(history->rsc.type); free(history->rsc.standard); free(history->rsc.provider); lrmd_free_event(history->failed); lrmd_free_event(history->last); free(history->id); history_free_recurring_ops(history); free(history); } static void update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); controld_delete_resource_history(op->rsc_id, lrm_state->node_name, NULL, crmd_cib_smart_opt()); return; } if (pcmk__str_eq(op->op_type, PCMK_ACTION_NOTIFY, pcmk__str_casei)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = pcmk__assert_alloc(1, sizeof(rsc_history_t)); entry->id = pcmk__str_copy(op->rsc_id); g_hash_table_insert(lrm_state->resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = pcmk__str_copy(rsc->type); entry->rsc.standard = pcmk__str_copy(rsc->standard); entry->rsc.provider = pcmk__str_copy(rsc->provider); } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } entry->last_callid = op->call_id; target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_EXEC_CANCELLED) { if (op->interval_ms > 0) { crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT, op->rsc_id, op->op_type, op->interval_ms); history_remove_recurring_op(entry, op); return; } else { crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d", op->rsc_id, op->op_type, op->interval_ms, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* Store failed monitors here, otherwise the block below will cause them * to be forgotten when a stop happens. */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval_ms == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && pcmk__strcase_any_of(op->op_type, PCMK_ACTION_START, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT, PCMK_ACTION_MONITOR, NULL)) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = pcmk__strkey_table(free, free); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval_ms > 0) { /* Ensure there are no duplicates */ history_remove_recurring_op(entry, op); crm_trace("Adding recurring op: " PCMK__OP_FMT, op->rsc_id, op->op_type, op->interval_ms); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if ((entry->recurring_op_list != NULL) && !pcmk__str_eq(op->op_type, PCMK_ACTION_MONITOR, pcmk__str_casei)) { crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT, g_list_length(entry->recurring_op_list), op->rsc_id, op->op_type, op->interval_ms); history_free_recurring_ops(entry); } } /*! * \internal * \brief Send a direct OK ack for a resource task * * \param[in] lrm_state LRM connection * \param[in] input Input message being ack'ed * \param[in] rsc_id ID of affected resource * \param[in] rsc Affected resource (if available) * \param[in] task Operation task being ack'ed * \param[in] ack_host Name of host to send ack to * \param[in] ack_sys IPC system name to ack */ static void send_task_ok_ack(const lrm_state_t *lrm_state, const ha_msg_input_t *input, const char *rsc_id, const lrmd_rsc_info_t *rsc, const char *task, const char *ack_host, const char *ack_sys) { lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task); lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id); lrmd_free_event(op); } static inline const char * op_node_name(lrmd_event_data_t *op) { return pcmk__s(op->remote_nodename, controld_globals.cluster->priv->node_name); } void lrm_op_callback(lrmd_event_data_t * op) { CRM_CHECK(op != NULL, return); switch (op->type) { case lrmd_event_disconnect: if (op->remote_nodename == NULL) { /* If this is the local executor IPC connection, set the right * bits in the controller when the connection goes down. */ lrm_connection_destroy(); } break; case lrmd_event_exec_complete: { lrm_state_t *lrm_state = controld_get_executor_state(op_node_name(op), false); pcmk__assert(lrm_state != NULL); process_lrm_event(lrm_state, op, NULL, NULL); } break; default: break; } } static void try_local_executor_connect(long long action, fsa_data_t *msg_data, lrm_state_t *lrm_state) { int rc = pcmk_rc_ok; crm_debug("Connecting to the local executor"); // If we can connect, great rc = controld_connect_local_executor(lrm_state); if (rc == pcmk_rc_ok) { controld_set_fsa_input_flags(R_LRM_CONNECTED); crm_info("Connection to the local executor established"); return; } // Otherwise, if we can try again, set a timer to do so if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { crm_warn("Failed to connect to the local executor %d time%s " "(%d max): %s", lrm_state->num_lrm_register_fails, pcmk__plural_s(lrm_state->num_lrm_register_fails), MAX_LRM_REG_FAILS, pcmk_rc_str(rc)); controld_start_wait_timer(); crmd_fsa_stall(FALSE); return; } // Otherwise give up crm_err("Failed to connect to the executor the max allowed " "%d time%s: %s", lrm_state->num_lrm_register_fails, pcmk__plural_s(lrm_state->num_lrm_register_fails), pcmk_rc_str(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* This only pertains to local executor connections. Remote connections are * handled as resources within the scheduler. Connecting and disconnecting * from remote executor instances is handled differently. */ lrm_state_t *lrm_state = NULL; if (controld_globals.cluster->priv->node_name == NULL) { return; // Shouldn't be possible } lrm_state = controld_get_executor_state(NULL, true); if (lrm_state == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { if (action == A_LRM_DISCONNECT) { crmd_fsa_stall(FALSE); return; } } controld_clear_fsa_input_flags(R_LRM_CONNECTED); lrm_state_disconnect(lrm_state); lrm_state_reset_tables(lrm_state, FALSE); } if (action & A_LRM_CONNECT) { try_local_executor_connect(action, msg_data, lrm_state); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __func__); } } static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; const char *when = "lrm disconnect"; GHashTableIter gIter; const char *key = NULL; rsc_history_t *entry = NULL; active_op_t *pending = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; when = "shutdown"; } else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { when = "shutdown... waiting"; } if ((lrm_state->active_ops != NULL) && lrm_state_is_connected(lrm_state)) { guint removed = g_hash_table_foreach_remove(lrm_state->active_ops, stop_recurring_actions, lrm_state); guint nremaining = g_hash_table_size(lrm_state->active_ops); if (removed || nremaining) { crm_notice("Stopped %u recurring operation%s at %s (%u remaining)", removed, pcmk__plural_s(removed), when, nremaining); } } if (lrm_state->active_ops != NULL) { g_hash_table_iter_init(&gIter, lrm_state->active_ops); while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { /* Ignore recurring actions in the shutdown calculations */ if (pending->interval_ms == 0) { counter++; } } } if (counter > 0) { do_crm_log(log_level, "%d pending executor operation%s at %s", counter, pcmk__plural_s(counter), when); if ((cur_state == S_TERMINATE) || !pcmk_is_set(controld_globals.fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_iter_init(&gIter, lrm_state->active_ops); while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); } } else { rc = FALSE; } return rc; } if (lrm_state->resource_history == NULL) { return rc; } if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { /* At this point we're not waiting, we're just shutting down */ when = "shutdown"; } counter = 0; g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { if (is_rsc_active(lrm_state, entry->id) == FALSE) { continue; } counter++; if (log_level == LOG_ERR) { crm_info("Found %s active at %s", entry->id, when); } else { crm_trace("Found %s active at %s", entry->id, when); } if (lrm_state->active_ops != NULL) { GHashTableIter hIter; g_hash_table_iter_init(&hIter, lrm_state->active_ops); while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) { crm_notice("%sction %s (%s) incomplete at %s", pending->interval_ms == 0 ? "A" : "Recurring a", key, pending->op_key, when); } } } } if (counter) { crm_err("%d resource%s active at %s", counter, (counter == 1)? " was" : "s were", when); } return rc; } static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) { rsc_history_t *entry = NULL; entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, entry->last->interval_ms, entry->last->rc); if ((entry->last->rc == PCMK_OCF_OK) && pcmk__str_eq(entry->last->op_type, PCMK_ACTION_STOP, pcmk__str_casei)) { return FALSE; } else if (entry->last->rc == PCMK_OCF_OK && pcmk__str_eq(entry->last->op_type, PCMK_ACTION_MIGRATE_TO, pcmk__str_casei)) { // A stricter check is too complex ... leave that to the scheduler return FALSE; } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { return FALSE; } else if ((entry->last->interval_ms == 0) && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = pcmk__xe_create(rsc_list, PCMK__XE_LRM_RESOURCE); crm_xml_add(xml_rsc, PCMK_XA_ID, entry->id); crm_xml_add(xml_rsc, PCMK_XA_TYPE, entry->rsc.type); crm_xml_add(xml_rsc, PCMK_XA_CLASS, entry->rsc.standard); crm_xml_add(xml_rsc, PCMK_XA_PROVIDER, entry->rsc.provider); if (entry->last && entry->last->params) { static const char *name = CRM_META "_" PCMK__META_CONTAINER; const char *container = g_hash_table_lookup(entry->last->params, name); if (container) { crm_trace("Resource %s is a part of container resource %s", entry->id, container); crm_xml_add(xml_rsc, PCMK__META_CONTAINER, container); } } controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name); controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { controld_add_resource_history_xml(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name); } } return FALSE; } xmlNode * controld_query_executor_state(void) { xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; pcmk__node_status_t *peer = NULL; lrm_state_t *lrm_state = controld_get_executor_state(NULL, false); if (!lrm_state) { crm_err("Could not get executor state for local node"); return NULL; } peer = pcmk__get_node(0, lrm_state->node_name, NULL, pcmk__node_search_any); CRM_CHECK(peer != NULL, return NULL); xml_state = create_node_state_update(peer, node_update_cluster|node_update_peer, NULL, __func__); if (xml_state == NULL) { return NULL; } xml_data = pcmk__xe_create(xml_state, PCMK__XE_LRM); crm_xml_add(xml_data, PCMK_XA_ID, peer->xml_id); rsc_list = pcmk__xe_create(xml_data, PCMK__XE_LRM_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(lrm_state, rsc_list); crm_log_xml_trace(xml_state, "Current executor state"); return xml_state; } /*! * \internal * \brief Map standard Pacemaker return code to operation status and OCF code * * \param[out] event Executor event whose status and return code should be set * \param[in] rc Standard Pacemaker return code */ void controld_rc2event(lrmd_event_data_t *event, int rc) { /* This is called for cleanup requests from controller peers/clients, not * for resource actions, so no exit reason is needed. */ switch (rc) { case pcmk_rc_ok: lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); break; case EACCES: lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV, PCMK_EXEC_ERROR, NULL); break; default: lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, NULL); break; } } /*! * \internal * \brief Trigger a new transition after CIB status was deleted * * If a CIB status delete was not expected (as part of the transition graph), * trigger a new transition by updating the (arbitrary) "last-lrm-refresh" * cluster property. * * \param[in] from_sys IPC name that requested the delete * \param[in] rsc_id Resource whose status was deleted (for logging only) */ void controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id) { if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) { char *now_s = crm_strdup_printf("%lld", (long long) time(NULL)); crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id); cib__update_node_attr(controld_globals.logger_out, controld_globals.cib_conn, cib_none, PCMK_XE_CRM_CONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, NULL, NULL); free(now_s); } } static void notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = crm_element_value(input->msg, PCMK__XA_CRM_SYS_FROM); const char *from_host = crm_element_value(input->msg, PCMK__XA_SRC); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "localhost"), rsc_id, ((rc == pcmk_ok)? "" : " not")); op = construct_op(lrm_state, input->xml, rsc_id, PCMK_ACTION_DELETE); controld_rc2event(op, pcmk_legacy2rc(rc)); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); controld_trigger_delete_refresh(from_sys, rsc_id); } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) { notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; active_op_t *pending = value; if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } static void delete_rsc_entry(lrm_state_t *lrm_state, ha_msg_input_t *input, const char *rsc_id, GHashTableIter *rsc_iter, int rc, const char *user_name, bool from_cib) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = pcmk__str_copy(rsc_id); if (rsc_iter) { g_hash_table_iter_remove(rsc_iter); } else { g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); } if (from_cib) { controld_delete_resource_history(rsc_id_copy, lrm_state->node_name, user_name, crmd_cib_smart_opt()); } g_hash_table_foreach_remove(lrm_state->active_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(lrm_state, input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; event.lrm_state = lrm_state; g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); } static inline gboolean last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms) { if (entry == NULL) { return FALSE; } if (op == NULL) { return TRUE; } return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei) && (interval_ms == entry->failed->interval_ms)); } /*! * \internal * \brief Clear a resource's last failure * * Erase a resource's last failure on a particular node from both the * LRM resource history in the CIB, and the resource history remembered * for the LRM state. * * \param[in] rsc_id Resource name * \param[in] node_name Node name * \param[in] operation If specified, only clear if matching this operation * \param[in] interval_ms If operation is specified, it has this interval */ void lrm_clear_last_failure(const char *rsc_id, const char *node_name, const char *operation, guint interval_ms) { lrm_state_t *lrm_state = controld_get_executor_state(node_name, false); if (lrm_state == NULL) { return; } if (lrm_state->resource_history != NULL) { rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (last_failed_matches_op(entry, operation, interval_ms)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } /* Returns: gboolean - cancellation is in progress */ static gboolean cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; char *local_key = NULL; active_op_t *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { local_key = make_stop_id(rsc_id, op); key = local_key; } pending = g_hash_table_lookup(lrm_state->active_ops, key); if (pending) { if (remove && !pcmk_is_set(pending->flags, active_op_remove)) { controld_set_active_op_flags(pending, active_op_remove); crm_debug("Scheduling %s for removal", key); } if (pcmk_is_set(pending->flags, active_op_cancelled)) { crm_debug("Operation %s already cancelled", key); free(local_key); return FALSE; } controld_set_active_op_flags(pending, active_op_cancelled); } else { crm_info("No pending op found for %s", key); free(local_key); return FALSE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); free(local_key); return TRUE; } crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the active operations list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from active operations will block * the node from shutting down */ free(local_key); return FALSE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct cancel_data *data = user_data; active_op_t *op = value; if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) { data->done = TRUE; remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); } return remove; } static gboolean cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { guint removed = 0; struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->active_ops, cancel_action_by_key, &data); crm_trace("Removed %u op cache entries, new size: %u", removed, g_hash_table_size(lrm_state->active_ops)); return data.done; } /*! * \internal * \brief Retrieve resource information from LRM * * \param[in,out] lrm_state Executor connection state to use * \param[in] rsc_xml XML containing resource configuration * \param[in] do_create If true, register resource if not already * \param[out] rsc_info Where to store information obtained from executor * * \retval pcmk_ok Success (and rsc_info holds newly allocated result) * \retval -EINVAL Required information is missing from arguments * \retval -ENOTCONN No active connection to LRM * \retval -ENODEV Resource not found * \retval -errno Error communicating with executor when registering resource * * \note Caller is responsible for freeing result on success. */ static int get_lrm_resource(lrm_state_t *lrm_state, const xmlNode *rsc_xml, gboolean do_create, lrmd_rsc_info_t **rsc_info) { const char *id = pcmk__xe_id(rsc_xml); CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL); CRM_CHECK(id, return -EINVAL); if (lrm_state_is_connected(lrm_state) == FALSE) { return -ENOTCONN; } crm_trace("Retrieving resource information for %s from the executor", id); *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); // If resource isn't known by ID, try clone name, if provided if (!*rsc_info) { const char *long_id = crm_element_value(rsc_xml, PCMK__XA_LONG_ID); if (long_id) { *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0); } } if ((*rsc_info == NULL) && do_create) { const char *class = crm_element_value(rsc_xml, PCMK_XA_CLASS); const char *provider = crm_element_value(rsc_xml, PCMK_XA_PROVIDER); const char *type = crm_element_value(rsc_xml, PCMK_XA_TYPE); int rc; crm_trace("Registering resource %s with the executor", id); rc = lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring); if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Could not register resource %s with the executor on %s: %s " QB_XS " rc=%d", id, lrm_state->node_name, pcmk_strerror(rc), rc); /* Register this as an internal error if this involves the local * executor. Otherwise, we're likely dealing with an unresponsive * remote node, which is not an FSA failure. */ if (lrm_state_is_local(lrm_state) == TRUE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } return rc; } *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); } return *rsc_info? pcmk_ok : -ENODEV; } static void delete_resource(lrm_state_t *lrm_state, const char *id, lrmd_rsc_info_t *rsc, GHashTableIter *iter, const char *sys, const char *user, ha_msg_input_t *request, bool unregister, bool from_cib) { int rc = pcmk_ok; crm_info("Removing resource %s from executor for %s%s%s", id, sys, (user? " as " : ""), (user? user : "")); if (rsc && unregister) { rc = lrm_state_unregister_rsc(lrm_state, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource %s deleted from executor", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' from executor is pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, PCMK_XA_REFERENCE); op = pcmk__assert_alloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = pcmk__str_copy(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(lrm_state->deletion_ops, ref, op); } return; } else { crm_warn("Could not delete '%s' from executor for %s%s%s: %s " QB_XS " rc=%d", id, sys, (user? " as " : ""), (user? user : ""), pcmk_strerror(rc), rc); } delete_rsc_entry(lrm_state, request, id, iter, rc, user, from_cib); } static int get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) { int call_id = 999999999; rsc_history_t *entry = NULL; if(lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry) { call_id = entry->last_callid + 1; } if (call_id < 0) { call_id = 1; } return call_id; } static void fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status, enum ocf_exitcode op_exitcode, const char *exit_reason) { op->call_id = get_fake_call_id(lrm_state, op->rsc_id); op->t_run = time(NULL); op->t_rcchange = op->t_run; lrmd__set_result(op, op_exitcode, op_status, exit_reason); } static void force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node, bool reprobe_all_nodes) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_info("Clearing resource history on node %s", lrm_state->node_name); g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { /* only unregister the resource during a reprobe if it is not a remote connection * resource. otherwise unregistering the connection will terminate remote-node * membership */ bool unregister = true; if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { unregister = false; if (reprobe_all_nodes) { lrm_state_t *remote_lrm_state = controld_get_executor_state(entry->id, false); if (remote_lrm_state != NULL) { /* If reprobing all nodes, be sure to reprobe the remote * node before clearing its connection resource */ force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE, reprobe_all_nodes); } } } /* Don't delete from the CIB, since we'll delete the whole node's LRM * state from the CIB soon */ delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, user_name, NULL, unregister, false); } /* Now delete the copy in the CIB */ controld_delete_node_state(lrm_state->node_name, controld_section_lrm, cib_none); } /*! * \internal * \brief Fail a requested action without actually executing it * * For an action that can't be executed, process it similarly to an actual * execution result, with specified error status (except for notify actions, * which will always be treated as successful). * * \param[in,out] lrm_state Executor connection that action is for * \param[in] action Action XML from request * \param[in] rc Desired return code to use * \param[in] op_status Desired operation status to use * \param[in] exit_reason Human-friendly detail, if error */ static void synthesize_lrmd_failure(lrm_state_t *lrm_state, const xmlNode *action, int op_status, enum ocf_exitcode rc, const char *exit_reason) { lrmd_event_data_t *op = NULL; const char *operation = crm_element_value(action, PCMK_XA_OPERATION); const char *target_node = crm_element_value(action, PCMK__META_ON_NODE); xmlNode *xml_rsc = pcmk__xe_first_child(action, PCMK_XE_PRIMITIVE, NULL, NULL); if ((xml_rsc == NULL) || (pcmk__xe_id(xml_rsc) == NULL)) { /* @TODO Should we do something else, like direct ack? */ crm_info("Can't fake %s failure (%d) on %s without resource configuration", crm_element_value(action, PCMK__XA_OPERATION_KEY), rc, target_node); return; } else if(operation == NULL) { /* This probably came from crm_resource -C, nothing to do */ crm_info("Can't fake %s failure (%d) on %s without operation", pcmk__xe_id(xml_rsc), rc, target_node); return; } op = construct_op(lrm_state, action, pcmk__xe_id(xml_rsc), operation); if (pcmk__str_eq(operation, PCMK_ACTION_NOTIFY, pcmk__str_casei)) { // Notifications can't fail fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL); } else { fake_op_status(lrm_state, op, op_status, rc, exit_reason); } crm_info("Faking " PCMK__OP_FMT " result (%d) on %s", op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node); // Process the result as if it came from the LRM process_lrm_event(lrm_state, op, NULL, action); lrmd_free_event(op); } /*! * \internal * \brief Get target of an LRM operation (replacing \p NULL with local node * name) * * \param[in] xml LRM operation data XML * * \return LRM operation target node name (local node or Pacemaker Remote node) */ static const char * lrm_op_target(const xmlNode *xml) { const char *target = NULL; if (xml) { target = crm_element_value(xml, PCMK__META_ON_NODE); } if (target == NULL) { target = controld_globals.cluster->priv->node_name; } return target; } static void fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, const char *from_host, const char *from_sys) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = pcmk__xe_first_child(xml, PCMK_XE_PRIMITIVE, NULL, NULL); CRM_CHECK(xml_rsc != NULL, return); /* The executor simply executes operations and reports the results, without * any concept of success or failure, so to fail a resource, we must fake * what a failure looks like. * * To do this, we create a fake executor operation event for the resource, * and pass that event to the executor client callback so it will be * processed as if it came from the executor. */ op = construct_op(lrm_state, xml, pcmk__xe_id(xml_rsc), "asyncmon"); free((char*) op->user_data); op->user_data = NULL; op->interval_ms = 0; if (user_name && !pcmk__is_privileged(user_name)) { crm_err("%s does not have permission to fail %s", user_name, pcmk__xe_id(xml_rsc)); fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_INSUFFICIENT_PRIV, "Unprivileged user cannot fail resources"); controld_ack_event_directly(from_host, from_sys, NULL, op, pcmk__xe_id(xml_rsc)); lrmd_free_event(op); return; } if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) { crm_info("Failing resource %s...", rsc->id); fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR, "Simulated failure"); process_lrm_event(lrm_state, op, NULL, xml); op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(xml, "bad input"); fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR, "Cannot fail unknown resource"); } controld_ack_event_directly(from_host, from_sys, NULL, op, pcmk__xe_id(xml_rsc)); lrmd_free_event(op); } static void handle_reprobe_op(lrm_state_t *lrm_state, xmlNode *msg, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node, bool reprobe_all_nodes) { crm_notice("Forcing the status of all resources to be redetected"); force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node, reprobe_all_nodes); if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) { xmlNode *reply = pcmk__new_reply(msg, NULL); crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } pcmk__xml_free(reply); } } static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys) { char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; guint interval_ms = 0; gboolean in_progress = FALSE; xmlNode *params = pcmk__xe_first_child(input->xml, PCMK__XE_ATTRIBUTES, NULL, NULL); CRM_CHECK(params != NULL, return FALSE); meta_key = crm_meta_name(PCMK_XA_OPERATION); op_task = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, return FALSE); meta_key = crm_meta_name(PCMK_META_INTERVAL); if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) { free(meta_key); return FALSE; } free(meta_key); op_key = pcmk__op_key(rsc->id, op_task, interval_ms); meta_key = crm_meta_name(PCMK__XA_CALL_ID); call_id = crm_element_value(params, meta_key); free(meta_key); crm_debug("Scheduler requested op %s (call=%s) be cancelled", op_key, (call_id? call_id : "NA")); pcmk__scan_min_int(call_id, &call, 0); if (call == 0) { // Normal case when the scheduler cancels a recurring op in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); } else { // Normal case when the scheduler cancels an orphan op in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); } // Acknowledge cancellation operation if for a remote connection resource if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { char *op_id = make_stop_id(rsc->id, call); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { crm_info("Nothing known about operation %d for %s", call, op_key); } controld_delete_action_history_by_key(rsc->id, lrm_state->node_name, op_key, call); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); /* needed at least for cancellation of a remote operation */ if (lrm_state->active_ops != NULL) { g_hash_table_remove(lrm_state->active_ops, op_id); } free(op_id); } free(op_key); return TRUE; } static void do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host, bool crm_rsc_delete, const char *user_name) { bool unregister = true; int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name, user_name, cib_dryrun|cib_sync_call); if (cib_rc != pcmk_rc_ok) { lrmd_event_data_t *op = NULL; op = construct_op(lrm_state, input->xml, rsc->id, PCMK_ACTION_DELETE); /* These are resource clean-ups, not actions, so no exit reason is * needed. */ lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); return; } if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { unregister = false; } delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, user_name, input, unregister, true); } // User data for asynchronous metadata execution struct metadata_cb_data { lrmd_rsc_info_t *rsc; // Copy of resource information xmlNode *input_xml; // Copy of FSA input XML }; static struct metadata_cb_data * new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml) { struct metadata_cb_data *data = NULL; data = pcmk__assert_alloc(1, sizeof(struct metadata_cb_data)); data->input_xml = pcmk__xml_copy(NULL, input_xml); data->rsc = lrmd_copy_rsc_info(rsc); return data; } static void free_metadata_cb_data(struct metadata_cb_data *data) { lrmd_free_rsc_info(data->rsc); pcmk__xml_free(data->input_xml); free(data); } /*! * \internal * \brief Execute an action after metadata has been retrieved * * \param[in] pid Ignored * \param[in] result Result of metadata action * \param[in] user_data Metadata callback data */ static void metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data) { struct metadata_cb_data *data = (struct metadata_cb_data *) user_data; struct ra_metadata_s *md = NULL; lrm_state_t *lrm_state = controld_get_executor_state(lrm_op_target(data->input_xml), false); if ((lrm_state != NULL) && pcmk__result_ok(result)) { md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc, result->action_stdout); } if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) { do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md); } free_metadata_cb_data(data); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { lrm_state_t *lrm_state = NULL; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; const char *target_node = lrm_op_target(input->xml); gboolean is_remote_node = FALSE; bool crm_rsc_delete = FALSE; // Message routed to the local node is targeting a specific, non-local node is_remote_node = !controld_is_local_node(target_node); lrm_state = controld_get_executor_state(target_node, false); if ((lrm_state == NULL) && is_remote_node) { crm_err("Failing action because local node has never had connection to remote node %s", target_node); synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, "Local node has no connection to remote"); return; } pcmk__assert(lrm_state != NULL); user_name = pcmk__update_acl_user(input->msg, PCMK__XA_CRM_USER, NULL); crm_op = crm_element_value(input->msg, PCMK__XA_CRM_TASK); from_sys = crm_element_value(input->msg, PCMK__XA_CRM_SYS_FROM); if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { from_host = crm_element_value(input->msg, PCMK__XA_SRC); } if (pcmk__str_eq(crm_op, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) { if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { crm_rsc_delete = TRUE; // from crm_resource } operation = PCMK_ACTION_DELETE; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, PCMK_XA_OPERATION); } CRM_CHECK(!pcmk__str_empty(crm_op) || !pcmk__str_empty(operation), return); crm_trace("'%s' execution request from %s as %s user", pcmk__s(crm_op, operation), pcmk__s(from_sys, "unknown subsystem"), pcmk__s(user_name, "current")); if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_none)) { fail_lrm_resource(input->xml, lrm_state, user_name, from_host, from_sys); } else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_none) || pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) { const char *raw_target = NULL; if (input->xml != NULL) { // For CRM_OP_REPROBE, a NULL target means we're targeting all nodes raw_target = crm_element_value(input->xml, PCMK__META_ON_NODE); } handle_reprobe_op(lrm_state, input->msg, from_sys, from_host, user_name, is_remote_node, (raw_target == NULL)); } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = pcmk__xe_first_child(input->xml, PCMK_XE_PRIMITIVE, NULL, NULL); gboolean create_rsc = !pcmk__str_eq(operation, PCMK_ACTION_DELETE, pcmk__str_none); int rc; // We can't return anything meaningful without a resource ID CRM_CHECK((xml_rsc != NULL) && (pcmk__xe_id(xml_rsc) != NULL), return); rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc); if (rc == -ENOTCONN) { synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, "Not connected to remote executor"); return; } else if ((rc < 0) && !create_rsc) { /* Delete of malformed or nonexistent resource * (deleting something that does not exist is a success) */ crm_debug("Not registering resource '%s' for a %s event " QB_XS " get-rc=%d (%s) transition-key=%s", pcmk__xe_id(xml_rsc), operation, rc, pcmk_strerror(rc), pcmk__xe_id(input->xml)); delete_rsc_entry(lrm_state, input, pcmk__xe_id(xml_rsc), NULL, pcmk_ok, user_name, true); return; } else if (rc == -EINVAL) { // Resource operation on malformed resource crm_err("Invalid resource definition for %s", pcmk__xe_id(xml_rsc)); crm_log_xml_warn(input->msg, "invalid resource"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, PCMK_OCF_NOT_CONFIGURED, // fatal error "Invalid resource definition"); return; } else if (rc < 0) { // Error communicating with the executor crm_err("Could not register resource '%s' with executor: %s " QB_XS " rc=%d", pcmk__xe_id(xml_rsc), pcmk_strerror(rc), rc); crm_log_xml_warn(input->msg, "failed registration"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, PCMK_OCF_INVALID_PARAM, // hard error "Could not register resource with executor"); return; } if (pcmk__str_eq(operation, PCMK_ACTION_CANCEL, pcmk__str_none)) { if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) { crm_log_xml_warn(input->xml, "Bad command"); } } else if (pcmk__str_eq(operation, PCMK_ACTION_DELETE, pcmk__str_none)) { do_lrm_delete(input, lrm_state, rsc, from_sys, from_host, crm_rsc_delete, user_name); } else { struct ra_metadata_s *md = NULL; /* Getting metadata from cache is OK except for start actions -- * always refresh from the agent for those, in case the resource * agent was updated. * * @TODO Only refresh metadata for starts if the agent actually * changed (using something like inotify, or a hash or modification * time of the agent executable). */ if (strcmp(operation, PCMK_ACTION_START) != 0) { md = controld_get_rsc_metadata(lrm_state, rsc, controld_metadata_from_cache); } if ((md == NULL) && crm_op_needs_metadata(rsc->standard, operation)) { /* Most likely, we'll need the agent metadata to record the * pending operation and the operation result. Get it now rather * than wait until then, so the metadata action doesn't eat into * the real action's timeout. * * @TODO Metadata is retrieved via direct execution of the * agent, which has a couple of related issues: the executor * should execute agents, not the controller; and metadata for * Pacemaker Remote nodes should be collected on those nodes, * not locally. */ struct metadata_cb_data *data = NULL; data = new_metadata_cb_data(rsc, input->xml); crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously", rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"), ((rsc->provider == NULL)? "" : rsc->provider), rsc->type); (void) lrmd__metadata_async(rsc, metadata_complete, (void *) data); } else { do_lrm_rsc_op(lrm_state, rsc, input->xml, md); } } lrmd_free_rsc_info(rsc); } else { crm_err("Invalid execution request: unknown command '%s' (bug?)", crm_op); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static lrmd_event_data_t * construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; GHashTable *params = NULL; xmlNode *primitive = NULL; const char *class = NULL; const char *transition = NULL; pcmk__assert((rsc_id != NULL) && (operation != NULL)); op = lrmd_new_event(rsc_id, operation, 0); op->type = lrmd_event_exec_complete; op->timeout = 0; op->start_delay = 0; lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); if (rsc_op == NULL) { CRM_LOG_ASSERT(pcmk__str_eq(operation, PCMK_ACTION_STOP, pcmk__str_casei)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = pcmk__strkey_table(free, free); pcmk__insert_dup(op->params, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_" PCMK__META_OP_TARGET_RC); op_delay = crm_meta_value(params, PCMK_META_START_DELAY); pcmk__scan_min_int(op_delay, &op->start_delay, 0); op_timeout = crm_meta_value(params, PCMK_META_TIMEOUT); pcmk__scan_min_int(op_timeout, &op->timeout, 0); if (pcmk__guint_from_hash(params, CRM_META "_" PCMK_META_INTERVAL, 0, &(op->interval_ms)) != pcmk_rc_ok) { op->interval_ms = 0; } /* Use pcmk_monitor_timeout instead of meta timeout for stonith recurring monitor, if set */ primitive = pcmk__xe_first_child(rsc_op, PCMK_XE_PRIMITIVE, NULL, NULL); class = crm_element_value(primitive, PCMK_XA_CLASS); if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params) && pcmk__str_eq(operation, PCMK_ACTION_MONITOR, pcmk__str_casei) && (op->interval_ms > 0)) { op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout"); if (op_timeout != NULL) { long long timeout_ms = crm_get_msec(op_timeout); op->timeout = (int) QB_MIN(timeout_ms, INT_MAX); } } if (!pcmk__str_eq(operation, PCMK_ACTION_STOP, pcmk__str_casei)) { op->params = params; } else { rsc_history_t *entry = NULL; if (lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = pcmk__strkey_table(free, free); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; } } /* sanity */ if (op->timeout <= 0) { op->timeout = op->interval_ms; } if (op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, PCMK__XA_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = pcmk__str_copy(transition); if (op->interval_ms != 0) { if (pcmk__strcase_any_of(operation, PCMK_ACTION_START, PCMK_ACTION_STOP, NULL)) { crm_err("Start and Stop actions cannot have an interval: %u", op->interval_ms); op->interval_ms = 0; } } crm_trace("Constructed %s op for %s: interval=%u", operation, rsc_id, op->interval_ms); return op; } /*! * \internal * \brief Send a (synthesized) event result * * Reply with a synthesized event result directly, as opposed to going through * the executor. * * \param[in] to_host Host to send result to * \param[in] to_sys IPC name to send result (NULL for transition engine) * \param[in] rsc Type information about resource the result is for * \param[in,out] op Event with result to send * \param[in] rsc_id ID of resource the result is for */ void controld_ack_event_directly(const char *to_host, const char *to_sys, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; pcmk__node_status_t *peer = NULL; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { // op->rsc_id is a (const char *) but lrmd_free_event() frees it pcmk__assert(rsc_id != NULL); op->rsc_id = pcmk__str_copy(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } peer = controld_get_local_node_status(); update = create_node_state_update(peer, node_update_none, NULL, __func__); iter = pcmk__xe_create(update, PCMK__XE_LRM); crm_xml_add(iter, PCMK_XA_ID, controld_globals.our_uuid); iter = pcmk__xe_create(iter, PCMK__XE_LRM_RESOURCES); iter = pcmk__xe_create(iter, PCMK__XE_LRM_RESOURCE); crm_xml_add(iter, PCMK_XA_ID, op->rsc_id); controld_add_resource_history_xml(iter, rsc, op, controld_globals.cluster->priv->node_name); /* We don't have the original message ID, so use "direct-ack" (we just need * something non-NULL for this to create a reply) * * @TODO It would be better to use the server, message ID, and task from the * original request when callers have it available */ reply = pcmk__new_message(pcmk_ipc_controld, "direct-ack", CRM_SYSTEM_LRMD, to_host, to_sys, CRM_OP_INVOKE_LRM, update); crm_log_xml_trace(update, "[direct ACK]"); crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s", op->rsc_id, op->op_type, op->interval_ms, op->user_data, crm_element_value(reply, PCMK_XA_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } pcmk__xml_free(update); pcmk__xml_free(reply); } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { gboolean res = TRUE; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { /* keep iterating through all even when false is returned */ res = FALSE; } } controld_set_fsa_input_flags(R_SENT_RSC_STOP); g_list_free(lrm_state_list); lrm_state_list = NULL; return res; } struct stop_recurring_action_s { lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct stop_recurring_action_s *event = user_data; active_op_t *op = value; if ((op->interval_ms != 0) && pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) { crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key); remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); } return remove; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; lrm_state_t *lrm_state = user_data; active_op_t *op = value; if (op->interval_ms != 0) { crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (const char *) key); remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); } return remove; } /*! * \internal * \brief Check whether recurring actions should be cancelled before an action * * \param[in] rsc_id Resource that action is for * \param[in] action Action being performed * \param[in] interval_ms Operation interval of \p action (in milliseconds) * * \return true if recurring actions should be cancelled, otherwise false */ static bool should_cancel_recurring(const char *rsc_id, const char *action, guint interval_ms) { if (is_remote_lrmd_ra(NULL, NULL, rsc_id) && (interval_ms == 0) && (strcmp(action, PCMK_ACTION_MIGRATE_TO) == 0)) { /* Don't stop monitoring a migrating Pacemaker Remote connection * resource until the entire migration has completed. We must detect if * the connection is unexpectedly severed, even during a migration. */ return false; } // Cancel recurring actions before changing resource state return (interval_ms == 0) && !pcmk__str_any_of(action, PCMK_ACTION_MONITOR, PCMK_ACTION_NOTIFY, NULL); } /*! * \internal * \brief Check whether an action should not be performed at this time * * \param[in] operation Action to be performed * * \return Readable description of why action should not be performed, * or NULL if it should be performed */ static const char * should_nack_action(const char *action) { if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN) && pcmk__str_eq(action, PCMK_ACTION_START, pcmk__str_none)) { register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); return "Not attempting start due to shutdown in progress"; } switch (controld_globals.fsa_state) { case S_NOT_DC: case S_POLICY_ENGINE: // Recalculating case S_TRANSITION_ENGINE: break; default: if (!pcmk__str_eq(action, PCMK_ACTION_STOP, pcmk__str_none)) { return "Controller cannot attempt actions at this time"; } break; } return NULL; } static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, struct ra_metadata_s *md) { int rc; int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; const char *operation = NULL; const char *nack_reason = NULL; CRM_CHECK((rsc != NULL) && (msg != NULL), return); operation = crm_element_value(msg, PCMK_XA_OPERATION); CRM_CHECK(!pcmk__str_empty(operation), return); transition = crm_element_value(msg, PCMK__XA_TRANSITION_KEY); if (pcmk__str_empty(transition)) { crm_log_xml_err(msg, "Missing transition number"); } if (lrm_state == NULL) { // This shouldn't be possible, but provide a failsafe just in case crm_err("Cannot execute %s of %s: No executor connection " QB_XS " transition_key=%s", operation, rsc->id, pcmk__s(transition, "")); synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID, PCMK_OCF_UNKNOWN_ERROR, "No executor connection"); return; } if (pcmk__str_any_of(operation, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT, NULL)) { /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs * will schedule reload-agent actions only. In either case, we need * to map that to whatever the resource agent actually supports. * Default to the OCF 1.1 name. */ if ((md != NULL) && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) { operation = PCMK_ACTION_RELOAD; } else { operation = PCMK_ACTION_RELOAD_AGENT; } } op = construct_op(lrm_state, msg, rsc->id, operation); CRM_CHECK(op != NULL, return); if (should_cancel_recurring(rsc->id, operation, op->interval_ms)) { guint removed = 0; struct stop_recurring_action_s data; data.rsc = rsc; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->active_ops, stop_recurring_action_by_rsc, &data); if (removed) { crm_debug("Stopped %u recurring operation%s in preparation for " PCMK__OP_FMT, removed, pcmk__plural_s(removed), rsc->id, operation, op->interval_ms); } } /* now do the op */ crm_notice("Requesting local execution of %s operation for %s on %s " QB_XS " transition_key=%s op_key=" PCMK__OP_FMT, pcmk__readable_action(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name, pcmk__s(transition, ""), rsc->id, operation, op->interval_ms); nack_reason = should_nack_action(operation); if (nack_reason != NULL) { crm_notice("Discarding attempt to perform action %s on %s in state %s " "(shutdown=%s)", operation, rsc->id, fsa_state2string(controld_globals.fsa_state), pcmk__flag_text(controld_globals.fsa_input_register, R_SHUTDOWN)); lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID, nack_reason); controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } controld_record_pending_op(lrm_state->node_name, rsc, op); op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms); if (op->interval_ms > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(lrm_state, rsc, op_id, FALSE); } rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type, op->user_data, op->interval_ms, op->timeout, op->start_delay, op->params, &call_id); if (rc == pcmk_rc_ok) { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); active_op_t *pending = NULL; pending = pcmk__assert_alloc(1, sizeof(active_op_t)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval_ms = op->interval_ms; pending->op_type = pcmk__str_copy(operation); pending->op_key = pcmk__str_copy(op_id); pending->rsc_id = pcmk__str_copy(rsc->id); pending->start_time = time(NULL); pending->user_data = pcmk__str_copy(op->user_data); if (crm_element_value_epoch(msg, PCMK_OPT_SHUTDOWN_LOCK, &(pending->lock_time)) != pcmk_ok) { pending->lock_time = 0; } g_hash_table_replace(lrm_state->active_ops, call_id_s, pending); if ((op->interval_ms > 0) && (op->start_delay > START_DELAY_THRESHOLD)) { int target_rc = PCMK_OCF_OK; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc); lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL); controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); } pending->params = op->params; op->params = NULL; } else if (lrm_state_is_local(lrm_state)) { crm_err("Could not initiate %s action for resource %s locally: %s " QB_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc); fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); process_lrm_event(lrm_state, op, NULL, NULL); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else { crm_err("Could not initiate %s action for resource %s remotely on %s: " "%s " QB_XS " rc=%d", operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc); fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); process_lrm_event(lrm_state, op, NULL, NULL); } free(op_id); lrmd_free_event(op); } -void -do_lrm_event(long long action, - enum crmd_fsa_cause cause, - enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) -{ - CRM_CHECK(FALSE, return); -} - static char * unescape_newlines(const char *string) { char *pch = NULL; char *ret = NULL; static const char *escaped_newline = "\\n"; if (!string) { return NULL; } ret = pcmk__str_copy(string); pch = strstr(ret, escaped_newline); while (pch != NULL) { /* Replace newline escape pattern with actual newline (and a space so we * don't have to shuffle the rest of the buffer) */ pch[0] = '\n'; pch[1] = ' '; pch = strstr(pch, escaped_newline); } return ret; } static bool did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id, const char * op_type, guint interval_ms) { rsc_history_t *entry = NULL; CRM_CHECK(lrm_state != NULL, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); CRM_CHECK(op_type != NULL, return FALSE); entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->failed == NULL) { return FALSE; } if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none) && pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei) && entry->failed->interval_ms == interval_ms) { return TRUE; } return FALSE; } /*! * \internal * \brief Log the result of an executor action (actual or synthesized) * * \param[in] op Executor action to log result for * \param[in] op_key Operation key for action * \param[in] node_name Name of node action was performed on, if known * \param[in] confirmed Whether to log that graph action was confirmed */ static void log_executor_event(const lrmd_event_data_t *op, const char *op_key, const char *node_name, gboolean confirmed) { int log_level = LOG_ERR; GString *str = g_string_sized_new(100); // reasonable starting size pcmk__g_strcat(str, "Result of ", pcmk__readable_action(op->op_type, op->interval_ms), " operation for ", op->rsc_id, NULL); if (node_name != NULL) { pcmk__g_strcat(str, " on ", node_name, NULL); } switch (op->op_status) { case PCMK_EXEC_DONE: log_level = LOG_NOTICE; pcmk__g_strcat(str, ": ", crm_exit_str((crm_exit_t) op->rc), NULL); break; case PCMK_EXEC_TIMEOUT: pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status), " after ", pcmk__readable_interval(op->timeout), NULL); break; case PCMK_EXEC_CANCELLED: log_level = LOG_INFO; pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status), NULL); break; default: pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status), NULL); break; } if ((op->exit_reason != NULL) && ((op->op_status != PCMK_EXEC_DONE) || (op->rc != PCMK_OCF_OK))) { pcmk__g_strcat(str, " (", op->exit_reason, ")", NULL); } g_string_append(str, " " QB_XS); g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s", (confirmed? "" : "un"), op->call_id, op_key); if (op->op_status == PCMK_EXEC_DONE) { g_string_append_printf(str, " rc=%d", op->rc); } do_crm_log(log_level, "%s", str->str); g_string_free(str, TRUE); /* The services library has already logged the output at info or debug * level, so just raise to notice if it looks like a failure. */ if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) { char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output", op->rsc_id, op->op_type, op->interval_ms, node_name); crm_log_output(LOG_NOTICE, prefix, op->output); free(prefix); } } void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, active_op_t *pending, const xmlNode *action_xml) { char *op_id = NULL; char *op_key = NULL; gboolean remove = FALSE; gboolean removed = FALSE; bool need_direct_ack = FALSE; lrmd_rsc_info_t *rsc = NULL; const char *node_name = NULL; CRM_CHECK(op != NULL, return); CRM_CHECK(op->rsc_id != NULL, return); // Remap new status codes for older DCs if (compare_version(controld_globals.dc_version, "3.2.0") < 0) { switch (op->op_status) { case PCMK_EXEC_NOT_CONNECTED: lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED, PCMK_EXEC_ERROR, op->exit_reason); break; case PCMK_EXEC_INVALID: lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR, op->exit_reason); break; default: break; } } op_id = make_stop_id(op->rsc_id, op->call_id); op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); // Get resource info if available (from executor state or action XML) if (lrm_state) { rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); } if ((rsc == NULL) && action_xml) { xmlNode *xml = pcmk__xe_first_child(action_xml, PCMK_XE_PRIMITIVE, NULL, NULL); const char *standard = crm_element_value(xml, PCMK_XA_CLASS); const char *provider = crm_element_value(xml, PCMK_XA_PROVIDER); const char *type = crm_element_value(xml, PCMK_XA_TYPE); if (standard && type) { crm_info("%s agent information not cached, using %s%s%s:%s from action XML", op->rsc_id, standard, (provider? ":" : ""), (provider? provider : ""), type); rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); } else { crm_err("Can't process %s result because %s agent information not cached or in XML", op_key, op->rsc_id); } } // Get node name if available (from executor state or action XML) if (lrm_state) { node_name = lrm_state->node_name; } else if (action_xml) { node_name = crm_element_value(action_xml, PCMK__META_ON_NODE); } if(pending == NULL) { remove = TRUE; if (lrm_state) { pending = g_hash_table_lookup(lrm_state->active_ops, op_id); } } if (op->op_status == PCMK_EXEC_ERROR) { switch(op->rc) { case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_PROMOTED: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_PROMOTED: // Leave it to the TE/scheduler to decide if this is an error op->op_status = PCMK_EXEC_DONE; break; default: /* Nothing to do */ break; } } if (op->op_status != PCMK_EXEC_CANCELLED) { /* We might not record the result, so directly acknowledge it to the * originator instead, so it doesn't time out waiting for the result * (especially important if part of a transition). */ need_direct_ack = TRUE; if (controld_action_is_recordable(op->op_type)) { if (node_name && rsc) { // We should record the result, and happily, we can time_t lock_time = (pending == NULL)? 0 : pending->lock_time; controld_update_resource_history(node_name, rsc, op, lock_time); need_direct_ack = FALSE; } else if (op->rsc_deleted) { /* We shouldn't record the result (likely the resource was * refreshed, cleaned, or removed while this operation was * in flight). */ crm_notice("Not recording %s result in CIB because " "resource information was removed since it was initiated", op_key); } else { /* This shouldn't be possible; the executor didn't consider the * resource deleted, but we couldn't find resource or node * information. */ crm_err("Unable to record %s result in CIB: %s", op_key, (node_name? "No resource information" : "No node name")); } } } else if (op->interval_ms == 0) { /* A non-recurring operation was cancelled. Most likely, the * never-initiated action was removed from the executor's pending * operations list upon resource removal. */ need_direct_ack = TRUE; } else if (pending == NULL) { /* This recurring operation was cancelled, but was not pending. No * transition actions are waiting on it, nothing needs to be done. */ } else if (op->user_data == NULL) { /* This recurring operation was cancelled and pending, but we don't * have a transition key. This should never happen. */ crm_err("Recurring operation %s was cancelled without transition information", op_key); } else if (pcmk_is_set(pending->flags, active_op_remove)) { /* This recurring operation was cancelled (by us) and pending, and we * have been waiting for it to finish. */ if (lrm_state) { controld_delete_action_history(op); } /* Directly acknowledge failed recurring actions here. The above call to * controld_delete_action_history() will not erase any corresponding * last_failure entry, which means that the DC won't confirm the * cancellation via process_op_deletion(), and the transition would * otherwise wait for the action timer to pop. */ if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms)) { need_direct_ack = TRUE; } } else if (op->rsc_deleted) { /* This recurring operation was cancelled (but not by us, and the * executor does not have resource information, likely due to resource * cleanup, refresh, or removal) and pending. */ crm_debug("Recurring op %s was cancelled due to resource deletion", op_key); need_direct_ack = TRUE; } else { /* This recurring operation was cancelled (but not by us, likely by the * executor before stopping the resource) and pending. We don't need to * do anything special. */ } if (need_direct_ack) { controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id); } if(remove == FALSE) { /* The caller will do this afterwards, but keep the logging consistent */ removed = TRUE; } else if (lrm_state && ((op->interval_ms == 0) || (op->op_status == PCMK_EXEC_CANCELLED))) { gboolean found = g_hash_table_remove(lrm_state->active_ops, op_id); if (op->interval_ms != 0) { removed = TRUE; } else if (found) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", op_key, op->call_id, op_id, g_hash_table_size(lrm_state->active_ops)); } } log_executor_event(op, op_key, node_name, removed); if (lrm_state) { if (!pcmk__str_eq(op->op_type, PCMK_ACTION_META_DATA, pcmk__str_casei)) { crmd_alert_resource_op(lrm_state->node_name, op); } else if (rsc && (op->rc == PCMK_OCF_OK)) { char *metadata = unescape_newlines(op->output); controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata); free(metadata); } } if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); if (lrm_state) { delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL, true); } } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ controld_trigger_fsa(); if (lrm_state && rsc) { update_history_cache(lrm_state, rsc, op); } lrmd_free_rsc_info(rsc); free(op_key); free(op_id); } diff --git a/daemons/controld/controld_fsa.c b/daemons/controld/controld_fsa.c index 5a16c8bad5..a0beff0b3d 100644 --- a/daemons/controld/controld_fsa.c +++ b/daemons/controld/controld_fsa.c @@ -1,733 +1,730 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include // uint64_t #include #include #include #include #include #include #include #include #include //! Triggers an FSA invocation static crm_trigger_t *fsa_trigger = NULL; #define DOT_PREFIX "actions:trace: " #define do_dot_log(fmt, args...) crm_trace( fmt, ##args) static void do_state_transition(enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t *msg_data); void s_crmd_fsa_actions(fsa_data_t * fsa_data); void log_fsa_input(fsa_data_t * stored_msg); void init_dotfile(void); void init_dotfile(void) { do_dot_log(DOT_PREFIX "digraph \"g\" {"); do_dot_log(DOT_PREFIX " size = \"30,30\""); do_dot_log(DOT_PREFIX " graph ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " bb = \"0,0,398.922306,478.927856\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " node ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " shape = \"ellipse\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " edge ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX "// special nodes"); do_dot_log(DOT_PREFIX " \"S_PENDING\" "); do_dot_log(DOT_PREFIX " ["); do_dot_log(DOT_PREFIX " color = \"blue\""); do_dot_log(DOT_PREFIX " fontcolor = \"blue\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " \"S_TERMINATE\" "); do_dot_log(DOT_PREFIX " ["); do_dot_log(DOT_PREFIX " color = \"red\""); do_dot_log(DOT_PREFIX " fontcolor = \"red\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX "// DC only nodes"); do_dot_log(DOT_PREFIX " \"S_INTEGRATION\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_RELEASE_DC\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_IDLE\" [ fontcolor = \"green\" ]"); } static void do_fsa_action(fsa_data_t * fsa_data, long long an_action, void (*function) (long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)) { controld_clear_fsa_action_flags(an_action); crm_trace(DOT_PREFIX "\t// %s", fsa_action2string(an_action)); function(an_action, fsa_data->fsa_cause, controld_globals.fsa_state, fsa_data->fsa_input, fsa_data); } static const uint64_t startup_actions = A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_HA_CONNECT | A_READCONFIG | A_STARTED | A_CL_JOIN_QUERY; // A_LOG, A_WARN, A_ERROR void do_log(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { unsigned log_type = LOG_TRACE; if (action & A_LOG) { log_type = LOG_INFO; } else if (action & A_WARN) { log_type = LOG_WARNING; } else if (action & A_ERROR) { log_type = LOG_ERR; } do_crm_log(log_type, "Input %s received in state %s from %s", fsa_input2string(msg_data->fsa_input), fsa_state2string(cur_state), msg_data->origin); if (msg_data->data_type == fsa_dt_ha_msg) { ha_msg_input_t *input = fsa_typed_data(msg_data->data_type); crm_log_xml_debug(input->msg, __func__); } else if (msg_data->data_type == fsa_dt_xml) { xmlNode *input = fsa_typed_data(msg_data->data_type); crm_log_xml_debug(input, __func__); } else if (msg_data->data_type == fsa_dt_lrm) { lrmd_event_data_t *input = fsa_typed_data(msg_data->data_type); do_crm_log(log_type, "Resource %s: Call ID %d returned %d (%d)." " New status if rc=0: %s", input->rsc_id, input->call_id, input->rc, input->op_status, (char *)input->user_data); } } /*! * \internal * \brief Initialize the FSA trigger */ void controld_init_fsa_trigger(void) { fsa_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL); } /*! * \internal * \brief Destroy the FSA trigger */ void controld_destroy_fsa_trigger(void) { // This basically will not work, since mainloop has a reference to it mainloop_destroy_trigger(fsa_trigger); fsa_trigger = NULL; } /*! * \internal * \brief Trigger an FSA invocation * * \param[in] fn Calling function name * \param[in] line Line number where call occurred */ void controld_trigger_fsa_as(const char *fn, int line) { if (fsa_trigger != NULL) { crm_trace("%s:%d - Triggered FSA invocation", fn, line); mainloop_set_trigger(fsa_trigger); } } enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause) { controld_globals_t *globals = &controld_globals; fsa_data_t *fsa_data = NULL; uint64_t register_copy = controld_globals.fsa_input_register; uint64_t new_actions = A_NOTHING; enum crmd_fsa_state last_state; crm_trace("FSA invoked with Cause: %s\tState: %s", fsa_cause2string(cause), fsa_state2string(globals->fsa_state)); fsa_dump_actions(controld_globals.fsa_actions, "Initial"); controld_clear_global_flags(controld_fsa_is_stalled); if ((controld_globals.fsa_message_queue == NULL) && (controld_globals.fsa_actions != A_NOTHING)) { /* fake the first message so we can get into the loop */ fsa_data = pcmk__assert_alloc(1, sizeof(fsa_data_t)); fsa_data->fsa_input = I_NULL; fsa_data->fsa_cause = C_FSA_INTERNAL; fsa_data->origin = __func__; fsa_data->data_type = fsa_dt_none; controld_globals.fsa_message_queue = g_list_append(controld_globals.fsa_message_queue, fsa_data); } while ((controld_globals.fsa_message_queue != NULL) && !pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) { crm_trace("Checking messages (%d remaining)", g_list_length(controld_globals.fsa_message_queue)); fsa_data = get_message(); if(fsa_data == NULL) { continue; } log_fsa_input(fsa_data); /* add any actions back to the queue */ controld_set_fsa_action_flags(fsa_data->actions); fsa_dump_actions(fsa_data->actions, "Restored actions"); /* get the next batch of actions */ new_actions = controld_fsa_get_action(fsa_data->fsa_input); controld_set_fsa_action_flags(new_actions); fsa_dump_actions(new_actions, "New actions"); if (fsa_data->fsa_input != I_NULL && fsa_data->fsa_input != I_ROUTER) { crm_debug("Processing %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(globals->fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); } /* logging : *before* the state is changed */ if (pcmk_is_set(controld_globals.fsa_actions, A_ERROR)) { do_fsa_action(fsa_data, A_ERROR, do_log); } if (pcmk_is_set(controld_globals.fsa_actions, A_WARN)) { do_fsa_action(fsa_data, A_WARN, do_log); } if (pcmk_is_set(controld_globals.fsa_actions, A_LOG)) { do_fsa_action(fsa_data, A_LOG, do_log); } /* update state variables */ last_state = globals->fsa_state; globals->fsa_state = controld_fsa_get_next_state(fsa_data->fsa_input); /* * Remove certain actions during shutdown */ if ((globals->fsa_state == S_STOPPING) || pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { controld_clear_fsa_action_flags(startup_actions); } /* * Hook for change of state. * Allows actions to be added or removed when entering a state */ if (last_state != globals->fsa_state) { do_state_transition(last_state, globals->fsa_state, fsa_data); } else { do_dot_log(DOT_PREFIX "\t// FSA input: State=%s \tCause=%s" " \tInput=%s \tOrigin=%s() \tid=%d", fsa_state2string(globals->fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id); } /* start doing things... */ s_crmd_fsa_actions(fsa_data); delete_fsa_input(fsa_data); } if ((controld_globals.fsa_message_queue != NULL) || (controld_globals.fsa_actions != A_NOTHING) || pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) { crm_debug("Exiting the FSA: queue=%d, fsa_actions=%#llx, stalled=%s", g_list_length(controld_globals.fsa_message_queue), (unsigned long long) controld_globals.fsa_actions, pcmk__flag_text(controld_globals.flags, controld_fsa_is_stalled)); } else { crm_trace("Exiting the FSA"); } /* cleanup inputs? */ if (register_copy != controld_globals.fsa_input_register) { uint64_t same = register_copy & controld_globals.fsa_input_register; fsa_dump_inputs(LOG_DEBUG, "Added", controld_globals.fsa_input_register ^ same); fsa_dump_inputs(LOG_DEBUG, "Removed", register_copy ^ same); } fsa_dump_actions(controld_globals.fsa_actions, "Remaining"); fsa_dump_queue(LOG_DEBUG); return globals->fsa_state; } void s_crmd_fsa_actions(fsa_data_t * fsa_data) { /* * Process actions in order of priority but do only one * action at a time to avoid complicating the ordering. */ CRM_CHECK(fsa_data != NULL, return); while ((controld_globals.fsa_actions != A_NOTHING) && !pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) { /* regular action processing in order of action priority * * Make sure all actions that connect to required systems * are performed first */ if (pcmk_is_set(controld_globals.fsa_actions, A_ERROR)) { do_fsa_action(fsa_data, A_ERROR, do_log); } else if (pcmk_is_set(controld_globals.fsa_actions, A_WARN)) { do_fsa_action(fsa_data, A_WARN, do_log); } else if (pcmk_is_set(controld_globals.fsa_actions, A_LOG)) { do_fsa_action(fsa_data, A_LOG, do_log); /* get out of here NOW! before anything worse happens */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_EXIT_1)) { do_fsa_action(fsa_data, A_EXIT_1, do_exit); /* sub-system restart */ } else if (pcmk_all_flags_set(controld_globals.fsa_actions, O_LRM_RECONNECT)) { do_fsa_action(fsa_data, O_LRM_RECONNECT, do_lrm_control); } else if (pcmk_all_flags_set(controld_globals.fsa_actions, O_CIB_RESTART)) { do_fsa_action(fsa_data, O_CIB_RESTART, do_cib_control); } else if (pcmk_all_flags_set(controld_globals.fsa_actions, O_PE_RESTART)) { do_fsa_action(fsa_data, O_PE_RESTART, do_pe_control); } else if (pcmk_all_flags_set(controld_globals.fsa_actions, O_TE_RESTART)) { do_fsa_action(fsa_data, O_TE_RESTART, do_te_control); /* essential start tasks */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_STARTUP)) { do_fsa_action(fsa_data, A_STARTUP, do_startup); } else if (pcmk_is_set(controld_globals.fsa_actions, A_CIB_START)) { do_fsa_action(fsa_data, A_CIB_START, do_cib_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_HA_CONNECT)) { do_fsa_action(fsa_data, A_HA_CONNECT, do_ha_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_READCONFIG)) { do_fsa_action(fsa_data, A_READCONFIG, do_read_config); /* sub-system start/connect */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_CONNECT)) { do_fsa_action(fsa_data, A_LRM_CONNECT, do_lrm_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_START)) { do_fsa_action(fsa_data, A_TE_START, do_te_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_START)) { do_fsa_action(fsa_data, A_PE_START, do_pe_control); /* Timers */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_TIMER_STOP)) { do_fsa_action(fsa_data, A_DC_TIMER_STOP, do_timer_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_INTEGRATE_TIMER_STOP)) { do_fsa_action(fsa_data, A_INTEGRATE_TIMER_STOP, do_timer_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_INTEGRATE_TIMER_START)) { do_fsa_action(fsa_data, A_INTEGRATE_TIMER_START, do_timer_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_FINALIZE_TIMER_STOP)) { do_fsa_action(fsa_data, A_FINALIZE_TIMER_STOP, do_timer_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_FINALIZE_TIMER_START)) { do_fsa_action(fsa_data, A_FINALIZE_TIMER_START, do_timer_control); /* * Highest priority actions */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_MSG_ROUTE)) { do_fsa_action(fsa_data, A_MSG_ROUTE, do_msg_route); } else if (pcmk_is_set(controld_globals.fsa_actions, A_RECOVER)) { do_fsa_action(fsa_data, A_RECOVER, do_recover); } else if (pcmk_is_set(controld_globals.fsa_actions, A_CL_JOIN_RESULT)) { do_fsa_action(fsa_data, A_CL_JOIN_RESULT, do_cl_join_finalize_respond); } else if (pcmk_is_set(controld_globals.fsa_actions, A_CL_JOIN_REQUEST)) { do_fsa_action(fsa_data, A_CL_JOIN_REQUEST, do_cl_join_offer_respond); } else if (pcmk_is_set(controld_globals.fsa_actions, A_SHUTDOWN_REQ)) { do_fsa_action(fsa_data, A_SHUTDOWN_REQ, do_shutdown_req); } else if (pcmk_is_set(controld_globals.fsa_actions, A_ELECTION_VOTE)) { do_fsa_action(fsa_data, A_ELECTION_VOTE, do_election_vote); } else if (pcmk_is_set(controld_globals.fsa_actions, A_ELECTION_COUNT)) { do_fsa_action(fsa_data, A_ELECTION_COUNT, do_election_count_vote); - } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_EVENT)) { - do_fsa_action(fsa_data, A_LRM_EVENT, do_lrm_event); - /* * High priority actions */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_STARTED)) { do_fsa_action(fsa_data, A_STARTED, do_started); } else if (pcmk_is_set(controld_globals.fsa_actions, A_CL_JOIN_QUERY)) { do_fsa_action(fsa_data, A_CL_JOIN_QUERY, do_cl_join_query); } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_TIMER_START)) { do_fsa_action(fsa_data, A_DC_TIMER_START, do_timer_control); /* * Medium priority actions * - Membership */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_TAKEOVER)) { do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover); } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_RELEASE)) { do_fsa_action(fsa_data, A_DC_RELEASE, do_dc_release); } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_JOIN_FINAL)) { do_fsa_action(fsa_data, A_DC_JOIN_FINAL, do_dc_join_final); } else if (pcmk_is_set(controld_globals.fsa_actions, A_ELECTION_CHECK)) { do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check); } else if (pcmk_is_set(controld_globals.fsa_actions, A_ELECTION_START)) { do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote); } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_JOIN_OFFER_ALL)) { do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all); } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_JOIN_OFFER_ONE)) { do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_one); } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_JOIN_PROCESS_REQ)) { do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ, do_dc_join_filter_offer); } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_JOIN_PROCESS_ACK)) { do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack); } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_JOIN_FINALIZE)) { do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize); } else if (pcmk_is_set(controld_globals.fsa_actions, A_CL_JOIN_ANNOUNCE)) { do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce); /* * Low(er) priority actions * Make sure the CIB is always updated before invoking the * scheduler, and the scheduler before the transition engine. */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_HALT)) { do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke); } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_CANCEL)) { do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke); } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_INVOKE)) { do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke); } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_INVOKE)) { do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke); } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_INVOKE)) { do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke); /* Shutdown actions */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_RELEASED)) { do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release); } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_STOP)) { do_fsa_action(fsa_data, A_PE_STOP, do_pe_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_STOP)) { do_fsa_action(fsa_data, A_TE_STOP, do_te_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_SHUTDOWN)) { do_fsa_action(fsa_data, A_SHUTDOWN, do_shutdown); } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_DISCONNECT)) { do_fsa_action(fsa_data, A_LRM_DISCONNECT, do_lrm_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_HA_DISCONNECT)) { do_fsa_action(fsa_data, A_HA_DISCONNECT, do_ha_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_CIB_STOP)) { do_fsa_action(fsa_data, A_CIB_STOP, do_cib_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_STOP)) { do_fsa_action(fsa_data, A_STOP, do_stop); /* exit gracefully */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_EXIT_0)) { do_fsa_action(fsa_data, A_EXIT_0, do_exit); /* Error checking and reporting */ } else { crm_err("Action %s not supported " QB_XS " %#llx", fsa_action2string(controld_globals.fsa_actions), (unsigned long long) controld_globals.fsa_actions); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL, __func__); } } } void log_fsa_input(fsa_data_t * stored_msg) { pcmk__assert(stored_msg != NULL); crm_trace("Processing queued input %d", stored_msg->id); if (stored_msg->fsa_cause == C_LRM_OP_CALLBACK) { crm_trace("FSA processing LRM callback from %s", stored_msg->origin); } else if (stored_msg->data == NULL) { crm_trace("FSA processing input from %s", stored_msg->origin); } else { ha_msg_input_t *ha_input = fsa_typed_data_adv(stored_msg, fsa_dt_ha_msg, __func__); crm_trace("FSA processing XML message from %s", stored_msg->origin); crm_log_xml_trace(ha_input->xml, "FSA message data"); } } static void check_join_counts(fsa_data_t *msg_data) { int count; guint npeers; count = crmd_join_phase_count(controld_join_finalized); if (count > 0) { crm_err("%d cluster node%s failed to confirm join", count, pcmk__plural_s(count)); crmd_join_phase_log(LOG_NOTICE); return; } npeers = pcmk__cluster_num_active_nodes(); count = crmd_join_phase_count(controld_join_confirmed); if (count == npeers) { if (npeers == 1) { crm_debug("Sole active cluster node is fully joined"); } else { crm_debug("All %d active cluster nodes are fully joined", count); } } else if (count > npeers) { crm_err("New election needed because more nodes confirmed join " "than are in membership (%d > %u)", count, npeers); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } else if (controld_globals.membership_id != controld_globals.peer_seq) { crm_info("New join needed because membership changed (%llu -> %llu)", controld_globals.membership_id, controld_globals.peer_seq); register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } else { crm_warn("Only %d of %u active cluster nodes fully joined " "(%d did not respond to offer)", count, npeers, crmd_join_phase_count(controld_join_welcomed)); } } static void do_state_transition(enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t *msg_data) { int level = LOG_INFO; int count = 0; gboolean clear_recovery_bit = TRUE; #if 0 uint64_t original_fsa_actions = controld_globals.fsa_actions; #endif enum crmd_fsa_cause cause = msg_data->fsa_cause; enum crmd_fsa_input current_input = msg_data->fsa_input; const char *state_from = fsa_state2string(cur_state); const char *state_to = fsa_state2string(next_state); const char *input = fsa_input2string(current_input); CRM_LOG_ASSERT(cur_state != next_state); do_dot_log(DOT_PREFIX "\t%s -> %s [ label=%s cause=%s origin=%s ]", state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); if (cur_state == S_IDLE || next_state == S_IDLE) { level = LOG_NOTICE; } else if (cur_state == S_NOT_DC || next_state == S_NOT_DC) { level = LOG_NOTICE; } else if (cur_state == S_ELECTION) { level = LOG_NOTICE; } else if (cur_state == S_STARTING) { level = LOG_NOTICE; } else if (next_state == S_RECOVERY) { level = LOG_WARNING; } do_crm_log(level, "State transition %s -> %s " QB_XS " input=%s cause=%s origin=%s", state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); if (next_state != S_ELECTION && cur_state != S_RELEASE_DC) { controld_stop_current_election_timeout(); } if (next_state == S_INTEGRATION) { controld_set_fsa_action_flags(A_INTEGRATE_TIMER_START); } else { controld_set_fsa_action_flags(A_INTEGRATE_TIMER_STOP); } if (next_state == S_FINALIZE_JOIN) { controld_set_fsa_action_flags(A_FINALIZE_TIMER_START); } else { controld_set_fsa_action_flags(A_FINALIZE_TIMER_STOP); } if (next_state != S_PENDING) { controld_set_fsa_action_flags(A_DC_TIMER_STOP); } if (next_state != S_IDLE) { controld_stop_recheck_timer(); } if (cur_state == S_FINALIZE_JOIN && next_state == S_POLICY_ENGINE) { populate_cib_nodes(node_update_quick|node_update_all, __func__); } switch (next_state) { case S_PENDING: { cib_t *cib_conn = controld_globals.cib_conn; cib_conn->cmds->set_secondary(cib_conn, cib_none); } update_dc(NULL); break; case S_ELECTION: update_dc(NULL); break; case S_NOT_DC: controld_reset_counter_election_timer(); purge_stonith_cleanup(); if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { crm_info("(Re)Issuing shutdown request now" " that we have a new DC"); controld_set_fsa_action_flags(A_SHUTDOWN_REQ); } CRM_LOG_ASSERT(controld_globals.dc_name != NULL); if (controld_globals.dc_name == NULL) { crm_err("Reached S_NOT_DC without a DC" " being recorded"); } break; case S_RECOVERY: clear_recovery_bit = FALSE; break; case S_FINALIZE_JOIN: CRM_LOG_ASSERT(AM_I_DC); if (cause == C_TIMER_POPPED) { crm_warn("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } count = crmd_join_phase_count(controld_join_welcomed); if (count > 0) { crm_warn("%d cluster node%s failed to respond to join offer", count, pcmk__plural_s(count)); crmd_join_phase_log(LOG_NOTICE); } else { crm_debug("All cluster nodes (%d) responded to join offer", crmd_join_phase_count(controld_join_integrated)); } break; case S_POLICY_ENGINE: controld_reset_counter_election_timer(); CRM_LOG_ASSERT(AM_I_DC); if (cause == C_TIMER_POPPED) { crm_info("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } check_join_counts(msg_data); break; case S_STOPPING: case S_TERMINATE: /* possibly redundant */ controld_set_fsa_input_flags(R_SHUTDOWN); break; case S_IDLE: CRM_LOG_ASSERT(AM_I_DC); if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { crm_info("(Re)Issuing shutdown request now" " that we are the DC"); controld_set_fsa_action_flags(A_SHUTDOWN_REQ); } controld_start_recheck_timer(); break; default: break; } if (clear_recovery_bit && next_state != S_PENDING) { controld_clear_fsa_action_flags(A_RECOVER); } else if (clear_recovery_bit == FALSE) { controld_set_fsa_action_flags(A_RECOVER); } #if 0 if (original_fsa_actions != controld_globals.fsa_actions) { fsa_dump_actions(original_fsa_actions ^ controld_globals.fsa_actions, "New actions"); } #endif } diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h index 7777145f0e..44857daa69 100644 --- a/daemons/controld/controld_fsa.h +++ b/daemons/controld/controld_fsa.h @@ -1,696 +1,687 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRMD_FSA__H # define CRMD_FSA__H # include # include # include # include # include # include # include /*! States the controller can be in */ enum crmd_fsa_state { S_IDLE = 0, /* Nothing happening */ S_ELECTION, /* Take part in the election algorithm as * described below */ S_INTEGRATION, /* integrate that status of new nodes (which is * all of them if we have just been elected DC) * to form a complete and up-to-date picture of * the CIB */ S_FINALIZE_JOIN, /* integrate that status of new nodes (which is * all of them if we have just been elected DC) * to form a complete and up-to-date picture of * the CIB */ S_NOT_DC, /* we are in non-DC mode */ S_POLICY_ENGINE, /* Determine next stable state of the cluster */ S_RECOVERY, /* Something bad happened, check everything is ok * before continuing and attempt to recover if * required */ S_RELEASE_DC, /* we were the DC, but now we arent anymore, * possibly by our own request, and we should * release all unnecessary sub-systems, finish * any pending actions, do general cleanup and * unset anything that makes us think we are * special :) */ S_STARTING, /* we are just starting out */ S_PENDING, /* we are not a full/active member yet */ S_STOPPING, /* We are in the final stages of shutting down */ S_TERMINATE, /* We are going to shutdown, this is the equiv of * "Sending TERM signal to all processes" in Linux * and in worst case scenarios could be considered * a self STONITH */ S_TRANSITION_ENGINE, /* Attempt to make the calculated next stable * state of the cluster a reality */ S_HALT, /* Freeze - don't do anything * Something bad happened that needs the admin to fix * Wait for I_ELECTION */ /* ----------- Last input found in table is above ---------- */ S_ILLEGAL /* This is an illegal FSA state */ /* (must be last) */ }; # define MAXSTATE S_ILLEGAL /* Once we start and do some basic sanity checks, we go into the S_NOT_DC state and await instructions from the DC or input from the cluster layer which indicates the election algorithm needs to run. If the election algorithm is triggered, we enter the S_ELECTION state from where we can either go back to the S_NOT_DC state or progress to the S_INTEGRATION state (or S_RELEASE_DC if we used to be the DC but aren't anymore). See the libcrmcluster API documentation for more information about the election algorithm. Once the election is complete, if we are the DC, we enter the S_INTEGRATION state which is a DC-in-waiting style state. We are the DC, but we shouldn't do anything yet because we may not have an up-to-date picture of the cluster. There may of course be times when this fails, so we should go back to the S_RECOVERY stage and check everything is ok. We may also end up here if a new node came online, since each node is authoritative about itself, and we would want to incorporate its information into the CIB. Once we have the latest CIB, we then enter the S_POLICY_ENGINE state where invoke the scheduler. It is possible that between invoking the scheduler and receiving an answer, that we receive more input. In this case, we would discard the orginal result and invoke it again. Once we are satisfied with the output from the scheduler, we enter S_TRANSITION_ENGINE and feed the scheduler's output to the Transition Engine who attempts to make the scheduler's calculation a reality. If the transition completes successfully, we enter S_IDLE, otherwise we go back to S_POLICY_ENGINE with the current unstable state and try again. Of course, we may be asked to shutdown at any time, however we must progress to S_NOT_DC before doing so. Once we have handed over DC duties to another node, we can then shut down like everyone else, that is, by asking the DC for permission and waiting for it to take all our resources away. The case where we are the DC and the only node in the cluster is a special case and handled as an escalation which takes us to S_SHUTDOWN. Similarly, if any other point in the shutdown fails or stalls, this is escalated and we end up in S_TERMINATE. At any point, the controller can relay messages for its subsystems, but outbound messages (from subsystems) should probably be blocked until S_INTEGRATION (for the DC) or the join protocol has completed (for non-DC controllers). */ /*====================================== * * Inputs/Events/Stimuli to be given to the finite state machine * * Some of these a true events, and others are synthesised based on * the "register" (see below) and the contents or source of messages. * * The machine keeps processing until receiving I_NULL * *======================================*/ enum crmd_fsa_input { /* 0 */ I_NULL, /* Nothing happened */ /* 1 */ I_CIB_OP, /* An update to the CIB occurred */ I_CIB_UPDATE, /* An update to the CIB occurred */ I_DC_TIMEOUT, /* We have lost communication with the DC */ I_ELECTION, /* Someone started an election */ I_PE_CALC, /* The scheduler needs to be invoked */ I_RELEASE_DC, /* The election completed and we were not * elected, but we were the DC beforehand */ I_ELECTION_DC, /* The election completed and we were (re-)elected * DC */ I_ERROR, /* Something bad happened (more serious than * I_FAIL) and may not have been due to the action * being performed. For example, we may have lost * our connection to the CIB. */ /* 9 */ I_FAIL, /* The action failed to complete successfully */ I_INTEGRATED, I_FINALIZED, I_NODE_JOIN, /* A node has entered the cluster */ I_NOT_DC, /* We are not and were not the DC before or after * the current operation or state */ I_RECOVERED, /* The recovery process completed successfully */ I_RELEASE_FAIL, /* We could not give up DC status for some reason */ I_RELEASE_SUCCESS, /* We are no longer the DC */ I_RESTART, /* The current set of actions needs to be * restarted */ I_TE_SUCCESS, /* Some non-resource, non-cluster-layer action * is required of us, e.g. ping */ /* 20 */ I_ROUTER, /* Do our job as router and forward this to the * right place */ I_SHUTDOWN, /* We are asking to shutdown */ I_STOP, /* We have been told to shutdown */ I_TERMINATE, /* Actually exit */ I_STARTUP, I_PE_SUCCESS, /* The action completed successfully */ I_JOIN_OFFER, /* The DC is offering membership */ I_JOIN_REQUEST, /* The client is requesting membership */ I_JOIN_RESULT, /* If not the DC: The result of a join request * Else: A client is responding with its local state info */ I_WAIT_FOR_EVENT, /* we may be waiting for an async task to "happen" * and until it does, we can't do anything else */ I_DC_HEARTBEAT, /* The DC is telling us that it is alive and well */ - I_LRM_EVENT, - -/* 30 */ I_PENDING, I_HALT, /* ------------ Last input found in table is above ----------- */ I_ILLEGAL /* This is an illegal value for an FSA input */ /* (must be last) */ }; # define MAXINPUT I_ILLEGAL # define I_MESSAGE I_ROUTER /*====================================== * * actions * * Some of the actions below will always occur together for now, but this may * not always be the case, so they are split up so that they can easily be * called independently in the future, if necessary. * * For example, separating A_LRM_CONNECT from A_STARTUP might be useful * if we ever try to recover from a faulty or disconnected executor. * *======================================*/ /* Don't do anything */ # define A_NOTHING 0x0000000000000000ULL /* -- Startup actions -- */ /* Hook to perform any actions (other than connecting to other daemons) * that might be needed as part of the startup. */ # define A_STARTUP 0x0000000000000001ULL /* Hook to perform any actions that might be needed as part * after startup is successful. */ # define A_STARTED 0x0000000000000002ULL /* Connect to cluster layer */ # define A_HA_CONNECT 0x0000000000000004ULL # define A_HA_DISCONNECT 0x0000000000000008ULL # define A_INTEGRATE_TIMER_START 0x0000000000000010ULL # define A_INTEGRATE_TIMER_STOP 0x0000000000000020ULL # define A_FINALIZE_TIMER_START 0x0000000000000040ULL # define A_FINALIZE_TIMER_STOP 0x0000000000000080ULL /* -- Election actions -- */ # define A_DC_TIMER_START 0x0000000000000100ULL # define A_DC_TIMER_STOP 0x0000000000000200ULL # define A_ELECTION_COUNT 0x0000000000000400ULL # define A_ELECTION_VOTE 0x0000000000000800ULL # define A_ELECTION_START 0x0000000000001000ULL /* -- Message processing -- */ /* Process the queue of requests */ # define A_MSG_PROCESS 0x0000000000002000ULL /* Send the message to the correct recipient */ # define A_MSG_ROUTE 0x0000000000004000ULL /* Send a welcome message to new node(s) */ # define A_DC_JOIN_OFFER_ONE 0x0000000000008000ULL /* -- Server Join protocol actions -- */ /* Send a welcome message to all nodes */ # define A_DC_JOIN_OFFER_ALL 0x0000000000010000ULL /* Process the remote node's ack of our join message */ # define A_DC_JOIN_PROCESS_REQ 0x0000000000020000ULL /* Send out the results of the Join phase */ # define A_DC_JOIN_FINALIZE 0x0000000000040000ULL /* Send out the results of the Join phase */ # define A_DC_JOIN_PROCESS_ACK 0x0000000000080000ULL /* -- Client Join protocol actions -- */ # define A_CL_JOIN_QUERY 0x0000000000100000ULL # define A_CL_JOIN_ANNOUNCE 0x0000000000200000ULL /* Request membership to the DC list */ # define A_CL_JOIN_REQUEST 0x0000000000400000ULL /* Did the DC accept or reject the request */ # define A_CL_JOIN_RESULT 0x0000000000800000ULL /* -- Recovery, DC start/stop -- */ /* Something bad happened, try to recover */ # define A_RECOVER 0x0000000001000000ULL /* Hook to perform any actions (apart from starting, the TE, scheduler, * and gathering the latest CIB) that might be necessary before * giving up the responsibilities of being the DC. */ # define A_DC_RELEASE 0x0000000002000000ULL /* */ # define A_DC_RELEASED 0x0000000004000000ULL /* Hook to perform any actions (apart from starting, the TE, scheduler, * and gathering the latest CIB) that might be necessary before * taking over the responsibilities of being the DC. */ # define A_DC_TAKEOVER 0x0000000008000000ULL /* -- Shutdown actions -- */ # define A_SHUTDOWN 0x0000000010000000ULL # define A_STOP 0x0000000020000000ULL # define A_EXIT_0 0x0000000040000000ULL # define A_EXIT_1 0x0000000080000000ULL # define A_SHUTDOWN_REQ 0x0000000100000000ULL # define A_ELECTION_CHECK 0x0000000200000000ULL # define A_DC_JOIN_FINAL 0x0000000400000000ULL /* -- CIB actions -- */ # define A_CIB_START 0x0000020000000000ULL # define A_CIB_STOP 0x0000040000000000ULL /* -- Transition Engine actions -- */ /* Attempt to reach the newly calculated cluster state. This is * only called once per transition (except if it is asked to * stop the transition or start a new one). * Once given a cluster state to reach, the TE will determine * tasks that can be performed in parallel, execute them, wait * for replies and then determine the next set until the new * state is reached or no further tasks can be taken. */ # define A_TE_INVOKE 0x0000100000000000ULL # define A_TE_START 0x0000200000000000ULL # define A_TE_STOP 0x0000400000000000ULL # define A_TE_CANCEL 0x0000800000000000ULL # define A_TE_HALT 0x0001000000000000ULL /* -- Scheduler actions -- */ /* Calculate the next state for the cluster. This is only * invoked once per needed calculation. */ # define A_PE_INVOKE 0x0002000000000000ULL # define A_PE_START 0x0004000000000000ULL # define A_PE_STOP 0x0008000000000000ULL /* -- Misc actions -- */ /* Add a system generate "block" so that resources arent moved * to or are activly moved away from the affected node. This * way we can return quickly even if busy with other things. */ # define A_NODE_BLOCK 0x0010000000000000ULL /* Update our information in the local CIB */ # define A_UPDATE_NODESTATUS 0x0020000000000000ULL # define A_READCONFIG 0x0080000000000000ULL /* -- LRM Actions -- */ // Connect to the local executor # define A_LRM_CONNECT 0x0100000000000000ULL // Disconnect from the local executor # define A_LRM_DISCONNECT 0x0200000000000000ULL # define A_LRM_INVOKE 0x0400000000000000ULL -# define A_LRM_EVENT 0x0800000000000000ULL /* -- Logging actions -- */ # define A_LOG 0x1000000000000000ULL # define A_ERROR 0x2000000000000000ULL # define A_WARN 0x4000000000000000ULL # define O_EXIT (A_SHUTDOWN|A_STOP|A_LRM_DISCONNECT|A_HA_DISCONNECT|A_EXIT_0|A_CIB_STOP) # define O_RELEASE (A_DC_TIMER_STOP|A_DC_RELEASE|A_PE_STOP|A_TE_STOP|A_DC_RELEASED) # define O_PE_RESTART (A_PE_START|A_PE_STOP) # define O_TE_RESTART (A_TE_START|A_TE_STOP) # define O_CIB_RESTART (A_CIB_START|A_CIB_STOP) # define O_LRM_RECONNECT (A_LRM_CONNECT|A_LRM_DISCONNECT) # define O_DC_TIMER_RESTART (A_DC_TIMER_STOP|A_DC_TIMER_START) /*====================================== * * "register" contents * * Things we may want to remember regardless of which state we are in. * * These also count as inputs for synthesizing I_* * *======================================*/ # define R_THE_DC 0x00000001ULL /* Are we the DC? */ # define R_STARTING 0x00000002ULL /* Are we starting up? */ # define R_SHUTDOWN 0x00000004ULL /* Are we trying to shut down? */ # define R_STAYDOWN 0x00000008ULL /* Should we restart? */ # define R_JOIN_OK 0x00000010ULL /* Have we completed the join process */ # define R_READ_CONFIG 0x00000040ULL # define R_INVOKE_PE 0x00000080ULL // Should the scheduler be invoked? # define R_CIB_CONNECTED 0x00000100ULL /* Is the CIB connected? */ # define R_PE_CONNECTED 0x00000200ULL // Is the scheduler connected? # define R_TE_CONNECTED 0x00000400ULL /* Is the Transition Engine connected? */ # define R_LRM_CONNECTED 0x00000800ULL // Is the executor connected? # define R_CIB_REQUIRED 0x00001000ULL /* Is the CIB required? */ # define R_PE_REQUIRED 0x00002000ULL // Is the scheduler required? # define R_TE_REQUIRED 0x00004000ULL /* Is the Transition Engine required? */ # define R_ST_REQUIRED 0x00008000ULL /* Is the Stonith daemon required? */ # define R_CIB_DONE 0x00010000ULL /* Have we calculated the CIB? */ # define R_HAVE_CIB 0x00020000ULL /* Do we have an up-to-date CIB */ # define R_MEMBERSHIP 0x00100000ULL /* Have we got cluster layer data yet */ // Ever received membership-layer data # define R_PEER_DATA 0x00200000ULL # define R_HA_DISCONNECTED 0x00400000ULL /* did we sign out of our own accord */ # define R_REQ_PEND 0x01000000ULL /* Are there Requests waiting for processing? */ # define R_PE_PEND 0x02000000ULL // Are we awaiting reply from scheduler? # define R_TE_PEND 0x04000000ULL /* Has the TE been invoked and we're awaiting completion? */ # define R_RESP_PEND 0x08000000ULL /* Do we have clients waiting on a response? if so perhaps we shouldn't stop yet */ # define R_SENT_RSC_STOP 0x20000000ULL /* Have we sent a stop action to all * resources in preparation for * shutting down */ # define R_IN_RECOVERY 0x80000000ULL #define CRM_DIRECT_NACK_RC (99) // Deprecated (see PCMK_EXEC_INVALID) enum crmd_fsa_cause { C_UNKNOWN = 0, C_STARTUP, C_IPC_MESSAGE, C_HA_MESSAGE, C_CRMD_STATUS_CALLBACK, C_LRM_OP_CALLBACK, C_TIMER_POPPED, C_SHUTDOWN, C_FSA_INTERNAL, }; enum fsa_data_type { fsa_dt_none, fsa_dt_ha_msg, fsa_dt_xml, fsa_dt_lrm, }; typedef struct fsa_data_s fsa_data_t; struct fsa_data_s { int id; enum crmd_fsa_input fsa_input; enum crmd_fsa_cause fsa_cause; uint64_t actions; const char *origin; void *data; enum fsa_data_type data_type; }; #define controld_set_fsa_input_flags(flags_to_set) do { \ controld_globals.fsa_input_register \ = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "FSA input", "controller", \ controld_globals.fsa_input_register, \ (flags_to_set), #flags_to_set); \ } while (0) #define controld_clear_fsa_input_flags(flags_to_clear) do { \ controld_globals.fsa_input_register \ = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "FSA input", "controller", \ controld_globals.fsa_input_register, \ (flags_to_clear), \ #flags_to_clear); \ } while (0) #define controld_set_fsa_action_flags(flags_to_set) do { \ controld_globals.fsa_actions \ = pcmk__set_flags_as(__func__, __LINE__, LOG_DEBUG, \ "FSA action", "controller", \ controld_globals.fsa_actions, \ (flags_to_set), #flags_to_set); \ } while (0) #define controld_clear_fsa_action_flags(flags_to_clear) do { \ controld_globals.fsa_actions \ = pcmk__clear_flags_as(__func__, __LINE__, LOG_DEBUG, \ "FSA action", "controller", \ controld_globals.fsa_actions, \ (flags_to_clear), #flags_to_clear); \ } while (0) // This should be moved elsewhere xmlNode *controld_query_executor_state(void); const char *fsa_input2string(enum crmd_fsa_input input); const char *fsa_state2string(enum crmd_fsa_state state); const char *fsa_cause2string(enum crmd_fsa_cause cause); const char *fsa_action2string(long long action); enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause); enum crmd_fsa_state controld_fsa_get_next_state(enum crmd_fsa_input input); uint64_t controld_fsa_get_action(enum crmd_fsa_input input); void controld_init_fsa_trigger(void); void controld_destroy_fsa_trigger(void); void free_max_generation(void); # define AM_I_DC pcmk_is_set(controld_globals.fsa_input_register, R_THE_DC) # define controld_trigger_fsa() controld_trigger_fsa_as(__func__, __LINE__) void controld_trigger_fsa_as(const char *fn, int line); /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_PE_INVOKE */ void do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_LOG */ void do_log(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_CIB_START, STOP, RESTART */ void do_cib_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_HA_CONNECT */ void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_PE_START, STOP, RESTART */ void do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_TE_START, STOP, RESTART */ void do_te_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_MSG_ROUTE */ void do_msg_route(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_ELECTION_VOTE */ void do_election_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_ELECTION_COUNT */ void do_election_count_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_ELECTION_CHECK */ void do_election_check(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_TIMER_STOP */ void do_timer_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_TAKEOVER */ void do_dc_takeover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_RELEASE */ void do_dc_release(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_OFFER_ALL */ void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_OFFER_ONE */ void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_ACK */ void do_dc_join_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_REQ */ void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_FINALIZE */ void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_CL_JOIN_QUERY */ /* is there a DC out there? */ void do_cl_join_query(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_CL_JOIN_ANNOUNCE */ void do_cl_join_announce(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_CL_JOIN_REQUEST */ void do_cl_join_offer_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_CL_JOIN_RESULT */ void do_cl_join_finalize_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); -/* A_LRM_EVENT */ -void do_lrm_event(long long action, enum crmd_fsa_cause cause, - enum crmd_fsa_state cur_state, - enum crmd_fsa_input cur_input, fsa_data_t *msg_data); - /* A_TE_INVOKE, A_TE_CANCEL */ void do_te_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_FINAL */ void do_dc_join_final(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); #endif diff --git a/daemons/controld/controld_matrix.c b/daemons/controld/controld_matrix.c index a404f0ac9f..74eb610272 100644 --- a/daemons/controld/controld_matrix.c +++ b/daemons/controld/controld_matrix.c @@ -1,1250 +1,1214 @@ /* * Copyright 2004-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include // uint64_t #include /* * The state transition table. The rows are inputs, and * the columns are states. */ static const enum crmd_fsa_state fsa_next_states[MAXINPUT][MAXSTATE] = { /* Got an I_NULL */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_CIB_OP */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_CIB_UPDATE */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_RECOVERY, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_RECOVERY, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_DC_TIMEOUT */ { /* S_IDLE ==> */ S_RECOVERY, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_RECOVERY, /* S_FINALIZE_JOIN ==> */ S_RECOVERY, /* S_NOT_DC ==> */ S_ELECTION, /* S_POLICY_ENGINE ==> */ S_RECOVERY, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RECOVERY, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_ELECTION, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, /* S_HALT ==> */ S_ELECTION, }, /* Got an I_ELECTION */ { /* S_IDLE ==> */ S_ELECTION, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_ELECTION, /* S_FINALIZE_JOIN ==> */ S_ELECTION, /* S_NOT_DC ==> */ S_ELECTION, /* S_POLICY_ENGINE ==> */ S_ELECTION, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_ELECTION, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_ELECTION, /* S_HALT ==> */ S_HALT, }, /* Got an I_PE_CALC */ { /* S_IDLE ==> */ S_POLICY_ENGINE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_RELEASE_DC */ { /* S_IDLE ==> */ S_RELEASE_DC, /* S_ELECTION ==> */ S_RELEASE_DC, /* S_INTEGRATION ==> */ S_RELEASE_DC, /* S_FINALIZE_JOIN ==> */ S_RELEASE_DC, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_RELEASE_DC, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_RELEASE_DC, /* S_HALT ==> */ S_HALT, }, /* Got an I_ELECTION_DC */ { /* S_IDLE ==> */ S_INTEGRATION, /* S_ELECTION ==> */ S_INTEGRATION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, /* S_NOT_DC ==> */ S_INTEGRATION, /* S_POLICY_ENGINE ==> */ S_INTEGRATION, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION, /* S_HALT ==> */ S_HALT, }, /* Got an I_ERROR */ { /* S_IDLE ==> */ S_RECOVERY, /* S_ELECTION ==> */ S_RECOVERY, /* S_INTEGRATION ==> */ S_RECOVERY, /* S_FINALIZE_JOIN ==> */ S_RECOVERY, /* S_NOT_DC ==> */ S_RECOVERY, /* S_POLICY_ENGINE ==> */ S_RECOVERY, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RECOVERY, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_RECOVERY, /* S_STOPPING ==> */ S_TERMINATE, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, /* S_HALT ==> */ S_RECOVERY, }, /* Got an I_FAIL */ { /* S_IDLE ==> */ S_RECOVERY, /* S_ELECTION ==> */ S_RELEASE_DC, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, /* S_NOT_DC ==> */ S_RECOVERY, /* S_POLICY_ENGINE ==> */ S_INTEGRATION, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STOPPING, /* S_PENDING ==> */ S_STOPPING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE, /* S_HALT ==> */ S_RELEASE_DC, }, /* Got an I_INTEGRATED */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_FINALIZE_JOIN, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_RECOVERY, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_FINALIZED */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_POLICY_ENGINE, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_RECOVERY, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_NODE_JOIN */ { /* S_IDLE ==> */ S_INTEGRATION, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_INTEGRATION, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION, /* S_HALT ==> */ S_HALT, }, /* Got an I_NOT_DC */ { /* S_IDLE ==> */ S_RECOVERY, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_RECOVERY, /* S_FINALIZE_JOIN ==> */ S_RECOVERY, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_RECOVERY, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_NOT_DC, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, /* S_HALT ==> */ S_HALT, }, /* Got an I_RECOVERED */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_PENDING, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_RELEASE_FAIL */ { /* S_IDLE ==> */ S_STOPPING, /* S_ELECTION ==> */ S_STOPPING, /* S_INTEGRATION ==> */ S_STOPPING, /* S_FINALIZE_JOIN ==> */ S_STOPPING, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_STOPPING, /* S_RECOVERY ==> */ S_STOPPING, /* S_RELEASE_DC ==> */ S_STOPPING, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_STOPPING, /* S_HALT ==> */ S_HALT, }, /* Got an I_RELEASE_SUCCESS */ { /* S_IDLE ==> */ S_RECOVERY, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_RECOVERY, /* S_FINALIZE_JOIN ==> */ S_RECOVERY, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_RECOVERY, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_PENDING, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, /* S_HALT ==> */ S_HALT, }, /* Got an I_RESTART */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_TE_SUCCESS */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_IDLE, /* S_HALT ==> */ S_HALT, }, /* Got an I_ROUTER */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_SHUTDOWN */ { /* S_IDLE ==> */ S_POLICY_ENGINE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_STOPPING, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STOPPING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE, /* S_HALT ==> */ S_ELECTION, }, /* Got an I_STOP */ { /* S_IDLE ==> */ S_STOPPING, /* S_ELECTION ==> */ S_STOPPING, /* S_INTEGRATION ==> */ S_STOPPING, /* S_FINALIZE_JOIN ==> */ S_STOPPING, /* S_NOT_DC ==> */ S_STOPPING, /* S_POLICY_ENGINE ==> */ S_STOPPING, /* S_RECOVERY ==> */ S_STOPPING, /* S_RELEASE_DC ==> */ S_STOPPING, /* S_STARTING ==> */ S_STOPPING, /* S_PENDING ==> */ S_STOPPING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_STOPPING, /* S_HALT ==> */ S_STOPPING, }, /* Got an I_TERMINATE */ { /* S_IDLE ==> */ S_TERMINATE, /* S_ELECTION ==> */ S_TERMINATE, /* S_INTEGRATION ==> */ S_TERMINATE, /* S_FINALIZE_JOIN ==> */ S_TERMINATE, /* S_NOT_DC ==> */ S_TERMINATE, /* S_POLICY_ENGINE ==> */ S_TERMINATE, /* S_RECOVERY ==> */ S_TERMINATE, /* S_RELEASE_DC ==> */ S_TERMINATE, /* S_STARTING ==> */ S_TERMINATE, /* S_PENDING ==> */ S_TERMINATE, /* S_STOPPING ==> */ S_TERMINATE, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TERMINATE, /* S_HALT ==> */ S_TERMINATE, }, /* Got an I_STARTUP */ { /* S_IDLE ==> */ S_RECOVERY, /* S_ELECTION ==> */ S_RECOVERY, /* S_INTEGRATION ==> */ S_RECOVERY, /* S_FINALIZE_JOIN ==> */ S_RECOVERY, /* S_NOT_DC ==> */ S_RECOVERY, /* S_POLICY_ENGINE ==> */ S_RECOVERY, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, /* S_HALT ==> */ S_HALT, }, /* Got an I_PE_SUCCESS */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_JOIN_OFFER */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_PENDING, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_JOIN_REQUEST */ { /* S_IDLE ==> */ S_INTEGRATION, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_INTEGRATION, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION, /* S_HALT ==> */ S_HALT, }, /* Got an I_JOIN_RESULT */ { /* S_IDLE ==> */ S_INTEGRATION, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_PENDING, /* S_POLICY_ENGINE ==> */ S_INTEGRATION, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION, /* S_HALT ==> */ S_HALT, }, /* Got an I_WAIT_FOR_EVENT */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_DC_HEARTBEAT */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, -/* Got an I_LRM_EVENT */ - { - /* S_IDLE ==> */ S_IDLE, - /* S_ELECTION ==> */ S_ELECTION, - /* S_INTEGRATION ==> */ S_INTEGRATION, - /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, - /* S_NOT_DC ==> */ S_NOT_DC, - /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, - /* S_RECOVERY ==> */ S_RECOVERY, - /* S_RELEASE_DC ==> */ S_RELEASE_DC, - /* S_STARTING ==> */ S_STARTING, - /* S_PENDING ==> */ S_PENDING, - /* S_STOPPING ==> */ S_STOPPING, - /* S_TERMINATE ==> */ S_TERMINATE, - /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, - /* S_HALT ==> */ S_HALT, - }, - /* Got an I_PENDING */ { /* S_IDLE ==> */ S_PENDING, /* S_ELECTION ==> */ S_PENDING, /* S_INTEGRATION ==> */ S_PENDING, /* S_FINALIZE_JOIN ==> */ S_PENDING, /* S_NOT_DC ==> */ S_PENDING, /* S_POLICY_ENGINE ==> */ S_PENDING, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_PENDING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_PENDING, /* S_HALT ==> */ S_HALT, }, /* Got an I_HALT */ { /* S_IDLE ==> */ S_HALT, /* S_ELECTION ==> */ S_HALT, /* S_INTEGRATION ==> */ S_HALT, /* S_FINALIZE_JOIN ==> */ S_HALT, /* S_NOT_DC ==> */ S_HALT, /* S_POLICY_ENGINE ==> */ S_HALT, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_HALT, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_HALT, /* S_HALT ==> */ S_HALT, }, }; /* * The action table. Each entry is a set of actions to take or-ed * together. Like the state table, the rows are inputs, and * the columns are states. */ /* NOTE: In the fsa, the actions are extracted then state is updated. */ static const uint64_t fsa_actions[MAXINPUT][MAXSTATE] = { /* Got an I_NULL */ { /* S_IDLE ==> */ A_NOTHING, /* S_ELECTION ==> */ A_NOTHING, /* S_INTEGRATION ==> */ A_NOTHING, /* S_FINALIZE_JOIN ==> */ A_NOTHING, /* S_NOT_DC ==> */ A_NOTHING, /* S_POLICY_ENGINE ==> */ A_NOTHING, /* S_RECOVERY ==> */ A_NOTHING, /* S_RELEASE_DC ==> */ A_NOTHING, /* S_STARTING ==> */ A_NOTHING, /* S_PENDING ==> */ A_NOTHING, /* S_STOPPING ==> */ A_NOTHING, /* S_TERMINATE ==> */ A_NOTHING, /* S_TRANSITION_ENGINE ==> */ A_NOTHING, /* S_HALT ==> */ A_NOTHING, }, /* Got an I_CIB_OP */ { /* S_IDLE ==> */ A_ERROR, /* S_ELECTION ==> */ A_ERROR, /* S_INTEGRATION ==> */ A_ERROR, /* S_FINALIZE_JOIN ==> */ A_ERROR, /* S_NOT_DC ==> */ A_ERROR, /* S_POLICY_ENGINE ==> */ A_ERROR, /* S_RECOVERY ==> */ A_ERROR, /* S_RELEASE_DC ==> */ A_ERROR, /* S_STARTING ==> */ A_ERROR, /* S_PENDING ==> */ A_ERROR, /* S_STOPPING ==> */ A_ERROR, /* S_TERMINATE ==> */ A_ERROR, /* S_TRANSITION_ENGINE ==> */ A_ERROR, /* S_HALT ==> */ A_ERROR, }, /* Got an I_CIB_UPDATE */ { /* S_IDLE ==> */ A_LOG, /* S_ELECTION ==> */ A_LOG, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_LOG, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_LOG, /* S_HALT ==> */ A_WARN, }, /* Got an I_DC_TIMEOUT */ { /* S_IDLE ==> */ A_WARN, /* S_ELECTION ==> */ A_ELECTION_VOTE, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_ELECTION_VOTE | A_WARN, /* S_POLICY_ENGINE ==> */ A_WARN, /* S_RECOVERY ==> */ A_NOTHING, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_ELECTION_VOTE | A_WARN, /* S_STOPPING ==> */ A_NOTHING, /* S_TERMINATE ==> */ A_NOTHING, /* S_TRANSITION_ENGINE ==> */ A_TE_CANCEL | A_WARN, /* S_HALT ==> */ A_WARN, }, /* Got an I_ELECTION */ { /* S_IDLE ==> */ A_ELECTION_VOTE, /* S_ELECTION ==> */ A_ELECTION_VOTE, /* S_INTEGRATION ==> */ A_ELECTION_VOTE, /* S_FINALIZE_JOIN ==> */ A_ELECTION_VOTE, /* S_NOT_DC ==> */ A_ELECTION_VOTE, /* S_POLICY_ENGINE ==> */ A_ELECTION_VOTE, /* S_RECOVERY ==> */ A_LOG, /* S_RELEASE_DC ==> */ A_LOG, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_ELECTION_VOTE, /* S_STOPPING ==> */ A_LOG, /* S_TERMINATE ==> */ A_LOG, /* S_TRANSITION_ENGINE ==> */ A_ELECTION_VOTE, /* S_HALT ==> */ A_ELECTION_VOTE, }, /* Got an I_PE_CALC */ { /* S_IDLE ==> */ A_PE_INVOKE, /* S_ELECTION ==> */ A_NOTHING, /* S_INTEGRATION ==> */ A_NOTHING, /* S_FINALIZE_JOIN ==> */ A_NOTHING, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_PE_INVOKE, /* S_RECOVERY ==> */ A_NOTHING, /* S_RELEASE_DC ==> */ A_NOTHING, /* S_STARTING ==> */ A_ERROR, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_ERROR, /* S_TRANSITION_ENGINE ==> */ A_PE_INVOKE, /* S_HALT ==> */ A_ERROR, }, /* Got an I_RELEASE_DC */ { /* S_IDLE ==> */ O_RELEASE, /* S_ELECTION ==> */ O_RELEASE, /* S_INTEGRATION ==> */ O_RELEASE | A_WARN, /* S_FINALIZE_JOIN ==> */ O_RELEASE | A_WARN, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ O_RELEASE | A_WARN, /* S_RECOVERY ==> */ O_RELEASE, /* S_RELEASE_DC ==> */ O_RELEASE | A_WARN, /* S_STARTING ==> */ A_ERROR, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ O_RELEASE | A_WARN, /* S_HALT ==> */ A_WARN, }, /* Got an I_ELECTION_DC */ { /* S_IDLE ==> */ A_WARN | A_ELECTION_VOTE, /* S_ELECTION ==> */ A_LOG | A_DC_TAKEOVER | A_PE_START | A_TE_START | A_DC_JOIN_OFFER_ALL | A_DC_TIMER_STOP, /* S_INTEGRATION ==> */ A_WARN | A_ELECTION_VOTE | A_DC_JOIN_OFFER_ALL, /* S_FINALIZE_JOIN ==> */ A_WARN | A_ELECTION_VOTE | A_DC_JOIN_OFFER_ALL, /* S_NOT_DC ==> */ A_LOG | A_ELECTION_VOTE, /* S_POLICY_ENGINE ==> */ A_WARN | A_ELECTION_VOTE, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_WARN | A_ELECTION_VOTE, /* S_STARTING ==> */ A_LOG | A_WARN, /* S_PENDING ==> */ A_LOG | A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_WARN | A_ELECTION_VOTE, /* S_HALT ==> */ A_WARN, }, /* Got an I_ERROR */ { /* S_IDLE ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, /* S_ELECTION ==> */ A_ERROR | A_RECOVER | O_RELEASE, /* S_INTEGRATION ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, /* S_FINALIZE_JOIN ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, /* S_NOT_DC ==> */ A_ERROR | A_RECOVER, /* S_POLICY_ENGINE ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, /* S_RECOVERY ==> */ A_ERROR | O_RELEASE, /* S_RELEASE_DC ==> */ A_ERROR | A_RECOVER, /* S_STARTING ==> */ A_ERROR | A_RECOVER, /* S_PENDING ==> */ A_ERROR | A_RECOVER, /* S_STOPPING ==> */ A_ERROR | A_EXIT_1, /* S_TERMINATE ==> */ A_ERROR | A_EXIT_1, /* S_TRANSITION_ENGINE ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, /* S_HALT ==> */ A_ERROR | A_RECOVER | O_RELEASE | A_ELECTION_START, }, /* Got an I_FAIL */ { /* S_IDLE ==> */ A_WARN, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_WARN | A_DC_JOIN_OFFER_ALL, /* S_FINALIZE_JOIN ==> */ A_WARN | A_DC_JOIN_OFFER_ALL, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_WARN | A_DC_JOIN_OFFER_ALL | A_TE_CANCEL, /* S_RECOVERY ==> */ A_WARN | O_RELEASE, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN | A_EXIT_1, /* S_TRANSITION_ENGINE ==> */ A_WARN | O_LRM_RECONNECT | A_PE_INVOKE | A_TE_CANCEL, /* S_HALT ==> */ A_WARN, }, /* Got an I_INTEGRATED */ { /* S_IDLE ==> */ A_NOTHING, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_DC_JOIN_FINALIZE, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_NOTHING, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_NOTHING, /* S_HALT ==> */ A_WARN, }, /* Got an I_FINALIZED */ { /* S_IDLE ==> */ A_NOTHING, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_FINAL | A_TE_CANCEL, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_NOTHING, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_NOTHING, /* S_HALT ==> */ A_WARN, }, /* Got an I_NODE_JOIN */ { /* S_IDLE ==> */ A_TE_HALT | A_DC_JOIN_OFFER_ONE, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_DC_JOIN_OFFER_ONE, /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_OFFER_ONE, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_DC_JOIN_OFFER_ONE, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_TE_HALT | A_DC_JOIN_OFFER_ONE, /* S_HALT ==> */ A_WARN, }, /* Got an I_NOT_DC */ { /* S_IDLE ==> */ A_WARN | O_RELEASE, /* S_ELECTION ==> */ A_ERROR | A_ELECTION_START | A_DC_TIMER_STOP, /* S_INTEGRATION ==> */ A_ERROR | O_RELEASE, /* S_FINALIZE_JOIN ==> */ A_ERROR | O_RELEASE, /* S_NOT_DC ==> */ A_LOG, /* S_POLICY_ENGINE ==> */ A_ERROR | O_RELEASE, /* S_RECOVERY ==> */ A_ERROR | O_RELEASE, /* S_RELEASE_DC ==> */ A_ERROR | O_RELEASE, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_LOG | A_DC_TIMER_STOP, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_ERROR | O_RELEASE, /* S_HALT ==> */ A_WARN, }, /* Got an I_RECOVERED */ { /* S_IDLE ==> */ A_WARN, /* S_ELECTION ==> */ A_ELECTION_VOTE, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_WARN, /* S_RECOVERY ==> */ A_LOG, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_WARN, /* S_HALT ==> */ A_WARN, }, /* Got an I_RELEASE_FAIL */ { /* S_IDLE ==> */ A_WARN, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_NOTHING, /* S_RECOVERY ==> */ A_WARN | A_SHUTDOWN_REQ, /* S_RELEASE_DC ==> */ A_NOTHING, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_WARN, /* S_HALT ==> */ A_WARN, }, /* Got an I_RELEASE_SUCCESS */ { /* S_IDLE ==> */ A_WARN, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_WARN, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_LOG, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_LOG, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_WARN, /* S_HALT ==> */ A_WARN, }, /* Got an I_RESTART */ { /* S_IDLE ==> */ A_NOTHING, /* S_ELECTION ==> */ A_LOG | A_ELECTION_VOTE, /* S_INTEGRATION ==> */ A_LOG | A_DC_JOIN_OFFER_ALL, /* S_FINALIZE_JOIN ==> */ A_LOG | A_DC_JOIN_FINALIZE, /* S_NOT_DC ==> */ A_LOG | A_NOTHING, /* S_POLICY_ENGINE ==> */ A_LOG | A_PE_INVOKE, /* S_RECOVERY ==> */ A_LOG | A_RECOVER | O_RELEASE, /* S_RELEASE_DC ==> */ A_LOG | O_RELEASE, /* S_STARTING ==> */ A_LOG, /* S_PENDING ==> */ A_LOG, /* S_STOPPING ==> */ A_LOG, /* S_TERMINATE ==> */ A_LOG, /* S_TRANSITION_ENGINE ==> */ A_LOG | A_TE_INVOKE, /* S_HALT ==> */ A_WARN, }, /* Got an I_TE_SUCCESS */ { /* S_IDLE ==> */ A_LOG, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_ERROR, /* S_POLICY_ENGINE ==> */ A_WARN, /* S_RECOVERY ==> */ A_RECOVER | A_WARN, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_ERROR, /* S_PENDING ==> */ A_ERROR, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_LOG, /* S_HALT ==> */ A_WARN, }, /* Got an I_ROUTER */ { /* S_IDLE ==> */ A_MSG_ROUTE, /* S_ELECTION ==> */ A_MSG_ROUTE, /* S_INTEGRATION ==> */ A_MSG_ROUTE, /* S_FINALIZE_JOIN ==> */ A_MSG_ROUTE, /* S_NOT_DC ==> */ A_MSG_ROUTE, /* S_POLICY_ENGINE ==> */ A_MSG_ROUTE, /* S_RECOVERY ==> */ A_MSG_ROUTE, /* S_RELEASE_DC ==> */ A_MSG_ROUTE, /* S_STARTING ==> */ A_MSG_ROUTE, /* S_PENDING ==> */ A_MSG_ROUTE, /* S_STOPPING ==> */ A_MSG_ROUTE, /* S_TERMINATE ==> */ A_MSG_ROUTE, /* S_TRANSITION_ENGINE ==> */ A_MSG_ROUTE, /* S_HALT ==> */ A_WARN | A_MSG_ROUTE, }, /* Got an I_SHUTDOWN */ { /* S_IDLE ==> */ A_LOG | A_SHUTDOWN_REQ, /* S_ELECTION ==> */ A_LOG | A_SHUTDOWN_REQ | A_ELECTION_VOTE, /* S_INTEGRATION ==> */ A_LOG | A_SHUTDOWN_REQ, /* S_FINALIZE_JOIN ==> */ A_LOG | A_SHUTDOWN_REQ, /* S_NOT_DC ==> */ A_SHUTDOWN_REQ, /* S_POLICY_ENGINE ==> */ A_LOG | A_SHUTDOWN_REQ, /* S_RECOVERY ==> */ A_WARN | O_EXIT | O_RELEASE, /* S_RELEASE_DC ==> */ A_WARN | A_SHUTDOWN_REQ, /* S_STARTING ==> */ A_WARN | O_EXIT, /* S_PENDING ==> */ A_SHUTDOWN_REQ, /* S_STOPPING ==> */ A_LOG, /* S_TERMINATE ==> */ A_LOG, /* S_TRANSITION_ENGINE ==> */ A_WARN | A_SHUTDOWN_REQ, /* S_HALT ==> */ A_WARN | A_ELECTION_START | A_SHUTDOWN_REQ, }, /* Got an I_STOP */ { /* S_IDLE ==> */ A_ERROR | O_RELEASE | O_EXIT, /* S_ELECTION ==> */ O_RELEASE | O_EXIT, /* S_INTEGRATION ==> */ A_WARN | O_RELEASE | O_EXIT, /* S_FINALIZE_JOIN ==> */ A_ERROR | O_RELEASE | O_EXIT, /* S_NOT_DC ==> */ O_EXIT, /* S_POLICY_ENGINE ==> */ A_WARN | O_RELEASE | O_EXIT, /* S_RECOVERY ==> */ A_ERROR | O_RELEASE | O_EXIT, /* S_RELEASE_DC ==> */ A_ERROR | O_RELEASE | O_EXIT, /* S_STARTING ==> */ O_EXIT, /* S_PENDING ==> */ O_EXIT, /* S_STOPPING ==> */ O_EXIT, /* S_TERMINATE ==> */ A_ERROR | A_EXIT_1, /* S_TRANSITION_ENGINE ==> */ A_LOG | O_RELEASE | O_EXIT, /* S_HALT ==> */ O_RELEASE | O_EXIT | A_WARN, }, /* Got an I_TERMINATE */ { /* S_IDLE ==> */ A_ERROR | O_EXIT, /* S_ELECTION ==> */ A_ERROR | O_EXIT, /* S_INTEGRATION ==> */ A_ERROR | O_EXIT, /* S_FINALIZE_JOIN ==> */ A_ERROR | O_EXIT, /* S_NOT_DC ==> */ A_ERROR | O_EXIT, /* S_POLICY_ENGINE ==> */ A_ERROR | O_EXIT, /* S_RECOVERY ==> */ A_ERROR | O_EXIT, /* S_RELEASE_DC ==> */ A_ERROR | O_EXIT, /* S_STARTING ==> */ O_EXIT, /* S_PENDING ==> */ A_ERROR | O_EXIT, /* S_STOPPING ==> */ O_EXIT, /* S_TERMINATE ==> */ O_EXIT, /* S_TRANSITION_ENGINE ==> */ A_ERROR | O_EXIT, /* S_HALT ==> */ A_ERROR | O_EXIT, }, /* Got an I_STARTUP */ { /* S_IDLE ==> */ A_WARN, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_WARN, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_LOG | A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_HA_CONNECT | A_READCONFIG | A_STARTED, /* S_PENDING ==> */ A_LOG, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_WARN, /* S_HALT ==> */ A_WARN, }, /* Got an I_PE_SUCCESS */ { /* S_IDLE ==> */ A_LOG, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_NOTHING, /* S_POLICY_ENGINE ==> */ A_TE_INVOKE, /* S_RECOVERY ==> */ A_RECOVER | A_LOG, /* S_RELEASE_DC ==> */ A_LOG, /* S_STARTING ==> */ A_ERROR, /* S_PENDING ==> */ A_LOG, /* S_STOPPING ==> */ A_ERROR, /* S_TERMINATE ==> */ A_ERROR, /* S_TRANSITION_ENGINE ==> */ A_LOG, /* S_HALT ==> */ A_WARN, }, /* Got an I_JOIN_OFFER */ { /* S_IDLE ==> */ A_WARN | A_CL_JOIN_REQUEST, /* S_ELECTION ==> */ A_WARN | A_ELECTION_VOTE, /* S_INTEGRATION ==> */ A_CL_JOIN_REQUEST, /* S_FINALIZE_JOIN ==> */ A_CL_JOIN_REQUEST, /* S_NOT_DC ==> */ A_CL_JOIN_REQUEST | A_DC_TIMER_STOP, /* S_POLICY_ENGINE ==> */ A_WARN | A_CL_JOIN_REQUEST, /* S_RECOVERY ==> */ A_WARN | A_CL_JOIN_REQUEST | A_DC_TIMER_STOP, /* S_RELEASE_DC ==> */ A_WARN | A_CL_JOIN_REQUEST | A_DC_TIMER_STOP, /* S_STARTING ==> */ A_LOG, /* S_PENDING ==> */ A_CL_JOIN_REQUEST | A_DC_TIMER_STOP, /* S_STOPPING ==> */ A_LOG, /* S_TERMINATE ==> */ A_LOG, /* S_TRANSITION_ENGINE ==> */ A_WARN | A_CL_JOIN_REQUEST, /* S_HALT ==> */ A_WARN, }, /* Got an I_JOIN_REQUEST */ { /* S_IDLE ==> */ A_DC_JOIN_OFFER_ONE, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_DC_JOIN_PROCESS_REQ, /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_OFFER_ONE, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_DC_JOIN_OFFER_ONE, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_DC_JOIN_OFFER_ONE, /* S_HALT ==> */ A_WARN, }, /* Got an I_JOIN_RESULT */ { /* S_IDLE ==> */ A_ERROR | A_TE_HALT | A_DC_JOIN_OFFER_ALL, /* S_ELECTION ==> */ A_LOG, /* S_INTEGRATION ==> */ A_LOG | A_CL_JOIN_RESULT | A_DC_JOIN_PROCESS_ACK, /* S_FINALIZE_JOIN ==> */ A_CL_JOIN_RESULT | A_DC_JOIN_PROCESS_ACK, /* S_NOT_DC ==> */ A_ERROR | A_CL_JOIN_ANNOUNCE, /* S_POLICY_ENGINE ==> */ A_ERROR | A_TE_HALT | A_DC_JOIN_OFFER_ALL, /* S_RECOVERY ==> */ A_LOG, /* S_RELEASE_DC ==> */ A_LOG, /* S_STARTING ==> */ A_ERROR, /* S_PENDING ==> */ A_CL_JOIN_RESULT, /* S_STOPPING ==> */ A_ERROR, /* S_TERMINATE ==> */ A_ERROR, /* S_TRANSITION_ENGINE ==> */ A_ERROR | A_TE_HALT | A_DC_JOIN_OFFER_ALL, /* S_HALT ==> */ A_WARN, }, /* Got an I_WAIT_FOR_EVENT */ { /* S_IDLE ==> */ A_LOG, /* S_ELECTION ==> */ A_LOG, /* S_INTEGRATION ==> */ A_LOG, /* S_FINALIZE_JOIN ==> */ A_LOG, /* S_NOT_DC ==> */ A_LOG, /* S_POLICY_ENGINE ==> */ A_LOG, /* S_RECOVERY ==> */ A_LOG, /* S_RELEASE_DC ==> */ A_LOG, /* S_STARTING ==> */ A_LOG, /* S_PENDING ==> */ A_LOG, /* S_STOPPING ==> */ A_LOG, /* S_TERMINATE ==> */ A_LOG, /* S_TRANSITION_ENGINE ==> */ A_LOG, /* S_HALT ==> */ A_WARN, }, /* Got an I_DC_HEARTBEAT */ { /* S_IDLE ==> */ A_ERROR, /* S_ELECTION ==> */ A_WARN | A_ELECTION_VOTE, /* S_INTEGRATION ==> */ A_ERROR, /* S_FINALIZE_JOIN ==> */ A_ERROR, /* S_NOT_DC ==> */ A_NOTHING, /* S_POLICY_ENGINE ==> */ A_ERROR, /* S_RECOVERY ==> */ A_NOTHING, /* S_RELEASE_DC ==> */ A_LOG, /* S_STARTING ==> */ A_LOG, /* S_PENDING ==> */ A_LOG | A_CL_JOIN_ANNOUNCE, /* S_STOPPING ==> */ A_NOTHING, /* S_TERMINATE ==> */ A_NOTHING, /* S_TRANSITION_ENGINE ==> */ A_ERROR, /* S_HALT ==> */ A_WARN, }, -/* Got an I_LRM_EVENT */ - { - /* S_IDLE ==> */ A_LRM_EVENT, - /* S_ELECTION ==> */ A_LRM_EVENT, - /* S_INTEGRATION ==> */ A_LRM_EVENT, - /* S_FINALIZE_JOIN ==> */ A_LRM_EVENT, - /* S_NOT_DC ==> */ A_LRM_EVENT, - /* S_POLICY_ENGINE ==> */ A_LRM_EVENT, - /* S_RECOVERY ==> */ A_LRM_EVENT, - /* S_RELEASE_DC ==> */ A_LRM_EVENT, - /* S_STARTING ==> */ A_LRM_EVENT, - /* S_PENDING ==> */ A_LRM_EVENT, - /* S_STOPPING ==> */ A_LRM_EVENT, - /* S_TERMINATE ==> */ A_LRM_EVENT, - /* S_TRANSITION_ENGINE ==> */ A_LRM_EVENT, - /* S_HALT ==> */ A_WARN, - }, - /* For everyone ending up in S_PENDING, (re)start the DC timer and wait for I_JOIN_OFFER or I_NOT_DC */ /* Got an I_PENDING */ { /* S_IDLE ==> */ O_RELEASE | O_DC_TIMER_RESTART, /* S_ELECTION ==> */ O_RELEASE | O_DC_TIMER_RESTART, /* S_INTEGRATION ==> */ O_RELEASE | O_DC_TIMER_RESTART, /* S_FINALIZE_JOIN ==> */ O_RELEASE | O_DC_TIMER_RESTART, /* S_NOT_DC ==> */ A_LOG | O_DC_TIMER_RESTART, /* S_POLICY_ENGINE ==> */ O_RELEASE | O_DC_TIMER_RESTART, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_WARN | O_DC_TIMER_RESTART, /* S_STARTING ==> */ A_LOG | A_DC_TIMER_START | A_CL_JOIN_QUERY, /* S_PENDING ==> */ A_LOG | O_DC_TIMER_RESTART, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ O_RELEASE | O_DC_TIMER_RESTART, /* S_HALT ==> */ A_WARN, }, /* Got an I_HALT */ { /* S_IDLE ==> */ A_WARN, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_WARN, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_WARN, /* S_HALT ==> */ A_WARN, }, }; /*! * \internal * \brief Get the next FSA state given an input and the current state * * \param[in] input FSA input * * \return The next FSA state */ enum crmd_fsa_state controld_fsa_get_next_state(enum crmd_fsa_input input) { return fsa_next_states[input][controld_globals.fsa_state]; } /*! * \internal * \brief Get the appropriate FSA action given an input and the current state * * \param[in] input FSA input * * \return The appropriate FSA action */ uint64_t controld_fsa_get_action(enum crmd_fsa_input input) { return fsa_actions[input][controld_globals.fsa_state]; } diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c index 89732336d6..59c2ddbee5 100644 --- a/daemons/controld/controld_utils.c +++ b/daemons/controld/controld_utils.c @@ -1,867 +1,858 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include // uint64_t #include #include #include #include /*! * \internal * \brief Check whether a given name is for the local node * * \param[in] name Name to check * * \return true if \p name is the name of the local node, otherwise false */ bool controld_is_local_node(const char *name) { CRM_CHECK(controld_globals.cluster != NULL, return false); return pcmk__str_eq(name, controld_globals.cluster->priv->node_name, pcmk__str_casei); } /*! * \internal * \brief Get node status object for local node * * \return Node status object for local node */ pcmk__node_status_t * controld_get_local_node_status(void) { CRM_CHECK(controld_globals.cluster != NULL, return NULL); return pcmk__get_node(controld_globals.cluster->priv->node_id, controld_globals.cluster->priv->node_name, NULL, pcmk__node_search_cluster_member); } const char * fsa_input2string(enum crmd_fsa_input input) { const char *inputAsText = NULL; switch (input) { case I_NULL: inputAsText = "I_NULL"; break; case I_CIB_OP: inputAsText = "I_CIB_OP (unused)"; break; case I_CIB_UPDATE: inputAsText = "I_CIB_UPDATE"; break; case I_DC_TIMEOUT: inputAsText = "I_DC_TIMEOUT"; break; case I_ELECTION: inputAsText = "I_ELECTION"; break; case I_PE_CALC: inputAsText = "I_PE_CALC"; break; case I_RELEASE_DC: inputAsText = "I_RELEASE_DC"; break; case I_ELECTION_DC: inputAsText = "I_ELECTION_DC"; break; case I_ERROR: inputAsText = "I_ERROR"; break; case I_FAIL: inputAsText = "I_FAIL"; break; case I_INTEGRATED: inputAsText = "I_INTEGRATED"; break; case I_FINALIZED: inputAsText = "I_FINALIZED"; break; case I_NODE_JOIN: inputAsText = "I_NODE_JOIN"; break; case I_JOIN_OFFER: inputAsText = "I_JOIN_OFFER"; break; case I_JOIN_REQUEST: inputAsText = "I_JOIN_REQUEST"; break; case I_JOIN_RESULT: inputAsText = "I_JOIN_RESULT"; break; case I_NOT_DC: inputAsText = "I_NOT_DC"; break; case I_RECOVERED: inputAsText = "I_RECOVERED"; break; case I_RELEASE_FAIL: inputAsText = "I_RELEASE_FAIL"; break; case I_RELEASE_SUCCESS: inputAsText = "I_RELEASE_SUCCESS"; break; case I_RESTART: inputAsText = "I_RESTART"; break; case I_PE_SUCCESS: inputAsText = "I_PE_SUCCESS"; break; case I_ROUTER: inputAsText = "I_ROUTER"; break; case I_SHUTDOWN: inputAsText = "I_SHUTDOWN"; break; case I_STARTUP: inputAsText = "I_STARTUP"; break; case I_TE_SUCCESS: inputAsText = "I_TE_SUCCESS"; break; case I_STOP: inputAsText = "I_STOP"; break; case I_DC_HEARTBEAT: inputAsText = "I_DC_HEARTBEAT"; break; case I_WAIT_FOR_EVENT: inputAsText = "I_WAIT_FOR_EVENT"; break; - case I_LRM_EVENT: - inputAsText = "I_LRM_EVENT"; - break; case I_PENDING: inputAsText = "I_PENDING"; break; case I_HALT: inputAsText = "I_HALT"; break; case I_TERMINATE: inputAsText = "I_TERMINATE"; break; case I_ILLEGAL: inputAsText = "I_ILLEGAL"; break; } if (inputAsText == NULL) { crm_err("Input %d is unknown", input); inputAsText = ""; } return inputAsText; } const char * fsa_state2string(enum crmd_fsa_state state) { const char *stateAsText = NULL; switch (state) { case S_IDLE: stateAsText = "S_IDLE"; break; case S_ELECTION: stateAsText = "S_ELECTION"; break; case S_INTEGRATION: stateAsText = "S_INTEGRATION"; break; case S_FINALIZE_JOIN: stateAsText = "S_FINALIZE_JOIN"; break; case S_NOT_DC: stateAsText = "S_NOT_DC"; break; case S_POLICY_ENGINE: stateAsText = "S_POLICY_ENGINE"; break; case S_RECOVERY: stateAsText = "S_RECOVERY"; break; case S_RELEASE_DC: stateAsText = "S_RELEASE_DC"; break; case S_PENDING: stateAsText = "S_PENDING"; break; case S_STOPPING: stateAsText = "S_STOPPING"; break; case S_TERMINATE: stateAsText = "S_TERMINATE"; break; case S_TRANSITION_ENGINE: stateAsText = "S_TRANSITION_ENGINE"; break; case S_STARTING: stateAsText = "S_STARTING"; break; case S_HALT: stateAsText = "S_HALT"; break; case S_ILLEGAL: stateAsText = "S_ILLEGAL"; break; } if (stateAsText == NULL) { crm_err("State %d is unknown", state); stateAsText = ""; } return stateAsText; } const char * fsa_cause2string(enum crmd_fsa_cause cause) { const char *causeAsText = NULL; switch (cause) { case C_UNKNOWN: causeAsText = "C_UNKNOWN"; break; case C_STARTUP: causeAsText = "C_STARTUP"; break; case C_IPC_MESSAGE: causeAsText = "C_IPC_MESSAGE"; break; case C_HA_MESSAGE: causeAsText = "C_HA_MESSAGE"; break; case C_TIMER_POPPED: causeAsText = "C_TIMER_POPPED"; break; case C_SHUTDOWN: causeAsText = "C_SHUTDOWN"; break; case C_LRM_OP_CALLBACK: causeAsText = "C_LRM_OP_CALLBACK"; break; case C_CRMD_STATUS_CALLBACK: causeAsText = "C_CRMD_STATUS_CALLBACK"; break; case C_FSA_INTERNAL: causeAsText = "C_FSA_INTERNAL"; break; } if (causeAsText == NULL) { crm_err("Cause %d is unknown", cause); causeAsText = ""; } return causeAsText; } const char * fsa_action2string(long long action) { const char *actionAsText = NULL; switch (action) { case A_NOTHING: actionAsText = "A_NOTHING"; break; case A_ELECTION_START: actionAsText = "A_ELECTION_START"; break; case A_DC_JOIN_FINAL: actionAsText = "A_DC_JOIN_FINAL"; break; case A_READCONFIG: actionAsText = "A_READCONFIG"; break; case O_RELEASE: actionAsText = "O_RELEASE"; break; case A_STARTUP: actionAsText = "A_STARTUP"; break; case A_STARTED: actionAsText = "A_STARTED"; break; case A_HA_CONNECT: actionAsText = "A_HA_CONNECT"; break; case A_HA_DISCONNECT: actionAsText = "A_HA_DISCONNECT"; break; case A_LRM_CONNECT: actionAsText = "A_LRM_CONNECT"; break; - case A_LRM_EVENT: - actionAsText = "A_LRM_EVENT"; - break; case A_LRM_INVOKE: actionAsText = "A_LRM_INVOKE"; break; case A_LRM_DISCONNECT: actionAsText = "A_LRM_DISCONNECT"; break; case O_LRM_RECONNECT: actionAsText = "O_LRM_RECONNECT"; break; case A_CL_JOIN_QUERY: actionAsText = "A_CL_JOIN_QUERY"; break; case A_DC_TIMER_STOP: actionAsText = "A_DC_TIMER_STOP"; break; case A_DC_TIMER_START: actionAsText = "A_DC_TIMER_START"; break; case A_INTEGRATE_TIMER_START: actionAsText = "A_INTEGRATE_TIMER_START"; break; case A_INTEGRATE_TIMER_STOP: actionAsText = "A_INTEGRATE_TIMER_STOP"; break; case A_FINALIZE_TIMER_START: actionAsText = "A_FINALIZE_TIMER_START"; break; case A_FINALIZE_TIMER_STOP: actionAsText = "A_FINALIZE_TIMER_STOP"; break; case A_ELECTION_COUNT: actionAsText = "A_ELECTION_COUNT"; break; case A_ELECTION_VOTE: actionAsText = "A_ELECTION_VOTE"; break; case A_ELECTION_CHECK: actionAsText = "A_ELECTION_CHECK"; break; case A_CL_JOIN_ANNOUNCE: actionAsText = "A_CL_JOIN_ANNOUNCE"; break; case A_CL_JOIN_REQUEST: actionAsText = "A_CL_JOIN_REQUEST"; break; case A_CL_JOIN_RESULT: actionAsText = "A_CL_JOIN_RESULT"; break; case A_DC_JOIN_OFFER_ALL: actionAsText = "A_DC_JOIN_OFFER_ALL"; break; case A_DC_JOIN_OFFER_ONE: actionAsText = "A_DC_JOIN_OFFER_ONE"; break; case A_DC_JOIN_PROCESS_REQ: actionAsText = "A_DC_JOIN_PROCESS_REQ"; break; case A_DC_JOIN_PROCESS_ACK: actionAsText = "A_DC_JOIN_PROCESS_ACK"; break; case A_DC_JOIN_FINALIZE: actionAsText = "A_DC_JOIN_FINALIZE"; break; case A_MSG_PROCESS: actionAsText = "A_MSG_PROCESS"; break; case A_MSG_ROUTE: actionAsText = "A_MSG_ROUTE"; break; case A_RECOVER: actionAsText = "A_RECOVER"; break; case A_DC_RELEASE: actionAsText = "A_DC_RELEASE"; break; case A_DC_RELEASED: actionAsText = "A_DC_RELEASED"; break; case A_DC_TAKEOVER: actionAsText = "A_DC_TAKEOVER"; break; case A_SHUTDOWN: actionAsText = "A_SHUTDOWN"; break; case A_SHUTDOWN_REQ: actionAsText = "A_SHUTDOWN_REQ"; break; case A_STOP: actionAsText = "A_STOP "; break; case A_EXIT_0: actionAsText = "A_EXIT_0"; break; case A_EXIT_1: actionAsText = "A_EXIT_1"; break; case O_CIB_RESTART: actionAsText = "O_CIB_RESTART"; break; case A_CIB_START: actionAsText = "A_CIB_START"; break; case A_CIB_STOP: actionAsText = "A_CIB_STOP"; break; case A_TE_INVOKE: actionAsText = "A_TE_INVOKE"; break; case O_TE_RESTART: actionAsText = "O_TE_RESTART"; break; case A_TE_START: actionAsText = "A_TE_START"; break; case A_TE_STOP: actionAsText = "A_TE_STOP"; break; case A_TE_HALT: actionAsText = "A_TE_HALT"; break; case A_TE_CANCEL: actionAsText = "A_TE_CANCEL"; break; case A_PE_INVOKE: actionAsText = "A_PE_INVOKE"; break; case O_PE_RESTART: actionAsText = "O_PE_RESTART"; break; case A_PE_START: actionAsText = "A_PE_START"; break; case A_PE_STOP: actionAsText = "A_PE_STOP"; break; case A_NODE_BLOCK: actionAsText = "A_NODE_BLOCK"; break; case A_UPDATE_NODESTATUS: actionAsText = "A_UPDATE_NODESTATUS"; break; case A_LOG: actionAsText = "A_LOG "; break; case A_ERROR: actionAsText = "A_ERROR "; break; case A_WARN: actionAsText = "A_WARN "; break; /* Composite actions */ case A_DC_TIMER_START | A_CL_JOIN_QUERY: actionAsText = "A_DC_TIMER_START|A_CL_JOIN_QUERY"; break; } if (actionAsText == NULL) { crm_err("Action %.16llx is unknown", action); actionAsText = ""; } return actionAsText; } void fsa_dump_inputs(int log_level, const char *text, long long input_register) { if (input_register == A_NOTHING) { return; } if (text == NULL) { text = "Input register contents:"; } if (pcmk_is_set(input_register, R_THE_DC)) { crm_trace("%s %.16llx (R_THE_DC)", text, R_THE_DC); } if (pcmk_is_set(input_register, R_STARTING)) { crm_trace("%s %.16llx (R_STARTING)", text, R_STARTING); } if (pcmk_is_set(input_register, R_SHUTDOWN)) { crm_trace("%s %.16llx (R_SHUTDOWN)", text, R_SHUTDOWN); } if (pcmk_is_set(input_register, R_STAYDOWN)) { crm_trace("%s %.16llx (R_STAYDOWN)", text, R_STAYDOWN); } if (pcmk_is_set(input_register, R_JOIN_OK)) { crm_trace("%s %.16llx (R_JOIN_OK)", text, R_JOIN_OK); } if (pcmk_is_set(input_register, R_READ_CONFIG)) { crm_trace("%s %.16llx (R_READ_CONFIG)", text, R_READ_CONFIG); } if (pcmk_is_set(input_register, R_INVOKE_PE)) { crm_trace("%s %.16llx (R_INVOKE_PE)", text, R_INVOKE_PE); } if (pcmk_is_set(input_register, R_CIB_CONNECTED)) { crm_trace("%s %.16llx (R_CIB_CONNECTED)", text, R_CIB_CONNECTED); } if (pcmk_is_set(input_register, R_PE_CONNECTED)) { crm_trace("%s %.16llx (R_PE_CONNECTED)", text, R_PE_CONNECTED); } if (pcmk_is_set(input_register, R_TE_CONNECTED)) { crm_trace("%s %.16llx (R_TE_CONNECTED)", text, R_TE_CONNECTED); } if (pcmk_is_set(input_register, R_LRM_CONNECTED)) { crm_trace("%s %.16llx (R_LRM_CONNECTED)", text, R_LRM_CONNECTED); } if (pcmk_is_set(input_register, R_CIB_REQUIRED)) { crm_trace("%s %.16llx (R_CIB_REQUIRED)", text, R_CIB_REQUIRED); } if (pcmk_is_set(input_register, R_PE_REQUIRED)) { crm_trace("%s %.16llx (R_PE_REQUIRED)", text, R_PE_REQUIRED); } if (pcmk_is_set(input_register, R_TE_REQUIRED)) { crm_trace("%s %.16llx (R_TE_REQUIRED)", text, R_TE_REQUIRED); } if (pcmk_is_set(input_register, R_REQ_PEND)) { crm_trace("%s %.16llx (R_REQ_PEND)", text, R_REQ_PEND); } if (pcmk_is_set(input_register, R_PE_PEND)) { crm_trace("%s %.16llx (R_PE_PEND)", text, R_PE_PEND); } if (pcmk_is_set(input_register, R_TE_PEND)) { crm_trace("%s %.16llx (R_TE_PEND)", text, R_TE_PEND); } if (pcmk_is_set(input_register, R_RESP_PEND)) { crm_trace("%s %.16llx (R_RESP_PEND)", text, R_RESP_PEND); } if (pcmk_is_set(input_register, R_CIB_DONE)) { crm_trace("%s %.16llx (R_CIB_DONE)", text, R_CIB_DONE); } if (pcmk_is_set(input_register, R_HAVE_CIB)) { crm_trace("%s %.16llx (R_HAVE_CIB)", text, R_HAVE_CIB); } if (pcmk_is_set(input_register, R_MEMBERSHIP)) { crm_trace("%s %.16llx (R_MEMBERSHIP)", text, R_MEMBERSHIP); } if (pcmk_is_set(input_register, R_PEER_DATA)) { crm_trace("%s %.16llx (R_PEER_DATA)", text, R_PEER_DATA); } if (pcmk_is_set(input_register, R_IN_RECOVERY)) { crm_trace("%s %.16llx (R_IN_RECOVERY)", text, R_IN_RECOVERY); } } void fsa_dump_actions(uint64_t action, const char *text) { if (pcmk_is_set(action, A_READCONFIG)) { crm_trace("Action %.16llx (A_READCONFIG) %s", A_READCONFIG, text); } if (pcmk_is_set(action, A_STARTUP)) { crm_trace("Action %.16llx (A_STARTUP) %s", A_STARTUP, text); } if (pcmk_is_set(action, A_STARTED)) { crm_trace("Action %.16llx (A_STARTED) %s", A_STARTED, text); } if (pcmk_is_set(action, A_HA_CONNECT)) { crm_trace("Action %.16llx (A_CONNECT) %s", A_HA_CONNECT, text); } if (pcmk_is_set(action, A_HA_DISCONNECT)) { crm_trace("Action %.16llx (A_DISCONNECT) %s", A_HA_DISCONNECT, text); } if (pcmk_is_set(action, A_LRM_CONNECT)) { crm_trace("Action %.16llx (A_LRM_CONNECT) %s", A_LRM_CONNECT, text); } - if (pcmk_is_set(action, A_LRM_EVENT)) { - crm_trace("Action %.16llx (A_LRM_EVENT) %s", A_LRM_EVENT, text); - } if (pcmk_is_set(action, A_LRM_INVOKE)) { crm_trace("Action %.16llx (A_LRM_INVOKE) %s", A_LRM_INVOKE, text); } if (pcmk_is_set(action, A_LRM_DISCONNECT)) { crm_trace("Action %.16llx (A_LRM_DISCONNECT) %s", A_LRM_DISCONNECT, text); } if (pcmk_is_set(action, A_DC_TIMER_STOP)) { crm_trace("Action %.16llx (A_DC_TIMER_STOP) %s", A_DC_TIMER_STOP, text); } if (pcmk_is_set(action, A_DC_TIMER_START)) { crm_trace("Action %.16llx (A_DC_TIMER_START) %s", A_DC_TIMER_START, text); } if (pcmk_is_set(action, A_INTEGRATE_TIMER_START)) { crm_trace("Action %.16llx (A_INTEGRATE_TIMER_START) %s", A_INTEGRATE_TIMER_START, text); } if (pcmk_is_set(action, A_INTEGRATE_TIMER_STOP)) { crm_trace("Action %.16llx (A_INTEGRATE_TIMER_STOP) %s", A_INTEGRATE_TIMER_STOP, text); } if (pcmk_is_set(action, A_FINALIZE_TIMER_START)) { crm_trace("Action %.16llx (A_FINALIZE_TIMER_START) %s", A_FINALIZE_TIMER_START, text); } if (pcmk_is_set(action, A_FINALIZE_TIMER_STOP)) { crm_trace("Action %.16llx (A_FINALIZE_TIMER_STOP) %s", A_FINALIZE_TIMER_STOP, text); } if (pcmk_is_set(action, A_ELECTION_COUNT)) { crm_trace("Action %.16llx (A_ELECTION_COUNT) %s", A_ELECTION_COUNT, text); } if (pcmk_is_set(action, A_ELECTION_VOTE)) { crm_trace("Action %.16llx (A_ELECTION_VOTE) %s", A_ELECTION_VOTE, text); } if (pcmk_is_set(action, A_ELECTION_CHECK)) { crm_trace("Action %.16llx (A_ELECTION_CHECK) %s", A_ELECTION_CHECK, text); } if (pcmk_is_set(action, A_CL_JOIN_ANNOUNCE)) { crm_trace("Action %.16llx (A_CL_JOIN_ANNOUNCE) %s", A_CL_JOIN_ANNOUNCE, text); } if (pcmk_is_set(action, A_CL_JOIN_REQUEST)) { crm_trace("Action %.16llx (A_CL_JOIN_REQUEST) %s", A_CL_JOIN_REQUEST, text); } if (pcmk_is_set(action, A_CL_JOIN_RESULT)) { crm_trace("Action %.16llx (A_CL_JOIN_RESULT) %s", A_CL_JOIN_RESULT, text); } if (pcmk_is_set(action, A_DC_JOIN_OFFER_ALL)) { crm_trace("Action %.16llx (A_DC_JOIN_OFFER_ALL) %s", A_DC_JOIN_OFFER_ALL, text); } if (pcmk_is_set(action, A_DC_JOIN_OFFER_ONE)) { crm_trace("Action %.16llx (A_DC_JOIN_OFFER_ONE) %s", A_DC_JOIN_OFFER_ONE, text); } if (pcmk_is_set(action, A_DC_JOIN_PROCESS_REQ)) { crm_trace("Action %.16llx (A_DC_JOIN_PROCESS_REQ) %s", A_DC_JOIN_PROCESS_REQ, text); } if (pcmk_is_set(action, A_DC_JOIN_PROCESS_ACK)) { crm_trace("Action %.16llx (A_DC_JOIN_PROCESS_ACK) %s", A_DC_JOIN_PROCESS_ACK, text); } if (pcmk_is_set(action, A_DC_JOIN_FINALIZE)) { crm_trace("Action %.16llx (A_DC_JOIN_FINALIZE) %s", A_DC_JOIN_FINALIZE, text); } if (pcmk_is_set(action, A_MSG_PROCESS)) { crm_trace("Action %.16llx (A_MSG_PROCESS) %s", A_MSG_PROCESS, text); } if (pcmk_is_set(action, A_MSG_ROUTE)) { crm_trace("Action %.16llx (A_MSG_ROUTE) %s", A_MSG_ROUTE, text); } if (pcmk_is_set(action, A_RECOVER)) { crm_trace("Action %.16llx (A_RECOVER) %s", A_RECOVER, text); } if (pcmk_is_set(action, A_DC_RELEASE)) { crm_trace("Action %.16llx (A_DC_RELEASE) %s", A_DC_RELEASE, text); } if (pcmk_is_set(action, A_DC_RELEASED)) { crm_trace("Action %.16llx (A_DC_RELEASED) %s", A_DC_RELEASED, text); } if (pcmk_is_set(action, A_DC_TAKEOVER)) { crm_trace("Action %.16llx (A_DC_TAKEOVER) %s", A_DC_TAKEOVER, text); } if (pcmk_is_set(action, A_SHUTDOWN)) { crm_trace("Action %.16llx (A_SHUTDOWN) %s", A_SHUTDOWN, text); } if (pcmk_is_set(action, A_SHUTDOWN_REQ)) { crm_trace("Action %.16llx (A_SHUTDOWN_REQ) %s", A_SHUTDOWN_REQ, text); } if (pcmk_is_set(action, A_STOP)) { crm_trace("Action %.16llx (A_STOP ) %s", A_STOP, text); } if (pcmk_is_set(action, A_EXIT_0)) { crm_trace("Action %.16llx (A_EXIT_0) %s", A_EXIT_0, text); } if (pcmk_is_set(action, A_EXIT_1)) { crm_trace("Action %.16llx (A_EXIT_1) %s", A_EXIT_1, text); } if (pcmk_is_set(action, A_CIB_START)) { crm_trace("Action %.16llx (A_CIB_START) %s", A_CIB_START, text); } if (pcmk_is_set(action, A_CIB_STOP)) { crm_trace("Action %.16llx (A_CIB_STOP) %s", A_CIB_STOP, text); } if (pcmk_is_set(action, A_TE_INVOKE)) { crm_trace("Action %.16llx (A_TE_INVOKE) %s", A_TE_INVOKE, text); } if (pcmk_is_set(action, A_TE_START)) { crm_trace("Action %.16llx (A_TE_START) %s", A_TE_START, text); } if (pcmk_is_set(action, A_TE_STOP)) { crm_trace("Action %.16llx (A_TE_STOP) %s", A_TE_STOP, text); } if (pcmk_is_set(action, A_TE_CANCEL)) { crm_trace("Action %.16llx (A_TE_CANCEL) %s", A_TE_CANCEL, text); } if (pcmk_is_set(action, A_PE_INVOKE)) { crm_trace("Action %.16llx (A_PE_INVOKE) %s", A_PE_INVOKE, text); } if (pcmk_is_set(action, A_PE_START)) { crm_trace("Action %.16llx (A_PE_START) %s", A_PE_START, text); } if (pcmk_is_set(action, A_PE_STOP)) { crm_trace("Action %.16llx (A_PE_STOP) %s", A_PE_STOP, text); } if (pcmk_is_set(action, A_NODE_BLOCK)) { crm_trace("Action %.16llx (A_NODE_BLOCK) %s", A_NODE_BLOCK, text); } if (pcmk_is_set(action, A_UPDATE_NODESTATUS)) { crm_trace("Action %.16llx (A_UPDATE_NODESTATUS) %s", A_UPDATE_NODESTATUS, text); } if (pcmk_is_set(action, A_LOG)) { crm_trace("Action %.16llx (A_LOG ) %s", A_LOG, text); } if (pcmk_is_set(action, A_ERROR)) { crm_trace("Action %.16llx (A_ERROR ) %s", A_ERROR, text); } if (pcmk_is_set(action, A_WARN)) { crm_trace("Action %.16llx (A_WARN ) %s", A_WARN, text); } } gboolean update_dc(xmlNode * msg) { char *last_dc = controld_globals.dc_name; const char *dc_version = NULL; const char *welcome_from = NULL; if (msg != NULL) { gboolean invalid = FALSE; dc_version = crm_element_value(msg, PCMK_XA_VERSION); welcome_from = crm_element_value(msg, PCMK__XA_SRC); CRM_CHECK(dc_version != NULL, return FALSE); CRM_CHECK(welcome_from != NULL, return FALSE); if (AM_I_DC && !controld_is_local_node(welcome_from)) { invalid = TRUE; } else if ((controld_globals.dc_name != NULL) && !pcmk__str_eq(welcome_from, controld_globals.dc_name, pcmk__str_casei)) { invalid = TRUE; } if (invalid) { if (AM_I_DC) { crm_err("Not updating DC to %s (%s): we are also a DC", welcome_from, dc_version); } else { crm_warn("New DC %s is not %s", welcome_from, controld_globals.dc_name); } controld_set_fsa_action_flags(A_CL_JOIN_QUERY | A_DC_TIMER_START); controld_trigger_fsa(); return FALSE; } } controld_globals.dc_name = NULL; // freed as last_dc pcmk__str_update(&(controld_globals.dc_name), welcome_from); pcmk__str_update(&(controld_globals.dc_version), dc_version); if (pcmk__str_eq(controld_globals.dc_name, last_dc, pcmk__str_casei)) { /* do nothing */ } else if (controld_globals.dc_name != NULL) { pcmk__node_status_t *dc_node = pcmk__get_node(0, controld_globals.dc_name, NULL, pcmk__node_search_cluster_member); crm_info("Set DC to %s (%s)", controld_globals.dc_name, pcmk__s(controld_globals.dc_version, "unknown version")); pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_MEMBER); } else if (last_dc != NULL) { crm_info("Unset DC (was %s)", last_dc); } free(last_dc); return TRUE; } void crmd_peer_down(pcmk__node_status_t *peer, bool full) { if(full && peer->state == NULL) { pcmk__update_peer_state(__func__, peer, PCMK__VALUE_LOST, 0); crm_update_peer_proc(__func__, peer, crm_proc_none, NULL); } crm_update_peer_join(__func__, peer, controld_join_none); pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN); } /*! * \internal * \brief Check feature set compatibility of DC and joining node * * Return true if a joining node's CRM feature set is compatible with the * current DC's. The feature sets are compatible if they have the same major * version number, and the DC's minor version number is the same or older than * the joining node's. The minor-minor version is intended solely to allow * resource agents to detect feature support, and so is ignored. * * \param[in] dc_version DC's feature set * \param[in] join_version Joining node's version */ bool feature_set_compatible(const char *dc_version, const char *join_version) { char *dc_minor = NULL; char *join_minor = NULL; long dc_v = 0; long join_v = 0; // Get DC's major version errno = 0; dc_v = strtol(dc_version, &dc_minor, 10); if (errno) { return FALSE; } // Get joining node's major version errno = 0; join_v = strtol(join_version, &join_minor, 10); if (errno) { return FALSE; } // Major version component must be identical if (dc_v != join_v) { return FALSE; } // Get DC's minor version if (*dc_minor == '.') { ++dc_minor; } errno = 0; dc_v = strtol(dc_minor, NULL, 10); if (errno) { return FALSE; } // Get joining node's minor version if (*join_minor == '.') { ++join_minor; } errno = 0; join_v = strtol(join_minor, NULL, 10); if (errno) { return FALSE; } // DC's minor version must be the same or older return dc_v <= join_v; } const char * get_node_id(xmlNode *lrm_rsc_op) { xmlNode *node = lrm_rsc_op; while ((node != NULL) && !pcmk__xe_is(node, PCMK__XE_NODE_STATE)) { node = node->parent; } CRM_CHECK(node != NULL, return NULL); return pcmk__xe_id(node); }