diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h index fac269b9db..002031433c 100644 --- a/daemons/controld/controld_fsa.h +++ b/daemons/controld/controld_fsa.h @@ -1,699 +1,698 @@ /* - * Copyright 2004-2022 the Pacemaker project contributors + * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRMD_FSA__H # define CRMD_FSA__H # include # include # include # include # include # include # include /*! States the controller can be in */ enum crmd_fsa_state { S_IDLE = 0, /* Nothing happening */ S_ELECTION, /* Take part in the election algorithm as * described below */ S_INTEGRATION, /* integrate that status of new nodes (which is * all of them if we have just been elected DC) * to form a complete and up-to-date picture of * the CIB */ S_FINALIZE_JOIN, /* integrate that status of new nodes (which is * all of them if we have just been elected DC) * to form a complete and up-to-date picture of * the CIB */ S_NOT_DC, /* we are in non-DC mode */ S_POLICY_ENGINE, /* Determine next stable state of the cluster */ S_RECOVERY, /* Something bad happened, check everything is ok * before continuing and attempt to recover if * required */ S_RELEASE_DC, /* we were the DC, but now we arent anymore, * possibly by our own request, and we should * release all unnecessary sub-systems, finish * any pending actions, do general cleanup and * unset anything that makes us think we are * special :) */ S_STARTING, /* we are just starting out */ S_PENDING, /* we are not a full/active member yet */ S_STOPPING, /* We are in the final stages of shutting down */ S_TERMINATE, /* We are going to shutdown, this is the equiv of * "Sending TERM signal to all processes" in Linux * and in worst case scenarios could be considered * a self STONITH */ S_TRANSITION_ENGINE, /* Attempt to make the calculated next stable * state of the cluster a reality */ S_HALT, /* Freeze - don't do anything * Something bad happened that needs the admin to fix * Wait for I_ELECTION */ /* ----------- Last input found in table is above ---------- */ S_ILLEGAL /* This is an illegal FSA state */ /* (must be last) */ }; # define MAXSTATE S_ILLEGAL /* Once we start and do some basic sanity checks, we go into the S_NOT_DC state and await instructions from the DC or input from the cluster layer which indicates the election algorithm needs to run. If the election algorithm is triggered, we enter the S_ELECTION state from where we can either go back to the S_NOT_DC state or progress to the S_INTEGRATION state (or S_RELEASE_DC if we used to be the DC but aren't anymore). See the libcrmcluster API documentation for more information about the election algorithm. Once the election is complete, if we are the DC, we enter the S_INTEGRATION state which is a DC-in-waiting style state. We are the DC, but we shouldn't do anything yet because we may not have an up-to-date picture of the cluster. There may of course be times when this fails, so we should go back to the S_RECOVERY stage and check everything is ok. We may also end up here if a new node came online, since each node is authoritative about itself, and we would want to incorporate its information into the CIB. Once we have the latest CIB, we then enter the S_POLICY_ENGINE state where invoke the scheduler. It is possible that between invoking the scheduler and receiving an answer, that we receive more input. In this case, we would discard the orginal result and invoke it again. Once we are satisfied with the output from the scheduler, we enter S_TRANSITION_ENGINE and feed the scheduler's output to the Transition Engine who attempts to make the scheduler's calculation a reality. If the transition completes successfully, we enter S_IDLE, otherwise we go back to S_POLICY_ENGINE with the current unstable state and try again. Of course, we may be asked to shutdown at any time, however we must progress to S_NOT_DC before doing so. Once we have handed over DC duties to another node, we can then shut down like everyone else, that is, by asking the DC for permission and waiting for it to take all our resources away. The case where we are the DC and the only node in the cluster is a special case and handled as an escalation which takes us to S_SHUTDOWN. Similarly, if any other point in the shutdown fails or stalls, this is escalated and we end up in S_TERMINATE. At any point, the controller can relay messages for its subsystems, but outbound messages (from subsystems) should probably be blocked until S_INTEGRATION (for the DC) or the join protocol has completed (for non-DC controllers). */ /*====================================== * * Inputs/Events/Stimuli to be given to the finite state machine * * Some of these a true events, and others are synthesised based on * the "register" (see below) and the contents or source of messages. * * The machine keeps processing until receiving I_NULL * *======================================*/ enum crmd_fsa_input { /* 0 */ I_NULL, /* Nothing happened */ /* 1 */ I_CIB_OP, /* An update to the CIB occurred */ I_CIB_UPDATE, /* An update to the CIB occurred */ I_DC_TIMEOUT, /* We have lost communication with the DC */ I_ELECTION, /* Someone started an election */ I_PE_CALC, /* The scheduler needs to be invoked */ I_RELEASE_DC, /* The election completed and we were not * elected, but we were the DC beforehand */ I_ELECTION_DC, /* The election completed and we were (re-)elected * DC */ I_ERROR, /* Something bad happened (more serious than * I_FAIL) and may not have been due to the action * being performed. For example, we may have lost * our connection to the CIB. */ /* 9 */ I_FAIL, /* The action failed to complete successfully */ I_INTEGRATED, I_FINALIZED, I_NODE_JOIN, /* A node has entered the cluster */ I_NOT_DC, /* We are not and were not the DC before or after * the current operation or state */ I_RECOVERED, /* The recovery process completed successfully */ I_RELEASE_FAIL, /* We could not give up DC status for some reason */ I_RELEASE_SUCCESS, /* We are no longer the DC */ I_RESTART, /* The current set of actions needs to be * restarted */ I_TE_SUCCESS, /* Some non-resource, non-cluster-layer action * is required of us, e.g. ping */ /* 20 */ I_ROUTER, /* Do our job as router and forward this to the * right place */ I_SHUTDOWN, /* We are asking to shutdown */ I_STOP, /* We have been told to shutdown */ I_TERMINATE, /* Actually exit */ I_STARTUP, I_PE_SUCCESS, /* The action completed successfully */ I_JOIN_OFFER, /* The DC is offering membership */ I_JOIN_REQUEST, /* The client is requesting membership */ I_JOIN_RESULT, /* If not the DC: The result of a join request * Else: A client is responding with its local state info */ I_WAIT_FOR_EVENT, /* we may be waiting for an async task to "happen" * and until it does, we can't do anything else */ I_DC_HEARTBEAT, /* The DC is telling us that it is alive and well */ I_LRM_EVENT, /* 30 */ I_PENDING, I_HALT, /* ------------ Last input found in table is above ----------- */ I_ILLEGAL /* This is an illegal value for an FSA input */ /* (must be last) */ }; # define MAXINPUT I_ILLEGAL # define I_MESSAGE I_ROUTER /*====================================== * * actions * * Some of the actions below will always occur together for now, but this may * not always be the case, so they are split up so that they can easily be * called independently in the future, if necessary. * * For example, separating A_LRM_CONNECT from A_STARTUP might be useful * if we ever try to recover from a faulty or disconnected executor. * *======================================*/ /* Don't do anything */ # define A_NOTHING 0x0000000000000000ULL /* -- Startup actions -- */ /* Hook to perform any actions (other than connecting to other daemons) * that might be needed as part of the startup. */ # define A_STARTUP 0x0000000000000001ULL /* Hook to perform any actions that might be needed as part * after startup is successful. */ # define A_STARTED 0x0000000000000002ULL /* Connect to cluster layer */ # define A_HA_CONNECT 0x0000000000000004ULL # define A_HA_DISCONNECT 0x0000000000000008ULL # define A_INTEGRATE_TIMER_START 0x0000000000000010ULL # define A_INTEGRATE_TIMER_STOP 0x0000000000000020ULL # define A_FINALIZE_TIMER_START 0x0000000000000040ULL # define A_FINALIZE_TIMER_STOP 0x0000000000000080ULL /* -- Election actions -- */ # define A_DC_TIMER_START 0x0000000000000100ULL # define A_DC_TIMER_STOP 0x0000000000000200ULL # define A_ELECTION_COUNT 0x0000000000000400ULL # define A_ELECTION_VOTE 0x0000000000000800ULL # define A_ELECTION_START 0x0000000000001000ULL /* -- Message processing -- */ /* Process the queue of requests */ # define A_MSG_PROCESS 0x0000000000002000ULL /* Send the message to the correct recipient */ # define A_MSG_ROUTE 0x0000000000004000ULL /* Send a welcome message to new node(s) */ # define A_DC_JOIN_OFFER_ONE 0x0000000000008000ULL /* -- Server Join protocol actions -- */ /* Send a welcome message to all nodes */ # define A_DC_JOIN_OFFER_ALL 0x0000000000010000ULL /* Process the remote node's ack of our join message */ # define A_DC_JOIN_PROCESS_REQ 0x0000000000020000ULL /* Send out the results of the Join phase */ # define A_DC_JOIN_FINALIZE 0x0000000000040000ULL /* Send out the results of the Join phase */ # define A_DC_JOIN_PROCESS_ACK 0x0000000000080000ULL /* -- Client Join protocol actions -- */ # define A_CL_JOIN_QUERY 0x0000000000100000ULL # define A_CL_JOIN_ANNOUNCE 0x0000000000200000ULL /* Request membership to the DC list */ # define A_CL_JOIN_REQUEST 0x0000000000400000ULL /* Did the DC accept or reject the request */ # define A_CL_JOIN_RESULT 0x0000000000800000ULL /* -- Recovery, DC start/stop -- */ /* Something bad happened, try to recover */ # define A_RECOVER 0x0000000001000000ULL /* Hook to perform any actions (apart from starting, the TE, scheduler, * and gathering the latest CIB) that might be necessary before * giving up the responsibilities of being the DC. */ # define A_DC_RELEASE 0x0000000002000000ULL /* */ # define A_DC_RELEASED 0x0000000004000000ULL /* Hook to perform any actions (apart from starting, the TE, scheduler, * and gathering the latest CIB) that might be necessary before * taking over the responsibilities of being the DC. */ # define A_DC_TAKEOVER 0x0000000008000000ULL /* -- Shutdown actions -- */ # define A_SHUTDOWN 0x0000000010000000ULL # define A_STOP 0x0000000020000000ULL # define A_EXIT_0 0x0000000040000000ULL # define A_EXIT_1 0x0000000080000000ULL # define A_SHUTDOWN_REQ 0x0000000100000000ULL # define A_ELECTION_CHECK 0x0000000200000000ULL # define A_DC_JOIN_FINAL 0x0000000400000000ULL /* -- CIB actions -- */ # define A_CIB_START 0x0000020000000000ULL # define A_CIB_STOP 0x0000040000000000ULL /* -- Transition Engine actions -- */ /* Attempt to reach the newly calculated cluster state. This is * only called once per transition (except if it is asked to * stop the transition or start a new one). * Once given a cluster state to reach, the TE will determine * tasks that can be performed in parallel, execute them, wait * for replies and then determine the next set until the new * state is reached or no further tasks can be taken. */ # define A_TE_INVOKE 0x0000100000000000ULL # define A_TE_START 0x0000200000000000ULL # define A_TE_STOP 0x0000400000000000ULL # define A_TE_CANCEL 0x0000800000000000ULL # define A_TE_HALT 0x0001000000000000ULL /* -- Scheduler actions -- */ /* Calculate the next state for the cluster. This is only * invoked once per needed calculation. */ # define A_PE_INVOKE 0x0002000000000000ULL # define A_PE_START 0x0004000000000000ULL # define A_PE_STOP 0x0008000000000000ULL /* -- Misc actions -- */ /* Add a system generate "block" so that resources arent moved * to or are activly moved away from the affected node. This * way we can return quickly even if busy with other things. */ # define A_NODE_BLOCK 0x0010000000000000ULL /* Update our information in the local CIB */ # define A_UPDATE_NODESTATUS 0x0020000000000000ULL # define A_READCONFIG 0x0080000000000000ULL /* -- LRM Actions -- */ /* Connect to pacemaker-execd */ # define A_LRM_CONNECT 0x0100000000000000ULL /* Disconnect from pacemaker-execd */ # define A_LRM_DISCONNECT 0x0200000000000000ULL # define A_LRM_INVOKE 0x0400000000000000ULL # define A_LRM_EVENT 0x0800000000000000ULL /* -- Logging actions -- */ # define A_LOG 0x1000000000000000ULL # define A_ERROR 0x2000000000000000ULL # define A_WARN 0x4000000000000000ULL # define O_EXIT (A_SHUTDOWN|A_STOP|A_LRM_DISCONNECT|A_HA_DISCONNECT|A_EXIT_0|A_CIB_STOP) # define O_RELEASE (A_DC_TIMER_STOP|A_DC_RELEASE|A_PE_STOP|A_TE_STOP|A_DC_RELEASED) # define O_PE_RESTART (A_PE_START|A_PE_STOP) # define O_TE_RESTART (A_TE_START|A_TE_STOP) # define O_CIB_RESTART (A_CIB_START|A_CIB_STOP) # define O_LRM_RECONNECT (A_LRM_CONNECT|A_LRM_DISCONNECT) # define O_DC_TIMER_RESTART (A_DC_TIMER_STOP|A_DC_TIMER_START) /*====================================== * * "register" contents * * Things we may want to remember regardless of which state we are in. * * These also count as inputs for synthesizing I_* * *======================================*/ # define R_THE_DC 0x00000001ULL /* Are we the DC? */ # define R_STARTING 0x00000002ULL /* Are we starting up? */ # define R_SHUTDOWN 0x00000004ULL /* Are we trying to shut down? */ # define R_STAYDOWN 0x00000008ULL /* Should we restart? */ # define R_JOIN_OK 0x00000010ULL /* Have we completed the join process */ # define R_READ_CONFIG 0x00000040ULL # define R_INVOKE_PE 0x00000080ULL // Should the scheduler be invoked? # define R_CIB_CONNECTED 0x00000100ULL /* Is the CIB connected? */ # define R_PE_CONNECTED 0x00000200ULL // Is the scheduler connected? # define R_TE_CONNECTED 0x00000400ULL /* Is the Transition Engine connected? */ # define R_LRM_CONNECTED 0x00000800ULL // Is pacemaker-execd connected? # define R_CIB_REQUIRED 0x00001000ULL /* Is the CIB required? */ # define R_PE_REQUIRED 0x00002000ULL // Is the scheduler required? # define R_TE_REQUIRED 0x00004000ULL /* Is the Transition Engine required? */ # define R_ST_REQUIRED 0x00008000ULL /* Is the Stonith daemon required? */ # define R_CIB_DONE 0x00010000ULL /* Have we calculated the CIB? */ # define R_HAVE_CIB 0x00020000ULL /* Do we have an up-to-date CIB */ # define R_CIB_ASKED 0x00040000ULL /* Have we asked for an up-to-date CIB */ # define R_MEMBERSHIP 0x00100000ULL /* Have we got cluster layer data yet */ # define R_PEER_DATA 0x00200000ULL /* Have we got T_CL_STATUS data yet */ # define R_HA_DISCONNECTED 0x00400000ULL /* did we sign out of our own accord */ # define R_REQ_PEND 0x01000000ULL /* Are there Requests waiting for processing? */ # define R_PE_PEND 0x02000000ULL // Are we awaiting reply from scheduler? # define R_TE_PEND 0x04000000ULL /* Has the TE been invoked and we're awaiting completion? */ # define R_RESP_PEND 0x08000000ULL /* Do we have clients waiting on a response? if so perhaps we shouldn't stop yet */ # define R_IN_TRANSITION 0x10000000ULL /* */ # define R_SENT_RSC_STOP 0x20000000ULL /* Have we sent a stop action to all * resources in preparation for * shutting down */ # define R_IN_RECOVERY 0x80000000ULL #define CRM_DIRECT_NACK_RC (99) // Deprecated (see PCMK_EXEC_INVALID) enum crmd_fsa_cause { C_UNKNOWN = 0, C_STARTUP, C_IPC_MESSAGE, C_HA_MESSAGE, C_CRMD_STATUS_CALLBACK, C_LRM_OP_CALLBACK, C_TIMER_POPPED, C_SHUTDOWN, C_FSA_INTERNAL, }; enum fsa_data_type { fsa_dt_none, fsa_dt_ha_msg, fsa_dt_xml, fsa_dt_lrm, }; typedef struct fsa_data_s fsa_data_t; struct fsa_data_s { int id; enum crmd_fsa_input fsa_input; enum crmd_fsa_cause fsa_cause; uint64_t actions; const char *origin; void *data; enum fsa_data_type data_type; }; #define controld_set_fsa_input_flags(flags_to_set) do { \ controld_globals.fsa_input_register \ = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "FSA input", "controller", \ controld_globals.fsa_input_register, \ (flags_to_set), #flags_to_set); \ } while (0) #define controld_clear_fsa_input_flags(flags_to_clear) do { \ controld_globals.fsa_input_register \ = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "FSA input", "controller", \ controld_globals.fsa_input_register, \ (flags_to_clear), \ #flags_to_clear); \ } while (0) #define controld_set_fsa_action_flags(flags_to_set) do { \ controld_globals.fsa_actions \ = pcmk__set_flags_as(__func__, __LINE__, LOG_DEBUG, \ "FSA action", "controller", \ controld_globals.fsa_actions, \ (flags_to_set), #flags_to_set); \ } while (0) #define controld_clear_fsa_action_flags(flags_to_clear) do { \ controld_globals.fsa_actions \ = pcmk__clear_flags_as(__func__, __LINE__, LOG_DEBUG, \ "FSA action", "controller", \ controld_globals.fsa_actions, \ (flags_to_clear), #flags_to_clear); \ } while (0) // These should be moved elsewhere -void do_update_cib_nodes(gboolean overwrite, const char *caller); int crmd_cib_smart_opt(void); xmlNode *controld_query_executor_state(void); const char *fsa_input2string(enum crmd_fsa_input input); const char *fsa_state2string(enum crmd_fsa_state state); const char *fsa_cause2string(enum crmd_fsa_cause cause); const char *fsa_action2string(long long action); enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause); enum crmd_fsa_state controld_fsa_get_next_state(enum crmd_fsa_input input); uint64_t controld_fsa_get_action(enum crmd_fsa_input input); void controld_init_fsa_trigger(void); void controld_destroy_fsa_trigger(void); void free_max_generation(void); # define AM_I_DC pcmk_is_set(controld_globals.fsa_input_register, R_THE_DC) # define controld_trigger_fsa() controld_trigger_fsa_as(__func__, __LINE__) void controld_trigger_fsa_as(const char *fn, int line); /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_PE_INVOKE */ void do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_LOG */ void do_log(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_CIB_START, STOP, RESTART */ void do_cib_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_HA_CONNECT */ void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_PE_START, STOP, RESTART */ void do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_TE_START, STOP, RESTART */ void do_te_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_MSG_ROUTE */ void do_msg_route(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_ELECTION_VOTE */ void do_election_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_ELECTION_COUNT */ void do_election_count_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_ELECTION_CHECK */ void do_election_check(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_TIMER_STOP */ void do_timer_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_TAKEOVER */ void do_dc_takeover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_RELEASE */ void do_dc_release(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_OFFER_ALL */ void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_OFFER_ONE */ void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_ACK */ void do_dc_join_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_REQ */ void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_FINALIZE */ void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_CL_JOIN_QUERY */ /* is there a DC out there? */ void do_cl_join_query(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_CL_JOIN_ANNOUNCE */ void do_cl_join_announce(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_CL_JOIN_REQUEST */ void do_cl_join_offer_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_CL_JOIN_RESULT */ void do_cl_join_finalize_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_LRM_EVENT */ void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_TE_INVOKE, A_TE_CANCEL */ void do_te_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_FINAL */ void do_dc_join_final(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); #endif diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c index 26c989f06f..aa30a2f2eb 100644 --- a/daemons/controld/controld_join_dc.c +++ b/daemons/controld/controld_join_dc.c @@ -1,963 +1,962 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include static char *max_generation_from = NULL; static xmlNodePtr max_generation_xml = NULL; /*! * \internal * \brief Nodes from which a CIB sync has failed since the peer joined * * This table is of the form (node_name -> join_id). \p node_name is * the name of a client node from which a CIB \p sync_from() call has failed in * \p do_dc_join_finalize() since the client joined the cluster as a peer. * \p join_id is the ID of the join round in which the \p sync_from() failed, * and is intended for use in nack log messages. */ static GHashTable *failed_sync_nodes = NULL; void finalize_join_for(gpointer key, gpointer value, gpointer user_data); void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); /* Numeric counter used to identify join rounds (an unsigned int would be * appropriate, except we get and set it in XML as int) */ static int current_join_id = 0; /*! * \internal * \brief Destroy the hash table containing failed sync nodes */ void controld_destroy_failed_sync_table(void) { if (failed_sync_nodes != NULL) { g_hash_table_destroy(failed_sync_nodes); failed_sync_nodes = NULL; } } /*! * \internal * \brief Remove a node from the failed sync nodes table if present * * \param[in] node_name Node name to remove */ void controld_remove_failed_sync_node(const char *node_name) { if (failed_sync_nodes != NULL) { g_hash_table_remove(failed_sync_nodes, (gchar *) node_name); } } /*! * \internal * \brief Add to a hash table a node whose CIB failed to sync * * \param[in] node_name Name of node whose CIB failed to sync * \param[in] join_id Join round when the failure occurred */ static void record_failed_sync_node(const char *node_name, gint join_id) { if (failed_sync_nodes == NULL) { failed_sync_nodes = pcmk__strikey_table(g_free, NULL); } /* If the node is already in the table then we failed to nack it during the * filter offer step */ CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name), GINT_TO_POINTER(join_id))); } /*! * \internal * \brief Look up a node name in the failed sync table * * \param[in] node_name Name of node to look up * \param[out] join_id Where to store the join ID of when the sync failed * * \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the * node name was found, or \p pcmk_rc_node_unknown otherwise. * \note \p *join_id is set to -1 if the node is not found. */ static int lookup_failed_sync_node(const char *node_name, gint *join_id) { *join_id = -1; if (failed_sync_nodes != NULL) { gpointer result = g_hash_table_lookup(failed_sync_nodes, (gchar *) node_name); if (result != NULL) { *join_id = GPOINTER_TO_INT(result); return pcmk_rc_ok; } } return pcmk_rc_node_unknown; } void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase) { enum crm_join_phase last = 0; CRM_CHECK(node != NULL, return); /* Remote nodes do not participate in joins */ if (pcmk_is_set(node->flags, crm_remote_node)) { return; } last = node->join; if(phase == last) { crm_trace("Node %s join-%d phase is still %s " CRM_XS " nodeid=%u source=%s", node->uname, current_join_id, crm_join_phase_str(last), node->id, source); } else if ((phase <= crm_join_none) || (phase == (last + 1))) { node->join = phase; crm_trace("Node %s join-%d phase is now %s (was %s) " CRM_XS " nodeid=%u source=%s", node->uname, current_join_id, crm_join_phase_str(phase), crm_join_phase_str(last), node->id, source); } else { crm_warn("Rejecting join-%d phase update for node %s because " "can't go from %s to %s " CRM_XS " nodeid=%u source=%s", current_join_id, node->uname, crm_join_phase_str(last), crm_join_phase_str(phase), node->id, source); } } static void start_join_round(void) { GHashTableIter iter; crm_node_t *peer = NULL; crm_debug("Starting new join round join-%d", current_join_id); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { crm_update_peer_join(__func__, peer, crm_join_none); } if (max_generation_from != NULL) { free(max_generation_from); max_generation_from = NULL; } if (max_generation_xml != NULL) { free_xml(max_generation_xml); max_generation_xml = NULL; } controld_clear_fsa_input_flags(R_HAVE_CIB|R_CIB_ASKED); } /*! * \internal * \brief Create a join message from the DC * * \param[in] join_op Join operation name * \param[in] host_to Recipient of message */ static xmlNode * create_dc_message(const char *join_op, const char *host_to) { xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL); /* Identify which election this is a part of */ crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id); /* Add a field specifying whether the DC is shutting down. This keeps the * joining node from fencing the old DC if it becomes the new DC. */ pcmk__xe_set_bool_attr(msg, F_CRM_DC_LEAVING, pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)); return msg; } static void join_make_offer(gpointer key, gpointer value, gpointer user_data) { xmlNode *offer = NULL; crm_node_t *member = (crm_node_t *)value; CRM_ASSERT(member != NULL); if (crm_is_peer_active(member) == FALSE) { crm_info("Not making join-%d offer to inactive node %s", current_join_id, (member->uname? member->uname : "with unknown name")); if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) { /* You would think this unsafe, but in fact this plus an * active resource is what causes it to be fenced. * * Yes, this does mean that any node that dies at the same * time as the old DC and is not running resource (still) * won't be fenced. * * I'm not happy about this either. */ pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN); } return; } if (member->uname == NULL) { crm_info("Not making join-%d offer to node uuid %s with unknown name", current_join_id, member->uuid); return; } if (controld_globals.membership_id != crm_peer_seq) { controld_globals.membership_id = crm_peer_seq; crm_info("Making join-%d offers based on membership event %llu", current_join_id, crm_peer_seq); } if(user_data && member->join > crm_join_none) { crm_info("Not making join-%d offer to already known node %s (%s)", current_join_id, member->uname, crm_join_phase_str(member->join)); return; } crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none); offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname); // Advertise our feature set so the joining node can bail if not compatible crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); crm_info("Sending join-%d offer to %s", current_join_id, member->uname); send_cluster_message(member, crm_msg_crmd, offer, TRUE); free_xml(offer); crm_update_peer_join(__func__, member, crm_join_welcomed); } /* A_DC_JOIN_OFFER_ALL */ void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int count; /* Reset everyone's status back to down or in_ccm in the CIB. * Any nodes that are active in the CIB but not in the cluster membership * will be seen as offline by the scheduler anyway. */ current_join_id++; start_join_round(); - // do_update_cib_nodes(TRUE, __func__); update_dc(NULL); if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) { crm_info("A new node joined the cluster"); } g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL); count = crmd_join_phase_count(crm_join_welcomed); crm_info("Waiting on join-%d requests from %d outstanding node%s", current_join_id, count, pcmk__plural_s(count)); // Don't waste time by invoking the scheduler yet } /* A_DC_JOIN_OFFER_ONE */ void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_node_t *member; ha_msg_input_t *welcome = NULL; int count; const char *join_to = NULL; if (msg_data->data == NULL) { crm_info("Making join-%d offers to any unconfirmed nodes " "because an unknown node joined", current_join_id); g_hash_table_foreach(crm_peer_cache, join_make_offer, &member); check_join_state(cur_state, __func__); return; } welcome = fsa_typed_data(fsa_dt_ha_msg); if (welcome == NULL) { // fsa_typed_data() already logged an error return; } join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM); if (join_to == NULL) { crm_err("Can't make join-%d offer to unknown node", current_join_id); return; } member = crm_get_peer(0, join_to); /* It is possible that a node will have been sick or starting up when the * original offer was made. However, it will either re-announce itself in * due course, or we can re-store the original offer on the client. */ crm_update_peer_join(__func__, member, crm_join_none); join_make_offer(NULL, member, NULL); /* If the offer isn't to the local node, make an offer to the local node as * well, to ensure the correct value for max_generation_from. */ if (strcasecmp(join_to, controld_globals.our_nodename) != 0) { member = crm_get_peer(0, controld_globals.our_nodename); join_make_offer(NULL, member, NULL); } /* This was a genuine join request; cancel any existing transition and * invoke the scheduler. */ abort_transition(INFINITY, pcmk__graph_restart, "Node join", NULL); count = crmd_join_phase_count(crm_join_welcomed); crm_info("Waiting on join-%d requests from %d outstanding node%s", current_join_id, count, pcmk__plural_s(count)); // Don't waste time by invoking the scheduler yet } static int compare_int_fields(xmlNode * left, xmlNode * right, const char *field) { const char *elem_l = crm_element_value(left, field); const char *elem_r = crm_element_value(right, field); long long int_elem_l; long long int_elem_r; pcmk__scan_ll(elem_l, &int_elem_l, -1LL); pcmk__scan_ll(elem_r, &int_elem_r, -1LL); if (int_elem_l < int_elem_r) { return -1; } else if (int_elem_l > int_elem_r) { return 1; } return 0; } /* A_DC_JOIN_PROCESS_REQ */ void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *generation = NULL; int cmp = 0; int join_id = -1; int count = 0; gint value = 0; gboolean ack_nack_bool = TRUE; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE); const char *join_version = crm_element_value(join_ack->msg, XML_ATTR_CRM_VERSION); crm_node_t *join_node = NULL; if (join_from == NULL) { crm_err("Ignoring invalid join request without node name"); return; } join_node = crm_get_peer(0, join_from); crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); if (join_id != current_join_id) { crm_debug("Ignoring join-%d request from %s because we are on join-%d", join_id, join_from, current_join_id); check_join_state(cur_state, __func__); return; } generation = join_ack->xml; if (max_generation_xml != NULL && generation != NULL) { int lpc = 0; const char *attributes[] = { XML_ATTR_GENERATION_ADMIN, XML_ATTR_GENERATION, XML_ATTR_NUMUPDATES, }; for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) { cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]); } } if (ref == NULL) { ref = "none"; // for logging only } if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) { crm_err("Rejecting join-%d request from node %s because we failed to " "sync its CIB in join-%d " CRM_XS " ref=%s", join_id, join_from, value, ref); ack_nack_bool = FALSE; } else if (!crm_is_peer_active(join_node)) { if (match_down_event(join_from) != NULL) { /* The join request was received after the node was fenced or * otherwise shutdown in a way that we're aware of. No need to log * an error in this rare occurrence; we know the client was recently * shut down, and receiving a lingering in-flight request is not * cause for alarm. */ crm_debug("Rejecting join-%d request from inactive node %s " CRM_XS " ref=%s", join_id, join_from, ref); } else { crm_err("Rejecting join-%d request from inactive node %s " CRM_XS " ref=%s", join_id, join_from, ref); } ack_nack_bool = FALSE; } else if (generation == NULL) { crm_err("Rejecting invalid join-%d request from node %s " "missing CIB generation " CRM_XS " ref=%s", join_id, join_from, ref); ack_nack_bool = FALSE; } else if ((join_version == NULL) || !feature_set_compatible(CRM_FEATURE_SET, join_version)) { crm_err("Rejecting join-%d request from node %s because feature set %s" " is incompatible with ours (%s) " CRM_XS " ref=%s", join_id, join_from, (join_version? join_version : "pre-3.1.0"), CRM_FEATURE_SET, ref); ack_nack_bool = FALSE; } else if (max_generation_xml == NULL) { const char *validation = crm_element_value(generation, XML_ATTR_VALIDATION); if (get_schema_version(validation) < 0) { crm_err("Rejecting join-%d request from %s (with first CIB " "generation) due to unknown schema version %s " CRM_XS " ref=%s", join_id, join_from, validation, ref); ack_nack_bool = FALSE; } else { crm_debug("Accepting join-%d request from %s (with first CIB " "generation) " CRM_XS " ref=%s", join_id, join_from, ref); max_generation_xml = copy_xml(generation); pcmk__str_update(&max_generation_from, join_from); } } else if ((cmp < 0) || ((cmp == 0) && pcmk__str_eq(join_from, controld_globals.our_nodename, pcmk__str_casei))) { const char *validation = crm_element_value(generation, XML_ATTR_VALIDATION); if (get_schema_version(validation) < 0) { crm_err("Rejecting join-%d request from %s (with better CIB " "generation than current best from %s) due to unknown " "schema version %s " CRM_XS " ref=%s", join_id, join_from, max_generation_from, validation, ref); ack_nack_bool = FALSE; } else { crm_debug("Accepting join-%d request from %s (with better CIB " "generation than current best from %s) " CRM_XS " ref=%s", join_id, join_from, max_generation_from, ref); crm_log_xml_debug(max_generation_xml, "Old max generation"); crm_log_xml_debug(generation, "New max generation"); free_xml(max_generation_xml); max_generation_xml = copy_xml(join_ack->xml); pcmk__str_update(&max_generation_from, join_from); } } else { crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s", join_id, join_from, ref); } if (!ack_nack_bool) { if (compare_version(join_version, "3.17.0") < 0) { /* Clients with CRM_FEATURE_SET < 3.17.0 may respawn infinitely * after a nack message, don't send one */ crm_update_peer_join(__func__, join_node, crm_join_nack_quiet); } else { crm_update_peer_join(__func__, join_node, crm_join_nack); } pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK); } else { crm_update_peer_join(__func__, join_node, crm_join_integrated); pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER); } count = crmd_join_phase_count(crm_join_integrated); crm_debug("%d node%s currently integrated in join-%d", count, pcmk__plural_s(count), join_id); if (check_join_state(cur_state, __func__) == FALSE) { // Don't waste time by invoking the scheduler yet count = crmd_join_phase_count(crm_join_welcomed); crm_debug("Waiting on join-%d requests from %d outstanding node%s", join_id, count, pcmk__plural_s(count)); } } /* A_DC_JOIN_FINALIZE */ void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { char *sync_from = NULL; int rc = pcmk_ok; int count_welcomed = crmd_join_phase_count(crm_join_welcomed); int count_finalizable = crmd_join_phase_count(crm_join_integrated) + crmd_join_phase_count(crm_join_nack) + crmd_join_phase_count(crm_join_nack_quiet); /* This we can do straight away and avoid clients timing us out * while we compute the latest CIB */ if (count_welcomed != 0) { crm_debug("Waiting on join-%d requests from %d outstanding node%s " "before finalizing join", current_join_id, count_welcomed, pcmk__plural_s(count_welcomed)); crmd_join_phase_log(LOG_DEBUG); /* crmd_fsa_stall(FALSE); Needed? */ return; } else if (count_finalizable == 0) { crm_debug("Finalization not needed for join-%d at the current time", current_join_id); crmd_join_phase_log(LOG_DEBUG); check_join_state(controld_globals.fsa_state, __func__); return; } controld_clear_fsa_input_flags(R_HAVE_CIB); if (pcmk__str_eq(max_generation_from, controld_globals.our_nodename, pcmk__str_null_matches|pcmk__str_casei)) { controld_set_fsa_input_flags(R_HAVE_CIB); } if (pcmk_is_set(controld_globals.fsa_input_register, R_IN_TRANSITION)) { crm_warn("Delaying join-%d finalization while transition in progress", current_join_id); crmd_join_phase_log(LOG_DEBUG); crmd_fsa_stall(FALSE); return; } if ((max_generation_from != NULL) && !pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { /* ask for the agreed best CIB */ pcmk__str_update(&sync_from, max_generation_from); controld_set_fsa_input_flags(R_CIB_ASKED); crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)", current_join_id, count_finalizable, pcmk__plural_s(count_finalizable), sync_from); crm_log_xml_notice(max_generation_xml, "Requested CIB version"); } else { /* Send _our_ CIB out to everyone */ pcmk__str_update(&sync_from, controld_globals.our_nodename); crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)", current_join_id, count_finalizable, pcmk__plural_s(count_finalizable)); crm_log_xml_debug(max_generation_xml, "Requested CIB version"); } crmd_join_phase_log(LOG_DEBUG); rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn, sync_from, NULL, cib_quorum_override); fsa_register_cib_callback(rc, sync_from, finalize_sync_callback); } void free_max_generation(void) { free(max_generation_from); max_generation_from = NULL; free_xml(max_generation_xml); max_generation_xml = NULL; } void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { CRM_LOG_ASSERT(-EPERM != rc); controld_clear_fsa_input_flags(R_CIB_ASKED); if (rc != pcmk_ok) { const char *sync_from = (const char *) user_data; do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR), "Could not sync CIB from %s in join-%d: %s", sync_from, current_join_id, pcmk_strerror(rc)); if (rc != -pcmk_err_old_data) { record_failed_sync_node(sync_from, current_join_id); } /* restart the whole join process */ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __func__); } else if (!AM_I_DC) { crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id); } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) { crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN " "(%s)", current_join_id, fsa_state2string(controld_globals.fsa_state)); } else { controld_set_fsa_input_flags(R_HAVE_CIB); controld_clear_fsa_input_flags(R_CIB_ASKED); /* make sure dc_uuid is re-set to us */ if (!check_join_state(controld_globals.fsa_state, __func__)) { int count_finalizable = 0; count_finalizable = crmd_join_phase_count(crm_join_integrated) + crmd_join_phase_count(crm_join_nack) + crmd_join_phase_count(crm_join_nack_quiet); crm_debug("Notifying %d node%s of join-%d results", count_finalizable, pcmk__plural_s(count_finalizable), current_join_id); g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL); } } } static void join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if (rc == pcmk_ok) { crm_debug("join-%d node history update (via CIB call %d) complete", current_join_id, call_id); check_join_state(controld_globals.fsa_state, __func__); } else { crm_err("join-%d node history update (via CIB call %d) failed: %s " "(next transition may determine resource status incorrectly)", current_join_id, call_id, pcmk_strerror(rc)); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /* A_DC_JOIN_PROCESS_ACK */ void do_dc_join_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int join_id = -1; int call_id = 0; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); enum controld_section_e section = controld_section_lrm; const char *op = crm_element_value(join_ack->msg, F_CRM_TASK); const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); crm_node_t *peer = NULL; // Sanity checks if (join_from == NULL) { crm_warn("Ignoring message received without node identification"); return; } if (op == NULL) { crm_warn("Ignoring message received from %s without task", join_from); return; } if (strcmp(op, CRM_OP_JOIN_CONFIRM)) { crm_debug("Ignoring '%s' message from %s while waiting for '%s'", op, join_from, CRM_OP_JOIN_CONFIRM); return; } if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) { crm_warn("Ignoring join confirmation from %s without valid join ID", join_from); return; } peer = crm_get_peer(0, join_from); if (peer->join != crm_join_finalized) { crm_info("Ignoring out-of-sequence join-%d confirmation from %s " "(currently %s not %s)", join_id, join_from, crm_join_phase_str(peer->join), crm_join_phase_str(crm_join_finalized)); return; } if (join_id != current_join_id) { crm_err("Rejecting join-%d confirmation from %s " "because currently on join-%d", join_id, join_from, current_join_id); crm_update_peer_join(__func__, peer, crm_join_nack); return; } crm_update_peer_join(__func__, peer, crm_join_confirmed); /* Update CIB with node's current executor state. A new transition will be * triggered later, when the CIB notifies us of the change. */ if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { section = controld_section_lrm_unlocked; } controld_delete_node_state(join_from, section, cib_scope_local); if (pcmk__str_eq(join_from, controld_globals.our_nodename, pcmk__str_casei)) { xmlNode *now_dc_lrmd_state = controld_query_executor_state(); if (now_dc_lrmd_state != NULL) { fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state, cib_scope_local|cib_quorum_override|cib_can_create, call_id); free_xml(now_dc_lrmd_state); crm_debug("Updating local node history for join-%d " "from query result (via CIB call %d)", join_id, call_id); } else { fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml, cib_scope_local|cib_quorum_override|cib_can_create, call_id); crm_warn("Updating local node history from join-%d confirmation " "because query failed (via CIB call %d)", join_id, call_id); } } else { fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml, cib_scope_local|cib_quorum_override|cib_can_create, call_id); crm_debug("Updating node history for %s from join-%d confirmation " "(via CIB call %d)", join_from, join_id, call_id); } fsa_register_cib_callback(call_id, NULL, join_update_complete_callback); } void finalize_join_for(gpointer key, gpointer value, gpointer user_data) { xmlNode *acknak = NULL; xmlNode *tmp1 = NULL; crm_node_t *join_node = value; const char *join_to = join_node->uname; bool integrated = false; switch (join_node->join) { case crm_join_integrated: integrated = true; break; case crm_join_nack: case crm_join_nack_quiet: break; default: crm_trace("Not updating non-integrated and non-nacked node %s (%s) " "for join-%d", join_to, crm_join_phase_str(join_node->join), current_join_id); return; } /* Update the element with the node's name and UUID, in case they * weren't known before */ crm_trace("Updating node name and UUID in CIB for %s", join_to); tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE); set_uuid(tmp1, XML_ATTR_ID, join_node); crm_xml_add(tmp1, XML_ATTR_UNAME, join_to); fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1); free_xml(tmp1); if (join_node->join == crm_join_nack_quiet) { crm_trace("Not sending nack message to node %s with feature set older " "than 3.17.0", join_to); return; } join_node = crm_get_peer(0, join_to); if (!crm_is_peer_active(join_node)) { /* * NACK'ing nodes that the membership layer doesn't know about yet * simply creates more churn * * Better to leave them waiting and let the join restart when * the new membership event comes in * * All other NACKs (due to versions etc) should still be processed */ pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING); return; } // Acknowledge or nack node's join request crm_debug("%sing join-%d request from %s", integrated? "Acknowledg" : "Nack", current_join_id, join_to); acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to); pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated); if (integrated) { // No change needed for a nacked node crm_update_peer_join(__func__, join_node, crm_join_finalized); pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER); } send_cluster_message(join_node, crm_msg_crmd, acknak, TRUE); free_xml(acknak); return; } gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source) { static unsigned long long highest_seq = 0; if (controld_globals.membership_id != crm_peer_seq) { crm_debug("join-%d: Membership changed from %llu to %llu " CRM_XS " highest=%llu state=%s for=%s", current_join_id, controld_globals.membership_id, crm_peer_seq, highest_seq, fsa_state2string(cur_state), source); if(highest_seq < crm_peer_seq) { /* Don't spam the FSA with duplicates */ highest_seq = crm_peer_seq; register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } } else if (cur_state == S_INTEGRATION) { if (crmd_join_phase_count(crm_join_welcomed) == 0) { int count = crmd_join_phase_count(crm_join_integrated); crm_debug("join-%d: Integration of %d peer%s complete " CRM_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL); return TRUE; } } else if (cur_state == S_FINALIZE_JOIN) { if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { crm_debug("join-%d: Delaying finalization until we have CIB " CRM_XS " state=%s for=%s", current_join_id, fsa_state2string(cur_state), source); return TRUE; } else if (crmd_join_phase_count(crm_join_welcomed) != 0) { int count = crmd_join_phase_count(crm_join_welcomed); crm_debug("join-%d: Still waiting on %d welcomed node%s " CRM_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(crm_join_integrated) != 0) { int count = crmd_join_phase_count(crm_join_integrated); crm_debug("join-%d: Still waiting on %d integrated node%s " CRM_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(crm_join_finalized) != 0) { int count = crmd_join_phase_count(crm_join_finalized); crm_debug("join-%d: Still waiting on %d finalized node%s " CRM_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else { crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s", current_join_id, fsa_state2string(cur_state), source); register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL); return TRUE; } } return FALSE; } void do_dc_join_final(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_debug("Ensuring DC, quorum and node attributes are up-to-date"); crm_update_quorum(crm_have_quorum, TRUE); } int crmd_join_phase_count(enum crm_join_phase phase) { int count = 0; crm_node_t *peer; GHashTableIter iter; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { if(peer->join == phase) { count++; } } return count; } void crmd_join_phase_log(int level) { crm_node_t *peer; GHashTableIter iter; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname, crm_join_phase_str(peer->join)); } } diff --git a/daemons/controld/controld_messages.h b/daemons/controld/controld_messages.h index 2eefeb6eee..58b4ee1159 100644 --- a/daemons/controld/controld_messages.h +++ b/daemons/controld/controld_messages.h @@ -1,87 +1,86 @@ /* - * Copyright 2004-2022 the Pacemaker project contributors + * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef XML_CRM_MESSAGES__H # define XML_CRM_MESSAGES__H # include # include # include # include # include typedef struct ha_msg_input_s { xmlNode *msg; xmlNode *xml; } ha_msg_input_t; -extern ha_msg_input_t *new_ha_msg_input(xmlNode * orig); extern void delete_ha_msg_input(ha_msg_input_t * orig); extern void *fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller); # define fsa_typed_data(x) fsa_typed_data_adv(msg_data, x, __func__) extern void register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, fsa_data_t * cur_data, void *new_data, const char *raised_from); #define register_fsa_error(cause, input, new_data) \ register_fsa_error_adv(cause, input, msg_data, new_data, __func__) void register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, void *data, uint64_t with_actions, gboolean prepend, const char *raised_from); extern void fsa_dump_queue(int log_level); extern void route_message(enum crmd_fsa_cause cause, xmlNode * input); # define crmd_fsa_stall(suppress) do { \ if(suppress == FALSE && msg_data != NULL) { \ register_fsa_input_adv( \ ((fsa_data_t*)msg_data)->fsa_cause, I_WAIT_FOR_EVENT, \ ((fsa_data_t*)msg_data)->data, action, TRUE, __func__); \ } else { \ register_fsa_input_adv( \ C_FSA_INTERNAL, I_WAIT_FOR_EVENT, \ NULL, action, TRUE, __func__); \ } \ } while(0) #define register_fsa_input(cause, input, data) \ register_fsa_input_adv(cause, input, data, A_NOTHING, FALSE, __func__) #define register_fsa_input_before(cause, input, data) \ register_fsa_input_adv(cause, input, data, A_NOTHING, TRUE, __func__) #define register_fsa_input_later(cause, input, data) \ register_fsa_input_adv(cause, input, data, A_NOTHING, FALSE, __func__) void delete_fsa_input(fsa_data_t * fsa_data); fsa_data_t *get_message(void); extern gboolean relay_message(xmlNode * relay_message, gboolean originated_locally); gboolean crmd_is_proxy_session(const char *session); void crmd_proxy_send(const char *session, xmlNode *msg); bool controld_authorize_ipc_message(const xmlNode *client_msg, pcmk__client_t *curr_client, const char *proxy_session); extern gboolean send_request(xmlNode * msg, char **msg_reference); extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig); void send_remote_state_message(const char *node_name, gboolean node_up); #endif