Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F1841835
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
52 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/crmd/fsa.c b/crmd/fsa.c
index 0651bdc7c8..32992bce76 100644
--- a/crmd/fsa.c
+++ b/crmd/fsa.c
@@ -1,686 +1,686 @@
-/*
+/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
- *
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
- *
+ *
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <crm/crm.h>
#include <crm/lrmd.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <crmd_messages.h>
#include <crmd_fsa.h>
#include <tengine.h>
#include <fsa_proto.h>
#include <fsa_matrix.h>
char *fsa_our_dc = NULL;
cib_t *fsa_cib_conn = NULL;
char *fsa_our_dc_version = NULL;
char *fsa_our_uuid = NULL;
char *fsa_our_uname = NULL;
#if SUPPORT_HEARTBEAT
ll_cluster_t *fsa_cluster_conn;
#endif
fsa_timer_t *wait_timer = NULL;
fsa_timer_t *recheck_timer = NULL;
fsa_timer_t *election_trigger = NULL;
fsa_timer_t *election_timeout = NULL;
fsa_timer_t *transition_timer = NULL;
fsa_timer_t *integration_timer = NULL;
fsa_timer_t *finalization_timer = NULL;
fsa_timer_t *shutdown_escalation_timer = NULL;
volatile gboolean do_fsa_stall = FALSE;
volatile long long fsa_input_register = 0;
volatile long long fsa_actions = A_NOTHING;
volatile enum crmd_fsa_state fsa_state = S_STARTING;
extern uint highest_born_on;
extern uint num_join_invites;
extern GHashTable *welcomed_nodes;
extern GHashTable *finalized_nodes;
extern GHashTable *confirmed_nodes;
extern GHashTable *integrated_nodes;
extern void initialize_join(gboolean before);
#define DOT_PREFIX "actions:trace: "
#define do_dot_log(fmt, args...) crm_trace( fmt, ##args)
long long do_state_transition(long long actions,
enum crmd_fsa_state cur_state,
enum crmd_fsa_state next_state, fsa_data_t * msg_data);
void dump_rsc_info(void);
void dump_rsc_info_callback(const xmlNode * msg, int call_id, int rc,
xmlNode * output, void *user_data);
void ghash_print_node(gpointer key, gpointer value, gpointer user_data);
void s_crmd_fsa_actions(fsa_data_t * fsa_data);
void log_fsa_input(fsa_data_t * stored_msg);
void init_dotfile(void);
void
init_dotfile(void)
{
do_dot_log(DOT_PREFIX "digraph \"g\" {");
do_dot_log(DOT_PREFIX " size = \"30,30\"");
do_dot_log(DOT_PREFIX " graph [");
do_dot_log(DOT_PREFIX " fontsize = \"12\"");
do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
do_dot_log(DOT_PREFIX " bb = \"0,0,398.922306,478.927856\"");
do_dot_log(DOT_PREFIX " color = \"black\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX " node [");
do_dot_log(DOT_PREFIX " fontsize = \"12\"");
do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
do_dot_log(DOT_PREFIX " shape = \"ellipse\"");
do_dot_log(DOT_PREFIX " color = \"black\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX " edge [");
do_dot_log(DOT_PREFIX " fontsize = \"12\"");
do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
do_dot_log(DOT_PREFIX " color = \"black\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX "// special nodes");
do_dot_log(DOT_PREFIX " \"S_PENDING\" ");
do_dot_log(DOT_PREFIX " [");
do_dot_log(DOT_PREFIX " color = \"blue\"");
do_dot_log(DOT_PREFIX " fontcolor = \"blue\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX " \"S_TERMINATE\" ");
do_dot_log(DOT_PREFIX " [");
do_dot_log(DOT_PREFIX " color = \"red\"");
do_dot_log(DOT_PREFIX " fontcolor = \"red\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX "// DC only nodes");
do_dot_log(DOT_PREFIX " \"S_INTEGRATION\" [ fontcolor = \"green\" ]");
do_dot_log(DOT_PREFIX " \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]");
do_dot_log(DOT_PREFIX " \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]");
do_dot_log(DOT_PREFIX " \"S_RELEASE_DC\" [ fontcolor = \"green\" ]");
do_dot_log(DOT_PREFIX " \"S_IDLE\" [ fontcolor = \"green\" ]");
}
static void
do_fsa_action(fsa_data_t * fsa_data, long long an_action,
void (*function) (long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t * msg_data))
{
fsa_actions &= ~an_action;
crm_trace(DOT_PREFIX "\t// %s", fsa_action2string(an_action));
function(an_action, fsa_data->fsa_cause, fsa_state, fsa_data->fsa_input, fsa_data);
}
static long long startup_actions =
A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_CCM_CONNECT | A_HA_CONNECT | A_READCONFIG |
A_STARTED | A_CL_JOIN_QUERY;
enum crmd_fsa_state
s_crmd_fsa(enum crmd_fsa_cause cause)
{
fsa_data_t *fsa_data = NULL;
long long register_copy = fsa_input_register;
long long new_actions = A_NOTHING;
enum crmd_fsa_state last_state = fsa_state;
crm_trace("FSA invoked with Cause: %s\tState: %s",
fsa_cause2string(cause), fsa_state2string(fsa_state));
fsa_dump_actions(fsa_actions, "Initial");
do_fsa_stall = FALSE;
if (is_message() == FALSE && fsa_actions != A_NOTHING) {
/* fake the first message so we can get into the loop */
fsa_data = calloc(1, sizeof(fsa_data_t));
fsa_data->fsa_input = I_NULL;
fsa_data->fsa_cause = C_FSA_INTERNAL;
fsa_data->origin = __FUNCTION__;
fsa_data->data_type = fsa_dt_none;
fsa_message_queue = g_list_append(fsa_message_queue, fsa_data);
fsa_data = NULL;
}
while (is_message() && do_fsa_stall == FALSE) {
crm_trace("Checking messages (%d remaining)", g_list_length(fsa_message_queue));
fsa_data = get_message();
CRM_CHECK(fsa_data != NULL, continue);
log_fsa_input(fsa_data);
/* add any actions back to the queue */
fsa_actions |= fsa_data->actions;
fsa_dump_actions(fsa_data->actions, "Restored actions");
/* get the next batch of actions */
new_actions = crmd_fsa_actions[fsa_data->fsa_input][fsa_state];
fsa_actions |= new_actions;
fsa_dump_actions(new_actions, "New actions");
if (fsa_data->fsa_input != I_NULL && fsa_data->fsa_input != I_ROUTER) {
crm_debug("Processing %s: [ state=%s cause=%s origin=%s ]",
fsa_input2string(fsa_data->fsa_input),
fsa_state2string(fsa_state),
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
}
/* logging : *before* the state is changed */
if (is_set(fsa_actions, A_ERROR)) {
do_fsa_action(fsa_data, A_ERROR, do_log);
}
if (is_set(fsa_actions, A_WARN)) {
do_fsa_action(fsa_data, A_WARN, do_log);
}
if (is_set(fsa_actions, A_LOG)) {
do_fsa_action(fsa_data, A_LOG, do_log);
}
/* update state variables */
last_state = fsa_state;
fsa_state = crmd_fsa_state[fsa_data->fsa_input][fsa_state];
/*
* Remove certain actions during shutdown
*/
if (fsa_state == S_STOPPING || ((fsa_input_register & R_SHUTDOWN) == R_SHUTDOWN)) {
clear_bit(fsa_actions, startup_actions);
}
/*
* Hook for change of state.
* Allows actions to be added or removed when entering a state
*/
if (last_state != fsa_state) {
fsa_actions = do_state_transition(fsa_actions, last_state, fsa_state, fsa_data);
} else {
do_dot_log(DOT_PREFIX "\t// FSA input: State=%s \tCause=%s"
" \tInput=%s \tOrigin=%s() \tid=%d",
fsa_state2string(fsa_state),
fsa_cause2string(fsa_data->fsa_cause),
fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id);
}
/* start doing things... */
s_crmd_fsa_actions(fsa_data);
delete_fsa_input(fsa_data);
fsa_data = NULL;
}
if (g_list_length(fsa_message_queue) > 0 || fsa_actions != A_NOTHING || do_fsa_stall) {
crm_debug("Exiting the FSA: queue=%d, fsa_actions=0x%llx, stalled=%s",
g_list_length(fsa_message_queue), fsa_actions, do_fsa_stall ? "true" : "false");
} else {
crm_trace("Exiting the FSA");
}
/* cleanup inputs? */
if (register_copy != fsa_input_register) {
long long same = register_copy & fsa_input_register;
fsa_dump_inputs(LOG_DEBUG, "Added", fsa_input_register ^ same);
fsa_dump_inputs(LOG_DEBUG, "Removed", register_copy ^ same);
}
fsa_dump_actions(fsa_actions, "Remaining");
fsa_dump_queue(LOG_DEBUG);
return fsa_state;
}
void
s_crmd_fsa_actions(fsa_data_t * fsa_data)
{
/*
* Process actions in order of priority but do only one
* action at a time to avoid complicating the ordering.
*/
CRM_CHECK(fsa_data != NULL, return);
while (fsa_actions != A_NOTHING && do_fsa_stall == FALSE) {
/* regular action processing in order of action priority
*
* Make sure all actions that connect to required systems
* are performed first
*/
if (fsa_actions & A_ERROR) {
do_fsa_action(fsa_data, A_ERROR, do_log);
} else if (fsa_actions & A_WARN) {
do_fsa_action(fsa_data, A_WARN, do_log);
} else if (fsa_actions & A_LOG) {
do_fsa_action(fsa_data, A_LOG, do_log);
/* get out of here NOW! before anything worse happens */
} else if (fsa_actions & A_EXIT_1) {
do_fsa_action(fsa_data, A_EXIT_1, do_exit);
/* sub-system restart */
} else if ((fsa_actions & O_LRM_RECONNECT) == O_LRM_RECONNECT) {
do_fsa_action(fsa_data, O_LRM_RECONNECT, do_lrm_control);
} else if ((fsa_actions & O_CIB_RESTART) == O_CIB_RESTART) {
do_fsa_action(fsa_data, O_CIB_RESTART, do_cib_control);
} else if ((fsa_actions & O_PE_RESTART) == O_PE_RESTART) {
do_fsa_action(fsa_data, O_PE_RESTART, do_pe_control);
} else if ((fsa_actions & O_TE_RESTART) == O_TE_RESTART) {
do_fsa_action(fsa_data, O_TE_RESTART, do_te_control);
/* essential start tasks */
} else if (fsa_actions & A_STARTUP) {
do_fsa_action(fsa_data, A_STARTUP, do_startup);
} else if (fsa_actions & A_CIB_START) {
do_fsa_action(fsa_data, A_CIB_START, do_cib_control);
} else if (fsa_actions & A_HA_CONNECT) {
do_fsa_action(fsa_data, A_HA_CONNECT, do_ha_control);
} else if (fsa_actions & A_READCONFIG) {
do_fsa_action(fsa_data, A_READCONFIG, do_read_config);
/* sub-system start/connect */
} else if (fsa_actions & A_LRM_CONNECT) {
do_fsa_action(fsa_data, A_LRM_CONNECT, do_lrm_control);
} else if (fsa_actions & A_CCM_CONNECT) {
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
do_fsa_action(fsa_data, A_CCM_CONNECT, do_ccm_control);
}
#endif
fsa_actions &= ~A_CCM_CONNECT;
} else if (fsa_actions & A_TE_START) {
do_fsa_action(fsa_data, A_TE_START, do_te_control);
} else if (fsa_actions & A_PE_START) {
do_fsa_action(fsa_data, A_PE_START, do_pe_control);
/* Timers */
/* else if(fsa_actions & O_DC_TIMER_RESTART) {
do_fsa_action(fsa_data, O_DC_TIMER_RESTART, do_timer_control) */ ;
} else if (fsa_actions & A_DC_TIMER_STOP) {
do_fsa_action(fsa_data, A_DC_TIMER_STOP, do_timer_control);
} else if (fsa_actions & A_INTEGRATE_TIMER_STOP) {
do_fsa_action(fsa_data, A_INTEGRATE_TIMER_STOP, do_timer_control);
} else if (fsa_actions & A_INTEGRATE_TIMER_START) {
do_fsa_action(fsa_data, A_INTEGRATE_TIMER_START, do_timer_control);
} else if (fsa_actions & A_FINALIZE_TIMER_STOP) {
do_fsa_action(fsa_data, A_FINALIZE_TIMER_STOP, do_timer_control);
} else if (fsa_actions & A_FINALIZE_TIMER_START) {
do_fsa_action(fsa_data, A_FINALIZE_TIMER_START, do_timer_control);
/*
* Highest priority actions
*/
} else if (fsa_actions & A_MSG_ROUTE) {
do_fsa_action(fsa_data, A_MSG_ROUTE, do_msg_route);
} else if (fsa_actions & A_RECOVER) {
do_fsa_action(fsa_data, A_RECOVER, do_recover);
} else if (fsa_actions & A_CL_JOIN_RESULT) {
do_fsa_action(fsa_data, A_CL_JOIN_RESULT, do_cl_join_finalize_respond);
} else if (fsa_actions & A_CL_JOIN_REQUEST) {
do_fsa_action(fsa_data, A_CL_JOIN_REQUEST, do_cl_join_offer_respond);
} else if (fsa_actions & A_SHUTDOWN_REQ) {
do_fsa_action(fsa_data, A_SHUTDOWN_REQ, do_shutdown_req);
} else if (fsa_actions & A_ELECTION_VOTE) {
do_fsa_action(fsa_data, A_ELECTION_VOTE, do_election_vote);
} else if (fsa_actions & A_ELECTION_COUNT) {
do_fsa_action(fsa_data, A_ELECTION_COUNT, do_election_count_vote);
} else if (fsa_actions & A_LRM_EVENT) {
do_fsa_action(fsa_data, A_LRM_EVENT, do_lrm_event);
/*
* High priority actions
*/
} else if (fsa_actions & A_STARTED) {
do_fsa_action(fsa_data, A_STARTED, do_started);
} else if (fsa_actions & A_CL_JOIN_QUERY) {
do_fsa_action(fsa_data, A_CL_JOIN_QUERY, do_cl_join_query);
} else if (fsa_actions & A_DC_TIMER_START) {
do_fsa_action(fsa_data, A_DC_TIMER_START, do_timer_control);
/*
* Medium priority actions
*/
} else if (fsa_actions & A_DC_TAKEOVER) {
do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover);
} else if (fsa_actions & A_DC_RELEASE) {
do_fsa_action(fsa_data, A_DC_RELEASE, do_dc_release);
} else if (fsa_actions & A_DC_JOIN_FINAL) {
do_fsa_action(fsa_data, A_DC_JOIN_FINAL, do_dc_join_final);
} else if (fsa_actions & A_ELECTION_CHECK) {
do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check);
} else if (fsa_actions & A_ELECTION_START) {
do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote);
} else if (fsa_actions & A_TE_HALT) {
do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke);
} else if (fsa_actions & A_TE_CANCEL) {
do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke);
} else if (fsa_actions & A_DC_JOIN_OFFER_ALL) {
do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all);
} else if (fsa_actions & A_DC_JOIN_OFFER_ONE) {
- do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_all);
+ do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_one);
} else if (fsa_actions & A_DC_JOIN_PROCESS_REQ) {
do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ, do_dc_join_filter_offer);
} else if (fsa_actions & A_DC_JOIN_PROCESS_ACK) {
do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack);
/*
* Low(er) priority actions
* Make sure the CIB is always updated before invoking the
* PE, and the PE before the TE
*/
} else if (fsa_actions & A_DC_JOIN_FINALIZE) {
do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize);
} else if (fsa_actions & A_LRM_INVOKE) {
do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke);
} else if (fsa_actions & A_PE_INVOKE) {
do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke);
} else if (fsa_actions & A_TE_INVOKE) {
do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke);
} else if (fsa_actions & A_CL_JOIN_ANNOUNCE) {
do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce);
/* sub-system stop */
} else if (fsa_actions & A_DC_RELEASED) {
do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release);
} else if (fsa_actions & A_PE_STOP) {
do_fsa_action(fsa_data, A_PE_STOP, do_pe_control);
} else if (fsa_actions & A_TE_STOP) {
do_fsa_action(fsa_data, A_TE_STOP, do_te_control);
} else if (fsa_actions & A_SHUTDOWN) {
do_fsa_action(fsa_data, A_SHUTDOWN, do_shutdown);
} else if (fsa_actions & A_LRM_DISCONNECT) {
do_fsa_action(fsa_data, A_LRM_DISCONNECT, do_lrm_control);
} else if (fsa_actions & A_CCM_DISCONNECT) {
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
do_fsa_action(fsa_data, A_CCM_DISCONNECT, do_ccm_control);
}
#endif
fsa_actions &= ~A_CCM_DISCONNECT;
} else if (fsa_actions & A_HA_DISCONNECT) {
do_fsa_action(fsa_data, A_HA_DISCONNECT, do_ha_control);
} else if (fsa_actions & A_CIB_STOP) {
do_fsa_action(fsa_data, A_CIB_STOP, do_cib_control);
} else if (fsa_actions & A_STOP) {
do_fsa_action(fsa_data, A_STOP, do_stop);
/* exit gracefully */
} else if (fsa_actions & A_EXIT_0) {
do_fsa_action(fsa_data, A_EXIT_0, do_exit);
/* Error checking and reporting */
} else {
crm_err("Action %s (0x%llx) not supported ",
fsa_action2string(fsa_actions), fsa_actions);
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL, __FUNCTION__);
}
}
}
void
log_fsa_input(fsa_data_t * stored_msg)
{
crm_trace("Processing queued input %d", stored_msg->id);
if (stored_msg->fsa_cause == C_CCM_CALLBACK) {
crm_trace("FSA processing CCM callback from %s", stored_msg->origin);
} else if (stored_msg->fsa_cause == C_LRM_OP_CALLBACK) {
crm_trace("FSA processing LRM callback from %s", stored_msg->origin);
} else if (stored_msg->data == NULL) {
crm_trace("FSA processing input from %s", stored_msg->origin);
} else {
ha_msg_input_t *ha_input = fsa_typed_data_adv(stored_msg, fsa_dt_ha_msg, __FUNCTION__);
crm_trace("FSA processing XML message from %s", stored_msg->origin);
crm_log_xml_trace(ha_input->xml, "FSA message data");
}
}
long long
do_state_transition(long long actions,
enum crmd_fsa_state cur_state,
enum crmd_fsa_state next_state, fsa_data_t * msg_data)
{
int level = LOG_INFO;
long long tmp = actions;
gboolean clear_recovery_bit = TRUE;
enum crmd_fsa_cause cause = msg_data->fsa_cause;
enum crmd_fsa_input current_input = msg_data->fsa_input;
const char *state_from = fsa_state2string(cur_state);
const char *state_to = fsa_state2string(next_state);
const char *input = fsa_input2string(current_input);
CRM_LOG_ASSERT(cur_state != next_state);
do_dot_log(DOT_PREFIX "\t%s -> %s [ label=%s cause=%s origin=%s ]",
state_from, state_to, input, fsa_cause2string(cause), msg_data->origin);
if (cur_state == S_IDLE || next_state == S_IDLE) {
level = LOG_NOTICE;
} else if (cur_state == S_NOT_DC || next_state == S_NOT_DC) {
level = LOG_NOTICE;
} else if (cur_state == S_ELECTION) {
level = LOG_NOTICE;
} else if (next_state == S_RECOVERY) {
level = LOG_WARNING;
}
do_crm_log(level, "State transition %s -> %s [ input=%s cause=%s origin=%s ]",
state_from, state_to, input, fsa_cause2string(cause), msg_data->origin);
/* the last two clauses might cause trouble later */
if (election_timeout != NULL && next_state != S_ELECTION && cur_state != S_RELEASE_DC) {
crm_timer_stop(election_timeout);
/* } else { */
/* crm_timer_start(election_timeout); */
}
#if 0
if ((fsa_input_register & R_SHUTDOWN)) {
set_bit(tmp, A_DC_TIMER_STOP);
}
#endif
if (next_state == S_INTEGRATION) {
set_bit(tmp, A_INTEGRATE_TIMER_START);
} else {
set_bit(tmp, A_INTEGRATE_TIMER_STOP);
}
if (next_state == S_FINALIZE_JOIN) {
set_bit(tmp, A_FINALIZE_TIMER_START);
} else {
set_bit(tmp, A_FINALIZE_TIMER_STOP);
}
if (next_state != S_PENDING) {
set_bit(tmp, A_DC_TIMER_STOP);
}
if (next_state != S_ELECTION) {
highest_born_on = 0;
}
if (next_state != S_IDLE) {
crm_timer_stop(recheck_timer);
}
if (cur_state == S_FINALIZE_JOIN && next_state == S_POLICY_ENGINE) {
populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer |
node_update_join | node_update_expected, __FUNCTION__);
}
switch (next_state) {
case S_PENDING:
fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local);
/* fall through */
case S_ELECTION:
crm_trace("Resetting our DC to NULL on transition to %s", fsa_state2string(next_state));
update_dc(NULL);
break;
case S_NOT_DC:
election_trigger->counter = 0;
if (stonith_cleanup_list) {
GListPtr gIter = NULL;
for (gIter = stonith_cleanup_list; gIter != NULL; gIter = gIter->next) {
char *target = gIter->data;
crm_info("Purging %s from stonith cleanup list", target);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_info("(Re)Issuing shutdown request now" " that we have a new DC");
set_bit(tmp, A_SHUTDOWN_REQ);
}
CRM_LOG_ASSERT(fsa_our_dc != NULL);
if (fsa_our_dc == NULL) {
crm_err("Reached S_NOT_DC without a DC" " being recorded");
}
break;
case S_RECOVERY:
clear_recovery_bit = FALSE;
break;
case S_FINALIZE_JOIN:
CRM_LOG_ASSERT(AM_I_DC);
if (cause == C_TIMER_POPPED) {
crm_warn("Progressed to state %s after %s",
fsa_state2string(next_state), fsa_cause2string(cause));
}
if (g_hash_table_size(welcomed_nodes) > 0) {
char *msg = strdup(" Welcome reply not received from");
crm_warn("%u cluster nodes failed to respond"
" to the join offer.", g_hash_table_size(welcomed_nodes));
g_hash_table_foreach(welcomed_nodes, ghash_print_node, msg);
free(msg);
} else {
crm_debug("All %d cluster nodes "
"responded to the join offer.", g_hash_table_size(integrated_nodes));
}
break;
case S_POLICY_ENGINE:
election_trigger->counter = 0;
CRM_LOG_ASSERT(AM_I_DC);
if (cause == C_TIMER_POPPED) {
crm_info("Progressed to state %s after %s",
fsa_state2string(next_state), fsa_cause2string(cause));
}
if (g_hash_table_size(finalized_nodes) > 0) {
char *msg = strdup(" Confirm not received from");
crm_err("%u cluster nodes failed to confirm"
" their join.", g_hash_table_size(finalized_nodes));
g_hash_table_foreach(finalized_nodes, ghash_print_node, msg);
free(msg);
} else if (g_hash_table_size(confirmed_nodes)
== crm_active_peers()) {
crm_debug("All %u cluster nodes are"
" eligible to run resources.", crm_active_peers());
} else if (g_hash_table_size(confirmed_nodes) > crm_active_peers()) {
crm_err("We have more confirmed nodes than our membership does: %d vs. %d",
g_hash_table_size(confirmed_nodes), crm_active_peers());
register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
} else if (saved_ccm_membership_id != crm_peer_seq) {
crm_info("Membership changed: %llu -> %llu - join restart",
saved_ccm_membership_id, crm_peer_seq);
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
} else {
crm_warn("Only %u of %u cluster "
"nodes are eligible to run resources - continue %d",
g_hash_table_size(confirmed_nodes),
crm_active_peers(), g_hash_table_size(welcomed_nodes));
}
/* initialize_join(FALSE); */
break;
case S_STOPPING:
case S_TERMINATE:
/* possibly redundant */
set_bit(fsa_input_register, R_SHUTDOWN);
break;
case S_IDLE:
CRM_LOG_ASSERT(AM_I_DC);
dump_rsc_info();
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_info("(Re)Issuing shutdown request now" " that we are the DC");
set_bit(tmp, A_SHUTDOWN_REQ);
}
if (recheck_timer->period_ms > 0) {
crm_debug("Starting %s", get_timer_desc(recheck_timer));
crm_timer_start(recheck_timer);
}
break;
default:
break;
}
if (clear_recovery_bit && next_state != S_PENDING) {
tmp &= ~A_RECOVER;
} else if (clear_recovery_bit == FALSE) {
tmp |= A_RECOVER;
}
if (tmp != actions) {
/* fsa_dump_actions(actions ^ tmp, "New actions"); */
actions = tmp;
}
return actions;
}
void
dump_rsc_info(void)
{
}
void
ghash_print_node(gpointer key, gpointer value, gpointer user_data)
{
const char *text = user_data;
const char *uname = key;
const char *value_s = value;
crm_info("%s: %s %s", text, uname, value_s);
}
diff --git a/crmd/join_dc.c b/crmd/join_dc.c
index d8a7bc0683..ea4424fde7 100644
--- a/crmd/join_dc.c
+++ b/crmd/join_dc.c
@@ -1,669 +1,686 @@
-/*
+/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
- *
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
- *
+ *
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <crmd_fsa.h>
#include <crmd_messages.h>
GHashTable *welcomed_nodes = NULL;
GHashTable *integrated_nodes = NULL;
GHashTable *finalized_nodes = NULL;
GHashTable *confirmed_nodes = NULL;
char *max_epoch = NULL;
char *max_generation_from = NULL;
xmlNode *max_generation_xml = NULL;
void initialize_join(gboolean before);
gboolean finalize_join_for(gpointer key, gpointer value, gpointer user_data);
void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
static int current_join_id = 0;
unsigned long long saved_ccm_membership_id = 0;
void
initialize_join(gboolean before)
{
/* clear out/reset a bunch of stuff */
crm_debug("join-%d: Initializing join data (flag=%s)",
current_join_id, before ? "true" : "false");
g_hash_table_destroy(welcomed_nodes);
g_hash_table_destroy(integrated_nodes);
g_hash_table_destroy(finalized_nodes);
g_hash_table_destroy(confirmed_nodes);
if (before) {
if (max_generation_from != NULL) {
free(max_generation_from);
max_generation_from = NULL;
}
if (max_generation_xml != NULL) {
free_xml(max_generation_xml);
max_generation_xml = NULL;
}
clear_bit(fsa_input_register, R_HAVE_CIB);
clear_bit(fsa_input_register, R_CIB_ASKED);
}
welcomed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
integrated_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
finalized_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
confirmed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
}
void
erase_node_from_join(const char *uname)
{
gboolean w = FALSE, i = FALSE, f = FALSE, c = FALSE;
if (uname == NULL) {
return;
}
if (welcomed_nodes != NULL) {
w = g_hash_table_remove(welcomed_nodes, uname);
}
if (integrated_nodes != NULL) {
i = g_hash_table_remove(integrated_nodes, uname);
}
if (finalized_nodes != NULL) {
f = g_hash_table_remove(finalized_nodes, uname);
}
if (confirmed_nodes != NULL) {
c = g_hash_table_remove(confirmed_nodes, uname);
}
if (w || i || f || c) {
crm_debug("Removed node %s from join calculations:"
" welcomed=%d itegrated=%d finalized=%d confirmed=%d", uname, w, i, f, c);
}
}
static void
join_make_offer(gpointer key, gpointer value, gpointer user_data)
{
const char *join_to = NULL;
const crm_node_t *member = value;
CRM_ASSERT(member != NULL);
if (crm_is_peer_active(member) == FALSE) {
crm_trace("Not making an offer to %s: not active", member->uname);
return;
}
join_to = member->uname;
if (join_to == NULL) {
crm_err("No recipient for welcome message");
return;
}
- erase_node_from_join(join_to);
-
if (saved_ccm_membership_id != crm_peer_seq) {
saved_ccm_membership_id = crm_peer_seq;
crm_info("Making join offers based on membership %llu", crm_peer_seq);
}
+ if(user_data) {
+ const char *reason = NULL;
+ if(reason == NULL && g_hash_table_lookup(welcomed_nodes, join_to)) {
+ reason = "welcomed";
+ }
+ if(reason == NULL && g_hash_table_lookup(integrated_nodes, join_to)) {
+ reason = "integrated";
+ }
+ if(reason == NULL && g_hash_table_lookup(finalized_nodes, join_to)) {
+ reason = "finalized";
+ }
+ if(reason == NULL && g_hash_table_lookup(confirmed_nodes, join_to)) {
+ reason = "confirmed";
+ }
+ if(reason) {
+ crm_info("Skipping %s: already %s", member->uname, reason);
+ return;
+ }
+ }
+
+ erase_node_from_join(join_to);
+
if (crm_is_peer_active(member)) {
xmlNode *offer = create_request(CRM_OP_JOIN_OFFER, NULL, join_to,
CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
char *join_offered = crm_itoa(current_join_id);
crm_xml_add_int(offer, F_CRM_JOIN_ID, current_join_id);
/* send the welcome */
- crm_debug("join-%d: Sending offer to %s", current_join_id, join_to);
+ crm_info("join-%d: Sending offer to %s", current_join_id, join_to);
send_cluster_message(crm_get_peer(0, join_to), crm_msg_crmd, offer, TRUE);
free_xml(offer);
g_hash_table_insert(welcomed_nodes, strdup(join_to), join_offered);
+ /* crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_PENDING); */
+
} else {
crm_info("Peer process on %s is not active (yet?): %.8lx %d",
join_to, (long)member->processes, g_hash_table_size(crm_peer_cache));
}
}
/* A_DC_JOIN_OFFER_ALL */
void
do_dc_join_offer_all(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* reset everyones status back to down or in_ccm in the CIB
*
* any nodes that are active in the CIB but not in the CCM list
* will be seen as offline by the PE anyway
*/
current_join_id++;
initialize_join(TRUE);
/* do_update_cib_nodes(TRUE, __FUNCTION__); */
update_dc(NULL);
if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
crm_info("A new node joined the cluster");
}
g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
/* dont waste time by invoking the PE yet; */
crm_info("join-%d: Waiting on %d outstanding join acks",
current_join_id, g_hash_table_size(welcomed_nodes));
}
/* A_DC_JOIN_OFFER_ONE */
void
do_dc_join_offer_one(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_node_t *member;
ha_msg_input_t *welcome = NULL;
const char *op = NULL;
const char *join_to = NULL;
if (msg_data->data) {
welcome = fsa_typed_data(fsa_dt_ha_msg);
} else {
- crm_info("A new node joined - wait until it contacts us");
+ crm_info("An unknown node joined - (re-)offer to any unconfirmed nodes");
+ g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
return;
}
if (welcome == NULL) {
crm_err("Attempt to send welcome message without a message to reply to!");
return;
}
join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM);
if (join_to == NULL) {
crm_err("Attempt to send welcome message without a host to reply to!");
return;
}
member = crm_get_peer(0, join_to);
- crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_PENDING);
-
- if (crm_is_peer_active(member) == FALSE) {
- crm_err("%s is not a fully active member of our partition", join_to);
- return;
- }
-
op = crm_element_value(welcome->msg, F_CRM_TASK);
if (join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) {
/* note: it _is_ possible that a node will have been
* sick or starting up when the original offer was made.
* however, it will either re-announce itself in due course
* _or_ we can re-store the original offer on the client.
*/
crm_trace("(Re-)offering membership to %s...", join_to);
}
crm_info("join-%d: Processing %s request from %s in state %s",
current_join_id, op, join_to, fsa_state2string(cur_state));
+ erase_node_from_join(join_to);
join_make_offer(NULL, member, NULL);
/* always offer to the DC (ourselves)
* this ensures the correct value for max_generation_from
*/
member = crm_get_peer(0, fsa_our_uname);
join_make_offer(NULL, member, NULL);
/* this was a genuine join request, cancel any existing
* transition and invoke the PE
*/
start_transition(fsa_state);
/* dont waste time by invoking the pe yet; */
crm_debug("Waiting on %d outstanding join acks for join-%d",
g_hash_table_size(welcomed_nodes), current_join_id);
}
static int
compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
{
const char *elem_l = crm_element_value(left, field);
const char *elem_r = crm_element_value(right, field);
int int_elem_l = crm_int_helper(elem_l, NULL);
int int_elem_r = crm_int_helper(elem_r, NULL);
if (int_elem_l < int_elem_r) {
return -1;
} else if (int_elem_l > int_elem_r) {
return 1;
}
return 0;
}
/* A_DC_JOIN_PROCESS_REQ */
void
do_dc_join_filter_offer(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *generation = NULL;
int cmp = 0;
int join_id = -1;
gboolean ack_nack_bool = TRUE;
const char *ack_nack = CRMD_JOINSTATE_MEMBER;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE);
crm_node_t *join_node = crm_get_peer(0, join_from);
crm_debug("Processing req from %s", join_from);
generation = join_ack->xml;
crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
if (max_generation_xml != NULL && generation != NULL) {
int lpc = 0;
const char *attributes[] = {
XML_ATTR_GENERATION_ADMIN,
XML_ATTR_GENERATION,
XML_ATTR_NUMUPDATES,
};
for (lpc = 0; cmp == 0 && lpc < DIMOF(attributes); lpc++) {
cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]);
}
}
if (join_id != current_join_id) {
crm_debug("Invalid response from %s: join-%d vs. join-%d",
join_from, join_id, current_join_id);
check_join_state(cur_state, __FUNCTION__);
return;
} else if (join_node == NULL || crm_is_peer_active(join_node) == FALSE) {
crm_err("Node %s is not a member", join_from);
ack_nack_bool = FALSE;
} else if (generation == NULL) {
crm_err("Generation was NULL");
ack_nack_bool = FALSE;
} else if (max_generation_xml == NULL) {
max_generation_xml = copy_xml(generation);
max_generation_from = strdup(join_from);
} else if (cmp < 0 || (cmp == 0 && safe_str_eq(join_from, fsa_our_uname))) {
crm_debug("%s has a better generation number than"
" the current max %s", join_from, max_generation_from);
if (max_generation_xml) {
crm_log_xml_debug(max_generation_xml, "Max generation");
}
crm_log_xml_debug(generation, "Their generation");
free(max_generation_from);
free_xml(max_generation_xml);
max_generation_from = strdup(join_from);
max_generation_xml = copy_xml(join_ack->xml);
}
if (ack_nack_bool == FALSE) {
/* NACK this client */
ack_nack = CRMD_JOINSTATE_NACK;
crm_err("join-%d: NACK'ing node %s (ref %s)", join_id, join_from, ref);
} else {
crm_debug("join-%d: Welcoming node %s (ref %s)", join_id, join_from, ref);
}
/* add them to our list of CRMD_JOINSTATE_MEMBER nodes */
g_hash_table_insert(integrated_nodes, strdup(join_from), strdup(ack_nack));
crm_update_peer_expected(__FUNCTION__, join_node, ack_nack);
crm_debug("%u nodes have been integrated into join-%d",
g_hash_table_size(integrated_nodes), join_id);
g_hash_table_remove(welcomed_nodes, join_from);
if (check_join_state(cur_state, __FUNCTION__) == FALSE) {
/* dont waste time by invoking the PE yet; */
crm_debug("join-%d: Still waiting on %d outstanding offers",
join_id, g_hash_table_size(welcomed_nodes));
}
}
/* A_DC_JOIN_FINALIZE */
void
do_dc_join_finalize(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
char *sync_from = NULL;
int rc = pcmk_ok;
/* This we can do straight away and avoid clients timing us out
* while we compute the latest CIB
*/
crm_debug("Finializing join-%d for %d clients",
current_join_id, g_hash_table_size(integrated_nodes));
if (g_hash_table_size(integrated_nodes) == 0) {
/* If we don't even have ourself, start again */
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__);
return;
}
clear_bit(fsa_input_register, R_HAVE_CIB);
if (max_generation_from == NULL || safe_str_eq(max_generation_from, fsa_our_uname)) {
set_bit(fsa_input_register, R_HAVE_CIB);
}
if (is_set(fsa_input_register, R_IN_TRANSITION)) {
crm_warn("join-%d: We are still in a transition."
" Delaying until the TE completes.", current_join_id);
crmd_fsa_stall(FALSE);
return;
}
if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
/* ask for the agreed best CIB */
sync_from = strdup(max_generation_from);
crm_log_xml_debug(max_generation_xml, "Requesting version");
set_bit(fsa_input_register, R_CIB_ASKED);
} else {
/* Send _our_ CIB out to everyone */
sync_from = strdup(fsa_our_uname);
}
crm_info("join-%d: Syncing the CIB from %s to the rest of the cluster",
current_join_id, sync_from);
rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override);
fsa_register_cib_callback(rc, FALSE, sync_from, finalize_sync_callback);
}
void
finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
CRM_LOG_ASSERT(-EPERM != rc);
clear_bit(fsa_input_register, R_CIB_ASKED);
if (rc != pcmk_ok) {
do_crm_log((rc == -pcmk_err_old_data ? LOG_WARNING : LOG_ERR),
"Sync from %s failed: %s", (char *)user_data, pcmk_strerror(rc));
/* restart the whole join process */
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__);
} else if (AM_I_DC && fsa_state == S_FINALIZE_JOIN) {
set_bit(fsa_input_register, R_HAVE_CIB);
clear_bit(fsa_input_register, R_CIB_ASKED);
/* make sure dc_uuid is re-set to us */
if (check_join_state(fsa_state, __FUNCTION__) == FALSE) {
crm_debug("Notifying %d clients of join-%d results",
g_hash_table_size(integrated_nodes), current_join_id);
g_hash_table_foreach_remove(integrated_nodes, finalize_join_for, NULL);
}
} else {
crm_debug("No longer the DC in S_FINALIZE_JOIN: %s/%s",
AM_I_DC ? "DC" : "CRMd", fsa_state2string(fsa_state));
}
free(user_data);
}
static void
join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
fsa_data_t *msg_data = NULL;
if (rc == pcmk_ok) {
crm_debug("Join update %d complete", call_id);
check_join_state(fsa_state, __FUNCTION__);
} else {
crm_err("Join update %d failed", call_id);
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
/* A_DC_JOIN_PROCESS_ACK */
void
do_dc_join_ack(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int join_id = -1;
int call_id = 0;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *join_id_s = NULL;
const char *join_state = NULL;
const char *op = crm_element_value(join_ack->msg, F_CRM_TASK);
const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
if (safe_str_neq(op, CRM_OP_JOIN_CONFIRM)) {
crm_debug("Ignoring op=%s message from %s", op, join_from);
return;
}
crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
join_id_s = crm_element_value(join_ack->msg, F_CRM_JOIN_ID);
/* now update them to "member" */
crm_trace("Processing ack from %s", join_from);
join_state = (const char *)
g_hash_table_lookup(finalized_nodes, join_from);
if (join_state == NULL) {
crm_err("Join not in progress: ignoring join-%d from %s", join_id, join_from);
return;
} else if (safe_str_neq(join_state, CRMD_JOINSTATE_MEMBER)) {
crm_err("Node %s wasnt invited to join the cluster", join_from);
g_hash_table_remove(finalized_nodes, join_from);
return;
} else if (join_id != current_join_id) {
crm_err("Invalid response from %s: join-%d vs. join-%d",
join_from, join_id, current_join_id);
g_hash_table_remove(finalized_nodes, join_from);
return;
}
g_hash_table_remove(finalized_nodes, join_from);
if (g_hash_table_lookup(confirmed_nodes, join_from) != NULL) {
crm_err("join-%d: hash already contains confirmation from %s", join_id, join_from);
}
g_hash_table_insert(confirmed_nodes, strdup(join_from), strdup(join_id_s));
crm_info("join-%d: Updating node state to %s for %s",
join_id, CRMD_JOINSTATE_MEMBER, join_from);
/* update CIB with the current LRM status from the node
* We dont need to notify the TE of these updates, a transition will
* be started in due time
*/
erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local);
fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
fsa_register_cib_callback(call_id, FALSE, NULL, join_update_complete_callback);
crm_debug("join-%d: Registered callback for LRM update %d", join_id, call_id);
}
gboolean
finalize_join_for(gpointer key, gpointer value, gpointer user_data)
{
const char *join_to = NULL;
const char *join_state = NULL;
xmlNode *acknak = NULL;
xmlNode *tmp1 = NULL;
crm_node_t *join_node = NULL;
if (key == NULL || value == NULL) {
return TRUE;
}
join_to = (const char *)key;
join_state = (const char *)value;
/* make sure a node entry exists for the new node */
crm_trace("Creating node entry for %s", join_to);
tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE);
set_uuid(tmp1, XML_ATTR_UUID, join_to);
crm_xml_add(tmp1, XML_ATTR_UNAME, join_to);
fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1,
cib_scope_local | cib_quorum_override | cib_can_create);
free_xml(tmp1);
join_node = crm_get_peer(0, join_to);
if (crm_is_peer_active(join_node) == FALSE) {
/*
* NACK'ing nodes that the membership layer doesn't know about yet
* simply creates more churn
*
* Better to leave them waiting and let the join restart when
* the new membership event comes in
*
* All other NACKs (due to versions etc) should still be processed
*/
crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_PENDING);
return TRUE;
}
/* send the ack/nack to the node */
acknak = create_request(CRM_OP_JOIN_ACKNAK, NULL, join_to,
CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
crm_xml_add_int(acknak, F_CRM_JOIN_ID, current_join_id);
/* set the ack/nack */
if (safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) {
crm_debug("join-%d: ACK'ing join request from %s, state %s",
current_join_id, join_to, join_state);
crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE);
g_hash_table_insert(finalized_nodes, strdup(join_to), strdup(CRMD_JOINSTATE_MEMBER));
crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER);
} else {
crm_warn("join-%d: NACK'ing join request from %s, state %s",
current_join_id, join_to, join_state);
crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_FALSE);
crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_NACK);
}
send_cluster_message(crm_get_peer(0, join_to), crm_msg_crmd, acknak, TRUE);
free_xml(acknak);
return TRUE;
}
void ghash_print_node(gpointer key, gpointer value, gpointer user_data);
gboolean
check_join_state(enum crmd_fsa_state cur_state, const char *source)
{
crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state));
if (saved_ccm_membership_id != crm_peer_seq) {
crm_debug("%s: Membership changed since join started: %llu -> %llu",
source, saved_ccm_membership_id, crm_peer_seq);
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
} else if (cur_state == S_INTEGRATION) {
if (g_hash_table_size(welcomed_nodes) == 0) {
crm_debug("join-%d: Integration of %d peers complete: %s",
current_join_id, g_hash_table_size(integrated_nodes), source);
register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
return TRUE;
}
} else if (cur_state == S_FINALIZE_JOIN) {
if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id);
return TRUE;
} else if (g_hash_table_size(integrated_nodes) == 0
&& g_hash_table_size(finalized_nodes) == 0) {
crm_debug("join-%d complete: %s", current_join_id, source);
register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
} else if (g_hash_table_size(integrated_nodes) != 0
&& g_hash_table_size(finalized_nodes) != 0) {
char *msg = NULL;
- crm_err("join-%d: Waiting on %d integrated nodes"
+ crm_info("join-%d: Waiting on %d integrated nodes"
" AND %d finalized nodes",
current_join_id,
g_hash_table_size(integrated_nodes), g_hash_table_size(finalized_nodes));
msg = strdup("Integrated node");
g_hash_table_foreach(integrated_nodes, ghash_print_node, msg);
free(msg);
msg = strdup("Finalized node");
g_hash_table_foreach(finalized_nodes, ghash_print_node, msg);
free(msg);
} else if (g_hash_table_size(integrated_nodes) != 0) {
crm_debug("join-%d: Still waiting on %d integrated nodes",
current_join_id, g_hash_table_size(integrated_nodes));
} else if (g_hash_table_size(finalized_nodes) != 0) {
crm_debug("join-%d: Still waiting on %d finalized nodes",
current_join_id, g_hash_table_size(finalized_nodes));
}
}
return FALSE;
}
void
do_dc_join_final(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
update_attrd(NULL, NULL, NULL, NULL);
crm_update_quorum(crm_have_quorum, TRUE);
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Nov 23, 8:19 AM (23 h, 29 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1009862
Default Alt Text
(52 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment