diff --git a/crm/crmd/crmd_utils.h b/crm/crmd/crmd_utils.h index 362303b8a1..977ffaec69 100644 --- a/crm/crmd/crmd_utils.h +++ b/crm/crmd/crmd_utils.h @@ -1,74 +1,74 @@ -/* $Id: crmd_utils.h,v 1.22 2006/02/03 15:01:39 andrew Exp $ */ +/* $Id: crmd_utils.h,v 1.23 2006/04/09 14:38:04 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef CRMD_UTILS__H #define CRMD_UTILS__H #include #include #define CLIENT_EXIT_WAIT 30 extern void process_client_disconnect(crmd_client_t *curr_client); extern void update_local_cib_adv( crm_data_t *msg_data, gboolean do_now, const char *raised_from); #define update_local_cib(data) update_local_cib_adv(data, TRUE, __FUNCTION__) extern long long toggle_bit (long long action_list, long long action); extern long long clear_bit (long long action_list, long long action); extern long long set_bit (long long action_list, long long action); #define set_bit_inplace(word, bit) word = set_bit(word, bit) #define clear_bit_inplace(word, bit) word = clear_bit(word, bit) #define toggle_bit_inplace(word, bit) word = toggle_bit(word, bit) extern gboolean is_set(long long action_list, long long action); extern gboolean is_set_any(long long action_list, long long action); extern gboolean crm_timer_stop (fsa_timer_t *timer); extern gboolean crm_timer_start(fsa_timer_t *timer); extern gboolean crm_timer_popped(gpointer data); extern crm_data_t *create_node_state( const char *uname, const char *ha_state, const char *ccm_state, const char *crmd_state, const char *join_state, const char *exp_state, gboolean clear_shutdown, const char *src); extern void create_node_entry( const char *uuid, const char *uname, const char *type); extern gboolean stop_subsystem ( struct crm_subsystem_s *centry, gboolean force_quit); extern gboolean start_subsystem(struct crm_subsystem_s *centry); extern lrm_op_t *copy_lrm_op(const lrm_op_t *op); extern lrm_rsc_t *copy_lrm_rsc(const lrm_rsc_t *rsc); extern struct crmd_ccm_data_s *copy_ccm_data( const struct crmd_ccm_data_s *ccm_input); extern oc_ev_membership_t *copy_ccm_oc_data(const oc_ev_membership_t *oc_in) ; extern void fsa_dump_actions(long long action, const char *text); extern void fsa_dump_inputs( int log_level, const char *text, long long input_register); extern gboolean need_transition(enum crmd_fsa_state state); -extern void update_dc(HA_Message *msg); +extern void update_dc(HA_Message *msg, gboolean assert_same); #endif diff --git a/crm/crmd/election.c b/crm/crmd/election.c index ef54915e93..8fd0a2c2a9 100644 --- a/crm/crmd/election.c +++ b/crm/crmd/election.c @@ -1,442 +1,438 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *voted = NULL; uint highest_born_on = -1; static int current_election_id = 1; /* A_ELECTION_VOTE */ enum crmd_fsa_input do_election_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { gboolean not_voting = FALSE; HA_Message *vote = NULL; /* dont vote if we're in one of these states or wanting to shut down */ switch(cur_state) { case S_RECOVERY: case S_STOPPING: case S_TERMINATE: crm_warn("Not voting in election, we're in state %s", fsa_state2string(cur_state)); not_voting = TRUE; break; default: break; } if(not_voting == FALSE) { if(is_set(fsa_input_register, R_STARTING)) { not_voting = TRUE; } } if(not_voting) { if(AM_I_DC) { register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL); } else { register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL); } return I_NULL; } vote = create_request( CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); current_election_id++; ha_msg_add(vote, F_CRM_ELECTION_OWNER, fsa_our_uuid); ha_msg_add_int(vote, F_CRM_ELECTION_ID, current_election_id); send_request(vote, NULL); if(cur_state == S_ELECTION || cur_state == S_RELEASE_DC) { crm_timer_start(election_timeout); } else if(cur_state != S_INTEGRATION) { crm_err("Broken? Voting in state %s", fsa_state2string(cur_state)); } return I_NULL; } char *dc_hb_msg = NULL; int beat_num = 0; gboolean do_dc_heartbeat(gpointer data) { #if 0 fsa_timer_t *timer = (fsa_timer_t *)data; crm_debug_3("Sending DC Heartbeat %d", beat_num); HA_Message *msg = ha_msg_new(5); ha_msg_add(msg, F_TYPE, T_CRM); ha_msg_add(msg, F_SUBTYPE, XML_ATTR_REQUEST); ha_msg_add(msg, F_CRM_SYS_TO, CRM_SYSTEM_CRMD); ha_msg_add(msg, F_CRM_SYS_FROM, CRM_SYSTEM_DC); ha_msg_add(msg, F_CRM_TASK, CRM_OP_HBEAT); ha_msg_add_int(msg, "dc_beat_seq", beat_num); beat_num++; if(send_msg_via_ha(fsa_cluster_conn, msg) == FALSE) { /* this is bad */ crm_timer_stop(timer); /* make it not go off again */ register_fsa_input(C_HEARTBEAT_FAILED, I_SHUTDOWN, NULL); return FALSE; } #endif return TRUE; } struct election_data_s { const char *winning_uname; unsigned int winning_bornon; }; enum crmd_fsa_input do_election_check(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int voted_size = g_hash_table_size(voted); int num_members = g_hash_table_size(fsa_membership_copy->members); /* in the case of #voted > #members, it is better to * wait for the timeout and give the cluster time to * stabilize */ if(fsa_state != S_ELECTION) { crm_debug("Ignore election check: we not in an election"); } else if(voted_size == num_members) { /* we won and everyone has voted */ crm_timer_stop(election_timeout); register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL); crm_debug("Destroying voted hash"); g_hash_table_destroy(voted); voted = NULL; } else { crm_info("Still waiting on %d non-votes (%d total)", num_members - voted_size, num_members); } return I_NULL; } /* A_ELECTION_COUNT */ enum crmd_fsa_input do_election_count_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int election_id = -1; gboolean we_loose = FALSE; enum crmd_fsa_input election_result = I_NULL; oc_node_t *our_node = NULL, *your_node = NULL; ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg); const char *op = cl_get_string(vote->msg, F_CRM_TASK); const char *vote_from = cl_get_string(vote->msg, F_CRM_HOST_FROM); const char *your_version = cl_get_string(vote->msg, F_CRM_VERSION); const char *election_owner= cl_get_string(vote->msg, F_CRM_ELECTION_OWNER); /* if the membership copy is NULL we REALLY shouldnt be voting * the question is how we managed to get here. */ CRM_CHECK(fsa_membership_copy != NULL, return I_NULL); CRM_CHECK(fsa_membership_copy->members != NULL, return I_NULL); CRM_CHECK(vote_from != NULL, vote_from = fsa_our_uname); our_node = (oc_node_t*)g_hash_table_lookup( fsa_membership_copy->members, fsa_our_uname); your_node = (oc_node_t*)g_hash_table_lookup( fsa_membership_copy->members, vote_from); if(voted == NULL) { crm_debug("Created voted hash"); voted = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } ha_msg_value_int(vote->msg, F_CRM_ELECTION_ID, &election_id); /* update the list of nodes that have voted */ if(your_node != NULL) { char *op_copy = NULL; char *uname_copy = NULL; if(safe_str_eq(op, CRM_OP_NOVOTE)) { if(safe_str_neq(fsa_our_uuid, election_owner)) { crm_err("Recieved %s for %s (we are %s)", op, election_owner, fsa_our_uuid); } } crm_debug("Election owner: %s", election_owner); if(safe_str_eq(fsa_our_uuid, election_owner)) { if(election_id != current_election_id) { crm_debug("Ignore old novote from %s: %d vs. %d", your_node->node_uname, election_id, current_election_id); } uname_copy = crm_strdup(your_node->node_uname); op_copy = crm_strdup(op); g_hash_table_replace(voted, uname_copy, op_copy); crm_info("Updated voted hash for %s to %s", uname_copy, op_copy); } } else { crm_debug("Election ignore: The other side doesnt exist in CCM."); return I_NULL; } if(vote_from == NULL || safe_str_eq(vote_from, fsa_our_uname)) { /* dont count our own vote */ crm_info("Election ignore: our %s (%s)", op, crm_str(vote_from)); return I_NULL; } else if(safe_str_eq(op, CRM_OP_NOVOTE)) { crm_info("Election ignore: no-vote from %s", vote_from); return I_NULL; } crm_info("Election check: %s from %s", op, vote_from); if(our_node == NULL || fsa_membership_copy->last_event == OC_EV_MS_EVICTED) { crm_info("Election fail: we dont exist in CCM"); we_loose = TRUE; } else if(compare_version(your_version, CRM_FEATURE_SET) < 0) { crm_info("Election fail: version"); we_loose = TRUE; } else if(compare_version(your_version, CRM_FEATURE_SET) > 0) { crm_info("Election pass: version"); } else if(your_node->node_born_on < our_node->node_born_on) { crm_debug("Election fail: born_on"); we_loose = TRUE; } else if(your_node->node_born_on > our_node->node_born_on) { crm_debug("Election pass: born_on"); } else if(strcmp(fsa_our_uname, vote_from) > 0) { crm_debug("Election fail: uname"); we_loose = TRUE; } else { -/* CRM_DEV_ASSERT(strcmp(fsa_our_uname, vote_from) < 0); */ + CRM_CHECK(strcmp(fsa_our_uname, vote_from) != 0, ;); crm_debug("Them: %s (born=%d) Us: %s (born=%d)", vote_from, your_node->node_born_on, fsa_our_uname, our_node->node_born_on); /* cant happen... * } else if(strcmp(fsa_our_uname, vote_from) == 0) { * * default... * } else { // strcmp(fsa_our_uname, vote_from) < 0 * we win */ } if(we_loose) { cl_uuid_t vote_uuid_s; gboolean vote_sent = FALSE; char vote_uuid[UU_UNPARSE_SIZEOF]; HA_Message *novote = create_request( CRM_OP_NOVOTE, NULL, vote_from, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); + + update_dc(NULL, FALSE); if(cl_get_uuid(vote->msg, F_ORIGUUID, &vote_uuid_s) == HA_OK) { cl_uuid_unparse(&vote_uuid_s, vote_uuid); } else { cl_log_message(LOG_ERR, vote->msg); } crm_timer_stop(election_timeout); crm_debug("Election lost to %s (%s/%d)", vote_from, vote_uuid, election_id); if(fsa_input_register & R_THE_DC) { crm_debug_3("Give up the DC to %s", vote_from); election_result = I_RELEASE_DC; } else { crm_debug_3("We werent the DC anyway"); election_result = I_PENDING; } ha_msg_add(novote, F_CRM_ELECTION_OWNER, vote_uuid); ha_msg_add_int(novote, F_CRM_ELECTION_ID, election_id); vote_sent = send_request(novote, NULL); CRM_DEV_ASSERT(vote_sent); fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local); } else { if(cur_state == S_PENDING) { crm_debug("Election ignore: We already lost the election"); return I_NULL; } else { crm_info("Election won over %s", vote_from); election_result = I_ELECTION; } crm_debug("Destroying voted hash"); g_hash_table_destroy(voted); voted = NULL; } register_fsa_input(C_FSA_INTERNAL, election_result, NULL); return I_NULL; } /* A_ELECT_TIMER_START, A_ELECTION_TIMEOUT */ /* we won */ enum crmd_fsa_input do_election_timer_ctrl(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { return I_NULL; } static void feature_update_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { if(rc != cib_ok) { fsa_data_t *msg_data = NULL; register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /* A_DC_TAKEOVER */ enum crmd_fsa_input do_dc_takeover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int rc = cib_ok; crm_data_t *cib = NULL; crm_data_t *fragment = NULL; crm_info("Taking over DC status for this partition"); set_bit_inplace(fsa_input_register, R_THE_DC); - crm_free(fsa_our_dc); - fsa_our_dc = crm_strdup(fsa_our_uname); - - crm_free(fsa_our_dc_version); - fsa_our_dc_version = crm_strdup(CRM_FEATURE_SET); - if(voted != NULL) { crm_debug_2("Destroying voted hash"); g_hash_table_destroy(voted); voted = NULL; } set_bit_inplace(fsa_input_register, R_JOIN_OK); set_bit_inplace(fsa_input_register, R_INVOKE_PE); fsa_cib_conn->cmds->set_slave_all(fsa_cib_conn, cib_none); fsa_cib_conn->cmds->set_master(fsa_cib_conn, cib_none); cib = createEmptyCib(); crm_xml_add(cib, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(cib, XML_ATTR_CIB_REVISION, CIB_FEATURE_SET); fragment = create_cib_fragment(cib, XML_TAG_CIB); fsa_cib_conn->cmds->update( fsa_cib_conn, NULL, fragment, NULL, cib_quorum_override); add_cib_op_callback(rc, FALSE, NULL, feature_update_callback); free_xml(cib); free_xml(fragment); return I_NULL; } /* A_DC_RELEASE */ enum crmd_fsa_input do_dc_release(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum crmd_fsa_input result = I_NULL; if(action & A_DC_RELEASE) { crm_debug("Releasing the role of DC"); clear_bit_inplace(fsa_input_register, R_THE_DC); } else if (action & A_DC_RELEASED) { crm_info("DC role released"); #if 0 if( are there errors ) { /* we cant stay up if not healthy */ /* or perhaps I_ERROR and go to S_RECOVER? */ result = I_SHUTDOWN; } #endif register_fsa_input(C_FSA_INTERNAL, I_RELEASE_SUCCESS, NULL); } else { crm_err("Unknown action %s", fsa_action2string(action)); } crm_debug_2("Am I still the DC? %s", AM_I_DC?XML_BOOLEAN_YES:XML_BOOLEAN_NO); return result; } diff --git a/crm/crmd/fsa.c b/crm/crmd/fsa.c index cccc6f0b63..bbcb7c40b1 100644 --- a/crm/crmd/fsa.c +++ b/crm/crmd/fsa.c @@ -1,797 +1,797 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include longclock_t action_start = 0; longclock_t action_stop = 0; longclock_t action_diff = 0; unsigned int action_diff_ms = 0; char *fsa_our_dc = NULL; cib_t *fsa_cib_conn = NULL; char *fsa_our_dc_version = NULL; ll_lrm_t *fsa_lrm_conn; ll_cluster_t *fsa_cluster_conn; oc_node_list_t *fsa_membership_copy; const char *fsa_our_uuid = NULL; const char *fsa_our_uname = NULL; fsa_timer_t *wait_timer = NULL; fsa_timer_t *recheck_timer = NULL; fsa_timer_t *election_trigger = NULL; fsa_timer_t *election_timeout = NULL; fsa_timer_t *integration_timer = NULL; fsa_timer_t *finalization_timer = NULL; fsa_timer_t *shutdown_escalation_timer = NULL; volatile gboolean do_fsa_stall = FALSE; volatile long long fsa_input_register = 0; volatile long long fsa_actions = A_NOTHING; volatile enum crmd_fsa_state fsa_state = S_STARTING; extern uint highest_born_on; extern uint num_join_invites; extern GHashTable *welcomed_nodes; extern GHashTable *finalized_nodes; extern GHashTable *confirmed_nodes; extern GHashTable *integrated_nodes; extern void initialize_join(gboolean before); #define DOT_PREFIX "actions:trace: " #define do_dot_log(fmt...) do_crm_log(LOG_DEBUG_2, NULL, NULL, fmt) long long do_state_transition(long long actions, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t *msg_data); long long clear_flags(long long actions, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input); void dump_rsc_info(void); void dump_rsc_info_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); void ghash_print_node(gpointer key, gpointer value, gpointer user_data); void s_crmd_fsa_actions(fsa_data_t *fsa_data); void log_fsa_input(fsa_data_t *stored_msg); void init_dotfile(void); void init_dotfile(void) { do_dot_log(DOT_PREFIX"digraph \"g\" {"); do_dot_log(DOT_PREFIX" size = \"30,30\""); do_dot_log(DOT_PREFIX" graph ["); do_dot_log(DOT_PREFIX" fontsize = \"12\""); do_dot_log(DOT_PREFIX" fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX" fontcolor = \"black\""); do_dot_log(DOT_PREFIX" bb = \"0,0,398.922306,478.927856\""); do_dot_log(DOT_PREFIX" color = \"black\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX" node ["); do_dot_log(DOT_PREFIX" fontsize = \"12\""); do_dot_log(DOT_PREFIX" fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX" fontcolor = \"black\""); do_dot_log(DOT_PREFIX" shape = \"ellipse\""); do_dot_log(DOT_PREFIX" color = \"black\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX" edge ["); do_dot_log(DOT_PREFIX" fontsize = \"12\""); do_dot_log(DOT_PREFIX" fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX" fontcolor = \"black\""); do_dot_log(DOT_PREFIX" color = \"black\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX"// special nodes"); do_dot_log(DOT_PREFIX" \"S_PENDING\" "); do_dot_log(DOT_PREFIX" ["); do_dot_log(DOT_PREFIX" color = \"blue\""); do_dot_log(DOT_PREFIX" fontcolor = \"blue\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX" \"S_TERMINATE\" "); do_dot_log(DOT_PREFIX" ["); do_dot_log(DOT_PREFIX" color = \"red\""); do_dot_log(DOT_PREFIX" fontcolor = \"red\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX"// DC only nodes"); do_dot_log(DOT_PREFIX" \"S_INTEGRATION\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX" \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX" \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX" \"S_RELEASE_DC\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX" \"S_IDLE\" [ fontcolor = \"green\" ]"); } static void do_fsa_action(fsa_data_t *fsa_data, long long an_action, enum crmd_fsa_input (*function)(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data)) { int action_log_level = LOG_DEBUG; gboolean do_time_check = TRUE; enum crmd_fsa_input result = I_NULL; if(is_set(fsa_actions, an_action) == FALSE) { crm_err("Action %s (%.16llx) was not requestsed", fsa_action2string(an_action), an_action); return; } if(an_action & A_MSG_ROUTE) { action_log_level = LOG_DEBUG_2; } else if(an_action & A_CIB_START) { do_time_check = FALSE; } fsa_actions = clear_bit(fsa_actions, an_action); crm_debug_3("Invoking action %s (%.16llx)", fsa_action2string(an_action), an_action); if(do_time_check && action_diff_max_ms > 0) { action_start = time_longclock(); } do_crm_log(action_log_level, NULL, NULL, DOT_PREFIX"\t// %s", fsa_action2string(an_action)); result = function(an_action, fsa_data->fsa_cause, fsa_state, fsa_data->fsa_input, fsa_data); CRM_DEV_ASSERT(result == I_NULL); crm_debug_3("Action complete: %s (%.16llx)", fsa_action2string(an_action), an_action); if(do_time_check && action_diff_max_ms > 0) { action_stop = time_longclock(); action_diff = sub_longclock(action_stop, action_start); action_diff_ms = longclockto_ms(action_diff); if(action_diff_ms > action_diff_max_ms) { crm_err("Action %s took %dms to complete", fsa_action2string(an_action), action_diff_ms); } else if(action_diff_ms > action_diff_warn_ms) { crm_warn("Action %s took %dms to complete", fsa_action2string(an_action), action_diff_ms); } } } enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause) { fsa_data_t *fsa_data = NULL; long long register_copy = fsa_input_register; long long new_actions = A_NOTHING; enum crmd_fsa_state last_state = fsa_state; crm_debug_2("FSA invoked with Cause: %s\tState: %s", fsa_cause2string(cause), fsa_state2string(fsa_state)); do_fsa_stall = FALSE; if(is_message() == FALSE && fsa_actions != A_NOTHING) { /* fake the first message so we can get into the loop */ crm_malloc0(fsa_data, sizeof(fsa_data_t)); fsa_data->fsa_input = I_NULL; fsa_data->fsa_cause = C_FSA_INTERNAL; fsa_data->origin = __FUNCTION__; fsa_data->data_type = fsa_dt_none; fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); fsa_data = NULL; } while(is_message() && do_fsa_stall == FALSE) { crm_debug_2("Checking messages (%d remaining)", g_list_length(fsa_message_queue)); fsa_data = get_message(); CRM_DEV_ASSERT(fsa_data != NULL); if(crm_assert_failed) { continue; } log_fsa_input(fsa_data); /* add any actions back to the queue */ fsa_actions |= fsa_data->actions; /* get the next batch of actions */ new_actions = crmd_fsa_actions[fsa_data->fsa_input][fsa_state]; fsa_actions |= new_actions; if(fsa_data->fsa_input != I_NULL && fsa_data->fsa_input != I_ROUTER) { crm_debug("Processing %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); } if(fsa_actions & A_SHUTDOWN) { crm_log_level = LOG_DEBUG_2; } #ifdef FSA_TRACE if(new_actions != A_NOTHING) { crm_debug_2("Adding FSA actions %.16llx for %s/%s", new_actions, fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state)); fsa_dump_actions(new_actions, "\tFSA scheduled"); } else if(fsa_data->fsa_input != I_NULL && new_actions == A_NOTHING) { crm_debug("No action specified for input,state (%s,%s)", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state)); } if(fsa_data->actions != A_NOTHING) { crm_debug_2("Adding input actions %.16llx for %s/%s", new_actions, fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state)); fsa_dump_actions(fsa_data->actions,"\tInput scheduled"); } #endif /* logging : *before* the state is changed */ if(is_set(fsa_actions, A_ERROR)) { do_fsa_action(fsa_data, A_ERROR, do_log); } if(is_set(fsa_actions, A_WARN)) { do_fsa_action(fsa_data, A_WARN, do_log); } if(is_set(fsa_actions, A_LOG)) { do_fsa_action(fsa_data, A_LOG, do_log); } /* update state variables */ last_state = fsa_state; fsa_state = crmd_fsa_state[fsa_data->fsa_input][fsa_state]; /* * Hook to allow actions to removed due to certain inputs */ fsa_actions = clear_flags( fsa_actions, cause, fsa_state, fsa_data->fsa_input); /* * Hook for change of state. * Allows actions to be added or removed when entering a state */ if(last_state != fsa_state){ fsa_actions = do_state_transition( fsa_actions, last_state, fsa_state, fsa_data); } else { do_dot_log(DOT_PREFIX"\t// FSA input: State=%s \tCause=%s" " \tInput=%s \tOrigin=%s() \tid=%d", fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id); } /* start doing things... */ s_crmd_fsa_actions(fsa_data); delete_fsa_input(fsa_data); fsa_data = NULL; } if(g_list_length(fsa_message_queue) > 0 || fsa_actions != A_NOTHING || do_fsa_stall) { crm_debug("Exiting the FSA: queue=%d, fsa_actions=0x%llx, stalled=%s", g_list_length(fsa_message_queue), fsa_actions, do_fsa_stall?"true":"false"); } else { crm_debug_2("Exiting the FSA"); } /* cleanup inputs? */ if(register_copy != fsa_input_register) { long long same = register_copy & fsa_input_register; fsa_dump_inputs(LOG_DEBUG, "Added input:", fsa_input_register ^ same); fsa_dump_inputs(LOG_DEBUG, "Removed input:", register_copy ^ same); } #if 0 if(fsa_state != S_STARTING && g_list_length(fsa_message_queue) && is_ipc_empty(te_subsystem->ipc) && is_ipc_empty(pe_subsystem->ipc) && is_ipc_empty(fsa_cib_conn->cmds->channel(fsa_cib_conn) && is_ipc_empty(fsa_cluster_conn->llc_ops->ipcchan(fsa_cluster_conn)) ){ static gboolean mem_needs_init = TRUE; if(mem_needs_init) { crm_debug("Reached a stable point:" " reseting memory usage stats to zero"); crm_zero_mem_stats(NULL); mem_needs_init = FALSE; } else { crm_debug("Reached a stable point:" " checking memory usage"); crm_mem_stats(NULL); } } #endif fsa_dump_queue(LOG_DEBUG); return fsa_state; } void s_crmd_fsa_actions(fsa_data_t *fsa_data) { /* * Process actions in order of priority but do only one * action at a time to avoid complicating the ordering. */ while(fsa_actions != A_NOTHING && do_fsa_stall == FALSE) { msg_queue_helper(); CRM_DEV_ASSERT(fsa_data != NULL); if(crm_assert_failed) { return; } /* regular action processing in order of action priority * * Make sure all actions that connect to required systems * are performed first */ if(is_set(fsa_actions, A_ERROR)) { do_fsa_action(fsa_data, A_ERROR, do_log); } else if(is_set(fsa_actions, A_WARN)) { do_fsa_action(fsa_data, A_WARN, do_log); } else if(is_set(fsa_actions, A_LOG)) { do_fsa_action(fsa_data, A_LOG, do_log); /* get out of here NOW! before anything worse happens */ } else if(is_set(fsa_actions, A_EXIT_1)) { do_fsa_action(fsa_data, A_EXIT_1, do_exit); /* essential start tasks */ } else if(is_set(fsa_actions, A_STARTUP)) { do_fsa_action(fsa_data, A_STARTUP, do_startup); } else if(is_set(fsa_actions, A_CIB_START)) { do_fsa_action(fsa_data, A_CIB_START, do_cib_control); } else if(is_set(fsa_actions, A_HA_CONNECT)) { do_fsa_action(fsa_data, A_HA_CONNECT, do_ha_control); } else if(is_set(fsa_actions, A_READCONFIG)) { do_fsa_action(fsa_data, A_READCONFIG, do_read_config); /* sub-system start/connect */ } else if(is_set(fsa_actions, A_LRM_CONNECT)) { do_fsa_action(fsa_data, A_LRM_CONNECT, do_lrm_control); } else if(is_set(fsa_actions, A_CCM_CONNECT)) { do_fsa_action(fsa_data, A_CCM_CONNECT, do_ccm_control); } else if(is_set(fsa_actions, A_TE_START)) { do_fsa_action(fsa_data, A_TE_START, do_te_control); } else if(is_set(fsa_actions, A_PE_START)) { do_fsa_action(fsa_data, A_PE_START, do_pe_control); /* sub-system restart */ } else if(is_set(fsa_actions, O_CIB_RESTART)) { do_fsa_action(fsa_data, O_CIB_RESTART, do_cib_control); } else if(is_set(fsa_actions, O_PE_RESTART)) { do_fsa_action(fsa_data, O_PE_RESTART, do_pe_control); } else if(is_set(fsa_actions, O_TE_RESTART)) { do_fsa_action(fsa_data, O_TE_RESTART, do_te_control); /* Timers */ /* else if(is_set(fsa_actions, O_DC_TIMER_RESTART)) { do_fsa_action(fsa_data, O_DC_TIMER_RESTART, do_timer_control) */; } else if(is_set(fsa_actions, A_DC_TIMER_STOP)) { do_fsa_action(fsa_data, A_DC_TIMER_STOP, do_timer_control); } else if(is_set(fsa_actions, A_INTEGRATE_TIMER_STOP)) { do_fsa_action(fsa_data, A_INTEGRATE_TIMER_STOP, do_timer_control); } else if(is_set(fsa_actions, A_INTEGRATE_TIMER_START)) { do_fsa_action(fsa_data, A_INTEGRATE_TIMER_START,do_timer_control); } else if(is_set(fsa_actions, A_FINALIZE_TIMER_STOP)) { do_fsa_action(fsa_data, A_FINALIZE_TIMER_STOP, do_timer_control); } else if(is_set(fsa_actions, A_FINALIZE_TIMER_START)) { do_fsa_action(fsa_data, A_FINALIZE_TIMER_START, do_timer_control); /* * Highest priority actions */ } else if(is_set(fsa_actions, A_CIB_BUMPGEN)) { do_fsa_action(fsa_data, A_CIB_BUMPGEN, do_cib_invoke); } else if(is_set(fsa_actions, A_MSG_ROUTE)) { do_fsa_action(fsa_data, A_MSG_ROUTE, do_msg_route); } else if(is_set(fsa_actions, A_RECOVER)) { do_fsa_action(fsa_data, A_RECOVER, do_recover); } else if(is_set(fsa_actions, A_CL_JOIN_RESULT)) { do_fsa_action(fsa_data, A_CL_JOIN_RESULT, do_cl_join_finalize_respond); } else if(is_set(fsa_actions, A_CL_JOIN_REQUEST)) { do_fsa_action(fsa_data, A_CL_JOIN_REQUEST, do_cl_join_offer_respond); } else if(is_set(fsa_actions, A_SHUTDOWN_REQ)) { do_fsa_action(fsa_data, A_SHUTDOWN_REQ, do_shutdown_req); } else if(is_set(fsa_actions, A_ELECTION_VOTE)) { do_fsa_action(fsa_data, A_ELECTION_VOTE, do_election_vote); } else if(is_set(fsa_actions, A_ELECTION_COUNT)) { do_fsa_action(fsa_data, A_ELECTION_COUNT, do_election_count_vote); /* * High priority actions * Update the cache first */ } else if(is_set(fsa_actions, A_CCM_UPDATE_CACHE)) { do_fsa_action(fsa_data, A_CCM_UPDATE_CACHE, do_ccm_update_cache); } else if(is_set(fsa_actions, A_CCM_EVENT)) { do_fsa_action(fsa_data, A_CCM_EVENT, do_ccm_event); } else if(is_set(fsa_actions, A_STARTED)) { do_fsa_action(fsa_data, A_STARTED, do_started); } else if(is_set(fsa_actions, A_CL_JOIN_QUERY)) { do_fsa_action(fsa_data, A_CL_JOIN_QUERY, do_cl_join_query); } else if(is_set(fsa_actions, A_DC_TIMER_START)) { do_fsa_action(fsa_data, A_DC_TIMER_START, do_timer_control); /* * Medium priority actions */ } else if(is_set(fsa_actions, A_DC_TAKEOVER)) { do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover); } else if(is_set(fsa_actions, A_DC_RELEASE)) { do_fsa_action(fsa_data, A_DC_RELEASE, do_dc_release); } else if(is_set(fsa_actions, A_ELECTION_CHECK)) { do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check); } else if(is_set(fsa_actions, A_ELECTION_START)) { do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote); } else if(is_set(fsa_actions, A_TE_HALT)) { do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke); } else if(is_set(fsa_actions, A_TE_CANCEL)) { do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke); } else if(is_set(fsa_actions, A_DC_JOIN_OFFER_ALL)) { do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all); } else if(is_set(fsa_actions, A_DC_JOIN_OFFER_ONE)) { do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_all); } else if(is_set(fsa_actions, A_DC_JOIN_PROCESS_REQ)) { do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ, do_dc_join_filter_offer); } else if(is_set(fsa_actions, A_DC_JOIN_PROCESS_ACK)) { do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack); /* * Low(er) priority actions * Make sure the CIB is always updated before invoking the * PE, and the PE before the TE */ } else if(is_set(fsa_actions, A_CIB_INVOKE_LOCAL)) { do_fsa_action(fsa_data, A_CIB_INVOKE_LOCAL, do_cib_invoke); } else if(is_set(fsa_actions, A_CIB_INVOKE)) { do_fsa_action(fsa_data, A_CIB_INVOKE, do_cib_invoke); } else if(is_set(fsa_actions, A_DC_JOIN_FINALIZE)) { do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize); } else if(is_set(fsa_actions, A_LRM_INVOKE)) { do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke); } else if(is_set(fsa_actions, A_LRM_EVENT)) { do_fsa_action(fsa_data, A_LRM_EVENT, do_lrm_event); } else if(is_set(fsa_actions, A_PE_INVOKE)) { do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke); } else if(is_set(fsa_actions, A_TE_INVOKE)) { do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke); } else if(is_set(fsa_actions, A_CL_JOIN_ANNOUNCE)) { do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce); /* sub-system stop */ } else if(is_set(fsa_actions, A_DC_RELEASED)) { do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release); } else if(is_set(fsa_actions, A_PE_STOP)) { do_fsa_action(fsa_data, A_PE_STOP, do_pe_control); } else if(is_set(fsa_actions, A_TE_STOP)) { do_fsa_action(fsa_data, A_TE_STOP, do_te_control); } else if(is_set(fsa_actions, A_SHUTDOWN)) { do_fsa_action(fsa_data, A_SHUTDOWN, do_shutdown); } else if(is_set(fsa_actions, A_STOP)) { do_fsa_action(fsa_data, A_STOP, do_stop); } else if(is_set(fsa_actions, A_CCM_DISCONNECT)) { do_fsa_action(fsa_data, A_CCM_DISCONNECT, do_ccm_control); } else if(is_set(fsa_actions, A_LRM_DISCONNECT)) { do_fsa_action(fsa_data, A_LRM_DISCONNECT, do_lrm_control); } else if(is_set(fsa_actions, A_HA_DISCONNECT)) { do_fsa_action(fsa_data, A_HA_DISCONNECT, do_ha_control); } else if(is_set(fsa_actions, A_CIB_STOP)) { do_fsa_action(fsa_data, A_CIB_STOP, do_cib_control); /* exit gracefully */ } else if(is_set(fsa_actions, A_EXIT_0)) { do_fsa_action(fsa_data, A_EXIT_0, do_exit); /* Error checking and reporting */ } else { crm_err("Action %s (0x%llx) not supported ", fsa_action2string(fsa_actions), fsa_actions); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL, __FUNCTION__); } } } void log_fsa_input(fsa_data_t *stored_msg) { crm_debug_2("Processing queued input %d", stored_msg->id); if(stored_msg->fsa_cause == C_CCM_CALLBACK) { crm_debug_3("FSA processing CCM callback from %s", stored_msg->origin); } else if(stored_msg->fsa_cause == C_LRM_OP_CALLBACK) { crm_debug_3("FSA processing LRM callback from %s", stored_msg->origin); } else if(stored_msg->data == NULL) { crm_debug_3("FSA processing input from %s", stored_msg->origin); } else { ha_msg_input_t *ha_input = fsa_typed_data_adv( stored_msg, fsa_dt_ha_msg, __FUNCTION__); crm_debug_3("FSA processing XML message from %s", stored_msg->origin); crm_log_message(LOG_MSG, ha_input->msg); crm_log_xml_debug_3(ha_input->xml, "FSA message data"); } } long long do_state_transition(long long actions, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t *msg_data) { long long tmp = actions; gboolean clear_recovery_bit = TRUE; enum crmd_fsa_cause cause = msg_data->fsa_cause; enum crmd_fsa_input current_input = msg_data->fsa_input; const char *state_from = fsa_state2string(cur_state); const char *state_to = fsa_state2string(next_state); const char *input = fsa_input2string(current_input); CRM_DEV_ASSERT(cur_state != next_state); do_dot_log(DOT_PREFIX"\t%s -> %s [ label=%s cause=%s origin=%s ]", state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); crm_info("State transition %s -> %s [ input=%s cause=%s origin=%s ]", state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); /* the last two clauses might cause trouble later */ if(election_timeout != NULL && next_state != S_ELECTION && cur_state != S_RELEASE_DC) { crm_timer_stop(election_timeout); /* } else { */ /* crm_timer_start(election_timeout); */ } #if 0 if(is_set(fsa_input_register, R_SHUTDOWN)){ set_bit_inplace(tmp, A_DC_TIMER_STOP); } #endif if(next_state == S_INTEGRATION) { set_bit_inplace(tmp, A_INTEGRATE_TIMER_START); } else { set_bit_inplace(tmp, A_INTEGRATE_TIMER_STOP); } if(next_state == S_FINALIZE_JOIN) { set_bit_inplace(tmp, A_FINALIZE_TIMER_START); } else { set_bit_inplace(tmp, A_FINALIZE_TIMER_STOP); } if(next_state != S_PENDING) { set_bit_inplace(tmp, A_DC_TIMER_STOP); } if(next_state != S_ELECTION) { highest_born_on = 0; } if(next_state != S_IDLE) { crm_timer_stop(recheck_timer); } switch(next_state) { case S_PENDING: fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local); /* fall through */ case S_ELECTION: crm_debug_2("Resetting our DC to NULL on transition to %s", fsa_state2string(next_state)); - update_dc(NULL); + update_dc(NULL, FALSE); break; case S_NOT_DC: if(is_set(fsa_input_register, R_SHUTDOWN)){ crm_info("(Re)Issuing shutdown request now" " that we have a new DC"); set_bit_inplace(tmp, A_SHUTDOWN_REQ); } CRM_DEV_ASSERT(fsa_our_dc != NULL); if(fsa_our_dc == NULL) { crm_err("Reached S_NOT_DC without a DC" " being recorded"); } break; case S_RECOVERY: clear_recovery_bit = FALSE; break; case S_FINALIZE_JOIN: CRM_DEV_ASSERT(AM_I_DC); if(cause == C_TIMER_POPPED) { crm_warn("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } if(g_hash_table_size(welcomed_nodes) > 0) { char *msg = crm_strdup( " Welcome reply not received from"); crm_warn("%u cluster nodes failed to respond" " to the join offer.", g_hash_table_size(welcomed_nodes)); g_hash_table_foreach( welcomed_nodes, ghash_print_node, msg); crm_free(msg); } else { crm_info("All %d cluster nodes " "responded to the join offer.", g_hash_table_size(integrated_nodes)); } break; case S_POLICY_ENGINE: CRM_DEV_ASSERT(AM_I_DC); if(cause == C_TIMER_POPPED) { crm_warn("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } if(g_hash_table_size(finalized_nodes) > 0) { char *msg = crm_strdup( " Confirm not received from"); crm_err("%u cluster nodes failed to confirm" " their join.", g_hash_table_size(finalized_nodes)); g_hash_table_foreach( finalized_nodes, ghash_print_node, msg); crm_free(msg); } else if(g_hash_table_size(confirmed_nodes) == fsa_membership_copy->members_size) { crm_info("All %u cluster nodes are" " eligable to run resources.", fsa_membership_copy->members_size); } else if(g_hash_table_size(confirmed_nodes) > fsa_membership_copy->members_size) { crm_err("We have more confirmed nodes than our membership does"); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } else { crm_warn("Only %u of %u cluster " "nodes are eligable to run resources", g_hash_table_size(confirmed_nodes), fsa_membership_copy->members_size); } /* initialize_join(FALSE); */ break; case S_STOPPING: case S_TERMINATE: /* possibly redundant */ set_bit_inplace(fsa_input_register, R_SHUTDOWN); break; case S_IDLE: CRM_DEV_ASSERT(AM_I_DC); dump_rsc_info(); if(is_set(fsa_input_register, R_SHUTDOWN)){ crm_info("(Re)Issuing shutdown request now" " that we are the DC"); set_bit_inplace(tmp, A_SHUTDOWN_REQ); } if(recheck_timer->period_ms > 0) { crm_timer_start(recheck_timer); } break; default: break; } if(clear_recovery_bit && next_state != S_PENDING) { tmp = clear_bit(tmp, A_RECOVER); } else if(clear_recovery_bit == FALSE) { tmp = set_bit(tmp, A_RECOVER); } if(tmp != actions) { fsa_dump_actions(actions ^ tmp, "New actions"); actions = tmp; } return actions; } long long clear_flags(long long actions, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input) { long long saved_actions = actions; long long startup_actions = A_STARTUP|A_CIB_START|A_LRM_CONNECT|A_CCM_CONNECT|A_HA_CONNECT|A_READCONFIG|A_STARTED|A_CL_JOIN_QUERY; if(cur_state == S_STOPPING || is_set(fsa_input_register, R_SHUTDOWN)) { clear_bit_inplace(actions, startup_actions); } fsa_dump_actions(actions ^ saved_actions, "Cleared Actions"); return actions; } void dump_rsc_info(void) { } void ghash_print_node(gpointer key, gpointer value, gpointer user_data) { const char *text = user_data; const char *uname = key; const char *value_s = value; crm_info("%s: %s %s", text, uname, value_s); } diff --git a/crm/crmd/join_client.c b/crm/crmd/join_client.c index 52d8329f47..c54a8a086b 100644 --- a/crm/crmd/join_client.c +++ b/crm/crmd/join_client.c @@ -1,307 +1,280 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include int reannounce_count = 0; void join_query_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t *orig); /* A_CL_JOIN_QUERY */ /* is there a DC out there? */ enum crmd_fsa_input do_cl_join_query(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { HA_Message *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); sleep(1); /* give the CCM time to propogate to the DC */ crm_debug("Querying for a DC"); send_msg_via_ha(fsa_cluster_conn, req); return I_NULL; } /* A_CL_JOIN_ANNOUNCE */ /* this is kind of a workaround for the fact that we may not be around * or are otherwise unable to reply when the DC sends out A_WELCOME_ALL */ enum crmd_fsa_input do_cl_join_announce(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { - ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); - /* Once we hear from the DC, we can stop the timer * * This timer was started either on startup or when a node * left the CCM list */ /* dont announce if we're in one of these states */ if(cur_state != S_PENDING) { crm_warn("Do not announce ourselves in state %s", fsa_state2string(cur_state)); return I_NULL; } if(AM_I_OPERATIONAL) { - const char *hb_from = cl_get_string( - input->msg, F_CRM_HOST_FROM); - - crm_debug("Announcing availability"); - if(hb_from == NULL) { - crm_err("Failed to determin origin of hb message"); - register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); - return I_NULL; - } - - if(fsa_our_dc == NULL) { - update_dc(input->msg); - - } else if(safe_str_neq(fsa_our_dc, hb_from)) { - /* reset the fsa_our_dc to NULL */ - crm_warn("Resetting our DC to NULL after DC_HB" - " from unrecognised node."); - update_dc(NULL); - return I_NULL; /* for now, wait for the DC's - * to settle down - */ - } - /* send as a broadcast */ - { - HA_Message *req = create_request( - CRM_OP_JOIN_ANNOUNCE, NULL, NULL, - CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); + HA_Message *req = create_request( + CRM_OP_JOIN_ANNOUNCE, NULL, NULL, + CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); - send_msg_via_ha(fsa_cluster_conn, req); - } + crm_debug("Announcing availability"); + update_dc(NULL, FALSE); + send_msg_via_ha(fsa_cluster_conn, req); } else { /* Delay announce until we have finished local startup */ crm_warn("Delaying announce until local startup is complete"); return I_NULL; } return I_NULL; } static int query_call_id = 0; /* A_CL_JOIN_REQUEST */ /* aka. accept the welcome offer */ enum crmd_fsa_input do_cl_join_offer_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *welcome_from = cl_get_string(input->msg, F_CRM_HOST_FROM); #if 0 if(we are sick) { log error ; /* save the request for later? */ return I_NULL; } #endif crm_debug("Accepting join offer: join-%s", cl_get_string(input->msg, F_CRM_JOIN_ID)); /* we only ever want the last one */ if(query_call_id > 0) { crm_debug("Cancelling previous join query: %d", query_call_id); remove_cib_op_callback(query_call_id, FALSE); query_call_id = 0; } - if(fsa_our_dc == NULL) { - update_dc(input->msg); - - } else if(safe_str_neq(welcome_from, fsa_our_dc)) { + update_dc(input->msg, FALSE); + if(safe_str_neq(welcome_from, fsa_our_dc)) { /* dont do anything until DC's sort themselves out */ crm_err("Expected a welcome from %s, but %s replied", fsa_our_dc, welcome_from); return I_NULL; } CRM_DEV_ASSERT(input != NULL); query_call_id = fsa_cib_conn->cmds->query( fsa_cib_conn, NULL, NULL, cib_scope_local); add_cib_op_callback( query_call_id, TRUE, copy_ha_msg_input(input), join_query_callback); crm_debug("Registered join query callback: %d", query_call_id); register_fsa_action(A_DC_TIMER_STOP); return I_NULL; } void join_query_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { crm_data_t *local_cib = NULL; ha_msg_input_t *input = user_data; crm_data_t *generation = create_xml_node( NULL, XML_CIB_TAG_GENERATION_TUPPLE); CRM_DEV_ASSERT(input != NULL); query_call_id = 0; if(rc == cib_ok) { #if CRM_DEPRECATED_SINCE_2_0_4 if(safe_str_eq(crm_element_name(output), XML_TAG_CIB)) { local_cib = output; } else { local_cib = find_xml_node(output, XML_TAG_CIB, TRUE); } #else local_cib = output; CRM_DEV_ASSERT(safe_str_eq(crm_element_name(local_cib), XML_TAG_CIB)); #endif } if(local_cib != NULL) { HA_Message *reply = NULL; const char *join_id = ha_msg_value(input->msg, F_CRM_JOIN_ID); crm_debug("Respond to join offer join-%s", join_id); crm_debug("Acknowledging %s as our DC", cl_get_string(input->msg, F_CRM_HOST_FROM)); copy_in_properties(generation, local_cib); reply = create_request( CRM_OP_JOIN_REQUEST, generation, fsa_our_dc, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); ha_msg_add(reply, F_CRM_JOIN_ID, join_id); send_msg_via_ha(fsa_cluster_conn, reply); } else { crm_err("Could not retrieve Generation to attach to our" " join acknowledgement: %s", cib_error2string(rc)); register_fsa_error_adv( C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); } delete_ha_msg_input(input); free_xml(generation); } /* A_CL_JOIN_RESULT */ /* aka. this is notification that we have (or have not) been accepted */ enum crmd_fsa_input do_cl_join_finalize_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { crm_data_t *tmp1 = NULL; gboolean was_nack = TRUE; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); int join_id = -1; const char *op = cl_get_string(input->msg,F_CRM_TASK); const char *ack_nack = cl_get_string(input->msg,CRM_OP_JOIN_ACKNAK); const char *welcome_from = cl_get_string(input->msg,F_CRM_HOST_FROM); if(safe_str_neq(op, CRM_OP_JOIN_ACKNAK)) { crm_debug_2("Ignoring op=%s message", op); return I_NULL; } /* calculate if it was an ack or a nack */ if(crm_is_true(ack_nack)) { was_nack = FALSE; } ha_msg_value_int(input->msg, F_CRM_JOIN_ID, &join_id); if(was_nack) { - crm_err("Join join-%d with %s failed. NACK'd", join_id, welcome_from); + crm_err("Join join-%d with %s failed. NACK'd", + join_id, welcome_from); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return I_NULL; } if(AM_I_DC == FALSE && safe_str_eq(welcome_from, fsa_our_uname)) { crm_warn("Discarding our own welcome - we're no longer the DC"); return I_NULL; - - } else if(fsa_our_dc == NULL) { - update_dc(input->msg); } + update_dc(input->msg, TRUE); + /* send our status section to the DC */ crm_debug("Confirming join join-%d: %s", join_id, cl_get_string(input->msg, F_CRM_TASK)); crm_debug_2("Discovering local LRM status"); tmp1 = do_lrm_query(TRUE); if(tmp1 != NULL) { HA_Message *reply = create_request( CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); ha_msg_add_int(reply, F_CRM_JOIN_ID, join_id); crm_debug("join-%d: Join complete. Sending local LRM status.", join_id); send_msg_via_ha(fsa_cluster_conn, reply); if(AM_I_DC == FALSE) { register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __FUNCTION__); } free_xml(tmp1); } else { crm_err("Could send our LRM state to the DC"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } return I_NULL; } diff --git a/crm/crmd/join_dc.c b/crm/crmd/join_dc.c index f712eeef02..3e60eb8cfd 100644 --- a/crm/crmd/join_dc.c +++ b/crm/crmd/join_dc.c @@ -1,602 +1,604 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include GHashTable *welcomed_nodes = NULL; GHashTable *integrated_nodes = NULL; GHashTable *finalized_nodes = NULL; GHashTable *confirmed_nodes = NULL; char *max_epoch = NULL; char *max_generation_from = NULL; crm_data_t *max_generation_xml = NULL; void initialize_join(gboolean before); gboolean finalize_join_for(gpointer key, gpointer value, gpointer user_data); void finalize_sync_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); void finalize_join(const char *caller); static int current_join_id = 0; void initialize_join(gboolean before) { /* clear out/reset a bunch of stuff */ crm_debug("join-%d: Initializing join data (flag=%s)", current_join_id, before?"true":"false"); g_hash_table_destroy(welcomed_nodes); g_hash_table_destroy(integrated_nodes); g_hash_table_destroy(finalized_nodes); g_hash_table_destroy(confirmed_nodes); if(before) { if(max_generation_from != NULL) { crm_free(max_generation_from); max_generation_from = NULL; } if(max_generation_xml != NULL) { free_xml(max_generation_xml); max_generation_xml = NULL; } clear_bit_inplace(fsa_input_register, R_HAVE_CIB); clear_bit_inplace(fsa_input_register, R_CIB_ASKED); } welcomed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); integrated_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); finalized_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); confirmed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } static void join_make_offer(gpointer key, gpointer value, gpointer user_data) { const char *join_to = NULL; const char *crm_online = NULL; const oc_node_t *member = (const oc_node_t*)value; if(member != NULL) { join_to = member->node_uname; } if(join_to == NULL) { crm_err("No recipient for welcome message"); return; } g_hash_table_remove(confirmed_nodes, join_to); g_hash_table_remove(finalized_nodes, join_to); g_hash_table_remove(integrated_nodes, join_to); g_hash_table_remove(welcomed_nodes, join_to); crm_online = g_hash_table_lookup(crmd_peer_state, join_to); if(safe_str_eq(crm_online, ONLINESTATUS)) { HA_Message *offer = create_request( CRM_OP_JOIN_OFFER, NULL, join_to, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL); char *join_offered = crm_itoa(current_join_id); ha_msg_add_int(offer, F_CRM_JOIN_ID, current_join_id); /* send the welcome */ crm_debug("join-%d: Sending offer to %s", current_join_id, join_to); send_msg_via_ha(fsa_cluster_conn, offer); g_hash_table_insert( welcomed_nodes, crm_strdup(join_to), join_offered); } else { crm_warn("Peer process on %s is not active", join_to); } } /* A_DC_JOIN_OFFER_ALL */ enum crmd_fsa_input do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { /* reset everyones status back to down or in_ccm in the CIB * * any nodes that are active in the CIB but not in the CCM list * will be seen as offline by the PE anyway */ current_join_id++; initialize_join(TRUE); do_update_cib_nodes(TRUE, __FUNCTION__); + + update_dc(NULL, FALSE); g_hash_table_foreach( fsa_membership_copy->members, join_make_offer, NULL); /* dont waste time by invoking the PE yet; */ crm_info("join-%d: Waiting on %d outstanding join acks", current_join_id, g_hash_table_size(welcomed_nodes)); return I_NULL; } /* A_DC_JOIN_OFFER_ONE */ enum crmd_fsa_input do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { oc_node_t member; ha_msg_input_t *welcome = fsa_typed_data(fsa_dt_ha_msg); const char *join_to = NULL; if(welcome == NULL) { crm_err("Attempt to send welcome message " "without a message to reply to!"); return I_NULL; } join_to = cl_get_string(welcome->msg, F_CRM_HOST_FROM); if(join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) { /* note: it _is_ possible that a node will have been * sick or starting up when the original offer was made. * however, it will either re-announce itself in due course * _or_ we can re-store the original offer on the client. */ crm_debug("Re-offering membership to %s...", join_to); } crm_info("join-%d: Processing annouce request from %s in state %s", current_join_id, join_to, fsa_state2string(cur_state)); /* always offer to the DC (ourselves) * this ensures the correct value for max_generation_from */ member.node_uname = crm_strdup(fsa_our_uname); join_make_offer(NULL, &member, NULL); crm_free(member.node_uname); member.node_uname = crm_strdup(join_to); join_make_offer(NULL, &member, NULL); crm_free(member.node_uname); /* this was a genuine join request, cancel any existing * transition and invoke the PE */ if(need_transition(fsa_state)) { register_fsa_action(A_TE_CANCEL); } /* dont waste time by invoking the pe yet; */ crm_debug("Waiting on %d outstanding join acks for join-%d", g_hash_table_size(welcomed_nodes), current_join_id); return I_NULL; } /* A_DC_JOIN_PROCESS_REQ */ enum crmd_fsa_input do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { crm_data_t *generation = NULL; int join_id = -1; gboolean ack_nack_bool = TRUE; const char *ack_nack = CRMD_JOINSTATE_MEMBER; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_from = cl_get_string(join_ack->msg,F_CRM_HOST_FROM); const char *ref = cl_get_string(join_ack->msg,XML_ATTR_REFERENCE); gpointer join_node = g_hash_table_lookup(fsa_membership_copy->members, join_from); crm_debug("Processing req from %s", join_from); generation = join_ack->xml; ha_msg_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); crm_log_xml_debug_2(max_generation_xml, "Max generation"); crm_log_xml_debug_2(generation, "Their generation"); if(join_node == NULL) { crm_err("Node %s is not a member", join_from); ack_nack_bool = FALSE; } else if(generation == NULL) { crm_err("Generation was NULL"); ack_nack_bool = FALSE; } else if(join_id != current_join_id) { crm_debug("Invalid response from %s: join-%d vs. join-%d", join_from, join_id, current_join_id); check_join_state(cur_state, __FUNCTION__); return I_NULL; } else if(max_generation_xml == NULL) { max_generation_xml = copy_xml(generation); max_generation_from = crm_strdup(join_from); } else if(cib_compare_generation(max_generation_xml, generation) < 0) { crm_debug("%s has a better generation number than" " the current max %s", join_from, max_generation_from); crm_free(max_generation_from); free_xml(max_generation_xml); max_generation_from = crm_strdup(join_from); max_generation_xml = copy_xml(join_ack->xml); } if(ack_nack_bool == FALSE) { /* NACK this client */ ack_nack = CRMD_STATE_INACTIVE; crm_err("join-%d: NACK'ing node %s (ref %s)", join_id, join_from, ref); } else { crm_debug("join-%d: Welcoming node %s (ref %s)", join_id, join_from, ref); } /* add them to our list of CRMD_STATE_ACTIVE nodes */ g_hash_table_insert( integrated_nodes, crm_strdup(join_from), crm_strdup(ack_nack)); crm_debug("%u nodes have been integrated into join-%d", g_hash_table_size(integrated_nodes), join_id); g_hash_table_remove(welcomed_nodes, join_from); if(check_join_state(cur_state, __FUNCTION__) == FALSE) { /* dont waste time by invoking the PE yet; */ crm_debug("join-%d: Still waiting on %d outstanding offers", join_id, g_hash_table_size(welcomed_nodes)); } return I_NULL; } /* A_DC_JOIN_FINALIZE */ enum crmd_fsa_input do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum cib_errors rc = cib_ok; /* This we can do straight away and avoid clients timing us out * while we compute the latest CIB */ crm_debug("Finializing join-%d for %d clients", current_join_id, g_hash_table_size(integrated_nodes)); clear_bit_inplace(fsa_input_register, R_HAVE_CIB); if(max_generation_from == NULL || safe_str_eq(max_generation_from, fsa_our_uname)){ set_bit_inplace(fsa_input_register, R_HAVE_CIB); } if(is_set(fsa_input_register, R_IN_TRANSITION)) { crm_warn("join-%d: We are still in a transition." " Delaying until the TE completes.", current_join_id); crmd_fsa_stall(NULL); return I_NULL; } if(is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { /* ask for the agreed best CIB */ crm_info("join-%d: Asking %s for its copy of the CIB", current_join_id, crm_str(max_generation_from)); crm_log_xml_debug(max_generation_xml, "Requesting version"); set_bit_inplace(fsa_input_register, R_CIB_ASKED); fsa_cib_conn->call_timeout = 10; rc = fsa_cib_conn->cmds->sync_from( fsa_cib_conn, max_generation_from, NULL, cib_quorum_override); fsa_cib_conn->call_timeout = 0; /* back to the default */ add_cib_op_callback(rc, FALSE, crm_strdup(max_generation_from), finalize_sync_callback); return I_NULL; } else { /* Send _our_ CIB out to everyone */ fsa_cib_conn->cmds->sync_from( fsa_cib_conn, fsa_our_uname, NULL,cib_quorum_override); } finalize_join(__FUNCTION__); return I_NULL; } void finalize_sync_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { CRM_DEV_ASSERT(cib_not_master != rc); clear_bit_inplace(fsa_input_register, R_CIB_ASKED); if(rc != cib_ok) { crm_log_maybe((rc==cib_old_data?LOG_WARNING:LOG_ERR), "Sync from %s resulted in an error: %s", (char*)user_data, cib_error2string(rc)); /* restart the whole join process */ register_fsa_error_adv( C_FSA_INTERNAL, I_ELECTION_DC,NULL,NULL,__FUNCTION__); } else if(AM_I_DC && fsa_state == S_FINALIZE_JOIN) { finalize_join(__FUNCTION__); } else { crm_debug("No longer the DC in S_FINALIZE_JOIN: %s/%s", AM_I_DC?"DC":"CRMd", fsa_state2string(fsa_state)); } crm_free(user_data); } void finalize_join(const char *caller) { crm_data_t *cib = createEmptyCib(); set_bit_inplace(fsa_input_register, R_HAVE_CIB); clear_bit_inplace(fsa_input_register, R_CIB_ASKED); set_uuid(fsa_cluster_conn, cib, XML_ATTR_DC_UUID, fsa_our_uname); crm_debug_3("Update %s in the CIB to our uuid: %s", XML_ATTR_DC_UUID, crm_element_value(cib, XML_ATTR_DC_UUID)); fsa_cib_conn->cmds->update( fsa_cib_conn, NULL, cib, NULL, cib_quorum_override); free_xml(cib); crm_debug_3("Bumping the epoch and syncing to %d clients", g_hash_table_size(finalized_nodes)); fsa_cib_conn->cmds->bump_epoch( fsa_cib_conn, cib_scope_local|cib_quorum_override); /* make sure dc_uuid is re-set to us */ if(check_join_state(fsa_state, caller) == FALSE) { crm_debug("Notifying %d clients of join-%d results", g_hash_table_size(integrated_nodes), current_join_id); g_hash_table_foreach_remove( integrated_nodes, finalize_join_for, NULL); } } static void join_update_complete_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { fsa_data_t *msg_data = NULL; if(rc == cib_ok) { crm_debug("Join update %d complete", call_id); check_join_state(fsa_state, __FUNCTION__); } else { crm_err("Join update %d failed", call_id); crm_log_message(LOG_DEBUG, msg); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /* A_DC_JOIN_PROCESS_ACK */ enum crmd_fsa_input do_dc_join_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int join_id = -1; int call_id = 0; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_id_s = NULL; const char *join_state = NULL; const char *op = cl_get_string(join_ack->msg, F_CRM_TASK); const char *join_from = cl_get_string(join_ack->msg, F_CRM_HOST_FROM); if(safe_str_neq(op, CRM_OP_JOIN_CONFIRM)) { crm_debug("Ignoring op=%s message", op); return I_NULL; } ha_msg_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); join_id_s = ha_msg_value(join_ack->msg, F_CRM_JOIN_ID); /* now update them to "member" */ crm_debug_2("Processing ack from %s", join_from); join_state = (const char *) g_hash_table_lookup(finalized_nodes, join_from); if(join_state == NULL) { crm_err("Join not in progress: ignoring join-%d from %s", join_id, join_from); return I_NULL; } else if(safe_str_neq(join_state, CRMD_JOINSTATE_MEMBER)) { crm_err("Node %s wasnt invited to join the cluster",join_from); g_hash_table_remove(finalized_nodes, join_from); return I_NULL; } else if(join_id != current_join_id) { crm_err("Invalid response from %s: join-%d vs. join-%d", join_from, join_id, current_join_id); g_hash_table_remove(finalized_nodes, join_from); return I_NULL; } g_hash_table_remove(finalized_nodes, join_from); if(g_hash_table_lookup(confirmed_nodes, join_from) != NULL) { crm_err("join-%d: hash already contains confirmation from %s", join_id, join_from); } g_hash_table_insert( confirmed_nodes, crm_strdup(join_from), crm_strdup(join_id_s)); crm_info("join-%d: Updating node state to %s for %s)", join_id, CRMD_JOINSTATE_MEMBER, join_from); /* update CIB with the current LRM status from the node * We dont need to notify the TE of these updates, a transition will * be started in due time */ call_id = fsa_cib_conn->cmds->update( fsa_cib_conn, XML_CIB_TAG_STATUS, join_ack->xml, NULL, cib_scope_local|cib_quorum_override); add_cib_op_callback( call_id, FALSE, NULL, join_update_complete_callback); crm_debug("join-%d: Registered callback for LRM update %d", join_id, call_id); return I_NULL; } gboolean finalize_join_for(gpointer key, gpointer value, gpointer user_data) { const char *join_to = NULL; const char *join_state = NULL; HA_Message *acknak = NULL; if(key == NULL || value == NULL) { return TRUE; } join_to = (const char *)key; join_state = (const char *)value; /* make sure the node exists in the config section */ create_node_entry(join_to, join_to, NORMALNODE); /* send the ack/nack to the node */ acknak = create_request( CRM_OP_JOIN_ACKNAK, NULL, join_to, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL); ha_msg_add_int(acknak, F_CRM_JOIN_ID, current_join_id); /* set the ack/nack */ if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { crm_debug("join-%d: ACK'ing join request from %s, state %s", current_join_id, join_to, join_state); ha_msg_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE); g_hash_table_insert( finalized_nodes, crm_strdup(join_to), crm_strdup(CRMD_JOINSTATE_MEMBER)); } else { crm_warn("join-%d: NACK'ing join request from %s, state %s", current_join_id, join_to, join_state); ha_msg_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_FALSE); } send_msg_via_ha(fsa_cluster_conn, acknak); return TRUE; } gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source) { crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state)); if(cur_state == S_INTEGRATION) { if(g_hash_table_size(welcomed_nodes) == 0) { crm_debug("join-%d: Integration of %d peers complete: %s", current_join_id, g_hash_table_size(integrated_nodes), source); register_fsa_input_before( C_FSA_INTERNAL, I_INTEGRATED, NULL); return TRUE; } } else if(cur_state == S_FINALIZE_JOIN) { if(is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id); return TRUE; } else if(g_hash_table_size(integrated_nodes) == 0 && g_hash_table_size(finalized_nodes) == 0) { crm_debug("join-%d complete: %s", current_join_id, source); register_fsa_input_later( C_FSA_INTERNAL, I_FINALIZED, NULL); } else if(g_hash_table_size(integrated_nodes) != 0 && g_hash_table_size(finalized_nodes) != 0) { crm_err("join-%d: Waiting on %d integrated nodes" " AND %d confirmations", current_join_id, g_hash_table_size(integrated_nodes), g_hash_table_size(finalized_nodes)); } else if(g_hash_table_size(integrated_nodes) != 0) { crm_debug("join-%d: Still waiting on %d integrated nodes", current_join_id, g_hash_table_size(integrated_nodes)); } else if(g_hash_table_size(finalized_nodes) != 0) { crm_debug("join-%d: Still waiting on %d confirmations", current_join_id, g_hash_table_size(finalized_nodes)); } } return FALSE; }