Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/crmd/fsa.c b/crmd/fsa.c
index 0651bdc7c8..32992bce76 100644
--- a/crmd/fsa.c
+++ b/crmd/fsa.c
@@ -1,686 +1,686 @@
-/*
+/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
- *
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
- *
+ *
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <crm/crm.h>
#include <crm/lrmd.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <crmd_messages.h>
#include <crmd_fsa.h>
#include <tengine.h>
#include <fsa_proto.h>
#include <fsa_matrix.h>
char *fsa_our_dc = NULL;
cib_t *fsa_cib_conn = NULL;
char *fsa_our_dc_version = NULL;
char *fsa_our_uuid = NULL;
char *fsa_our_uname = NULL;
#if SUPPORT_HEARTBEAT
ll_cluster_t *fsa_cluster_conn;
#endif
fsa_timer_t *wait_timer = NULL;
fsa_timer_t *recheck_timer = NULL;
fsa_timer_t *election_trigger = NULL;
fsa_timer_t *election_timeout = NULL;
fsa_timer_t *transition_timer = NULL;
fsa_timer_t *integration_timer = NULL;
fsa_timer_t *finalization_timer = NULL;
fsa_timer_t *shutdown_escalation_timer = NULL;
volatile gboolean do_fsa_stall = FALSE;
volatile long long fsa_input_register = 0;
volatile long long fsa_actions = A_NOTHING;
volatile enum crmd_fsa_state fsa_state = S_STARTING;
extern uint highest_born_on;
extern uint num_join_invites;
extern GHashTable *welcomed_nodes;
extern GHashTable *finalized_nodes;
extern GHashTable *confirmed_nodes;
extern GHashTable *integrated_nodes;
extern void initialize_join(gboolean before);
#define DOT_PREFIX "actions:trace: "
#define do_dot_log(fmt, args...) crm_trace( fmt, ##args)
long long do_state_transition(long long actions,
enum crmd_fsa_state cur_state,
enum crmd_fsa_state next_state, fsa_data_t * msg_data);
void dump_rsc_info(void);
void dump_rsc_info_callback(const xmlNode * msg, int call_id, int rc,
xmlNode * output, void *user_data);
void ghash_print_node(gpointer key, gpointer value, gpointer user_data);
void s_crmd_fsa_actions(fsa_data_t * fsa_data);
void log_fsa_input(fsa_data_t * stored_msg);
void init_dotfile(void);
void
init_dotfile(void)
{
do_dot_log(DOT_PREFIX "digraph \"g\" {");
do_dot_log(DOT_PREFIX " size = \"30,30\"");
do_dot_log(DOT_PREFIX " graph [");
do_dot_log(DOT_PREFIX " fontsize = \"12\"");
do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
do_dot_log(DOT_PREFIX " bb = \"0,0,398.922306,478.927856\"");
do_dot_log(DOT_PREFIX " color = \"black\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX " node [");
do_dot_log(DOT_PREFIX " fontsize = \"12\"");
do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
do_dot_log(DOT_PREFIX " shape = \"ellipse\"");
do_dot_log(DOT_PREFIX " color = \"black\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX " edge [");
do_dot_log(DOT_PREFIX " fontsize = \"12\"");
do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
do_dot_log(DOT_PREFIX " color = \"black\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX "// special nodes");
do_dot_log(DOT_PREFIX " \"S_PENDING\" ");
do_dot_log(DOT_PREFIX " [");
do_dot_log(DOT_PREFIX " color = \"blue\"");
do_dot_log(DOT_PREFIX " fontcolor = \"blue\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX " \"S_TERMINATE\" ");
do_dot_log(DOT_PREFIX " [");
do_dot_log(DOT_PREFIX " color = \"red\"");
do_dot_log(DOT_PREFIX " fontcolor = \"red\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX "// DC only nodes");
do_dot_log(DOT_PREFIX " \"S_INTEGRATION\" [ fontcolor = \"green\" ]");
do_dot_log(DOT_PREFIX " \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]");
do_dot_log(DOT_PREFIX " \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]");
do_dot_log(DOT_PREFIX " \"S_RELEASE_DC\" [ fontcolor = \"green\" ]");
do_dot_log(DOT_PREFIX " \"S_IDLE\" [ fontcolor = \"green\" ]");
}
static void
do_fsa_action(fsa_data_t * fsa_data, long long an_action,
void (*function) (long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t * msg_data))
{
fsa_actions &= ~an_action;
crm_trace(DOT_PREFIX "\t// %s", fsa_action2string(an_action));
function(an_action, fsa_data->fsa_cause, fsa_state, fsa_data->fsa_input, fsa_data);
}
static long long startup_actions =
A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_CCM_CONNECT | A_HA_CONNECT | A_READCONFIG |
A_STARTED | A_CL_JOIN_QUERY;
enum crmd_fsa_state
s_crmd_fsa(enum crmd_fsa_cause cause)
{
fsa_data_t *fsa_data = NULL;
long long register_copy = fsa_input_register;
long long new_actions = A_NOTHING;
enum crmd_fsa_state last_state = fsa_state;
crm_trace("FSA invoked with Cause: %s\tState: %s",
fsa_cause2string(cause), fsa_state2string(fsa_state));
fsa_dump_actions(fsa_actions, "Initial");
do_fsa_stall = FALSE;
if (is_message() == FALSE && fsa_actions != A_NOTHING) {
/* fake the first message so we can get into the loop */
fsa_data = calloc(1, sizeof(fsa_data_t));
fsa_data->fsa_input = I_NULL;
fsa_data->fsa_cause = C_FSA_INTERNAL;
fsa_data->origin = __FUNCTION__;
fsa_data->data_type = fsa_dt_none;
fsa_message_queue = g_list_append(fsa_message_queue, fsa_data);
fsa_data = NULL;
}
while (is_message() && do_fsa_stall == FALSE) {
crm_trace("Checking messages (%d remaining)", g_list_length(fsa_message_queue));
fsa_data = get_message();
CRM_CHECK(fsa_data != NULL, continue);
log_fsa_input(fsa_data);
/* add any actions back to the queue */
fsa_actions |= fsa_data->actions;
fsa_dump_actions(fsa_data->actions, "Restored actions");
/* get the next batch of actions */
new_actions = crmd_fsa_actions[fsa_data->fsa_input][fsa_state];
fsa_actions |= new_actions;
fsa_dump_actions(new_actions, "New actions");
if (fsa_data->fsa_input != I_NULL && fsa_data->fsa_input != I_ROUTER) {
crm_debug("Processing %s: [ state=%s cause=%s origin=%s ]",
fsa_input2string(fsa_data->fsa_input),
fsa_state2string(fsa_state),
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
}
/* logging : *before* the state is changed */
if (is_set(fsa_actions, A_ERROR)) {
do_fsa_action(fsa_data, A_ERROR, do_log);
}
if (is_set(fsa_actions, A_WARN)) {
do_fsa_action(fsa_data, A_WARN, do_log);
}
if (is_set(fsa_actions, A_LOG)) {
do_fsa_action(fsa_data, A_LOG, do_log);
}
/* update state variables */
last_state = fsa_state;
fsa_state = crmd_fsa_state[fsa_data->fsa_input][fsa_state];
/*
* Remove certain actions during shutdown
*/
if (fsa_state == S_STOPPING || ((fsa_input_register & R_SHUTDOWN) == R_SHUTDOWN)) {
clear_bit(fsa_actions, startup_actions);
}
/*
* Hook for change of state.
* Allows actions to be added or removed when entering a state
*/
if (last_state != fsa_state) {
fsa_actions = do_state_transition(fsa_actions, last_state, fsa_state, fsa_data);
} else {
do_dot_log(DOT_PREFIX "\t// FSA input: State=%s \tCause=%s"
" \tInput=%s \tOrigin=%s() \tid=%d",
fsa_state2string(fsa_state),
fsa_cause2string(fsa_data->fsa_cause),
fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id);
}
/* start doing things... */
s_crmd_fsa_actions(fsa_data);
delete_fsa_input(fsa_data);
fsa_data = NULL;
}
if (g_list_length(fsa_message_queue) > 0 || fsa_actions != A_NOTHING || do_fsa_stall) {
crm_debug("Exiting the FSA: queue=%d, fsa_actions=0x%llx, stalled=%s",
g_list_length(fsa_message_queue), fsa_actions, do_fsa_stall ? "true" : "false");
} else {
crm_trace("Exiting the FSA");
}
/* cleanup inputs? */
if (register_copy != fsa_input_register) {
long long same = register_copy & fsa_input_register;
fsa_dump_inputs(LOG_DEBUG, "Added", fsa_input_register ^ same);
fsa_dump_inputs(LOG_DEBUG, "Removed", register_copy ^ same);
}
fsa_dump_actions(fsa_actions, "Remaining");
fsa_dump_queue(LOG_DEBUG);
return fsa_state;
}
void
s_crmd_fsa_actions(fsa_data_t * fsa_data)
{
/*
* Process actions in order of priority but do only one
* action at a time to avoid complicating the ordering.
*/
CRM_CHECK(fsa_data != NULL, return);
while (fsa_actions != A_NOTHING && do_fsa_stall == FALSE) {
/* regular action processing in order of action priority
*
* Make sure all actions that connect to required systems
* are performed first
*/
if (fsa_actions & A_ERROR) {
do_fsa_action(fsa_data, A_ERROR, do_log);
} else if (fsa_actions & A_WARN) {
do_fsa_action(fsa_data, A_WARN, do_log);
} else if (fsa_actions & A_LOG) {
do_fsa_action(fsa_data, A_LOG, do_log);
/* get out of here NOW! before anything worse happens */
} else if (fsa_actions & A_EXIT_1) {
do_fsa_action(fsa_data, A_EXIT_1, do_exit);
/* sub-system restart */
} else if ((fsa_actions & O_LRM_RECONNECT) == O_LRM_RECONNECT) {
do_fsa_action(fsa_data, O_LRM_RECONNECT, do_lrm_control);
} else if ((fsa_actions & O_CIB_RESTART) == O_CIB_RESTART) {
do_fsa_action(fsa_data, O_CIB_RESTART, do_cib_control);
} else if ((fsa_actions & O_PE_RESTART) == O_PE_RESTART) {
do_fsa_action(fsa_data, O_PE_RESTART, do_pe_control);
} else if ((fsa_actions & O_TE_RESTART) == O_TE_RESTART) {
do_fsa_action(fsa_data, O_TE_RESTART, do_te_control);
/* essential start tasks */
} else if (fsa_actions & A_STARTUP) {
do_fsa_action(fsa_data, A_STARTUP, do_startup);
} else if (fsa_actions & A_CIB_START) {
do_fsa_action(fsa_data, A_CIB_START, do_cib_control);
} else if (fsa_actions & A_HA_CONNECT) {
do_fsa_action(fsa_data, A_HA_CONNECT, do_ha_control);
} else if (fsa_actions & A_READCONFIG) {
do_fsa_action(fsa_data, A_READCONFIG, do_read_config);
/* sub-system start/connect */
} else if (fsa_actions & A_LRM_CONNECT) {
do_fsa_action(fsa_data, A_LRM_CONNECT, do_lrm_control);
} else if (fsa_actions & A_CCM_CONNECT) {
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
do_fsa_action(fsa_data, A_CCM_CONNECT, do_ccm_control);
}
#endif
fsa_actions &= ~A_CCM_CONNECT;
} else if (fsa_actions & A_TE_START) {
do_fsa_action(fsa_data, A_TE_START, do_te_control);
} else if (fsa_actions & A_PE_START) {
do_fsa_action(fsa_data, A_PE_START, do_pe_control);
/* Timers */
/* else if(fsa_actions & O_DC_TIMER_RESTART) {
do_fsa_action(fsa_data, O_DC_TIMER_RESTART, do_timer_control) */ ;
} else if (fsa_actions & A_DC_TIMER_STOP) {
do_fsa_action(fsa_data, A_DC_TIMER_STOP, do_timer_control);
} else if (fsa_actions & A_INTEGRATE_TIMER_STOP) {
do_fsa_action(fsa_data, A_INTEGRATE_TIMER_STOP, do_timer_control);
} else if (fsa_actions & A_INTEGRATE_TIMER_START) {
do_fsa_action(fsa_data, A_INTEGRATE_TIMER_START, do_timer_control);
} else if (fsa_actions & A_FINALIZE_TIMER_STOP) {
do_fsa_action(fsa_data, A_FINALIZE_TIMER_STOP, do_timer_control);
} else if (fsa_actions & A_FINALIZE_TIMER_START) {
do_fsa_action(fsa_data, A_FINALIZE_TIMER_START, do_timer_control);
/*
* Highest priority actions
*/
} else if (fsa_actions & A_MSG_ROUTE) {
do_fsa_action(fsa_data, A_MSG_ROUTE, do_msg_route);
} else if (fsa_actions & A_RECOVER) {
do_fsa_action(fsa_data, A_RECOVER, do_recover);
} else if (fsa_actions & A_CL_JOIN_RESULT) {
do_fsa_action(fsa_data, A_CL_JOIN_RESULT, do_cl_join_finalize_respond);
} else if (fsa_actions & A_CL_JOIN_REQUEST) {
do_fsa_action(fsa_data, A_CL_JOIN_REQUEST, do_cl_join_offer_respond);
} else if (fsa_actions & A_SHUTDOWN_REQ) {
do_fsa_action(fsa_data, A_SHUTDOWN_REQ, do_shutdown_req);
} else if (fsa_actions & A_ELECTION_VOTE) {
do_fsa_action(fsa_data, A_ELECTION_VOTE, do_election_vote);
} else if (fsa_actions & A_ELECTION_COUNT) {
do_fsa_action(fsa_data, A_ELECTION_COUNT, do_election_count_vote);
} else if (fsa_actions & A_LRM_EVENT) {
do_fsa_action(fsa_data, A_LRM_EVENT, do_lrm_event);
/*
* High priority actions
*/
} else if (fsa_actions & A_STARTED) {
do_fsa_action(fsa_data, A_STARTED, do_started);
} else if (fsa_actions & A_CL_JOIN_QUERY) {
do_fsa_action(fsa_data, A_CL_JOIN_QUERY, do_cl_join_query);
} else if (fsa_actions & A_DC_TIMER_START) {
do_fsa_action(fsa_data, A_DC_TIMER_START, do_timer_control);
/*
* Medium priority actions
*/
} else if (fsa_actions & A_DC_TAKEOVER) {
do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover);
} else if (fsa_actions & A_DC_RELEASE) {
do_fsa_action(fsa_data, A_DC_RELEASE, do_dc_release);
} else if (fsa_actions & A_DC_JOIN_FINAL) {
do_fsa_action(fsa_data, A_DC_JOIN_FINAL, do_dc_join_final);
} else if (fsa_actions & A_ELECTION_CHECK) {
do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check);
} else if (fsa_actions & A_ELECTION_START) {
do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote);
} else if (fsa_actions & A_TE_HALT) {
do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke);
} else if (fsa_actions & A_TE_CANCEL) {
do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke);
} else if (fsa_actions & A_DC_JOIN_OFFER_ALL) {
do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all);
} else if (fsa_actions & A_DC_JOIN_OFFER_ONE) {
- do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_all);
+ do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_one);
} else if (fsa_actions & A_DC_JOIN_PROCESS_REQ) {
do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ, do_dc_join_filter_offer);
} else if (fsa_actions & A_DC_JOIN_PROCESS_ACK) {
do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack);
/*
* Low(er) priority actions
* Make sure the CIB is always updated before invoking the
* PE, and the PE before the TE
*/
} else if (fsa_actions & A_DC_JOIN_FINALIZE) {
do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize);
} else if (fsa_actions & A_LRM_INVOKE) {
do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke);
} else if (fsa_actions & A_PE_INVOKE) {
do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke);
} else if (fsa_actions & A_TE_INVOKE) {
do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke);
} else if (fsa_actions & A_CL_JOIN_ANNOUNCE) {
do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce);
/* sub-system stop */
} else if (fsa_actions & A_DC_RELEASED) {
do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release);
} else if (fsa_actions & A_PE_STOP) {
do_fsa_action(fsa_data, A_PE_STOP, do_pe_control);
} else if (fsa_actions & A_TE_STOP) {
do_fsa_action(fsa_data, A_TE_STOP, do_te_control);
} else if (fsa_actions & A_SHUTDOWN) {
do_fsa_action(fsa_data, A_SHUTDOWN, do_shutdown);
} else if (fsa_actions & A_LRM_DISCONNECT) {
do_fsa_action(fsa_data, A_LRM_DISCONNECT, do_lrm_control);
} else if (fsa_actions & A_CCM_DISCONNECT) {
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
do_fsa_action(fsa_data, A_CCM_DISCONNECT, do_ccm_control);
}
#endif
fsa_actions &= ~A_CCM_DISCONNECT;
} else if (fsa_actions & A_HA_DISCONNECT) {
do_fsa_action(fsa_data, A_HA_DISCONNECT, do_ha_control);
} else if (fsa_actions & A_CIB_STOP) {
do_fsa_action(fsa_data, A_CIB_STOP, do_cib_control);
} else if (fsa_actions & A_STOP) {
do_fsa_action(fsa_data, A_STOP, do_stop);
/* exit gracefully */
} else if (fsa_actions & A_EXIT_0) {
do_fsa_action(fsa_data, A_EXIT_0, do_exit);
/* Error checking and reporting */
} else {
crm_err("Action %s (0x%llx) not supported ",
fsa_action2string(fsa_actions), fsa_actions);
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL, __FUNCTION__);
}
}
}
void
log_fsa_input(fsa_data_t * stored_msg)
{
crm_trace("Processing queued input %d", stored_msg->id);
if (stored_msg->fsa_cause == C_CCM_CALLBACK) {
crm_trace("FSA processing CCM callback from %s", stored_msg->origin);
} else if (stored_msg->fsa_cause == C_LRM_OP_CALLBACK) {
crm_trace("FSA processing LRM callback from %s", stored_msg->origin);
} else if (stored_msg->data == NULL) {
crm_trace("FSA processing input from %s", stored_msg->origin);
} else {
ha_msg_input_t *ha_input = fsa_typed_data_adv(stored_msg, fsa_dt_ha_msg, __FUNCTION__);
crm_trace("FSA processing XML message from %s", stored_msg->origin);
crm_log_xml_trace(ha_input->xml, "FSA message data");
}
}
long long
do_state_transition(long long actions,
enum crmd_fsa_state cur_state,
enum crmd_fsa_state next_state, fsa_data_t * msg_data)
{
int level = LOG_INFO;
long long tmp = actions;
gboolean clear_recovery_bit = TRUE;
enum crmd_fsa_cause cause = msg_data->fsa_cause;
enum crmd_fsa_input current_input = msg_data->fsa_input;
const char *state_from = fsa_state2string(cur_state);
const char *state_to = fsa_state2string(next_state);
const char *input = fsa_input2string(current_input);
CRM_LOG_ASSERT(cur_state != next_state);
do_dot_log(DOT_PREFIX "\t%s -> %s [ label=%s cause=%s origin=%s ]",
state_from, state_to, input, fsa_cause2string(cause), msg_data->origin);
if (cur_state == S_IDLE || next_state == S_IDLE) {
level = LOG_NOTICE;
} else if (cur_state == S_NOT_DC || next_state == S_NOT_DC) {
level = LOG_NOTICE;
} else if (cur_state == S_ELECTION) {
level = LOG_NOTICE;
} else if (next_state == S_RECOVERY) {
level = LOG_WARNING;
}
do_crm_log(level, "State transition %s -> %s [ input=%s cause=%s origin=%s ]",
state_from, state_to, input, fsa_cause2string(cause), msg_data->origin);
/* the last two clauses might cause trouble later */
if (election_timeout != NULL && next_state != S_ELECTION && cur_state != S_RELEASE_DC) {
crm_timer_stop(election_timeout);
/* } else { */
/* crm_timer_start(election_timeout); */
}
#if 0
if ((fsa_input_register & R_SHUTDOWN)) {
set_bit(tmp, A_DC_TIMER_STOP);
}
#endif
if (next_state == S_INTEGRATION) {
set_bit(tmp, A_INTEGRATE_TIMER_START);
} else {
set_bit(tmp, A_INTEGRATE_TIMER_STOP);
}
if (next_state == S_FINALIZE_JOIN) {
set_bit(tmp, A_FINALIZE_TIMER_START);
} else {
set_bit(tmp, A_FINALIZE_TIMER_STOP);
}
if (next_state != S_PENDING) {
set_bit(tmp, A_DC_TIMER_STOP);
}
if (next_state != S_ELECTION) {
highest_born_on = 0;
}
if (next_state != S_IDLE) {
crm_timer_stop(recheck_timer);
}
if (cur_state == S_FINALIZE_JOIN && next_state == S_POLICY_ENGINE) {
populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer |
node_update_join | node_update_expected, __FUNCTION__);
}
switch (next_state) {
case S_PENDING:
fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local);
/* fall through */
case S_ELECTION:
crm_trace("Resetting our DC to NULL on transition to %s", fsa_state2string(next_state));
update_dc(NULL);
break;
case S_NOT_DC:
election_trigger->counter = 0;
if (stonith_cleanup_list) {
GListPtr gIter = NULL;
for (gIter = stonith_cleanup_list; gIter != NULL; gIter = gIter->next) {
char *target = gIter->data;
crm_info("Purging %s from stonith cleanup list", target);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_info("(Re)Issuing shutdown request now" " that we have a new DC");
set_bit(tmp, A_SHUTDOWN_REQ);
}
CRM_LOG_ASSERT(fsa_our_dc != NULL);
if (fsa_our_dc == NULL) {
crm_err("Reached S_NOT_DC without a DC" " being recorded");
}
break;
case S_RECOVERY:
clear_recovery_bit = FALSE;
break;
case S_FINALIZE_JOIN:
CRM_LOG_ASSERT(AM_I_DC);
if (cause == C_TIMER_POPPED) {
crm_warn("Progressed to state %s after %s",
fsa_state2string(next_state), fsa_cause2string(cause));
}
if (g_hash_table_size(welcomed_nodes) > 0) {
char *msg = strdup(" Welcome reply not received from");
crm_warn("%u cluster nodes failed to respond"
" to the join offer.", g_hash_table_size(welcomed_nodes));
g_hash_table_foreach(welcomed_nodes, ghash_print_node, msg);
free(msg);
} else {
crm_debug("All %d cluster nodes "
"responded to the join offer.", g_hash_table_size(integrated_nodes));
}
break;
case S_POLICY_ENGINE:
election_trigger->counter = 0;
CRM_LOG_ASSERT(AM_I_DC);
if (cause == C_TIMER_POPPED) {
crm_info("Progressed to state %s after %s",
fsa_state2string(next_state), fsa_cause2string(cause));
}
if (g_hash_table_size(finalized_nodes) > 0) {
char *msg = strdup(" Confirm not received from");
crm_err("%u cluster nodes failed to confirm"
" their join.", g_hash_table_size(finalized_nodes));
g_hash_table_foreach(finalized_nodes, ghash_print_node, msg);
free(msg);
} else if (g_hash_table_size(confirmed_nodes)
== crm_active_peers()) {
crm_debug("All %u cluster nodes are"
" eligible to run resources.", crm_active_peers());
} else if (g_hash_table_size(confirmed_nodes) > crm_active_peers()) {
crm_err("We have more confirmed nodes than our membership does: %d vs. %d",
g_hash_table_size(confirmed_nodes), crm_active_peers());
register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
} else if (saved_ccm_membership_id != crm_peer_seq) {
crm_info("Membership changed: %llu -> %llu - join restart",
saved_ccm_membership_id, crm_peer_seq);
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
} else {
crm_warn("Only %u of %u cluster "
"nodes are eligible to run resources - continue %d",
g_hash_table_size(confirmed_nodes),
crm_active_peers(), g_hash_table_size(welcomed_nodes));
}
/* initialize_join(FALSE); */
break;
case S_STOPPING:
case S_TERMINATE:
/* possibly redundant */
set_bit(fsa_input_register, R_SHUTDOWN);
break;
case S_IDLE:
CRM_LOG_ASSERT(AM_I_DC);
dump_rsc_info();
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_info("(Re)Issuing shutdown request now" " that we are the DC");
set_bit(tmp, A_SHUTDOWN_REQ);
}
if (recheck_timer->period_ms > 0) {
crm_debug("Starting %s", get_timer_desc(recheck_timer));
crm_timer_start(recheck_timer);
}
break;
default:
break;
}
if (clear_recovery_bit && next_state != S_PENDING) {
tmp &= ~A_RECOVER;
} else if (clear_recovery_bit == FALSE) {
tmp |= A_RECOVER;
}
if (tmp != actions) {
/* fsa_dump_actions(actions ^ tmp, "New actions"); */
actions = tmp;
}
return actions;
}
void
dump_rsc_info(void)
{
}
void
ghash_print_node(gpointer key, gpointer value, gpointer user_data)
{
const char *text = user_data;
const char *uname = key;
const char *value_s = value;
crm_info("%s: %s %s", text, uname, value_s);
}
diff --git a/crmd/join_dc.c b/crmd/join_dc.c
index d8a7bc0683..ea4424fde7 100644
--- a/crmd/join_dc.c
+++ b/crmd/join_dc.c
@@ -1,669 +1,686 @@
-/*
+/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
- *
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
- *
+ *
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <crmd_fsa.h>
#include <crmd_messages.h>
GHashTable *welcomed_nodes = NULL;
GHashTable *integrated_nodes = NULL;
GHashTable *finalized_nodes = NULL;
GHashTable *confirmed_nodes = NULL;
char *max_epoch = NULL;
char *max_generation_from = NULL;
xmlNode *max_generation_xml = NULL;
void initialize_join(gboolean before);
gboolean finalize_join_for(gpointer key, gpointer value, gpointer user_data);
void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
static int current_join_id = 0;
unsigned long long saved_ccm_membership_id = 0;
void
initialize_join(gboolean before)
{
/* clear out/reset a bunch of stuff */
crm_debug("join-%d: Initializing join data (flag=%s)",
current_join_id, before ? "true" : "false");
g_hash_table_destroy(welcomed_nodes);
g_hash_table_destroy(integrated_nodes);
g_hash_table_destroy(finalized_nodes);
g_hash_table_destroy(confirmed_nodes);
if (before) {
if (max_generation_from != NULL) {
free(max_generation_from);
max_generation_from = NULL;
}
if (max_generation_xml != NULL) {
free_xml(max_generation_xml);
max_generation_xml = NULL;
}
clear_bit(fsa_input_register, R_HAVE_CIB);
clear_bit(fsa_input_register, R_CIB_ASKED);
}
welcomed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
integrated_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
finalized_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
confirmed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
}
void
erase_node_from_join(const char *uname)
{
gboolean w = FALSE, i = FALSE, f = FALSE, c = FALSE;
if (uname == NULL) {
return;
}
if (welcomed_nodes != NULL) {
w = g_hash_table_remove(welcomed_nodes, uname);
}
if (integrated_nodes != NULL) {
i = g_hash_table_remove(integrated_nodes, uname);
}
if (finalized_nodes != NULL) {
f = g_hash_table_remove(finalized_nodes, uname);
}
if (confirmed_nodes != NULL) {
c = g_hash_table_remove(confirmed_nodes, uname);
}
if (w || i || f || c) {
crm_debug("Removed node %s from join calculations:"
" welcomed=%d itegrated=%d finalized=%d confirmed=%d", uname, w, i, f, c);
}
}
static void
join_make_offer(gpointer key, gpointer value, gpointer user_data)
{
const char *join_to = NULL;
const crm_node_t *member = value;
CRM_ASSERT(member != NULL);
if (crm_is_peer_active(member) == FALSE) {
crm_trace("Not making an offer to %s: not active", member->uname);
return;
}
join_to = member->uname;
if (join_to == NULL) {
crm_err("No recipient for welcome message");
return;
}
- erase_node_from_join(join_to);
-
if (saved_ccm_membership_id != crm_peer_seq) {
saved_ccm_membership_id = crm_peer_seq;
crm_info("Making join offers based on membership %llu", crm_peer_seq);
}
+ if(user_data) {
+ const char *reason = NULL;
+ if(reason == NULL && g_hash_table_lookup(welcomed_nodes, join_to)) {
+ reason = "welcomed";
+ }
+ if(reason == NULL && g_hash_table_lookup(integrated_nodes, join_to)) {
+ reason = "integrated";
+ }
+ if(reason == NULL && g_hash_table_lookup(finalized_nodes, join_to)) {
+ reason = "finalized";
+ }
+ if(reason == NULL && g_hash_table_lookup(confirmed_nodes, join_to)) {
+ reason = "confirmed";
+ }
+ if(reason) {
+ crm_info("Skipping %s: already %s", member->uname, reason);
+ return;
+ }
+ }
+
+ erase_node_from_join(join_to);
+
if (crm_is_peer_active(member)) {
xmlNode *offer = create_request(CRM_OP_JOIN_OFFER, NULL, join_to,
CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
char *join_offered = crm_itoa(current_join_id);
crm_xml_add_int(offer, F_CRM_JOIN_ID, current_join_id);
/* send the welcome */
- crm_debug("join-%d: Sending offer to %s", current_join_id, join_to);
+ crm_info("join-%d: Sending offer to %s", current_join_id, join_to);
send_cluster_message(crm_get_peer(0, join_to), crm_msg_crmd, offer, TRUE);
free_xml(offer);
g_hash_table_insert(welcomed_nodes, strdup(join_to), join_offered);
+ /* crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_PENDING); */
+
} else {
crm_info("Peer process on %s is not active (yet?): %.8lx %d",
join_to, (long)member->processes, g_hash_table_size(crm_peer_cache));
}
}
/* A_DC_JOIN_OFFER_ALL */
void
do_dc_join_offer_all(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* reset everyones status back to down or in_ccm in the CIB
*
* any nodes that are active in the CIB but not in the CCM list
* will be seen as offline by the PE anyway
*/
current_join_id++;
initialize_join(TRUE);
/* do_update_cib_nodes(TRUE, __FUNCTION__); */
update_dc(NULL);
if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
crm_info("A new node joined the cluster");
}
g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
/* dont waste time by invoking the PE yet; */
crm_info("join-%d: Waiting on %d outstanding join acks",
current_join_id, g_hash_table_size(welcomed_nodes));
}
/* A_DC_JOIN_OFFER_ONE */
void
do_dc_join_offer_one(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_node_t *member;
ha_msg_input_t *welcome = NULL;
const char *op = NULL;
const char *join_to = NULL;
if (msg_data->data) {
welcome = fsa_typed_data(fsa_dt_ha_msg);
} else {
- crm_info("A new node joined - wait until it contacts us");
+ crm_info("An unknown node joined - (re-)offer to any unconfirmed nodes");
+ g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
return;
}
if (welcome == NULL) {
crm_err("Attempt to send welcome message without a message to reply to!");
return;
}
join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM);
if (join_to == NULL) {
crm_err("Attempt to send welcome message without a host to reply to!");
return;
}
member = crm_get_peer(0, join_to);
- crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_PENDING);
-
- if (crm_is_peer_active(member) == FALSE) {
- crm_err("%s is not a fully active member of our partition", join_to);
- return;
- }
-
op = crm_element_value(welcome->msg, F_CRM_TASK);
if (join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) {
/* note: it _is_ possible that a node will have been
* sick or starting up when the original offer was made.
* however, it will either re-announce itself in due course
* _or_ we can re-store the original offer on the client.
*/
crm_trace("(Re-)offering membership to %s...", join_to);
}
crm_info("join-%d: Processing %s request from %s in state %s",
current_join_id, op, join_to, fsa_state2string(cur_state));
+ erase_node_from_join(join_to);
join_make_offer(NULL, member, NULL);
/* always offer to the DC (ourselves)
* this ensures the correct value for max_generation_from
*/
member = crm_get_peer(0, fsa_our_uname);
join_make_offer(NULL, member, NULL);
/* this was a genuine join request, cancel any existing
* transition and invoke the PE
*/
start_transition(fsa_state);
/* dont waste time by invoking the pe yet; */
crm_debug("Waiting on %d outstanding join acks for join-%d",
g_hash_table_size(welcomed_nodes), current_join_id);
}
static int
compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
{
const char *elem_l = crm_element_value(left, field);
const char *elem_r = crm_element_value(right, field);
int int_elem_l = crm_int_helper(elem_l, NULL);
int int_elem_r = crm_int_helper(elem_r, NULL);
if (int_elem_l < int_elem_r) {
return -1;
} else if (int_elem_l > int_elem_r) {
return 1;
}
return 0;
}
/* A_DC_JOIN_PROCESS_REQ */
void
do_dc_join_filter_offer(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *generation = NULL;
int cmp = 0;
int join_id = -1;
gboolean ack_nack_bool = TRUE;
const char *ack_nack = CRMD_JOINSTATE_MEMBER;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE);
crm_node_t *join_node = crm_get_peer(0, join_from);
crm_debug("Processing req from %s", join_from);
generation = join_ack->xml;
crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
if (max_generation_xml != NULL && generation != NULL) {
int lpc = 0;
const char *attributes[] = {
XML_ATTR_GENERATION_ADMIN,
XML_ATTR_GENERATION,
XML_ATTR_NUMUPDATES,
};
for (lpc = 0; cmp == 0 && lpc < DIMOF(attributes); lpc++) {
cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]);
}
}
if (join_id != current_join_id) {
crm_debug("Invalid response from %s: join-%d vs. join-%d",
join_from, join_id, current_join_id);
check_join_state(cur_state, __FUNCTION__);
return;
} else if (join_node == NULL || crm_is_peer_active(join_node) == FALSE) {
crm_err("Node %s is not a member", join_from);
ack_nack_bool = FALSE;
} else if (generation == NULL) {
crm_err("Generation was NULL");
ack_nack_bool = FALSE;
} else if (max_generation_xml == NULL) {
max_generation_xml = copy_xml(generation);
max_generation_from = strdup(join_from);
} else if (cmp < 0 || (cmp == 0 && safe_str_eq(join_from, fsa_our_uname))) {
crm_debug("%s has a better generation number than"
" the current max %s", join_from, max_generation_from);
if (max_generation_xml) {
crm_log_xml_debug(max_generation_xml, "Max generation");
}
crm_log_xml_debug(generation, "Their generation");
free(max_generation_from);
free_xml(max_generation_xml);
max_generation_from = strdup(join_from);
max_generation_xml = copy_xml(join_ack->xml);
}
if (ack_nack_bool == FALSE) {
/* NACK this client */
ack_nack = CRMD_JOINSTATE_NACK;
crm_err("join-%d: NACK'ing node %s (ref %s)", join_id, join_from, ref);
} else {
crm_debug("join-%d: Welcoming node %s (ref %s)", join_id, join_from, ref);
}
/* add them to our list of CRMD_JOINSTATE_MEMBER nodes */
g_hash_table_insert(integrated_nodes, strdup(join_from), strdup(ack_nack));
crm_update_peer_expected(__FUNCTION__, join_node, ack_nack);
crm_debug("%u nodes have been integrated into join-%d",
g_hash_table_size(integrated_nodes), join_id);
g_hash_table_remove(welcomed_nodes, join_from);
if (check_join_state(cur_state, __FUNCTION__) == FALSE) {
/* dont waste time by invoking the PE yet; */
crm_debug("join-%d: Still waiting on %d outstanding offers",
join_id, g_hash_table_size(welcomed_nodes));
}
}
/* A_DC_JOIN_FINALIZE */
void
do_dc_join_finalize(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
char *sync_from = NULL;
int rc = pcmk_ok;
/* This we can do straight away and avoid clients timing us out
* while we compute the latest CIB
*/
crm_debug("Finializing join-%d for %d clients",
current_join_id, g_hash_table_size(integrated_nodes));
if (g_hash_table_size(integrated_nodes) == 0) {
/* If we don't even have ourself, start again */
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__);
return;
}
clear_bit(fsa_input_register, R_HAVE_CIB);
if (max_generation_from == NULL || safe_str_eq(max_generation_from, fsa_our_uname)) {
set_bit(fsa_input_register, R_HAVE_CIB);
}
if (is_set(fsa_input_register, R_IN_TRANSITION)) {
crm_warn("join-%d: We are still in a transition."
" Delaying until the TE completes.", current_join_id);
crmd_fsa_stall(FALSE);
return;
}
if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
/* ask for the agreed best CIB */
sync_from = strdup(max_generation_from);
crm_log_xml_debug(max_generation_xml, "Requesting version");
set_bit(fsa_input_register, R_CIB_ASKED);
} else {
/* Send _our_ CIB out to everyone */
sync_from = strdup(fsa_our_uname);
}
crm_info("join-%d: Syncing the CIB from %s to the rest of the cluster",
current_join_id, sync_from);
rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override);
fsa_register_cib_callback(rc, FALSE, sync_from, finalize_sync_callback);
}
void
finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
CRM_LOG_ASSERT(-EPERM != rc);
clear_bit(fsa_input_register, R_CIB_ASKED);
if (rc != pcmk_ok) {
do_crm_log((rc == -pcmk_err_old_data ? LOG_WARNING : LOG_ERR),
"Sync from %s failed: %s", (char *)user_data, pcmk_strerror(rc));
/* restart the whole join process */
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__);
} else if (AM_I_DC && fsa_state == S_FINALIZE_JOIN) {
set_bit(fsa_input_register, R_HAVE_CIB);
clear_bit(fsa_input_register, R_CIB_ASKED);
/* make sure dc_uuid is re-set to us */
if (check_join_state(fsa_state, __FUNCTION__) == FALSE) {
crm_debug("Notifying %d clients of join-%d results",
g_hash_table_size(integrated_nodes), current_join_id);
g_hash_table_foreach_remove(integrated_nodes, finalize_join_for, NULL);
}
} else {
crm_debug("No longer the DC in S_FINALIZE_JOIN: %s/%s",
AM_I_DC ? "DC" : "CRMd", fsa_state2string(fsa_state));
}
free(user_data);
}
static void
join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
fsa_data_t *msg_data = NULL;
if (rc == pcmk_ok) {
crm_debug("Join update %d complete", call_id);
check_join_state(fsa_state, __FUNCTION__);
} else {
crm_err("Join update %d failed", call_id);
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
/* A_DC_JOIN_PROCESS_ACK */
void
do_dc_join_ack(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int join_id = -1;
int call_id = 0;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *join_id_s = NULL;
const char *join_state = NULL;
const char *op = crm_element_value(join_ack->msg, F_CRM_TASK);
const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
if (safe_str_neq(op, CRM_OP_JOIN_CONFIRM)) {
crm_debug("Ignoring op=%s message from %s", op, join_from);
return;
}
crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
join_id_s = crm_element_value(join_ack->msg, F_CRM_JOIN_ID);
/* now update them to "member" */
crm_trace("Processing ack from %s", join_from);
join_state = (const char *)
g_hash_table_lookup(finalized_nodes, join_from);
if (join_state == NULL) {
crm_err("Join not in progress: ignoring join-%d from %s", join_id, join_from);
return;
} else if (safe_str_neq(join_state, CRMD_JOINSTATE_MEMBER)) {
crm_err("Node %s wasnt invited to join the cluster", join_from);
g_hash_table_remove(finalized_nodes, join_from);
return;
} else if (join_id != current_join_id) {
crm_err("Invalid response from %s: join-%d vs. join-%d",
join_from, join_id, current_join_id);
g_hash_table_remove(finalized_nodes, join_from);
return;
}
g_hash_table_remove(finalized_nodes, join_from);
if (g_hash_table_lookup(confirmed_nodes, join_from) != NULL) {
crm_err("join-%d: hash already contains confirmation from %s", join_id, join_from);
}
g_hash_table_insert(confirmed_nodes, strdup(join_from), strdup(join_id_s));
crm_info("join-%d: Updating node state to %s for %s",
join_id, CRMD_JOINSTATE_MEMBER, join_from);
/* update CIB with the current LRM status from the node
* We dont need to notify the TE of these updates, a transition will
* be started in due time
*/
erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local);
fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
fsa_register_cib_callback(call_id, FALSE, NULL, join_update_complete_callback);
crm_debug("join-%d: Registered callback for LRM update %d", join_id, call_id);
}
gboolean
finalize_join_for(gpointer key, gpointer value, gpointer user_data)
{
const char *join_to = NULL;
const char *join_state = NULL;
xmlNode *acknak = NULL;
xmlNode *tmp1 = NULL;
crm_node_t *join_node = NULL;
if (key == NULL || value == NULL) {
return TRUE;
}
join_to = (const char *)key;
join_state = (const char *)value;
/* make sure a node entry exists for the new node */
crm_trace("Creating node entry for %s", join_to);
tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE);
set_uuid(tmp1, XML_ATTR_UUID, join_to);
crm_xml_add(tmp1, XML_ATTR_UNAME, join_to);
fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1,
cib_scope_local | cib_quorum_override | cib_can_create);
free_xml(tmp1);
join_node = crm_get_peer(0, join_to);
if (crm_is_peer_active(join_node) == FALSE) {
/*
* NACK'ing nodes that the membership layer doesn't know about yet
* simply creates more churn
*
* Better to leave them waiting and let the join restart when
* the new membership event comes in
*
* All other NACKs (due to versions etc) should still be processed
*/
crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_PENDING);
return TRUE;
}
/* send the ack/nack to the node */
acknak = create_request(CRM_OP_JOIN_ACKNAK, NULL, join_to,
CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
crm_xml_add_int(acknak, F_CRM_JOIN_ID, current_join_id);
/* set the ack/nack */
if (safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) {
crm_debug("join-%d: ACK'ing join request from %s, state %s",
current_join_id, join_to, join_state);
crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE);
g_hash_table_insert(finalized_nodes, strdup(join_to), strdup(CRMD_JOINSTATE_MEMBER));
crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER);
} else {
crm_warn("join-%d: NACK'ing join request from %s, state %s",
current_join_id, join_to, join_state);
crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_FALSE);
crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_NACK);
}
send_cluster_message(crm_get_peer(0, join_to), crm_msg_crmd, acknak, TRUE);
free_xml(acknak);
return TRUE;
}
void ghash_print_node(gpointer key, gpointer value, gpointer user_data);
gboolean
check_join_state(enum crmd_fsa_state cur_state, const char *source)
{
crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state));
if (saved_ccm_membership_id != crm_peer_seq) {
crm_debug("%s: Membership changed since join started: %llu -> %llu",
source, saved_ccm_membership_id, crm_peer_seq);
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
} else if (cur_state == S_INTEGRATION) {
if (g_hash_table_size(welcomed_nodes) == 0) {
crm_debug("join-%d: Integration of %d peers complete: %s",
current_join_id, g_hash_table_size(integrated_nodes), source);
register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
return TRUE;
}
} else if (cur_state == S_FINALIZE_JOIN) {
if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id);
return TRUE;
} else if (g_hash_table_size(integrated_nodes) == 0
&& g_hash_table_size(finalized_nodes) == 0) {
crm_debug("join-%d complete: %s", current_join_id, source);
register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
} else if (g_hash_table_size(integrated_nodes) != 0
&& g_hash_table_size(finalized_nodes) != 0) {
char *msg = NULL;
- crm_err("join-%d: Waiting on %d integrated nodes"
+ crm_info("join-%d: Waiting on %d integrated nodes"
" AND %d finalized nodes",
current_join_id,
g_hash_table_size(integrated_nodes), g_hash_table_size(finalized_nodes));
msg = strdup("Integrated node");
g_hash_table_foreach(integrated_nodes, ghash_print_node, msg);
free(msg);
msg = strdup("Finalized node");
g_hash_table_foreach(finalized_nodes, ghash_print_node, msg);
free(msg);
} else if (g_hash_table_size(integrated_nodes) != 0) {
crm_debug("join-%d: Still waiting on %d integrated nodes",
current_join_id, g_hash_table_size(integrated_nodes));
} else if (g_hash_table_size(finalized_nodes) != 0) {
crm_debug("join-%d: Still waiting on %d finalized nodes",
current_join_id, g_hash_table_size(finalized_nodes));
}
}
return FALSE;
}
void
do_dc_join_final(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
update_attrd(NULL, NULL, NULL, NULL);
crm_update_quorum(crm_have_quorum, TRUE);
}

File Metadata

Mime Type
text/x-diff
Expires
Sat, Nov 23, 8:19 AM (1 d, 4 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1009862
Default Alt Text
(52 KB)

Event Timeline