diff --git a/crm/crmd/callbacks.c b/crm/crmd/callbacks.c index bcc08cdbef..e583f0fb90 100644 --- a/crm/crmd/callbacks.c +++ b/crm/crmd/callbacks.c @@ -1,662 +1,662 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *crmd_peer_state = NULL; crm_data_t *find_xml_in_hamessage(const HA_Message * msg); void crmd_ha_connection_destroy(gpointer user_data); /* From join_dc... */ extern gboolean check_join_state( enum crmd_fsa_state cur_state, const char *source); /* #define MAX_EMPTY_CALLBACKS 20 */ /* int empty_callbacks = 0; */ #define trigger_fsa(source) crm_debug_3("Triggering FSA: %s", __FUNCTION__); \ G_main_set_trigger(source); gboolean crmd_ha_msg_dispatch(IPC_Channel *channel, gpointer user_data) { gboolean stay_connected = TRUE; crm_debug_3("Invoked"); if(fsa_cluster_conn != NULL && IPC_ISRCONN(channel)) { if(fsa_cluster_conn->llc_ops->msgready(fsa_cluster_conn) == 0) { crm_debug_2("no message ready yet"); } /* invoke the callbacks but dont block */ fsa_cluster_conn->llc_ops->rcvmsg(fsa_cluster_conn, 0); } if (fsa_cluster_conn == NULL || channel->ch_status != IPC_CONNECT) { if(is_set(fsa_input_register, R_HA_DISCONNECTED) == FALSE) { crm_crit("Lost connection to heartbeat service."); } else { crm_info("Lost connection to heartbeat service."); } trigger_fsa(fsa_source); stay_connected = FALSE; } return stay_connected; } void crmd_ha_connection_destroy(gpointer user_data) { crm_debug_3("Invoked"); if(is_set(fsa_input_register, R_HA_DISCONNECTED)) { /* we signed out, so this is expected */ crm_info("Heartbeat disconnection complete"); return; } crm_crit("Lost connection to heartbeat service!"); register_fsa_input(C_HA_DISCONNECT, I_ERROR, NULL); trigger_fsa(fsa_source); } void crmd_ha_msg_callback(HA_Message * msg, void* private_data) { int level = LOG_DEBUG; ha_msg_input_t *new_input = NULL; oc_node_t *from_node = NULL; const char *from = ha_msg_value(msg, F_ORIG); const char *seq = ha_msg_value(msg, F_SEQ); const char *op = ha_msg_value(msg, F_CRM_TASK); const char *sys_to = ha_msg_value(msg, F_CRM_SYS_TO); const char *sys_from = ha_msg_value(msg, F_CRM_SYS_FROM); CRM_DEV_ASSERT(from != NULL); crm_debug_2("HA[inbound]: %s from %s", op, from); if(fsa_membership_copy == NULL) { crm_debug("Ignoring HA messages until we are" " connected to the CCM (%s op from %s)", op, from); crm_log_message_adv( LOG_MSG, "HA[inbound]: Ignore (No CCM)", msg); return; } from_node = g_hash_table_lookup(fsa_membership_copy->members, from); if(from_node == NULL) { if(safe_str_eq(op, CRM_OP_VOTE)) { level = LOG_WARNING; } else if(AM_I_DC && safe_str_eq(op, CRM_OP_JOIN_ANNOUNCE)) { level = LOG_WARNING; } else if(safe_str_eq(sys_from, CRM_SYSTEM_DC)) { level = LOG_WARNING; } do_crm_log(level, __FILE__, __FUNCTION__, "Ignoring HA message (op=%s) from %s: not in our" " membership list (size=%d)", op, from, g_hash_table_size(fsa_membership_copy->members)); crm_log_message_adv(LOG_MSG, "HA[inbound]: CCM Discard", msg); } else if(safe_str_eq(sys_to, CRM_SYSTEM_DC) && AM_I_DC == FALSE) { crm_debug_2("Ignoring message for the DC [F_SEQ=%s]", seq); return; } else if(safe_str_eq(sys_from, CRM_SYSTEM_DC)) { if(AM_I_DC && safe_str_neq(from, fsa_our_uname)) { crm_err("Another DC detected: %s (op=%s)", from, op); /* make sure the election happens NOW */ level = LOG_WARNING; if(fsa_state != S_ELECTION) { new_input = new_ha_msg_input(msg); register_fsa_error_adv( C_FSA_INTERNAL, I_ELECTION, NULL, new_input, __FUNCTION__); } #if 0 /* still thinking about this one... * could create a timing issue if we dont notice the * election before a new DC is elected. */ } else if(fsa_our_dc != NULL && safe_str_neq(from,fsa_our_dc)){ crm_warn("Ignoring message from wrong DC: %s vs. %s ", from, fsa_our_dc); return; #endif } else { crm_debug_2("Processing DC message from %s [F_SEQ=%s]", from, seq); } } if(new_input == NULL) { int msg_id = -1; crm_log_message_adv(LOG_MSG, "HA[inbound]", msg); new_input = new_ha_msg_input(msg); msg_id = register_fsa_input(C_HA_MESSAGE, I_ROUTER, new_input); crm_debug_2("Submitted %s from %s for processing (job=%d)", op, from, msg_id); } #if 0 if(ha_msg_value(msg, XML_ATTR_REFERENCE) == NULL) { ha_msg_add(new_input->msg, XML_ATTR_REFERENCE, seq); } #endif delete_ha_msg_input(new_input); trigger_fsa(fsa_source); return; } /* * Apparently returning TRUE means "stay connected, keep doing stuff". * Returning FALSE means "we're all done, close the connection" */ gboolean crmd_ipc_msg_callback(IPC_Channel *client, gpointer user_data) { int lpc = 0; HA_Message *msg = NULL; ha_msg_input_t *new_input = NULL; crmd_client_t *curr_client = (crmd_client_t*)user_data; gboolean stay_connected = TRUE; crm_debug_2("Invoked: %s", curr_client->table_key); while(IPC_ISRCONN(client)) { if(client->ops->is_message_pending(client) == 0) { break; } msg = msgfromIPC_noauth(client); if (msg == NULL) { crm_err("%s: no message this time", curr_client->table_key); continue; } lpc++; new_input = new_ha_msg_input(msg); crm_msg_del(msg); crm_debug_2("Processing msg from %s", curr_client->table_key); crm_log_message_adv(LOG_DEBUG_2, "CRMd[inbound]", new_input->msg); if(crmd_authorize_message(new_input, curr_client)) { register_fsa_input(C_IPC_MESSAGE, I_ROUTER, new_input); } delete_ha_msg_input(new_input); msg = NULL; new_input = NULL; if(client->ch_status != IPC_CONNECT) { break; } } crm_debug_2("Processed %d messages", lpc); if (client->ch_status != IPC_CONNECT) { stay_connected = FALSE; process_client_disconnect(curr_client); } trigger_fsa(fsa_source); return stay_connected; } extern GCHSource *lrm_source; gboolean lrm_dispatch(IPC_Channel *src_not_used, gpointer user_data) { /* ?? src == lrm_channel ?? */ ll_lrm_t *lrm = (ll_lrm_t*)user_data; IPC_Channel *lrm_channel = lrm->lrm_ops->ipcchan(lrm); crm_debug_3("Invoked"); lrm->lrm_ops->rcvmsg(lrm, FALSE); if(lrm_channel->ch_status != IPC_CONNECT) { if(is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("LRM Connection failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED); } else { crm_info("LRM Connection disconnected"); } lrm_source = NULL; return FALSE; } return TRUE; } void lrm_op_callback(lrm_op_t* op) { CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return; } crm_debug_3("Invoked: %s/%s (%s)", op->op_type, op->rsc_id, op_status2text(op->op_status)); /* Make sure the LRM events are received in order */ register_fsa_input_later(C_LRM_OP_CALLBACK, I_LRM_EVENT, op); } void crmd_ha_status_callback( const char *node, const char * status, void* private_data) { crm_data_t *update = NULL; crm_notice("Status update: Node %s now has status [%s]",node,status); if(safe_str_neq(status, DEADSTATUS)) { crm_debug_3("nstatus callback was not for a dead node"); return; } /* this node is taost */ update = create_node_state( node, status, XML_BOOLEAN_NO, OFFLINESTATUS, CRMD_STATE_INACTIVE, NULL, TRUE, __FUNCTION__); crm_xml_add(update, XML_CIB_ATTR_REPLACE, XML_TAG_TRANSIENT_NODEATTRS); /* this change should not be broadcast */ update_local_cib(create_cib_fragment(update, XML_CIB_TAG_STATUS)); trigger_fsa(fsa_source); free_xml(update); } void crmd_client_status_callback(const char * node, const char * client, const char * status, void * private) { const char *join = NULL; crm_data_t *update = NULL; gboolean clear_shutdown = FALSE; crm_debug_3("Invoked"); if(safe_str_neq(client, CRM_SYSTEM_CRMD)) { return; } if(safe_str_eq(status, JOINSTATUS)){ status = ONLINESTATUS; clear_shutdown = TRUE; } else if(safe_str_eq(status, LEAVESTATUS)){ status = OFFLINESTATUS; join = CRMD_STATE_INACTIVE; /* clear_shutdown = TRUE; */ } set_bit_inplace(fsa_input_register, R_PEER_DATA); g_hash_table_replace( crmd_peer_state, crm_strdup(node), crm_strdup(status)); if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { return; } else if(fsa_state == S_STOPPING) { return; } crm_notice("Status update: Client %s/%s now has status [%s]", node, client, status); if(safe_str_eq(status, ONLINESTATUS)) { /* remove the cached value in case it changed */ crm_info("Uncaching UUID for %s", node); unget_uuid(node); } if(safe_str_eq(node, fsa_our_dc) && safe_str_eq(status, OFFLINESTATUS)) { /* did our DC leave us */ crm_info("Got client status callback - our DC is dead"); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); } else { crm_debug_3("Got client status callback"); update = create_node_state( node, NULL, NULL, status, join, NULL, clear_shutdown, __FUNCTION__); if(clear_shutdown) { crm_xml_add(update, XML_CIB_ATTR_REPLACE, XML_TAG_TRANSIENT_NODEATTRS); } /* it is safe to keep these updates on the local node * each node updates their own CIB */ - fsa_cib_conn->cmds->update( - fsa_cib_conn, XML_CIB_TAG_STATUS, update, NULL, + fsa_cib_anon_update( + XML_CIB_TAG_STATUS, update, cib_inhibit_bcast|cib_scope_local|cib_quorum_override); free_xml(update); if(AM_I_DC && safe_str_eq(status, OFFLINESTATUS)) { g_hash_table_remove(confirmed_nodes, node); g_hash_table_remove(finalized_nodes, node); g_hash_table_remove(integrated_nodes, node); g_hash_table_remove(welcomed_nodes, node); check_join_state(fsa_state, __FUNCTION__); } } trigger_fsa(fsa_source); } static void crmd_ipc_connection_destroy(gpointer user_data) { crmd_client_t *client = user_data; if(client == NULL) { crm_debug_4("No client to delete"); return; } if(client->client_source != NULL) { crm_debug_4("Deleting %s (%p) from mainloop", client->uuid, client->client_source); G_main_del_IPC_Channel(client->client_source); client->client_source = NULL; } crm_debug_3("Freeing %s client", client->uuid); crm_free(client->table_key); crm_free(client->sub_sys); crm_free(client->uuid); crm_free(client); return; } gboolean crmd_client_connect(IPC_Channel *client_channel, gpointer user_data) { crm_debug_3("Invoked"); if (client_channel == NULL) { crm_err("Channel was NULL"); } else if (client_channel->ch_status == IPC_DISCONNECT) { crm_err("Channel was disconnected"); } else { crmd_client_t *blank_client = NULL; crm_debug_3("Channel connected"); crm_malloc0(blank_client, sizeof(crmd_client_t)); if (blank_client == NULL) { return FALSE; } client_channel->ops->set_recv_qlen(client_channel, 100); client_channel->ops->set_send_qlen(client_channel, 100); blank_client->client_channel = client_channel; blank_client->sub_sys = NULL; blank_client->uuid = NULL; blank_client->table_key = NULL; blank_client->client_source = G_main_add_IPC_Channel( G_PRIORITY_LOW, client_channel, FALSE, crmd_ipc_msg_callback, blank_client, crmd_ipc_connection_destroy); } return TRUE; } gboolean ccm_dispatch(int fd, gpointer user_data) { int rc = 0; oc_ev_t *ccm_token = (oc_ev_t*)user_data; gboolean was_error = FALSE; crm_debug_3("Invoked"); rc = oc_ev_handle_event(ccm_token); if(rc != 0) { if(is_set(fsa_input_register, R_CCM_DISCONNECTED) == FALSE) { /* we signed out, so this is expected */ register_fsa_input(C_CCM_CALLBACK, I_ERROR, NULL); crm_err("CCM connection appears to have failed: rc=%d.", rc); } was_error = TRUE; } trigger_fsa(fsa_source); return !was_error; } static gboolean fsa_have_quorum = FALSE; void crmd_ccm_msg_callback( oc_ed_t event, void *cookie, size_t size, const void *data) { int instance = -1; gboolean update_cache = FALSE; struct crmd_ccm_data_s *event_data = NULL; const oc_ev_membership_t *membership = data; gboolean update_quorum = FALSE; gboolean trigger_transition = FALSE; crm_debug_3("Invoked"); if(data != NULL) { instance = membership->m_instance; } crm_info("Quorum %s after event=%s (id=%d)", ccm_have_quorum(event)?"(re)attained":"lost", ccm_event_name(event), instance); switch(event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID:/* fall through */ update_cache = TRUE; update_quorum = TRUE; break; case OC_EV_MS_NOT_PRIMARY: #if UNTESTED if(AM_I_DC == FALSE) { break; } /* tell the TE to pretend it had completed and stop */ /* side effect: we'll end up in S_IDLE */ register_fsa_action(A_TE_HALT, TRUE); #endif break; case OC_EV_MS_PRIMARY_RESTORED: fsa_membership_copy->id = instance; if(AM_I_DC && need_transition(fsa_state)) { trigger_transition = TRUE; } break; case OC_EV_MS_EVICTED: update_quorum = TRUE; register_fsa_input(C_FSA_INTERNAL, I_STOP, NULL); crm_err("Shutting down after CCM event: %s", ccm_event_name(event)); break; default: crm_err("Unknown CCM event: %d", event); } if(update_quorum && ccm_have_quorum(event) == FALSE) { /* did we just loose quorum? */ if(fsa_have_quorum && need_transition(fsa_state)) { crm_info("Quorum lost: triggering transition (%s)", ccm_event_name(event)); trigger_transition = TRUE; } fsa_have_quorum = FALSE; } else if(update_quorum) { crm_debug_2("Updating quorum after event %s", ccm_event_name(event)); fsa_have_quorum = TRUE; } if(trigger_transition) { crm_debug_2("Scheduling transition after event %s", ccm_event_name(event)); /* make sure that when we query the CIB that it has * the changes that triggered the transition */ switch(event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID: case OC_EV_MS_PRIMARY_RESTORED: fsa_membership_copy->id = instance; break; default: break; } if(update_cache == FALSE) { /* a stand-alone transition */ register_fsa_action(A_TE_CANCEL); } } if(update_cache) { crm_debug_2("Updating cache after event %s", ccm_event_name(event)); crm_malloc0(event_data, sizeof(struct crmd_ccm_data_s)); if(event_data == NULL) { return; } event_data->event = event; if(data != NULL) { event_data->oc = copy_ccm_oc_data(data); } register_fsa_input_adv( C_CCM_CALLBACK, I_CCM_EVENT, event_data, trigger_transition?A_TE_CANCEL:A_NOTHING, FALSE, __FUNCTION__); if (event_data->oc) { crm_free(event_data->oc); event_data->oc = NULL; } crm_free(event_data); } oc_ev_callback_done(cookie); return; } void crmd_cib_connection_destroy(gpointer user_data) { crm_debug_3("Invoked"); trigger_fsa(fsa_source); if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Connection to the CIB terminated..."); return; } /* eventually this will trigger a reconnect, not a shutdown */ crm_err("Connection to the CIB terminated..."); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit_inplace(fsa_input_register, R_CIB_CONNECTED); return; } longclock_t fsa_start = 0; longclock_t fsa_stop = 0; longclock_t fsa_diff = 0; gboolean crm_fsa_trigger(gpointer user_data) { unsigned int fsa_diff_ms = 0; if(fsa_diff_max_ms > 0) { fsa_start = time_longclock(); } crm_debug_2("Invoked (queue len: %d)", g_list_length(fsa_message_queue)); s_crmd_fsa(C_FSA_INTERNAL); crm_debug_2("Exited (queue len: %d)", g_list_length(fsa_message_queue)); if(fsa_diff_max_ms > 0) { fsa_stop = time_longclock(); fsa_diff = sub_longclock(fsa_stop, fsa_start); fsa_diff_ms = longclockto_ms(fsa_diff); if(fsa_diff_ms > fsa_diff_max_ms) { crm_err("FSA took %dms to complete", fsa_diff_ms); } else if(fsa_diff_ms > fsa_diff_warn_ms) { crm_warn("FSA took %dms to complete", fsa_diff_ms); } } return TRUE; } diff --git a/crm/crmd/ccm.c b/crm/crmd/ccm.c index 4e05f3d596..5353e3367b 100644 --- a/crm/crmd/ccm.c +++ b/crm/crmd/ccm.c @@ -1,706 +1,705 @@ -/* $Id: ccm.c,v 1.102 2006/04/06 11:05:53 andrew Exp $ */ +/* $Id: ccm.c,v 1.103 2006/04/18 10:59:46 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* put these first so that uuid_t is defined without conflicts */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void oc_ev_special(const oc_ev_t *, oc_ev_class_t , int ); int register_with_ccm(ll_cluster_t *hb_cluster); void msg_ccm_join(const HA_Message *msg, void *foo); void crmd_ccm_msg_callback(oc_ed_t event, void *cookie, size_t size, const void *data); gboolean ghash_node_clfree(gpointer key, gpointer value, gpointer user_data); void ghash_update_cib_node(gpointer key, gpointer value, gpointer user_data); #define CCM_EVENT_DETAIL 0 #define CCM_EVENT_DETAIL_PARTIAL 0 oc_ev_t *fsa_ev_token; int num_ccm_register_fails = 0; int max_ccm_register_fails = 30; /* A_CCM_CONNECT */ enum crmd_fsa_input do_ccm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int ret; int fsa_ev_fd; gboolean did_fail = FALSE; if(action & A_CCM_DISCONNECT){ set_bit_inplace(fsa_input_register, R_CCM_DISCONNECTED); oc_ev_unregister(fsa_ev_token); } if(action & A_CCM_CONNECT) { crm_debug_3("Registering with CCM"); clear_bit_inplace(fsa_input_register, R_CCM_DISCONNECTED); ret = oc_ev_register(&fsa_ev_token); if (ret != 0) { crm_warn("CCM registration failed"); did_fail = TRUE; } if(did_fail == FALSE) { crm_debug_3("Setting up CCM callbacks"); ret = oc_ev_set_callback(fsa_ev_token, OC_EV_MEMB_CLASS, crmd_ccm_msg_callback, NULL); if (ret != 0) { crm_warn("CCM callback not set"); did_fail = TRUE; } } if(did_fail == FALSE) { oc_ev_special(fsa_ev_token, OC_EV_MEMB_CLASS, 0/*don't care*/); crm_debug_3("Activating CCM token"); ret = oc_ev_activate(fsa_ev_token, &fsa_ev_fd); if (ret != 0){ crm_warn("CCM Activation failed"); did_fail = TRUE; } } if(did_fail) { num_ccm_register_fails++; oc_ev_unregister(fsa_ev_token); if(num_ccm_register_fails < max_ccm_register_fails) { crm_warn("CCM Connection failed" " %d times (%d max)", num_ccm_register_fails, max_ccm_register_fails); crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return I_NULL; } else { crm_err("CCM Activation failed %d (max) times", num_ccm_register_fails); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } } crm_info("CCM connection established..." " waiting for first callback"); G_main_add_fd(G_PRIORITY_HIGH, fsa_ev_fd, FALSE, ccm_dispatch, fsa_ev_token, default_ipc_connection_destroy); } if(action & ~(A_CCM_CONNECT|A_CCM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } return I_NULL; } extern GHashTable *voted; /* A_CCM_EVENT */ enum crmd_fsa_input do_ccm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum crmd_fsa_input return_input = I_NULL; oc_ed_t event; const oc_ev_membership_t *oc = NULL; struct crmd_ccm_data_s *ccm_data = fsa_typed_data(fsa_dt_ccm); if(ccm_data == NULL) { crm_err("No data provided to FSA function"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } else if(msg_data->fsa_cause != C_CCM_CALLBACK) { crm_err("FSA function called in response to incorect input"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } event = ccm_data->event; oc = ccm_data->oc; ccm_event_detail(oc, event); if (OC_EV_MS_EVICTED == event) { /* todo: drop back to S_PENDING instead */ /* get out... NOW! * * go via the error recovery process so that HA will * restart us if required */ register_fsa_error(cause, I_ERROR, msg_data->data); return I_NULL; } if(AM_I_DC == FALSE && oc->m_n_out != 0) { /* Possibly move this logic to ghash_update_cib_node() */ unsigned lpc = 0; int offset = oc->m_out_idx; for(lpc=0; lpc < oc->m_n_out; lpc++) { const char *uname = oc->m_array[offset+lpc].node_uname; if(uname == NULL) { crm_err("CCM node had no name"); continue; } else { /* remove any no-votes they had cast */ if(voted != NULL) { g_hash_table_remove(voted, uname); } if(safe_str_eq(uname, fsa_our_dc)) { crm_warn("Our DC node (%s) left the cluster", uname); register_fsa_input(cause, I_ELECTION, NULL); } } } } return return_input; } /* A_CCM_UPDATE_CACHE */ /* * Take the opportunity to update the node status in the CIB as well */ enum crmd_fsa_input do_ccm_update_cache(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum crmd_fsa_input next_input = I_NULL; unsigned int lpc; int offset; GHashTable *members = NULL; oc_ed_t event; const oc_ev_membership_t *oc = NULL; oc_node_list_t *tmp = NULL, *membership_copy = NULL; struct crmd_ccm_data_s *ccm_data = fsa_typed_data(fsa_dt_ccm); HA_Message *no_op = create_request( CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD, AM_I_DC?CRM_SYSTEM_DC:CRM_SYSTEM_CRMD, NULL); if(ccm_data == NULL) { crm_err("No data provided to FSA function"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); send_msg_via_ha(fsa_cluster_conn, no_op); return I_NULL; } event = ccm_data->event; oc = ccm_data->oc; crm_debug_2("Updating CCM cache after a \"%s\" event.", ccm_event_name(event)); crm_debug_2("instance=%d, nodes=%d, new=%d, lost=%d n_idx=%d, " "new_idx=%d, old_idx=%d", oc->m_instance, oc->m_n_member, oc->m_n_in, oc->m_n_out, oc->m_memb_idx, oc->m_in_idx, oc->m_out_idx); #define ALAN_DEBUG 1 #ifdef ALAN_DEBUG { /* * Size (Size + 2) / 2 * * 3 (3+2)/2 = 5 / 2 = 2 * 4 (4+2)/2 = 6 / 2 = 3 * 5 (5+2)/2 = 7 / 2 = 3 * 6 (6+2)/2 = 8 / 2 = 4 * 7 (7+2)/2 = 9 / 2 = 4 */ unsigned int clsize = (oc->m_out_idx - oc->m_n_member); unsigned int plsize = (clsize + 2)/2; gboolean plurality = (oc->m_n_member >= plsize); gboolean Q = ccm_have_quorum(event); if(clsize == 2) { if (!Q) { crm_err("2 nodes w/o quorum"); } } else if(Q && !plurality) { crm_err("Quorum w/o plurality (%d/%d nodes)", oc->m_n_member, clsize); } else if(plurality && !Q) { crm_err("Plurality w/o Quorum (%d/%d nodes)", oc->m_n_member, clsize); } else { crm_debug_2("Quorum(%s) and plurality (%d/%d) agree.", Q?"true":"false", oc->m_n_member, clsize); } } #endif crm_malloc0(membership_copy, sizeof(oc_node_list_t)); if(membership_copy == NULL) { crm_crit("Couldnt create membership copy - out of memory"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return I_NULL; } membership_copy->id = oc->m_instance; membership_copy->last_event = event; crm_debug_3("Copying members"); /*--*-- All Member Nodes --*--*/ offset = oc->m_memb_idx; membership_copy->members_size = oc->m_n_member; if(membership_copy->members_size > 0) { membership_copy->members = g_hash_table_new(g_str_hash, g_str_equal); members = membership_copy->members; for(lpc=0; lpc < membership_copy->members_size; lpc++) { oc_node_t *member = NULL; crm_debug_3("Copying member %d", lpc); crm_malloc0(member, sizeof(oc_node_t)); if(member == NULL) { continue; } member->node_id = oc->m_array[offset+lpc].node_id; member->node_born_on = oc->m_array[offset+lpc].node_born_on; member->node_uname = NULL; if(oc->m_array[offset+lpc].node_uname != NULL) { member->node_uname = crm_strdup(oc->m_array[offset+lpc].node_uname); } else { crm_err("Node %d had a NULL uname", member->node_id); } g_hash_table_insert( members, member->node_uname, member); } } else { membership_copy->members = NULL; } crm_debug_3("Copying new members"); /*--*-- New Member Nodes --*--*/ offset = oc->m_in_idx; membership_copy->new_members_size = oc->m_n_in; if(membership_copy->new_members_size > 0) { membership_copy->new_members = g_hash_table_new(g_str_hash, g_str_equal); members = membership_copy->new_members; for(lpc=0; lpc < membership_copy->new_members_size; lpc++) { oc_node_t *member = NULL; crm_malloc0(member, sizeof(oc_node_t)); if(member == NULL) { continue; } member->node_uname = NULL; member->node_id = oc->m_array[offset+lpc].node_id; member->node_born_on = oc->m_array[offset+lpc].node_born_on; if(oc->m_array[offset+lpc].node_uname != NULL) { member->node_uname = crm_strdup(oc->m_array[offset+lpc].node_uname); } else { crm_err("Node %d had a NULL uname", member->node_id); } g_hash_table_insert( members, member->node_uname, member); g_hash_table_insert(members, member->node_uname, member); } } else { membership_copy->new_members = NULL; } crm_debug_3("Copying dead members"); /*--*-- Recently Dead Member Nodes --*--*/ offset = oc->m_out_idx; membership_copy->dead_members_size = oc->m_n_out; if(membership_copy->dead_members_size > 0) { membership_copy->dead_members = g_hash_table_new(g_str_hash, g_str_equal); members = membership_copy->dead_members; for(lpc=0; lpc < membership_copy->dead_members_size; lpc++) { oc_node_t *member = NULL; crm_malloc0(member, sizeof(oc_node_t)); if(member == NULL) { continue; } member->node_id = oc->m_array[offset+lpc].node_id; member->node_born_on = oc->m_array[offset+lpc].node_born_on; member->node_uname = NULL; CRM_DEV_ASSERT(oc->m_array[offset+lpc].node_uname != NULL); if(oc->m_array[offset+lpc].node_uname == NULL) { continue; } member->node_uname = crm_strdup(oc->m_array[offset+lpc].node_uname); g_hash_table_insert( members, member->node_uname, member); g_hash_table_insert(members, member->node_uname, member); if(confirmed_nodes != NULL) { g_hash_table_remove( confirmed_nodes, member->node_uname); } if(finalized_nodes != NULL) { g_hash_table_remove( finalized_nodes, member->node_uname); } if(integrated_nodes != NULL) { g_hash_table_remove( integrated_nodes, member->node_uname); } } } else { membership_copy->dead_members = NULL; } tmp = fsa_membership_copy; fsa_membership_copy = membership_copy; crm_debug_2("Updated membership cache with %d (%d new, %d lost) members", g_hash_table_size(fsa_membership_copy->members), g_hash_table_size(fsa_membership_copy->new_members), g_hash_table_size(fsa_membership_copy->dead_members)); /* Free the old copy */ if(tmp != NULL) { if(tmp->members != NULL) g_hash_table_foreach_remove( tmp->members, ghash_node_clfree, NULL); if(tmp->new_members != NULL) g_hash_table_foreach_remove( tmp->new_members, ghash_node_clfree, NULL); if(tmp->dead_members != NULL) g_hash_table_foreach_remove( tmp->dead_members, ghash_node_clfree, NULL); crm_free(tmp); } crm_debug_3("Free'd old copies"); set_bit_inplace(fsa_input_register, R_CCM_DATA); if(cur_state != S_STOPPING) { crm_debug_3("Updating the CIB from CCM cache"); do_update_cib_nodes(FALSE, __FUNCTION__); } /* Membership changed, remind everyone we're here. * This will aid detection of duplicate DCs */ send_msg_via_ha(fsa_cluster_conn, no_op); return next_input; } void ccm_event_detail(const oc_ev_membership_t *oc, oc_ed_t event) { int lpc; gboolean member = FALSE; member = FALSE; crm_debug_2("-----------------------"); crm_info("%s: trans=%d, nodes=%d, new=%d, lost=%d n_idx=%d, " "new_idx=%d, old_idx=%d", ccm_event_name(event), oc->m_instance, oc->m_n_member, oc->m_n_in, oc->m_n_out, oc->m_memb_idx, oc->m_in_idx, oc->m_out_idx); #if !CCM_EVENT_DETAIL_PARTIAL for(lpc=0; lpc < oc->m_n_member; lpc++) { crm_info("\tCURRENT: %s [nodeid=%d, born=%d]", oc->m_array[oc->m_memb_idx+lpc].node_uname, oc->m_array[oc->m_memb_idx+lpc].node_id, oc->m_array[oc->m_memb_idx+lpc].node_born_on); if(safe_str_eq(fsa_our_uname, oc->m_array[oc->m_memb_idx+lpc].node_uname)) { member = TRUE; } } if (member == FALSE) { crm_warn("MY NODE IS NOT IN CCM THE MEMBERSHIP LIST"); } #endif for(lpc=0; lpc<(int)oc->m_n_in; lpc++) { crm_info("\tNEW: %s [nodeid=%d, born=%d]", oc->m_array[oc->m_in_idx+lpc].node_uname, oc->m_array[oc->m_in_idx+lpc].node_id, oc->m_array[oc->m_in_idx+lpc].node_born_on); } for(lpc=0; lpc<(int)oc->m_n_out; lpc++) { crm_info("\tLOST: %s [nodeid=%d, born=%d]", oc->m_array[oc->m_out_idx+lpc].node_uname, oc->m_array[oc->m_out_idx+lpc].node_id, oc->m_array[oc->m_out_idx+lpc].node_born_on); if(fsa_our_uname != NULL && 0 == strcmp(fsa_our_uname, oc->m_array[oc->m_out_idx+lpc].node_uname)) { crm_err("We're not part of the cluster anymore"); } } crm_debug_2("-----------------------"); } int register_with_ccm(ll_cluster_t *hb_cluster) { return 0; } void msg_ccm_join(const HA_Message *msg, void *foo) { crm_debug_2("###### Received ccm_join message..."); if (msg != NULL) { crm_debug_2("[type=%s]", ha_msg_value(msg, F_TYPE)); crm_debug_2("[orig=%s]", ha_msg_value(msg, F_ORIG)); crm_debug_2("[to=%s]", ha_msg_value(msg, F_TO)); crm_debug_2("[status=%s]", ha_msg_value(msg, F_STATUS)); crm_debug_2("[info=%s]", ha_msg_value(msg, F_COMMENT)); crm_debug_2("[rsc_hold=%s]", ha_msg_value(msg, F_RESOURCES)); crm_debug_2("[stable=%s]", ha_msg_value(msg, F_ISSTABLE)); crm_debug_2("[rtype=%s]", ha_msg_value(msg, F_RTYPE)); crm_debug_2("[ts=%s]", ha_msg_value(msg, F_TIME)); crm_debug_2("[seq=%s]", ha_msg_value(msg, F_SEQ)); crm_debug_2("[generation=%s]", ha_msg_value(msg, F_HBGENERATION)); /* crm_debug_2("[=%s]", ha_msg_value(msg, F_)); */ } return; } struct update_data_s { const char *state; const char *caller; crm_data_t *updates; gboolean overwrite_join; }; static void ccm_node_update_complete(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { fsa_data_t *msg_data = NULL; if(rc == cib_ok) { crm_debug("Node update %d complete", call_id); } else { crm_err("Node update %d failed", call_id); crm_log_message(LOG_DEBUG, msg); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } void do_update_cib_nodes(gboolean overwrite, const char *caller) { int call_id = 0; int call_options = cib_scope_local|cib_quorum_override; struct update_data_s update_data; crm_data_t *fragment = NULL; if(fsa_membership_copy == NULL) { /* We got a replace notification before being connected to * the CCM. * So there is no need to update the local CIB with our values * - since we have none. */ return; } fragment = create_xml_node(NULL, XML_CIB_TAG_STATUS); update_data.caller = caller; update_data.updates = fragment; update_data.state = XML_BOOLEAN_YES; update_data.overwrite_join = overwrite; if(overwrite == FALSE) { call_options = call_options|cib_inhibit_bcast; crm_debug_2("Inhibiting bcast for membership updates"); } /* live nodes */ if(fsa_membership_copy->members != NULL) { g_hash_table_foreach(fsa_membership_copy->members, ghash_update_cib_node, &update_data); } /* dead nodes */ update_data.state = XML_BOOLEAN_NO; if(fsa_membership_copy->dead_members != NULL) { g_hash_table_foreach(fsa_membership_copy->dead_members, ghash_update_cib_node, &update_data); } - call_id = fsa_cib_conn->cmds->update( - fsa_cib_conn, XML_CIB_TAG_STATUS, fragment, NULL,call_options); + fsa_cib_update(XML_CIB_TAG_STATUS, fragment, call_options, call_id); add_cib_op_callback(call_id, FALSE, NULL, ccm_node_update_complete); free_xml(fragment); } void ghash_update_cib_node(gpointer key, gpointer value, gpointer user_data) { crm_data_t *tmp1 = NULL; const char *join = NULL; const char *peer_online = NULL; const char *node_uname = (const char*)key; struct update_data_s* data = (struct update_data_s*)user_data; crm_debug_2("Updating %s: %s (overwrite=%s)", node_uname, data->state, data->overwrite_join?"true":"false"); peer_online = g_hash_table_lookup(crmd_peer_state, node_uname); if(data->overwrite_join) { if(safe_str_neq(peer_online, ONLINESTATUS)) { join = CRMD_STATE_INACTIVE; } else { const char *peer_member = g_hash_table_lookup( confirmed_nodes, node_uname); if(peer_member != NULL) { join = CRMD_JOINSTATE_MEMBER; } else { join = CRMD_JOINSTATE_PENDING; } } } tmp1 = create_node_state(node_uname, NULL, data->state, peer_online, join, NULL, FALSE, data->caller); add_node_copy(data->updates, tmp1); free_xml(tmp1); } gboolean ghash_node_clfree(gpointer key, gpointer value, gpointer user_data) { /* value->node_uname is free'd as "key" */ if(key != NULL) { crm_free(key); } if(value != NULL) { crm_free(value); } return TRUE; } diff --git a/crm/crmd/crmd_utils.h b/crm/crmd/crmd_utils.h index 977ffaec69..751485ac10 100644 --- a/crm/crmd/crmd_utils.h +++ b/crm/crmd/crmd_utils.h @@ -1,74 +1,93 @@ -/* $Id: crmd_utils.h,v 1.23 2006/04/09 14:38:04 andrew Exp $ */ +/* $Id: crmd_utils.h,v 1.24 2006/04/18 10:59:46 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef CRMD_UTILS__H #define CRMD_UTILS__H #include #include #define CLIENT_EXIT_WAIT 30 extern void process_client_disconnect(crmd_client_t *curr_client); extern void update_local_cib_adv( crm_data_t *msg_data, gboolean do_now, const char *raised_from); #define update_local_cib(data) update_local_cib_adv(data, TRUE, __FUNCTION__) +#define fsa_cib_update(section, data, options, call_id) \ + if(fsa_cib_conn != NULL) { \ + call_id = fsa_cib_conn->cmds->update( \ + fsa_cib_conn, section, data, NULL, options); \ + \ + } else { \ + crm_err("No CIB connection available"); \ + } + +#define fsa_cib_anon_update(section, data, options) \ + if(fsa_cib_conn != NULL) { \ + fsa_cib_conn->cmds->update( \ + fsa_cib_conn, section, data, NULL, options); \ + \ + } else { \ + crm_err("No CIB connection available"); \ + } + + extern long long toggle_bit (long long action_list, long long action); extern long long clear_bit (long long action_list, long long action); extern long long set_bit (long long action_list, long long action); #define set_bit_inplace(word, bit) word = set_bit(word, bit) #define clear_bit_inplace(word, bit) word = clear_bit(word, bit) #define toggle_bit_inplace(word, bit) word = toggle_bit(word, bit) extern gboolean is_set(long long action_list, long long action); extern gboolean is_set_any(long long action_list, long long action); extern gboolean crm_timer_stop (fsa_timer_t *timer); extern gboolean crm_timer_start(fsa_timer_t *timer); extern gboolean crm_timer_popped(gpointer data); extern crm_data_t *create_node_state( const char *uname, const char *ha_state, const char *ccm_state, const char *crmd_state, const char *join_state, const char *exp_state, gboolean clear_shutdown, const char *src); extern void create_node_entry( const char *uuid, const char *uname, const char *type); extern gboolean stop_subsystem ( struct crm_subsystem_s *centry, gboolean force_quit); extern gboolean start_subsystem(struct crm_subsystem_s *centry); extern lrm_op_t *copy_lrm_op(const lrm_op_t *op); extern lrm_rsc_t *copy_lrm_rsc(const lrm_rsc_t *rsc); extern struct crmd_ccm_data_s *copy_ccm_data( const struct crmd_ccm_data_s *ccm_input); extern oc_ev_membership_t *copy_ccm_oc_data(const oc_ev_membership_t *oc_in) ; extern void fsa_dump_actions(long long action, const char *text); extern void fsa_dump_inputs( int log_level, const char *text, long long input_register); extern gboolean need_transition(enum crmd_fsa_state state); extern void update_dc(HA_Message *msg, gboolean assert_same); #endif diff --git a/crm/crmd/election.c b/crm/crmd/election.c index 0c9812d0e8..25173c75dd 100644 --- a/crm/crmd/election.c +++ b/crm/crmd/election.c @@ -1,435 +1,434 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *voted = NULL; uint highest_born_on = -1; static int current_election_id = 1; /* A_ELECTION_VOTE */ enum crmd_fsa_input do_election_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { gboolean not_voting = FALSE; HA_Message *vote = NULL; /* dont vote if we're in one of these states or wanting to shut down */ switch(cur_state) { case S_RECOVERY: case S_STOPPING: case S_TERMINATE: crm_warn("Not voting in election, we're in state %s", fsa_state2string(cur_state)); not_voting = TRUE; break; default: break; } if(not_voting == FALSE) { if(is_set(fsa_input_register, R_STARTING)) { not_voting = TRUE; } } if(not_voting) { if(AM_I_DC) { register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL); } else { register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL); } return I_NULL; } vote = create_request( CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); current_election_id++; ha_msg_add(vote, F_CRM_ELECTION_OWNER, fsa_our_uuid); ha_msg_add_int(vote, F_CRM_ELECTION_ID, current_election_id); send_request(vote, NULL); if(cur_state == S_ELECTION || cur_state == S_RELEASE_DC) { crm_timer_start(election_timeout); } else if(cur_state != S_INTEGRATION) { crm_err("Broken? Voting in state %s", fsa_state2string(cur_state)); } return I_NULL; } char *dc_hb_msg = NULL; int beat_num = 0; gboolean do_dc_heartbeat(gpointer data) { #if 0 fsa_timer_t *timer = (fsa_timer_t *)data; crm_debug_3("Sending DC Heartbeat %d", beat_num); HA_Message *msg = ha_msg_new(5); ha_msg_add(msg, F_TYPE, T_CRM); ha_msg_add(msg, F_SUBTYPE, XML_ATTR_REQUEST); ha_msg_add(msg, F_CRM_SYS_TO, CRM_SYSTEM_CRMD); ha_msg_add(msg, F_CRM_SYS_FROM, CRM_SYSTEM_DC); ha_msg_add(msg, F_CRM_TASK, CRM_OP_HBEAT); ha_msg_add_int(msg, "dc_beat_seq", beat_num); beat_num++; if(send_msg_via_ha(fsa_cluster_conn, msg) == FALSE) { /* this is bad */ crm_timer_stop(timer); /* make it not go off again */ register_fsa_input(C_HEARTBEAT_FAILED, I_SHUTDOWN, NULL); return FALSE; } #endif return TRUE; } struct election_data_s { const char *winning_uname; unsigned int winning_bornon; }; enum crmd_fsa_input do_election_check(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int voted_size = g_hash_table_size(voted); int num_members = g_hash_table_size(fsa_membership_copy->members); /* in the case of #voted > #members, it is better to * wait for the timeout and give the cluster time to * stabilize */ if(fsa_state != S_ELECTION) { crm_debug("Ignore election check: we not in an election"); } else if(voted_size == num_members) { /* we won and everyone has voted */ crm_timer_stop(election_timeout); register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL); crm_debug("Destroying voted hash"); g_hash_table_destroy(voted); voted = NULL; } else { crm_info("Still waiting on %d non-votes (%d total)", num_members - voted_size, num_members); } return I_NULL; } /* A_ELECTION_COUNT */ enum crmd_fsa_input do_election_count_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int election_id = -1; gboolean we_loose = FALSE; enum crmd_fsa_input election_result = I_NULL; oc_node_t *our_node = NULL, *your_node = NULL; ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg); const char *op = cl_get_string(vote->msg, F_CRM_TASK); const char *vote_from = cl_get_string(vote->msg, F_CRM_HOST_FROM); const char *your_version = cl_get_string(vote->msg, F_CRM_VERSION); const char *election_owner= cl_get_string(vote->msg, F_CRM_ELECTION_OWNER); /* if the membership copy is NULL we REALLY shouldnt be voting * the question is how we managed to get here. */ CRM_CHECK(fsa_membership_copy != NULL, return I_NULL); CRM_CHECK(fsa_membership_copy->members != NULL, return I_NULL); CRM_CHECK(vote_from != NULL, vote_from = fsa_our_uname); our_node = (oc_node_t*)g_hash_table_lookup( fsa_membership_copy->members, fsa_our_uname); your_node = (oc_node_t*)g_hash_table_lookup( fsa_membership_copy->members, vote_from); if(voted == NULL) { crm_debug("Created voted hash"); voted = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } ha_msg_value_int(vote->msg, F_CRM_ELECTION_ID, &election_id); /* update the list of nodes that have voted */ if(your_node != NULL) { char *op_copy = NULL; char *uname_copy = NULL; if(safe_str_eq(op, CRM_OP_NOVOTE)) { if(safe_str_neq(fsa_our_uuid, election_owner)) { crm_err("Recieved %s for %s (we are %s)", op, election_owner, fsa_our_uuid); } } crm_debug("Election owner: %s", election_owner); if(safe_str_eq(fsa_our_uuid, election_owner)) { if(election_id != current_election_id) { crm_debug("Ignore old novote from %s: %d vs. %d", your_node->node_uname, election_id, current_election_id); } uname_copy = crm_strdup(your_node->node_uname); op_copy = crm_strdup(op); g_hash_table_replace(voted, uname_copy, op_copy); crm_info("Updated voted hash for %s to %s", uname_copy, op_copy); } } else { crm_debug("Election ignore: The other side doesnt exist in CCM."); return I_NULL; } if(vote_from == NULL || safe_str_eq(vote_from, fsa_our_uname)) { /* dont count our own vote */ crm_info("Election ignore: our %s (%s)", op, crm_str(vote_from)); return I_NULL; } else if(safe_str_eq(op, CRM_OP_NOVOTE)) { crm_info("Election ignore: no-vote from %s", vote_from); return I_NULL; } crm_info("Election check: %s from %s", op, vote_from); if(our_node == NULL || fsa_membership_copy->last_event == OC_EV_MS_EVICTED) { crm_info("Election fail: we dont exist in CCM"); we_loose = TRUE; } else if(compare_version(your_version, CRM_FEATURE_SET) < 0) { crm_info("Election fail: version"); we_loose = TRUE; } else if(compare_version(your_version, CRM_FEATURE_SET) > 0) { crm_info("Election pass: version"); } else if(your_node->node_born_on < our_node->node_born_on) { crm_debug("Election fail: born_on"); we_loose = TRUE; } else if(your_node->node_born_on > our_node->node_born_on) { crm_debug("Election pass: born_on"); } else if(strcmp(fsa_our_uname, vote_from) > 0) { crm_debug("Election fail: uname"); we_loose = TRUE; } else { CRM_CHECK(strcmp(fsa_our_uname, vote_from) != 0, ;); crm_debug("Them: %s (born=%d) Us: %s (born=%d)", vote_from, your_node->node_born_on, fsa_our_uname, our_node->node_born_on); /* cant happen... * } else if(strcmp(fsa_our_uname, vote_from) == 0) { * * default... * } else { // strcmp(fsa_our_uname, vote_from) < 0 * we win */ } if(we_loose) { cl_uuid_t vote_uuid_s; gboolean vote_sent = FALSE; char vote_uuid[UU_UNPARSE_SIZEOF]; HA_Message *novote = create_request( CRM_OP_NOVOTE, NULL, vote_from, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); update_dc(NULL, FALSE); if(cl_get_uuid(vote->msg, F_ORIGUUID, &vote_uuid_s) == HA_OK) { cl_uuid_unparse(&vote_uuid_s, vote_uuid); } else { cl_log_message(LOG_ERR, vote->msg); } crm_timer_stop(election_timeout); crm_debug("Election lost to %s (%s/%d)", vote_from, vote_uuid, election_id); if(fsa_input_register & R_THE_DC) { crm_debug_3("Give up the DC to %s", vote_from); election_result = I_RELEASE_DC; } else { crm_debug_3("We werent the DC anyway"); election_result = I_PENDING; } ha_msg_add(novote, F_CRM_ELECTION_OWNER, vote_uuid); ha_msg_add_int(novote, F_CRM_ELECTION_ID, election_id); vote_sent = send_request(novote, NULL); CRM_DEV_ASSERT(vote_sent); fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local); } else { if(cur_state == S_PENDING) { crm_debug("Election ignore: We already lost the election"); return I_NULL; } else { crm_info("Election won over %s", vote_from); election_result = I_ELECTION; } crm_debug("Destroying voted hash"); g_hash_table_destroy(voted); voted = NULL; } register_fsa_input(C_FSA_INTERNAL, election_result, NULL); return I_NULL; } /* A_ELECT_TIMER_START, A_ELECTION_TIMEOUT */ /* we won */ enum crmd_fsa_input do_election_timer_ctrl(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { return I_NULL; } static void feature_update_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { if(rc != cib_ok) { fsa_data_t *msg_data = NULL; register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /* A_DC_TAKEOVER */ enum crmd_fsa_input do_dc_takeover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int rc = cib_ok; crm_data_t *cib = NULL; crm_info("Taking over DC status for this partition"); set_bit_inplace(fsa_input_register, R_THE_DC); if(voted != NULL) { crm_debug_2("Destroying voted hash"); g_hash_table_destroy(voted); voted = NULL; } set_bit_inplace(fsa_input_register, R_JOIN_OK); set_bit_inplace(fsa_input_register, R_INVOKE_PE); fsa_cib_conn->cmds->set_slave_all(fsa_cib_conn, cib_none); fsa_cib_conn->cmds->set_master(fsa_cib_conn, cib_none); cib = createEmptyCib(); crm_xml_add(cib, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(cib, XML_ATTR_CIB_REVISION, CIB_FEATURE_SET); - fsa_cib_conn->cmds->update( - fsa_cib_conn, XML_TAG_CIB, cib, NULL, cib_quorum_override); + fsa_cib_update(XML_TAG_CIB, cib, cib_quorum_override, rc); add_cib_op_callback(rc, FALSE, NULL, feature_update_callback); free_xml(cib); return I_NULL; } /* A_DC_RELEASE */ enum crmd_fsa_input do_dc_release(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum crmd_fsa_input result = I_NULL; if(action & A_DC_RELEASE) { crm_debug("Releasing the role of DC"); clear_bit_inplace(fsa_input_register, R_THE_DC); } else if (action & A_DC_RELEASED) { crm_info("DC role released"); #if 0 if( are there errors ) { /* we cant stay up if not healthy */ /* or perhaps I_ERROR and go to S_RECOVER? */ result = I_SHUTDOWN; } #endif register_fsa_input(C_FSA_INTERNAL, I_RELEASE_SUCCESS, NULL); } else { crm_err("Unknown action %s", fsa_action2string(action)); } crm_debug_2("Am I still the DC? %s", AM_I_DC?XML_BOOLEAN_YES:XML_BOOLEAN_NO); return result; } diff --git a/crm/crmd/join_dc.c b/crm/crmd/join_dc.c index b5c1bdefc3..5f69c573d1 100644 --- a/crm/crmd/join_dc.c +++ b/crm/crmd/join_dc.c @@ -1,632 +1,628 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include GHashTable *welcomed_nodes = NULL; GHashTable *integrated_nodes = NULL; GHashTable *finalized_nodes = NULL; GHashTable *confirmed_nodes = NULL; char *max_epoch = NULL; char *max_generation_from = NULL; crm_data_t *max_generation_xml = NULL; void initialize_join(gboolean before); gboolean finalize_join_for(gpointer key, gpointer value, gpointer user_data); void finalize_sync_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); void finalize_join(const char *caller); static int current_join_id = 0; static void update_attrd(void) { static IPC_Channel *attrd = NULL; if(attrd == NULL) { crm_info("Connecting to attrd..."); attrd = init_client_ipc_comms_nodispatch(T_ATTRD); } if(attrd != NULL) { HA_Message *update = ha_msg_new(3); ha_msg_add(update, F_TYPE, T_ATTRD); ha_msg_add(update, F_ORIG, "crmd"); ha_msg_add(update, "task", "refresh"); if(send_ipc_message(attrd, update) == FALSE) { crm_err("attrd refresh failed"); attrd = NULL; } else { crm_debug("sent attrd refresh"); } } else { crm_err("Couldn't connect to attrd"); } } void initialize_join(gboolean before) { /* clear out/reset a bunch of stuff */ crm_debug("join-%d: Initializing join data (flag=%s)", current_join_id, before?"true":"false"); g_hash_table_destroy(welcomed_nodes); g_hash_table_destroy(integrated_nodes); g_hash_table_destroy(finalized_nodes); g_hash_table_destroy(confirmed_nodes); if(before) { if(max_generation_from != NULL) { crm_free(max_generation_from); max_generation_from = NULL; } if(max_generation_xml != NULL) { free_xml(max_generation_xml); max_generation_xml = NULL; } clear_bit_inplace(fsa_input_register, R_HAVE_CIB); clear_bit_inplace(fsa_input_register, R_CIB_ASKED); } welcomed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); integrated_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); finalized_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); confirmed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } static void join_make_offer(gpointer key, gpointer value, gpointer user_data) { const char *join_to = NULL; const char *crm_online = NULL; const oc_node_t *member = (const oc_node_t*)value; if(member != NULL) { join_to = member->node_uname; } if(join_to == NULL) { crm_err("No recipient for welcome message"); return; } g_hash_table_remove(confirmed_nodes, join_to); g_hash_table_remove(finalized_nodes, join_to); g_hash_table_remove(integrated_nodes, join_to); g_hash_table_remove(welcomed_nodes, join_to); crm_online = g_hash_table_lookup(crmd_peer_state, join_to); if(safe_str_eq(crm_online, ONLINESTATUS)) { HA_Message *offer = create_request( CRM_OP_JOIN_OFFER, NULL, join_to, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL); char *join_offered = crm_itoa(current_join_id); ha_msg_add_int(offer, F_CRM_JOIN_ID, current_join_id); /* send the welcome */ crm_debug("join-%d: Sending offer to %s", current_join_id, join_to); send_msg_via_ha(fsa_cluster_conn, offer); g_hash_table_insert( welcomed_nodes, crm_strdup(join_to), join_offered); } else { crm_warn("Peer process on %s is not active", join_to); } } /* A_DC_JOIN_OFFER_ALL */ enum crmd_fsa_input do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { /* reset everyones status back to down or in_ccm in the CIB * * any nodes that are active in the CIB but not in the CCM list * will be seen as offline by the PE anyway */ current_join_id++; initialize_join(TRUE); do_update_cib_nodes(TRUE, __FUNCTION__); update_dc(NULL, FALSE); g_hash_table_foreach( fsa_membership_copy->members, join_make_offer, NULL); /* dont waste time by invoking the PE yet; */ crm_info("join-%d: Waiting on %d outstanding join acks", current_join_id, g_hash_table_size(welcomed_nodes)); return I_NULL; } /* A_DC_JOIN_OFFER_ONE */ enum crmd_fsa_input do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { oc_node_t member; ha_msg_input_t *welcome = fsa_typed_data(fsa_dt_ha_msg); const char *join_to = NULL; if(welcome == NULL) { crm_err("Attempt to send welcome message " "without a message to reply to!"); return I_NULL; } join_to = cl_get_string(welcome->msg, F_CRM_HOST_FROM); if(join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) { /* note: it _is_ possible that a node will have been * sick or starting up when the original offer was made. * however, it will either re-announce itself in due course * _or_ we can re-store the original offer on the client. */ crm_debug("Re-offering membership to %s...", join_to); } crm_info("join-%d: Processing annouce request from %s in state %s", current_join_id, join_to, fsa_state2string(cur_state)); /* always offer to the DC (ourselves) * this ensures the correct value for max_generation_from */ member.node_uname = crm_strdup(fsa_our_uname); join_make_offer(NULL, &member, NULL); crm_free(member.node_uname); member.node_uname = crm_strdup(join_to); join_make_offer(NULL, &member, NULL); crm_free(member.node_uname); /* this was a genuine join request, cancel any existing * transition and invoke the PE */ if(need_transition(fsa_state)) { register_fsa_action(A_TE_CANCEL); } /* dont waste time by invoking the pe yet; */ crm_debug("Waiting on %d outstanding join acks for join-%d", g_hash_table_size(welcomed_nodes), current_join_id); return I_NULL; } /* A_DC_JOIN_PROCESS_REQ */ enum crmd_fsa_input do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { crm_data_t *generation = NULL; int join_id = -1; gboolean ack_nack_bool = TRUE; const char *ack_nack = CRMD_JOINSTATE_MEMBER; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_from = cl_get_string(join_ack->msg,F_CRM_HOST_FROM); const char *ref = cl_get_string(join_ack->msg,XML_ATTR_REFERENCE); gpointer join_node = g_hash_table_lookup(fsa_membership_copy->members, join_from); crm_debug("Processing req from %s", join_from); generation = join_ack->xml; ha_msg_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); crm_log_xml_debug_2(max_generation_xml, "Max generation"); crm_log_xml_debug_2(generation, "Their generation"); if(join_node == NULL) { crm_err("Node %s is not a member", join_from); ack_nack_bool = FALSE; } else if(generation == NULL) { crm_err("Generation was NULL"); ack_nack_bool = FALSE; } else if(join_id != current_join_id) { crm_debug("Invalid response from %s: join-%d vs. join-%d", join_from, join_id, current_join_id); check_join_state(cur_state, __FUNCTION__); return I_NULL; } else if(max_generation_xml == NULL) { max_generation_xml = copy_xml(generation); max_generation_from = crm_strdup(join_from); } else if(cib_compare_generation(max_generation_xml, generation) < 0) { crm_debug("%s has a better generation number than" " the current max %s", join_from, max_generation_from); crm_free(max_generation_from); free_xml(max_generation_xml); max_generation_from = crm_strdup(join_from); max_generation_xml = copy_xml(join_ack->xml); } if(ack_nack_bool == FALSE) { /* NACK this client */ ack_nack = CRMD_STATE_INACTIVE; crm_err("join-%d: NACK'ing node %s (ref %s)", join_id, join_from, ref); } else { crm_debug("join-%d: Welcoming node %s (ref %s)", join_id, join_from, ref); } /* add them to our list of CRMD_STATE_ACTIVE nodes */ g_hash_table_insert( integrated_nodes, crm_strdup(join_from), crm_strdup(ack_nack)); crm_debug("%u nodes have been integrated into join-%d", g_hash_table_size(integrated_nodes), join_id); g_hash_table_remove(welcomed_nodes, join_from); if(check_join_state(cur_state, __FUNCTION__) == FALSE) { /* dont waste time by invoking the PE yet; */ crm_debug("join-%d: Still waiting on %d outstanding offers", join_id, g_hash_table_size(welcomed_nodes)); } return I_NULL; } /* A_DC_JOIN_FINALIZE */ enum crmd_fsa_input do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum cib_errors rc = cib_ok; /* This we can do straight away and avoid clients timing us out * while we compute the latest CIB */ crm_debug("Finializing join-%d for %d clients", current_join_id, g_hash_table_size(integrated_nodes)); clear_bit_inplace(fsa_input_register, R_HAVE_CIB); if(max_generation_from == NULL || safe_str_eq(max_generation_from, fsa_our_uname)){ set_bit_inplace(fsa_input_register, R_HAVE_CIB); } if(is_set(fsa_input_register, R_IN_TRANSITION)) { crm_warn("join-%d: We are still in a transition." " Delaying until the TE completes.", current_join_id); crmd_fsa_stall(NULL); return I_NULL; } if(is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { /* ask for the agreed best CIB */ crm_info("join-%d: Asking %s for its copy of the CIB", current_join_id, crm_str(max_generation_from)); crm_log_xml_debug(max_generation_xml, "Requesting version"); set_bit_inplace(fsa_input_register, R_CIB_ASKED); fsa_cib_conn->call_timeout = 10; rc = fsa_cib_conn->cmds->sync_from( fsa_cib_conn, max_generation_from, NULL, cib_quorum_override); fsa_cib_conn->call_timeout = 0; /* back to the default */ add_cib_op_callback(rc, FALSE, crm_strdup(max_generation_from), finalize_sync_callback); return I_NULL; } else { /* Send _our_ CIB out to everyone */ fsa_cib_conn->cmds->sync_from( fsa_cib_conn, fsa_our_uname, NULL,cib_quorum_override); update_attrd(); } finalize_join(__FUNCTION__); return I_NULL; } void finalize_sync_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { CRM_DEV_ASSERT(cib_not_master != rc); clear_bit_inplace(fsa_input_register, R_CIB_ASKED); if(rc != cib_ok) { crm_log_maybe((rc==cib_old_data?LOG_WARNING:LOG_ERR), "Sync from %s resulted in an error: %s", (char*)user_data, cib_error2string(rc)); /* restart the whole join process */ register_fsa_error_adv( C_FSA_INTERNAL, I_ELECTION_DC,NULL,NULL,__FUNCTION__); } else if(AM_I_DC && fsa_state == S_FINALIZE_JOIN) { finalize_join(__FUNCTION__); update_attrd(); } else { crm_debug("No longer the DC in S_FINALIZE_JOIN: %s/%s", AM_I_DC?"DC":"CRMd", fsa_state2string(fsa_state)); } crm_free(user_data); } void finalize_join(const char *caller) { crm_data_t *cib = createEmptyCib(); set_bit_inplace(fsa_input_register, R_HAVE_CIB); clear_bit_inplace(fsa_input_register, R_CIB_ASKED); set_uuid(fsa_cluster_conn, cib, XML_ATTR_DC_UUID, fsa_our_uname); crm_debug_3("Update %s in the CIB to our uuid: %s", XML_ATTR_DC_UUID, crm_element_value(cib, XML_ATTR_DC_UUID)); - fsa_cib_conn->cmds->update( - fsa_cib_conn, NULL, cib, NULL, cib_quorum_override); - + fsa_cib_anon_update(NULL, cib, cib_quorum_override); free_xml(cib); crm_debug_3("Bumping the epoch and syncing to %d clients", g_hash_table_size(finalized_nodes)); fsa_cib_conn->cmds->bump_epoch( fsa_cib_conn, cib_scope_local|cib_quorum_override); /* make sure dc_uuid is re-set to us */ if(check_join_state(fsa_state, caller) == FALSE) { crm_debug("Notifying %d clients of join-%d results", g_hash_table_size(integrated_nodes), current_join_id); g_hash_table_foreach_remove( integrated_nodes, finalize_join_for, NULL); } } static void join_update_complete_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { fsa_data_t *msg_data = NULL; if(rc == cib_ok) { crm_debug("Join update %d complete", call_id); check_join_state(fsa_state, __FUNCTION__); } else { crm_err("Join update %d failed", call_id); crm_log_message(LOG_DEBUG, msg); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /* A_DC_JOIN_PROCESS_ACK */ enum crmd_fsa_input do_dc_join_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int join_id = -1; int call_id = 0; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_id_s = NULL; const char *join_state = NULL; const char *op = cl_get_string(join_ack->msg, F_CRM_TASK); const char *join_from = cl_get_string(join_ack->msg, F_CRM_HOST_FROM); if(safe_str_neq(op, CRM_OP_JOIN_CONFIRM)) { crm_debug("Ignoring op=%s message", op); return I_NULL; } ha_msg_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); join_id_s = ha_msg_value(join_ack->msg, F_CRM_JOIN_ID); /* now update them to "member" */ crm_debug_2("Processing ack from %s", join_from); join_state = (const char *) g_hash_table_lookup(finalized_nodes, join_from); if(join_state == NULL) { crm_err("Join not in progress: ignoring join-%d from %s", join_id, join_from); return I_NULL; } else if(safe_str_neq(join_state, CRMD_JOINSTATE_MEMBER)) { crm_err("Node %s wasnt invited to join the cluster",join_from); g_hash_table_remove(finalized_nodes, join_from); return I_NULL; } else if(join_id != current_join_id) { crm_err("Invalid response from %s: join-%d vs. join-%d", join_from, join_id, current_join_id); g_hash_table_remove(finalized_nodes, join_from); return I_NULL; } g_hash_table_remove(finalized_nodes, join_from); if(g_hash_table_lookup(confirmed_nodes, join_from) != NULL) { crm_err("join-%d: hash already contains confirmation from %s", join_id, join_from); } g_hash_table_insert( confirmed_nodes, crm_strdup(join_from), crm_strdup(join_id_s)); crm_info("join-%d: Updating node state to %s for %s)", join_id, CRMD_JOINSTATE_MEMBER, join_from); /* update CIB with the current LRM status from the node * We dont need to notify the TE of these updates, a transition will * be started in due time */ - call_id = fsa_cib_conn->cmds->update( - fsa_cib_conn, XML_CIB_TAG_STATUS, join_ack->xml, NULL, - cib_scope_local|cib_quorum_override); - + fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml, + cib_scope_local|cib_quorum_override, call_id); add_cib_op_callback( call_id, FALSE, NULL, join_update_complete_callback); crm_debug("join-%d: Registered callback for LRM update %d", join_id, call_id); return I_NULL; } gboolean finalize_join_for(gpointer key, gpointer value, gpointer user_data) { const char *join_to = NULL; const char *join_state = NULL; HA_Message *acknak = NULL; if(key == NULL || value == NULL) { return TRUE; } join_to = (const char *)key; join_state = (const char *)value; /* make sure the node exists in the config section */ create_node_entry(join_to, join_to, NORMALNODE); /* send the ack/nack to the node */ acknak = create_request( CRM_OP_JOIN_ACKNAK, NULL, join_to, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL); ha_msg_add_int(acknak, F_CRM_JOIN_ID, current_join_id); /* set the ack/nack */ if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { crm_debug("join-%d: ACK'ing join request from %s, state %s", current_join_id, join_to, join_state); ha_msg_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE); g_hash_table_insert( finalized_nodes, crm_strdup(join_to), crm_strdup(CRMD_JOINSTATE_MEMBER)); } else { crm_warn("join-%d: NACK'ing join request from %s, state %s", current_join_id, join_to, join_state); ha_msg_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_FALSE); } send_msg_via_ha(fsa_cluster_conn, acknak); return TRUE; } gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source) { crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state)); if(cur_state == S_INTEGRATION) { if(g_hash_table_size(welcomed_nodes) == 0) { crm_debug("join-%d: Integration of %d peers complete: %s", current_join_id, g_hash_table_size(integrated_nodes), source); register_fsa_input_before( C_FSA_INTERNAL, I_INTEGRATED, NULL); return TRUE; } } else if(cur_state == S_FINALIZE_JOIN) { if(is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id); return TRUE; } else if(g_hash_table_size(integrated_nodes) == 0 && g_hash_table_size(finalized_nodes) == 0) { crm_debug("join-%d complete: %s", current_join_id, source); register_fsa_input_later( C_FSA_INTERNAL, I_FINALIZED, NULL); } else if(g_hash_table_size(integrated_nodes) != 0 && g_hash_table_size(finalized_nodes) != 0) { crm_err("join-%d: Waiting on %d integrated nodes" " AND %d confirmations", current_join_id, g_hash_table_size(integrated_nodes), g_hash_table_size(finalized_nodes)); } else if(g_hash_table_size(integrated_nodes) != 0) { crm_debug("join-%d: Still waiting on %d integrated nodes", current_join_id, g_hash_table_size(integrated_nodes)); } else if(g_hash_table_size(finalized_nodes) != 0) { crm_debug("join-%d: Still waiting on %d confirmations", current_join_id, g_hash_table_size(finalized_nodes)); } } return FALSE; }