diff --git a/crm/crmd/callbacks.c b/crm/crmd/callbacks.c index 88e25ebb7f..374662d650 100644 --- a/crm/crmd/callbacks.c +++ b/crm/crmd/callbacks.c @@ -1,599 +1,599 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *crmd_peer_state = NULL; crm_data_t *find_xml_in_hamessage(const HA_Message * msg); void crmd_ha_connection_destroy(gpointer user_data); /* From join_dc... */ extern gboolean check_join_state( enum crmd_fsa_state cur_state, const char *source); /* #define MAX_EMPTY_CALLBACKS 20 */ /* int empty_callbacks = 0; */ gboolean crmd_ha_msg_dispatch(IPC_Channel *channel, gpointer user_data) { int lpc = 0; ll_cluster_t *hb_cluster = (ll_cluster_t*)user_data; while(lpc < 2 && hb_cluster->llc_ops->msgready(hb_cluster)) { if(channel->ch_status != IPC_CONNECT) { /* there really is no point continuing */ break; } lpc++; /* invoke the callbacks but dont block */ hb_cluster->llc_ops->rcvmsg(hb_cluster, 0); } crm_debug_3("%d HA messages dispatched", lpc); G_main_set_trigger(fsa_source); if (channel && (channel->ch_status != IPC_CONNECT)) { crm_crit("Lost connection to heartbeat service."); return FALSE; } return TRUE; } void crmd_ha_msg_callback(const HA_Message * msg, void* private_data) { ha_msg_input_t *new_input = NULL; oc_node_t *from_node = NULL; const char *from = ha_msg_value(msg, F_ORIG); const char *seq = ha_msg_value(msg, F_SEQ); const char *op = ha_msg_value(msg, F_CRM_TASK); const char *sys_to = ha_msg_value(msg, F_CRM_SYS_TO); const char *sys_from = ha_msg_value(msg, F_CRM_SYS_FROM); CRM_DEV_ASSERT(from != NULL); if(fsa_membership_copy == NULL) { crm_debug("Ignoring HA messages until we are" " connected to the CCM (%s op from %s)", op, from); crm_log_message_adv( LOG_MSG, "HA[inbound]: Ignore (No CCM)", msg); return; } from_node = g_hash_table_lookup(fsa_membership_copy->members, from); if(from_node == NULL) { int level = LOG_DEBUG; if(safe_str_eq(op, CRM_OP_VOTE)) { level = LOG_WARNING; } else if(AM_I_DC && safe_str_eq(op, CRM_OP_JOIN_ANNOUNCE)) { level = LOG_WARNING; } else if(safe_str_eq(sys_from, CRM_SYSTEM_DC)) { level = LOG_WARNING; } do_crm_log(level, __FILE__, __FUNCTION__, "Ignoring HA message (op=%s) from %s: not in our" " membership list (size=%d)", op, from, g_hash_table_size(fsa_membership_copy->members)); crm_log_message_adv(LOG_MSG, "HA[inbound]: CCM Discard", msg); } else if(AM_I_DC && safe_str_eq(sys_from, CRM_SYSTEM_DC) && safe_str_neq(from, fsa_our_uname)) { crm_err("Another DC detected: %s (op=%s)", from, op); crm_log_message_adv( LOG_WARNING, "HA[inbound]: Duplicate DC", msg); new_input = new_ha_msg_input(msg); /* make sure the election happens NOW */ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, new_input, __FUNCTION__); #if 0 /* still thinking about this one... * could create a timing issue if we dont notice the * election before a new DC is elected. */ } else if(fsa_our_dc != NULL && safe_str_eq(sys_from, CRM_SYSTEM_DC) && safe_str_neq(from, fsa_our_dc)) { crm_warn("Ignoring message from wrong DC: %s vs. %s ", from, fsa_our_dc); crm_log_message_adv(LOG_WARNING, "HA[inbound]: wrong DC", msg); #endif } else if(safe_str_eq(sys_to, CRM_SYSTEM_DC) && AM_I_DC == FALSE) { crm_debug_2("Ignoring message for the DC [F_SEQ=%s]", seq); crm_log_message_adv(LOG_DEBUG_4, "HA[inbound]: ignore", msg); return; } else if(safe_str_eq(from, fsa_our_uname) && safe_str_eq(op, CRM_OP_VOTE)) { crm_log_message_adv(LOG_DEBUG_4, "HA[inbound]", msg); crm_debug_2("Ignoring our own vote [F_SEQ=%s]: own vote", seq); return; } else if(AM_I_DC && safe_str_eq(op, CRM_OP_HBEAT)) { crm_debug_2("Ignoring our own heartbeat [F_SEQ=%s]", seq); crm_log_message_adv(LOG_DEBUG_4, "HA[inbound]: own heartbeat", msg); return; } else { crm_debug_3("Processing message"); crm_log_message_adv(LOG_MSG, "HA[inbound]", msg); new_input = new_ha_msg_input(msg); register_fsa_input(C_HA_MESSAGE, I_ROUTER, new_input); } #if 0 if(ha_msg_value(msg, XML_ATTR_REFERENCE) == NULL) { ha_msg_add(new_input->msg, XML_ATTR_REFERENCE, seq); } #endif delete_ha_msg_input(new_input); return; } /* * Apparently returning TRUE means "stay connected, keep doing stuff". * Returning FALSE means "we're all done, close the connection" */ gboolean crmd_ipc_msg_callback(IPC_Channel *client, gpointer user_data) { int lpc = 0; IPC_Message *msg = NULL; ha_msg_input_t *new_input = NULL; crmd_client_t *curr_client = (crmd_client_t*)user_data; gboolean stay_connected = TRUE; crm_debug_2("Processing IPC message from %s", curr_client->table_key); while(lpc == 0 && client->ops->is_message_pending(client)) { if (client->ch_status == IPC_DISCONNECT) { /* The message which was pending for us is that * the IPC status is now IPC_DISCONNECT */ break; } if (client->ops->recv(client, &msg) != IPC_OK) { perror("Receive failure:"); crm_err("[%s] [receive failure]", curr_client->table_key); stay_connected = FALSE; break; } else if (msg == NULL) { crm_err("[%s] [no message this time]", curr_client->table_key); continue; } lpc++; new_input = new_ipc_msg_input(msg); msg->msg_done(msg); crm_debug_2("Processing msg from %s", curr_client->table_key); crm_log_message_adv(LOG_MSG, "CRMd[inbound]", new_input->msg); if(crmd_authorize_message(new_input, curr_client)) { register_fsa_input(C_IPC_MESSAGE, I_ROUTER, new_input); } delete_ha_msg_input(new_input); msg = NULL; new_input = NULL; } crm_debug_2("Processed %d messages", lpc); if (client->ch_status == IPC_DISCONNECT) { stay_connected = FALSE; process_client_disconnect(curr_client); } G_main_set_trigger(fsa_source); return stay_connected; } gboolean lrm_dispatch(IPC_Channel*src_not_used, gpointer user_data) { int num_msgs = 0; ll_lrm_t *lrm = (ll_lrm_t*)user_data; crm_debug_3("received callback"); num_msgs = lrm->lrm_ops->rcvmsg(lrm, FALSE); if(num_msgs < 1) { crm_err("lrm->lrm_ops->rcvmsg() failed, connection lost?"); clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); return FALSE; } return TRUE; } void lrm_op_callback(lrm_op_t* op) { CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return; } crm_debug("received callback: %s/%s (%s)", op->op_type, op->rsc_id, op_status2text(op->op_status)); /* Make sure the LRM events are received in order */ register_fsa_input_later(C_LRM_OP_CALLBACK, I_LRM_EVENT, op); } void crmd_ha_status_callback( const char *node, const char * status, void* private_data) { crm_data_t *update = NULL; crm_debug_3("received callback"); crm_notice("Status update: Node %s now has status [%s]",node,status); if(safe_str_neq(status, DEADSTATUS)) { crm_debug_3("nstatus callback was not for a dead node"); return; } /* this node is taost */ update = create_node_state( node, node, status, NULL, NULL, NULL, NULL, __FUNCTION__); crm_xml_add(update, XML_CIB_ATTR_CLEAR_SHUTDOWN, XML_BOOLEAN_TRUE); /* this change should not be broadcast */ update_local_cib(create_cib_fragment(update, NULL)); G_main_set_trigger(fsa_source); free_xml(update); } void crmd_client_status_callback(const char * node, const char * client, const char * status, void * private) { const char *join = NULL; const char *extra = NULL; crm_data_t * update = NULL; crm_debug_3("received callback"); if(safe_str_neq(client, CRM_SYSTEM_CRMD)) { return; } if(safe_str_eq(status, JOINSTATUS)){ status = ONLINESTATUS; extra = XML_CIB_ATTR_CLEAR_SHUTDOWN; } else if(safe_str_eq(status, LEAVESTATUS)){ status = OFFLINESTATUS; join = CRMD_JOINSTATE_DOWN; extra = XML_CIB_ATTR_CLEAR_SHUTDOWN; } set_bit_inplace(fsa_input_register, R_PEER_DATA); g_hash_table_replace( crmd_peer_state, crm_strdup(node), crm_strdup(status)); if(fsa_state == S_STARTING || fsa_state == S_STOPPING) { return; } crm_notice("Status update: Client %s/%s now has status [%s]", node, client, status); if(safe_str_eq(node, fsa_our_dc) && safe_str_eq(status, OFFLINESTATUS)) { /* did our DC leave us */ crm_info("Got client status callback - our DC is dead"); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); } else { crm_data_t *fragment = NULL; crm_debug_3("Got client status callback"); update = create_node_state( node, node, NULL, NULL, status, join, NULL, __FUNCTION__); crm_xml_add(update, extra, XML_BOOLEAN_TRUE); fragment = create_cib_fragment(update, NULL); /* it is safe to keep these updates on the local node * each node updates their own CIB */ fsa_cib_conn->cmds->modify( fsa_cib_conn, XML_CIB_TAG_STATUS, fragment, NULL, cib_inhibit_bcast|cib_scope_local|cib_quorum_override); free_xml(fragment); free_xml(update); if(AM_I_DC && safe_str_eq(status, OFFLINESTATUS)) { g_hash_table_remove(confirmed_nodes, node); g_hash_table_remove(finalized_nodes, node); g_hash_table_remove(integrated_nodes, node); g_hash_table_remove(welcomed_nodes, node); check_join_state(fsa_state, __FUNCTION__); } } G_main_set_trigger(fsa_source); } void crmd_ha_connection_destroy(gpointer user_data) { crm_crit("Heartbeat has left us"); /* this is always an error */ /* feed this back into the FSA */ register_fsa_input(C_HA_DISCONNECT, I_ERROR, NULL); } gboolean crmd_client_connect(IPC_Channel *client_channel, gpointer user_data) { if (client_channel == NULL) { crm_err("Channel was NULL"); } else if (client_channel->ch_status == IPC_DISCONNECT) { crm_err("Channel was disconnected"); } else { crmd_client_t *blank_client = NULL; crm_debug_3("Channel connected"); crm_malloc0(blank_client, sizeof(crmd_client_t)); if (blank_client == NULL) { return FALSE; } client_channel->ops->set_recv_qlen(client_channel, 100); client_channel->ops->set_send_qlen(client_channel, 100); blank_client->client_channel = client_channel; blank_client->sub_sys = NULL; blank_client->uuid = NULL; blank_client->table_key = NULL; blank_client->client_source = G_main_add_IPC_Channel( G_PRIORITY_LOW, client_channel, FALSE, crmd_ipc_msg_callback, blank_client, default_ipc_connection_destroy); } return TRUE; } gboolean ccm_dispatch(int fd, gpointer user_data) { int rc = 0; oc_ev_t *ccm_token = (oc_ev_t*)user_data; gboolean was_error = FALSE; crm_debug_3("received callback"); rc = oc_ev_handle_event(ccm_token); if(rc != 0) { crm_err("CCM connection appears to have failed: rc=%d.", rc); register_fsa_input(C_CCM_CALLBACK, I_ERROR, NULL); was_error = TRUE; } G_main_set_trigger(fsa_source); return !was_error; } static gboolean fsa_have_quorum = FALSE; void crmd_ccm_msg_callback( oc_ed_t event, void *cookie, size_t size, const void *data) { int instance = -1; gboolean update_cache = FALSE; struct crmd_ccm_data_s *event_data = NULL; const oc_ev_membership_t *membership = data; gboolean update_quorum = FALSE; gboolean trigger_transition = FALSE; crm_debug_3("received callback"); if(data != NULL) { instance = membership->m_instance; } crm_info("Quorum %s after event=%s (id=%d)", ccm_have_quorum(event)?"(re)attained":"lost", ccm_event_name(event), instance); switch(event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID:/* fall through */ update_cache = TRUE; update_quorum = TRUE; break; case OC_EV_MS_NOT_PRIMARY: #if UNTESTED if(AM_I_DC == FALSE) { break; } /* tell the TE to pretend it had completed and stop */ /* side effect: we'll end up in S_IDLE */ register_fsa_action(A_TE_HALT, TRUE); #endif break; case OC_EV_MS_PRIMARY_RESTORED: fsa_membership_copy->id = instance; if(AM_I_DC && need_transition(fsa_state)) { trigger_transition = TRUE; } break; case OC_EV_MS_EVICTED: update_quorum = TRUE; register_fsa_input(C_FSA_INTERNAL, I_STOP, NULL); break; default: crm_err("Unknown CCM event: %d", event); } if(update_quorum && ccm_have_quorum(event) == FALSE) { /* did we just loose quorum? */ if(fsa_have_quorum && need_transition(fsa_state)) { crm_info("Quorum lost: triggering transition (%s)", ccm_event_name(event)); trigger_transition = TRUE; } fsa_have_quorum = FALSE; } else if(update_quorum) { - crm_debug("Updating quorum after event %s", - ccm_event_name(event)); + crm_debug_2("Updating quorum after event %s", + ccm_event_name(event)); fsa_have_quorum = TRUE; } if(trigger_transition) { - crm_debug("Scheduling transition after event %s", - ccm_event_name(event)); + crm_debug_2("Scheduling transition after event %s", + ccm_event_name(event)); /* make sure that when we query the CIB that it has * the changes that triggered the transition */ switch(event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID: case OC_EV_MS_PRIMARY_RESTORED: fsa_membership_copy->id = instance; break; default: break; } if(update_cache == FALSE) { /* a stand-alone transition */ register_fsa_action(A_TE_CANCEL); } } if(update_cache) { - crm_debug("Updating cache after event %s", - ccm_event_name(event)); + crm_debug_2("Updating cache after event %s", + ccm_event_name(event)); crm_malloc0(event_data, sizeof(struct crmd_ccm_data_s)); if(event_data == NULL) { return; } event_data->event = event; if(data != NULL) { event_data->oc = copy_ccm_oc_data(data); } register_fsa_input_adv( C_CCM_CALLBACK, I_CCM_EVENT, event_data, trigger_transition?A_TE_CANCEL:A_NOTHING, FALSE, __FUNCTION__); if (event_data->oc) { crm_free(event_data->oc); event_data->oc = NULL; } crm_free(event_data); } oc_ev_callback_done(cookie); return; } void crmd_cib_connection_destroy(gpointer user_data) { if(is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("Connection to the CIB terminated..."); return; } /* eventually this will trigger a reconnect, not a shutdown */ crm_err("Connection to the CIB terminated..."); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit_inplace(fsa_input_register, R_CIB_CONNECTED); return; } longclock_t fsa_start = 0; longclock_t fsa_stop = 0; longclock_t fsa_diff = 0; gboolean crm_fsa_trigger(gpointer user_data) { unsigned int fsa_diff_ms = 0; if(fsa_diff_max_ms > 0) { fsa_start = time_longclock(); } s_crmd_fsa(C_FSA_INTERNAL); if(fsa_diff_max_ms > 0) { fsa_stop = time_longclock(); fsa_diff = sub_longclock(fsa_stop, fsa_start); fsa_diff_ms = longclockto_ms(fsa_diff); if(fsa_diff_ms > fsa_diff_max_ms) { crm_err("FSA took %dms to complete", fsa_diff_ms); } else if(fsa_diff_ms > fsa_diff_warn_ms) { crm_warn("FSA took %dms to complete", fsa_diff_ms); } } return TRUE; } diff --git a/crm/crmd/ccm.c b/crm/crmd/ccm.c index c03540edd0..a9f468d462 100644 --- a/crm/crmd/ccm.c +++ b/crm/crmd/ccm.c @@ -1,660 +1,655 @@ -/* $Id: ccm.c,v 1.84 2005/06/15 13:39:40 andrew Exp $ */ +/* $Id: ccm.c,v 1.85 2005/06/17 18:44:57 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* put these first so that uuid_t is defined without conflicts */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include void oc_ev_special(const oc_ev_t *, oc_ev_class_t , int ); int register_with_ccm(ll_cluster_t *hb_cluster); void msg_ccm_join(const HA_Message *msg, void *foo); void crmd_ccm_msg_callback(oc_ed_t event, void *cookie, size_t size, const void *data); gboolean ghash_node_clfree(gpointer key, gpointer value, gpointer user_data); void ghash_update_cib_node(gpointer key, gpointer value, gpointer user_data); #define CCM_EVENT_DETAIL 0 #define CCM_EVENT_DETAIL_PARTIAL 1 oc_ev_t *fsa_ev_token; int num_ccm_register_fails = 0; int max_ccm_register_fails = 30; /* A_CCM_CONNECT */ enum crmd_fsa_input do_ccm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int ret; int fsa_ev_fd; gboolean did_fail = FALSE; if(action & A_CCM_DISCONNECT){ oc_ev_unregister(fsa_ev_token); } if(action & A_CCM_CONNECT) { crm_debug_3("Registering with CCM"); ret = oc_ev_register(&fsa_ev_token); if (ret != 0) { crm_warn("CCM registration failed"); did_fail = TRUE; } if(did_fail == FALSE) { crm_debug_3("Setting up CCM callbacks"); ret = oc_ev_set_callback(fsa_ev_token, OC_EV_MEMB_CLASS, crmd_ccm_msg_callback, NULL); if (ret != 0) { crm_warn("CCM callback not set"); did_fail = TRUE; } } if(did_fail == FALSE) { oc_ev_special(fsa_ev_token, OC_EV_MEMB_CLASS, 0/*don't care*/); crm_debug_3("Activating CCM token"); ret = oc_ev_activate(fsa_ev_token, &fsa_ev_fd); if (ret != 0){ crm_warn("CCM Activation failed"); did_fail = TRUE; } } if(did_fail) { num_ccm_register_fails++; oc_ev_unregister(fsa_ev_token); if(num_ccm_register_fails < max_ccm_register_fails) { crm_warn("CCM Connection failed" " %d times (%d max)", num_ccm_register_fails, max_ccm_register_fails); crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return I_NULL; } else { crm_err("CCM Activation failed %d (max) times", num_ccm_register_fails); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } } crm_info("CCM Activation passed... all set to go!"); G_main_add_fd(G_PRIORITY_HIGH, fsa_ev_fd, FALSE, ccm_dispatch, fsa_ev_token, default_ipc_connection_destroy); } if(action & ~(A_CCM_CONNECT|A_CCM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } return I_NULL; } /* A_CCM_EVENT */ enum crmd_fsa_input do_ccm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum crmd_fsa_input return_input = I_NULL; oc_ed_t event; const oc_ev_membership_t *oc = NULL; struct crmd_ccm_data_s *ccm_data = fsa_typed_data(fsa_dt_ccm); if(ccm_data == NULL) { crm_err("No data provided to FSA function"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } else if(msg_data->fsa_cause != C_CCM_CALLBACK) { crm_err("FSA function called in response to incorect input"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } event = ccm_data->event; oc = ccm_data->oc; - crm_info("event=%s", ccm_event_name(event)); - - if(CCM_EVENT_DETAIL /*constant condition*/ || CCM_EVENT_DETAIL_PARTIAL) { - ccm_event_detail(oc, event); - } + ccm_event_detail(oc, event); if (OC_EV_MS_EVICTED == event) { /* todo: drop back to S_PENDING instead */ /* get out... NOW! * * go via the error recovery process so that HA will * restart us if required */ register_fsa_error(cause, I_ERROR, msg_data->data); return I_NULL; } CRM_DEV_ASSERT(oc->m_n_in != 0 || oc->m_n_out != 0); if(AM_I_DC) { /* Membership changed, remind everyone we're here. * This will aid detection of duplicate DCs */ HA_Message *no_op = create_request( CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); send_msg_via_ha(fsa_cluster_conn, no_op); } else if(oc->m_n_out != 0) { /* Possibly move this logic to ghash_update_cib_node() */ unsigned lpc = 0; int offset = oc->m_out_idx; for(lpc=0; lpc < oc->m_n_out; lpc++) { const char *uname = oc->m_array[offset+lpc].node_uname; if(uname == NULL) { crm_err("CCM node had no name"); continue; } else if(safe_str_eq(uname, fsa_our_dc)) { - crm_info("Our DC node (%s) left the cluster", - uname); + crm_warn("Our DC node (%s) left the cluster", + uname); register_fsa_input(cause, I_ELECTION, NULL); } } } return return_input; } /* A_CCM_UPDATE_CACHE */ /* * Take the opportunity to update the node status in the CIB as well */ enum crmd_fsa_input do_ccm_update_cache(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum crmd_fsa_input next_input = I_NULL; int lpc, offset; GHashTable *members = NULL; oc_ed_t event; const oc_ev_membership_t *oc = NULL; oc_node_list_t *tmp = NULL, *membership_copy = NULL; struct crmd_ccm_data_s *ccm_data = fsa_typed_data(fsa_dt_ccm); if(ccm_data == NULL) { crm_err("No data provided to FSA function"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } event = ccm_data->event; oc = ccm_data->oc; - crm_info("Updating CCM cache after a \"%s\" event.", + crm_debug_2("Updating CCM cache after a \"%s\" event.", ccm_event_name(event)); - crm_debug("instance=%d, nodes=%d, new=%d, lost=%d n_idx=%d, " + crm_debug_2("instance=%d, nodes=%d, new=%d, lost=%d n_idx=%d, " "new_idx=%d, old_idx=%d", oc->m_instance, oc->m_n_member, oc->m_n_in, oc->m_n_out, oc->m_memb_idx, oc->m_in_idx, oc->m_out_idx); #define ALAN_DEBUG 1 #ifdef ALAN_DEBUG { /* * Size (Size + 2) / 2 * * 3 (3+2)/2 = 5 / 2 = 2 * 4 (4+2)/2 = 6 / 2 = 3 * 5 (5+2)/2 = 7 / 2 = 3 * 6 (6+2)/2 = 8 / 2 = 4 * 7 (7+2)/2 = 9 / 2 = 4 */ int clsize = (oc->m_out_idx - oc->m_n_member); int plsize = (clsize + 2)/2; gboolean plurality = (oc->m_n_member >= plsize); gboolean Q = ccm_have_quorum(event); if(clsize == 2) { if (!Q) { crm_err("2 nodes w/o quorum"); } } else if(Q && !plurality) { crm_err("Quorum w/o plurality (%d/%d nodes)", oc->m_n_member, clsize); } else if(plurality && !Q) { crm_err("Plurality w/o Quorum (%d/%d nodes)", oc->m_n_member, clsize); } else { - crm_debug("Quorum(%s) and plurality (%d/%d) agree.", - Q?"true":"false", oc->m_n_member, clsize); + crm_debug_2("Quorum(%s) and plurality (%d/%d) agree.", + Q?"true":"false", oc->m_n_member, clsize); } } #endif crm_malloc0(membership_copy, sizeof(oc_node_list_t)); if(membership_copy == NULL) { crm_crit("Couldnt create membership copy - out of memory"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return I_NULL; } membership_copy->id = oc->m_instance; membership_copy->last_event = event; crm_debug_3("Copying members"); /*--*-- All Member Nodes --*--*/ offset = oc->m_memb_idx; membership_copy->members_size = oc->m_n_member; if(membership_copy->members_size > 0) { membership_copy->members = g_hash_table_new(g_str_hash, g_str_equal); members = membership_copy->members; for(lpc=0; lpc < membership_copy->members_size; lpc++) { oc_node_t *member = NULL; crm_debug_3("Copying member %d", lpc); crm_malloc0(member, sizeof(oc_node_t)); if(member == NULL) { continue; } member->node_id = oc->m_array[offset+lpc].node_id; member->node_born_on = oc->m_array[offset+lpc].node_born_on; member->node_uname = NULL; if(oc->m_array[offset+lpc].node_uname != NULL) { member->node_uname = crm_strdup(oc->m_array[offset+lpc].node_uname); } else { crm_err("Node %d had a NULL uname", member->node_id); } g_hash_table_insert( members, member->node_uname, member); } } else { membership_copy->members = NULL; } crm_debug_3("Copying new members"); /*--*-- New Member Nodes --*--*/ offset = oc->m_in_idx; membership_copy->new_members_size = oc->m_n_in; if(membership_copy->new_members_size > 0) { membership_copy->new_members = g_hash_table_new(g_str_hash, g_str_equal); members = membership_copy->new_members; for(lpc=0; lpc < membership_copy->new_members_size; lpc++) { oc_node_t *member = NULL; crm_malloc0(member, sizeof(oc_node_t)); if(member == NULL) { continue; } member->node_uname = NULL; member->node_id = oc->m_array[offset+lpc].node_id; member->node_born_on = oc->m_array[offset+lpc].node_born_on; if(oc->m_array[offset+lpc].node_uname != NULL) { member->node_uname = crm_strdup(oc->m_array[offset+lpc].node_uname); } else { crm_err("Node %d had a NULL uname", member->node_id); } g_hash_table_insert( members, member->node_uname, member); g_hash_table_insert(members, member->node_uname, member); } } else { membership_copy->new_members = NULL; } crm_debug_3("Copying dead members"); /*--*-- Recently Dead Member Nodes --*--*/ offset = oc->m_out_idx; membership_copy->dead_members_size = oc->m_n_out; if(membership_copy->dead_members_size > 0) { membership_copy->dead_members = g_hash_table_new(g_str_hash, g_str_equal); members = membership_copy->dead_members; for(lpc=0; lpc < membership_copy->dead_members_size; lpc++) { oc_node_t *member = NULL; crm_malloc0(member, sizeof(oc_node_t)); if(member == NULL) { continue; } member->node_id = oc->m_array[offset+lpc].node_id; member->node_born_on = oc->m_array[offset+lpc].node_born_on; member->node_uname = NULL; - if(oc->m_array[offset+lpc].node_uname != NULL) { - member->node_uname = - crm_strdup(oc->m_array[offset+lpc].node_uname); - } else { - crm_err("Node %d had a NULL uname", - member->node_id); + CRM_DEV_ASSERT(oc->m_array[offset+lpc].node_uname != NULL); + + if(oc->m_array[offset+lpc].node_uname == NULL) { + continue; } + + member->node_uname = + crm_strdup(oc->m_array[offset+lpc].node_uname); + g_hash_table_insert( members, member->node_uname, member); g_hash_table_insert(members, member->node_uname, member); } } else { membership_copy->dead_members = NULL; } tmp = fsa_membership_copy; fsa_membership_copy = membership_copy; - crm_info("Updated membership cache with %d (%d new, %d lost) members", - g_hash_table_size(fsa_membership_copy->members), - g_hash_table_size(fsa_membership_copy->new_members), - g_hash_table_size(fsa_membership_copy->dead_members)); - + crm_debug_2("Updated membership cache with %d (%d new, %d lost) members", + g_hash_table_size(fsa_membership_copy->members), + g_hash_table_size(fsa_membership_copy->new_members), + g_hash_table_size(fsa_membership_copy->dead_members)); + /* Free the old copy */ if(tmp != NULL) { if(tmp->members != NULL) g_hash_table_foreach_remove( tmp->members, ghash_node_clfree, NULL); if(tmp->new_members != NULL) g_hash_table_foreach_remove( tmp->new_members, ghash_node_clfree, NULL); if(tmp->dead_members != NULL) g_hash_table_foreach_remove( tmp->dead_members, ghash_node_clfree, NULL); crm_free(tmp); } crm_debug_3("Free'd old copies"); set_bit_inplace(fsa_input_register, R_CCM_DATA); if(cur_state != S_STARTING && cur_state != S_STOPPING) { crm_debug_3("Updating the CIB from CCM cache"); do_update_cib_nodes(NULL, FALSE); } return next_input; } void ccm_event_detail(const oc_ev_membership_t *oc, oc_ed_t event) { int lpc; gboolean member = FALSE; member = FALSE; crm_debug_2("-----------------------"); - crm_info("trans=%d, nodes=%d, new=%d, lost=%d n_idx=%d, " - "new_idx=%d, old_idx=%d", - oc->m_instance, - oc->m_n_member, - oc->m_n_in, - oc->m_n_out, - oc->m_memb_idx, - oc->m_in_idx, - oc->m_out_idx); - + crm_info("%s: trans=%d, nodes=%d, new=%d, lost=%d n_idx=%d, " + "new_idx=%d, old_idx=%d", + ccm_event_name(event), + oc->m_instance, + oc->m_n_member, + oc->m_n_in, + oc->m_n_out, + oc->m_memb_idx, + oc->m_in_idx, + oc->m_out_idx); + #if !CCM_EVENT_DETAIL_PARTIAL for(lpc=0; lpc < oc->m_n_member; lpc++) { crm_info("\tCURRENT: %s [nodeid=%d, born=%d]", oc->m_array[oc->m_memb_idx+lpc].node_uname, oc->m_array[oc->m_memb_idx+lpc].node_id, oc->m_array[oc->m_memb_idx+lpc].node_born_on); if(safe_str_eq(fsa_our_uname, oc->m_array[oc->m_memb_idx+lpc].node_uname)) { member = TRUE; } } if (member == FALSE) { crm_warn("MY NODE IS NOT IN CCM THE MEMBERSHIP LIST"); } #endif for(lpc=0; lpc<(int)oc->m_n_in; lpc++) { crm_info("\tNEW: %s [nodeid=%d, born=%d]", oc->m_array[oc->m_in_idx+lpc].node_uname, oc->m_array[oc->m_in_idx+lpc].node_id, oc->m_array[oc->m_in_idx+lpc].node_born_on); } for(lpc=0; lpc<(int)oc->m_n_out; lpc++) { crm_info("\tLOST: %s [nodeid=%d, born=%d]", oc->m_array[oc->m_out_idx+lpc].node_uname, oc->m_array[oc->m_out_idx+lpc].node_id, oc->m_array[oc->m_out_idx+lpc].node_born_on); if(fsa_our_uname != NULL && 0 == strcmp(fsa_our_uname, oc->m_array[oc->m_out_idx+lpc].node_uname)) { crm_err("We're not part of the cluster anymore"); } } crm_debug_2("-----------------------"); } int register_with_ccm(ll_cluster_t *hb_cluster) { return 0; } void msg_ccm_join(const HA_Message *msg, void *foo) { crm_debug_2("###### Received ccm_join message..."); if (msg != NULL) { crm_debug_2("[type=%s]", ha_msg_value(msg, F_TYPE)); crm_debug_2("[orig=%s]", ha_msg_value(msg, F_ORIG)); crm_debug_2("[to=%s]", ha_msg_value(msg, F_TO)); crm_debug_2("[status=%s]", ha_msg_value(msg, F_STATUS)); crm_debug_2("[info=%s]", ha_msg_value(msg, F_COMMENT)); crm_debug_2("[rsc_hold=%s]", ha_msg_value(msg, F_RESOURCES)); crm_debug_2("[stable=%s]", ha_msg_value(msg, F_ISSTABLE)); crm_debug_2("[rtype=%s]", ha_msg_value(msg, F_RTYPE)); crm_debug_2("[ts=%s]", ha_msg_value(msg, F_TIME)); crm_debug_2("[seq=%s]", ha_msg_value(msg, F_SEQ)); crm_debug_2("[generation=%s]", ha_msg_value(msg, F_HBGENERATION)); /* crm_debug_2("[=%s]", ha_msg_value(msg, F_)); */ } return; } struct update_data_s { crm_data_t *updates; const char *state; const char *join; }; crm_data_t* do_update_cib_nodes(crm_data_t *updates, gboolean overwrite) { int call_options = cib_scope_local|cib_quorum_override; struct update_data_s update_data; crm_data_t *fragment = updates; crm_data_t *tmp = NULL; if(updates == NULL) { fragment = create_cib_fragment(NULL, NULL); crm_xml_add(fragment, XML_ATTR_SECTION, XML_CIB_TAG_STATUS); } tmp = find_xml_node(fragment, XML_TAG_CIB, TRUE); tmp = get_object_root(XML_CIB_TAG_STATUS, tmp); CRM_DEV_ASSERT(tmp != NULL); update_data.updates = tmp; update_data.state = XML_BOOLEAN_YES; update_data.join = NULL; if(overwrite) { crm_debug_2("Performing a join update based on CCM data"); update_data.join = CRMD_JOINSTATE_PENDING; if(fsa_membership_copy->members != NULL) { g_hash_table_foreach(fsa_membership_copy->members, ghash_update_cib_node, &update_data); } } else { call_options = call_options|cib_inhibit_bcast; - crm_debug("Inhibiting bcast for CCM updates"); + crm_debug_2("Inhibiting bcast for CCM updates"); if(fsa_membership_copy->members != NULL) { g_hash_table_foreach(fsa_membership_copy->new_members, ghash_update_cib_node, &update_data); } update_data.state = XML_BOOLEAN_NO; update_data.join = CRMD_JOINSTATE_DOWN; if(fsa_membership_copy->dead_members != NULL) { g_hash_table_foreach(fsa_membership_copy->dead_members, ghash_update_cib_node, &update_data); } } if(update_data.updates != NULL) { fsa_cib_conn->cmds->modify(fsa_cib_conn, XML_CIB_TAG_STATUS, fragment, NULL, call_options); free_xml(fragment); } return NULL; } void ghash_update_cib_node(gpointer key, gpointer value, gpointer user_data) { crm_data_t *tmp1 = NULL; const char *node_uname = (const char*)key; struct update_data_s* data = (struct update_data_s*)user_data; - if(safe_str_eq(data->state, XML_BOOLEAN_NO)) { - crm_info("Node %s has left the cluster", node_uname); - } - crm_debug_2("%s processing %s (%s)", __FUNCTION__, node_uname, data->state); tmp1 = create_node_state(node_uname, node_uname, NULL, data->state, NULL, data->join, NULL, __FUNCTION__); add_node_copy(data->updates, tmp1); free_xml(tmp1); } gboolean ghash_node_clfree(gpointer key, gpointer value, gpointer user_data) { /* value->node_uname is free'd as "key" */ if(key != NULL) { crm_free(key); } if(value != NULL) { crm_free(value); } return TRUE; } diff --git a/crm/crmd/fsa.c b/crm/crmd/fsa.c index de13998429..b90283e0ee 100644 --- a/crm/crmd/fsa.c +++ b/crm/crmd/fsa.c @@ -1,684 +1,684 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern uint highest_born_on; extern int num_join_invites; extern GHashTable *welcomed_nodes; extern GHashTable *integrated_nodes; extern GHashTable *finalized_nodes; extern GHashTable *confirmed_nodes; extern void initialize_join(gboolean before); long long do_state_transition(long long actions, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t *msg_data); long long clear_flags(long long actions, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input); void dump_rsc_info(void); void dump_rsc_info_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); void ghash_print_node(gpointer key, gpointer value, gpointer user_data); #define DOT_PREFIX "live.dot: " #define DOT_LOG LOG_DEBUG_2 #define do_dot_log(fmt...) do_crm_log(DOT_LOG, NULL, NULL, fmt) #define do_dot_action(fmt...) do_crm_log(DOT_LOG, NULL, NULL, fmt) longclock_t action_start = 0; longclock_t action_stop = 0; longclock_t action_diff = 0; unsigned int action_diff_ms = 0; #define IF_FSA_ACTION(x,y) \ if(is_set(fsa_actions,x)) { \ enum crmd_fsa_input result = I_NULL; \ last_action = x; \ fsa_actions = clear_bit(fsa_actions, x); \ crm_debug_3("Invoking action %s (%.16llx)", \ fsa_action2string(x), x); \ if(action_diff_max_ms > 0) { \ action_start = time_longclock(); \ } \ result = y(x, fsa_data->fsa_cause, fsa_state, \ fsa_data->fsa_input, fsa_data); \ if(action_diff_max_ms > 0) { \ action_stop = time_longclock(); \ action_diff = sub_longclock(action_stop, action_start); \ action_diff_ms = longclockto_ms(action_diff); \ if(action_diff_ms > action_diff_max_ms) { \ crm_err("Action %s took %dms to complete", \ fsa_action2string(x), \ action_diff_ms); \ } else if(action_diff_ms > action_diff_warn_ms) { \ crm_warn("Action %s took %dms to complete", \ fsa_action2string(x), \ action_diff_ms); \ } \ } \ crm_debug_3("Action complete: %s (%.16llx)", \ fsa_action2string(x), x); \ CRM_DEV_ASSERT(result == I_NULL); \ do_dot_action(DOT_PREFIX"\t// %s", fsa_action2string(x)); \ } /* #define ELSEIF_FSA_ACTION(x,y) else IF_FSA_ACTION(x,y) */ void init_dotfile(void); void s_crmd_fsa_actions(fsa_data_t *fsa_data); void log_fsa_input(fsa_data_t *stored_msg); void init_dotfile(void) { do_dot_log(DOT_PREFIX"digraph \"g\" {"); do_dot_log(DOT_PREFIX" size = \"30,30\""); do_dot_log(DOT_PREFIX" graph ["); do_dot_log(DOT_PREFIX" fontsize = \"12\""); do_dot_log(DOT_PREFIX" fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX" fontcolor = \"black\""); do_dot_log(DOT_PREFIX" bb = \"0,0,398.922306,478.927856\""); do_dot_log(DOT_PREFIX" color = \"black\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX" node ["); do_dot_log(DOT_PREFIX" fontsize = \"12\""); do_dot_log(DOT_PREFIX" fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX" fontcolor = \"black\""); do_dot_log(DOT_PREFIX" shape = \"ellipse\""); do_dot_log(DOT_PREFIX" color = \"black\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX" edge ["); do_dot_log(DOT_PREFIX" fontsize = \"12\""); do_dot_log(DOT_PREFIX" fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX" fontcolor = \"black\""); do_dot_log(DOT_PREFIX" color = \"black\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX"// special nodes"); do_dot_log(DOT_PREFIX" \"S_PENDING\" "); do_dot_log(DOT_PREFIX" ["); do_dot_log(DOT_PREFIX" color = \"blue\""); do_dot_log(DOT_PREFIX" fontcolor = \"blue\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX" \"S_TERMINATE\" "); do_dot_log(DOT_PREFIX" ["); do_dot_log(DOT_PREFIX" color = \"red\""); do_dot_log(DOT_PREFIX" fontcolor = \"red\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX"// DC only nodes"); do_dot_log(DOT_PREFIX" \"S_INTEGRATION\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX" \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX" \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX" \"S_RELEASE_DC\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX" \"S_IDLE\" [ fontcolor = \"green\" ]"); } volatile enum crmd_fsa_state fsa_state = S_STARTING; oc_node_list_t *fsa_membership_copy; ll_cluster_t *fsa_cluster_conn; ll_lrm_t *fsa_lrm_conn; volatile long long fsa_input_register; volatile long long fsa_actions = A_NOTHING; const char *fsa_our_uname; char *fsa_our_dc; cib_t *fsa_cib_conn = NULL; fsa_timer_t *election_trigger = NULL; /* */ fsa_timer_t *election_timeout = NULL; /* */ fsa_timer_t *shutdown_escalation_timer = NULL; /* */ fsa_timer_t *shutdown_timer = NULL; /* */ fsa_timer_t *integration_timer = NULL; fsa_timer_t *finalization_timer = NULL; fsa_timer_t *dc_heartbeat = NULL; fsa_timer_t *wait_timer = NULL; volatile gboolean do_fsa_stall = FALSE; enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause) { fsa_data_t *fsa_data = NULL; long long register_copy = fsa_input_register; long long new_actions = A_NOTHING; long long last_action = A_NOTHING; enum crmd_fsa_state last_state = fsa_state; crm_debug_2("FSA invoked with Cause: %s\tState: %s", fsa_cause2string(cause), fsa_state2string(fsa_state)); do_fsa_stall = FALSE; if(is_message() == FALSE && fsa_actions != A_NOTHING) { /* fake the first message so we can get into the loop */ crm_malloc0(fsa_data, sizeof(fsa_data_t)); fsa_data->fsa_input = I_NULL; fsa_data->fsa_cause = C_FSA_INTERNAL; fsa_data->origin = __FUNCTION__; fsa_data->data_type = fsa_dt_none; fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); fsa_data = NULL; } while(is_message() && do_fsa_stall == FALSE) { crm_debug_2("Checking messages (%d remaining)", g_list_length(fsa_message_queue)); fsa_data = get_message(); CRM_DEV_ASSERT(fsa_data != NULL); if(crm_assert_failed) { continue; } log_fsa_input(fsa_data); /* add any actions back to the queue */ fsa_actions |= fsa_data->actions; /* get the next batch of actions */ new_actions = crmd_fsa_actions[fsa_data->fsa_input][fsa_state]; fsa_actions |= new_actions; #ifdef FSA_TRACE if(new_actions != A_NOTHING) { crm_debug_2("Adding FSA actions %.16llx for %s/%s", new_actions, fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state)); fsa_dump_actions(new_actions, "\tFSA scheduled"); } else if(fsa_data->fsa_input != I_NULL && new_actions == A_NOTHING) { crm_debug("No action specified for input,state (%s,%s)", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state)); } if(fsa_data->actions != A_NOTHING) { crm_debug_2("Adding input actions %.16llx for %s/%s", new_actions, fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state)); fsa_dump_actions(fsa_data->actions,"\tInput scheduled"); } #endif /* logging : *before* the state is changed */ IF_FSA_ACTION(A_ERROR, do_log) IF_FSA_ACTION(A_WARN, do_log) IF_FSA_ACTION(A_LOG, do_log) /* update state variables */ last_state = fsa_state; fsa_state = crmd_fsa_state[fsa_data->fsa_input][fsa_state]; /* * Hook to allow actions to removed due to certain inputs */ fsa_actions = clear_flags( fsa_actions, cause, fsa_state, fsa_data->fsa_input); /* * Hook for change of state. * Allows actions to be added or removed when entering a state */ if(last_state != fsa_state){ fsa_actions = do_state_transition( fsa_actions, last_state, fsa_state, fsa_data); } else { do_dot_log(DOT_PREFIX"\t// FSA input: State=%s \tCause=%s" " \tInput=%s \tOrigin=%s() \tid=%d", fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id); } /* start doing things... */ s_crmd_fsa_actions(fsa_data); delete_fsa_input(fsa_data); fsa_data = NULL; } if(g_list_length(fsa_message_queue) > 0 || fsa_actions != A_NOTHING || do_fsa_stall) { crm_debug("Exiting the FSA: queue=%d, fsa_actions=0x%llx, stalled=%s", g_list_length(fsa_message_queue), fsa_actions, do_fsa_stall?"true":"false"); } else { crm_debug_2("Exiting the FSA"); } /* cleanup inputs? */ if(register_copy != fsa_input_register) { long long same = register_copy & fsa_input_register; fsa_dump_inputs(LOG_DEBUG, "Added input:", fsa_input_register ^ same); fsa_dump_inputs(LOG_DEBUG, "Removed input:", register_copy ^ same); } #if 0 if(fsa_state != S_STARTING && g_list_length(fsa_message_queue) && is_ipc_empty(te_subsystem->ipc) && is_ipc_empty(pe_subsystem->ipc) && is_ipc_empty(fsa_cib_conn->cmds->channel(fsa_cib_conn) && is_ipc_empty(fsa_cluster_conn->llc_ops->ipcchan(fsa_cluster_conn)) ){ static gboolean mem_needs_init = TRUE; if(mem_needs_init) { crm_debug("Reached a stable point:" " reseting memory usage stats to zero"); crm_zero_mem_stats(NULL); mem_needs_init = FALSE; } else { crm_debug("Reached a stable point:" " checking memory usage"); crm_mem_stats(NULL); } } #endif fsa_dump_queue(LOG_DEBUG); return fsa_state; } void s_crmd_fsa_actions(fsa_data_t *fsa_data) { long long last_action = A_NOTHING; /* * Process actions in order of priority but do only one * action at a time to avoid complicating the ordering. */ while(fsa_actions != A_NOTHING && do_fsa_stall == FALSE) { msg_queue_helper(); CRM_DEV_ASSERT(fsa_data != NULL); if(crm_assert_failed) { return; } /* regular action processing in order of action priority * * Make sure all actions that connect to required systems * are performed first */ /* get out of here NOW! before anything worse happens */ IF_FSA_ACTION(A_EXIT_1, do_exit) else IF_FSA_ACTION(A_ERROR, do_log) else IF_FSA_ACTION(A_WARN, do_log) else IF_FSA_ACTION(A_LOG, do_log) /* essential start tasks */ else IF_FSA_ACTION(A_HA_CONNECT, do_ha_control) else IF_FSA_ACTION(A_STARTUP, do_startup) else IF_FSA_ACTION(A_CIB_START, do_cib_control) else IF_FSA_ACTION(A_READCONFIG, do_read_config) /* sub-system start/connect */ else IF_FSA_ACTION(A_LRM_CONNECT, do_lrm_control) else IF_FSA_ACTION(A_CCM_CONNECT, do_ccm_control) else IF_FSA_ACTION(A_TE_START, do_te_control) else IF_FSA_ACTION(A_PE_START, do_pe_control) /* sub-system restart */ else IF_FSA_ACTION(O_CIB_RESTART, do_cib_control) else IF_FSA_ACTION(O_PE_RESTART, do_pe_control) else IF_FSA_ACTION(O_TE_RESTART, do_te_control) /* Timers */ /* else IF_FSA_ACTION(O_DC_TIMER_RESTART, do_timer_control) */ else IF_FSA_ACTION(A_DC_TIMER_STOP, do_timer_control) else IF_FSA_ACTION(A_INTEGRATE_TIMER_STOP, do_timer_control) else IF_FSA_ACTION(A_INTEGRATE_TIMER_START, do_timer_control) else IF_FSA_ACTION(A_FINALIZE_TIMER_STOP, do_timer_control) else IF_FSA_ACTION(A_FINALIZE_TIMER_START, do_timer_control) /* * Highest priority actions */ else IF_FSA_ACTION(A_CIB_BUMPGEN, do_cib_invoke) else IF_FSA_ACTION(A_MSG_ROUTE, do_msg_route) else IF_FSA_ACTION(A_RECOVER, do_recover) else IF_FSA_ACTION(A_CL_JOIN_REQUEST, do_cl_join_request) else IF_FSA_ACTION(A_CL_JOIN_RESULT, do_cl_join_result) else IF_FSA_ACTION(A_SHUTDOWN_REQ, do_shutdown_req) else IF_FSA_ACTION(A_ELECTION_VOTE, do_election_vote) else IF_FSA_ACTION(A_ELECTION_COUNT, do_election_count_vote) /* * High priority actions * Update the cache first */ else IF_FSA_ACTION(A_CCM_UPDATE_CACHE, do_ccm_update_cache) else IF_FSA_ACTION(A_CCM_EVENT, do_ccm_event) else IF_FSA_ACTION(A_STARTED, do_started) else IF_FSA_ACTION(A_CL_JOIN_QUERY, do_cl_join_query) else IF_FSA_ACTION(A_DC_TIMER_START, do_timer_control) /* * Medium priority actions */ else IF_FSA_ACTION(A_DC_TAKEOVER, do_dc_takeover) else IF_FSA_ACTION(A_DC_RELEASE, do_dc_release) else IF_FSA_ACTION(A_ELECTION_START, do_election_vote) else IF_FSA_ACTION(A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all) else IF_FSA_ACTION(A_DC_JOIN_OFFER_ONE, do_dc_join_offer_all) else IF_FSA_ACTION(A_DC_JOIN_PROCESS_REQ,do_dc_join_req) else IF_FSA_ACTION(A_DC_JOIN_PROCESS_ACK,do_dc_join_ack) /* * Low(er) priority actions * Make sure the CIB is always updated before invoking the * PE, and the PE before the TE */ else IF_FSA_ACTION(A_CIB_INVOKE_LOCAL, do_cib_invoke) else IF_FSA_ACTION(A_CIB_INVOKE, do_cib_invoke) else IF_FSA_ACTION(A_DC_JOIN_FINALIZE, do_dc_join_finalize) else IF_FSA_ACTION(A_LRM_INVOKE, do_lrm_invoke) else IF_FSA_ACTION(A_LRM_EVENT, do_lrm_event) else IF_FSA_ACTION(A_TE_HALT, do_te_invoke) else IF_FSA_ACTION(A_TE_CANCEL, do_te_invoke) else IF_FSA_ACTION(A_PE_INVOKE, do_pe_invoke) else IF_FSA_ACTION(A_TE_INVOKE, do_te_invoke) else IF_FSA_ACTION(A_CL_JOIN_ANNOUNCE, do_cl_join_announce) /* sub-system stop */ else IF_FSA_ACTION(A_DC_RELEASED, do_dc_release) else IF_FSA_ACTION(A_PE_STOP, do_pe_control) else IF_FSA_ACTION(A_TE_STOP, do_te_control) else IF_FSA_ACTION(A_SHUTDOWN, do_shutdown) else IF_FSA_ACTION(A_STOP, do_stop) else IF_FSA_ACTION(A_CCM_DISCONNECT, do_ccm_control) else IF_FSA_ACTION(A_LRM_DISCONNECT, do_lrm_control) else IF_FSA_ACTION(A_HA_DISCONNECT, do_ha_control) else IF_FSA_ACTION(A_CIB_STOP, do_cib_control) /* exit gracefully */ else IF_FSA_ACTION(A_EXIT_0, do_exit) /* else IF_FSA_ACTION(A_, do_) */ /* Error checking and reporting */ else { crm_err("Action %s (0x%llx) not supported ", fsa_action2string(fsa_actions), fsa_actions); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL, __FUNCTION__); } } } void log_fsa_input(fsa_data_t *stored_msg) { - crm_debug("Processing queued input %d", stored_msg->id); + crm_debug_2("Processing queued input %d", stored_msg->id); if(stored_msg->fsa_cause == C_CCM_CALLBACK) { crm_debug_3("FSA processing CCM callback from %s", stored_msg->origin); } else if(stored_msg->fsa_cause == C_LRM_OP_CALLBACK) { crm_debug_3("FSA processing LRM callback from %s", stored_msg->origin); } else if(stored_msg->data == NULL) { crm_debug_3("FSA processing input from %s", stored_msg->origin); } else { ha_msg_input_t *ha_input = fsa_typed_data_adv( stored_msg, fsa_dt_ha_msg, __FUNCTION__); crm_debug_3("FSA processing XML message from %s", stored_msg->origin); crm_log_message(LOG_MSG, ha_input->msg); crm_log_xml_debug_3(ha_input->xml, "FSA message data"); } } long long do_state_transition(long long actions, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t *msg_data) { long long tmp = actions; gboolean clear_recovery_bit = TRUE; enum crmd_fsa_cause cause = msg_data->fsa_cause; enum crmd_fsa_input current_input = msg_data->fsa_input; const char *state_from = fsa_state2string(cur_state); const char *state_to = fsa_state2string(next_state); const char *input = fsa_input2string(current_input); CRM_DEV_ASSERT(cur_state != next_state); do_dot_log(DOT_PREFIX"\t%s -> %s [ label=%s cause=%s origin=%s ]", state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); crm_info("State transition %s -> %s [ input=%s cause=%s origin=%s ]", state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); /* the last two clauses might cause trouble later */ if(election_timeout != NULL && next_state != S_ELECTION && cur_state != S_RELEASE_DC) { crm_timer_stop(election_timeout); /* } else { */ /* crm_timer_start(election_timeout); */ } #if 0 if(is_set(fsa_input_register, R_SHUTDOWN)){ set_bit_inplace(tmp, A_DC_TIMER_STOP); } #endif if(next_state == S_INTEGRATION) { set_bit_inplace(tmp, A_INTEGRATE_TIMER_START); } else { set_bit_inplace(tmp, A_INTEGRATE_TIMER_STOP); } if(next_state == S_FINALIZE_JOIN) { set_bit_inplace(tmp, A_FINALIZE_TIMER_START); } else { set_bit_inplace(tmp, A_FINALIZE_TIMER_STOP); } if(next_state != S_PENDING) { set_bit_inplace(tmp, A_DC_TIMER_STOP); } if(next_state != S_ELECTION) { highest_born_on = 0; } switch(next_state) { case S_PENDING: case S_ELECTION: crm_debug_2("Resetting our DC to NULL on transition to %s", fsa_state2string(next_state)); crm_free(fsa_our_dc); fsa_our_dc = NULL; break; case S_NOT_DC: if(is_set(fsa_input_register, R_SHUTDOWN)){ crm_info("(Re)Issuing shutdown request now" " that we have a new DC"); set_bit_inplace(tmp, A_SHUTDOWN_REQ); } CRM_DEV_ASSERT(fsa_our_dc != NULL); if(fsa_our_dc == NULL) { crm_err("Reached S_NOT_DC without a DC" " being recorded"); } break; case S_RECOVERY: clear_recovery_bit = FALSE; break; case S_FINALIZE_JOIN: if(cause == C_TIMER_POPPED) { crm_warn("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } if(g_hash_table_size(welcomed_nodes) > 0) { char *msg = crm_strdup( " Welcome reply not received from"); crm_warn("%u cluster nodes failed to respond" " to the join offer.", g_hash_table_size(welcomed_nodes)); g_hash_table_foreach( welcomed_nodes, ghash_print_node, msg); crm_free(msg); } else { crm_info("All %d cluster nodes " "responded to the join offer.", g_hash_table_size(integrated_nodes)); } break; case S_POLICY_ENGINE: if(cause == C_TIMER_POPPED) { crm_warn("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } if(g_hash_table_size(finalized_nodes) > 0) { char *msg = crm_strdup( " Confirm not received from"); crm_err("%u cluster nodes failed to confirm" " their join.", g_hash_table_size(finalized_nodes)); g_hash_table_foreach( finalized_nodes, ghash_print_node, msg); crm_free(msg); } else if(g_hash_table_size(confirmed_nodes) == fsa_membership_copy->members_size) { crm_info("All %u cluster nodes are" " eligable to run resources.", fsa_membership_copy->members_size); } else { crm_warn("Only %u of %u cluster " "nodes are eligable to run resources", g_hash_table_size(confirmed_nodes), fsa_membership_copy->members_size); } /* initialize_join(FALSE); */ break; case S_STOPPING: case S_TERMINATE: /* possibly redundant */ crm_timer_stop(shutdown_timer); set_bit_inplace(fsa_input_register, R_SHUTDOWN); break; case S_IDLE: dump_rsc_info(); break; default: break; } if(clear_recovery_bit && next_state != S_PENDING) { tmp = clear_bit(tmp, A_RECOVER); } else if(clear_recovery_bit == FALSE) { tmp = set_bit(tmp, A_RECOVER); } if(tmp != actions) { fsa_dump_actions(actions ^ tmp, "New actions"); actions = tmp; } return actions; } long long clear_flags(long long actions, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input) { long long saved_actions = actions; long long startup_actions = A_STARTUP|A_CIB_START|A_LRM_CONNECT|A_CCM_CONNECT|A_HA_CONNECT|A_READCONFIG|A_STARTED|A_CL_JOIN_QUERY; if(cur_state == S_STOPPING || is_set(fsa_input_register, R_SHUTDOWN)) { clear_bit_inplace(actions, startup_actions); } fsa_dump_actions(actions ^ saved_actions, "Cleared Actions"); return actions; } void dump_rsc_info(void) { } void ghash_print_node(gpointer key, gpointer value, gpointer user_data) { const char *text = user_data; const char *uname = key; crm_info("%s: %s", text, uname); } diff --git a/crm/crmd/join_client.c b/crm/crmd/join_client.c index 390e470e32..e48e03a907 100644 --- a/crm/crmd/join_client.c +++ b/crm/crmd/join_client.c @@ -1,296 +1,296 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include int reannounce_count = 0; void join_query_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t *orig); extern gboolean process_join_ack_msg( const char *join_from, crm_data_t *lrm_update); /* A_CL_JOIN_QUERY */ /* is there a DC out there? */ enum crmd_fsa_input do_cl_join_query(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { HA_Message *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); crm_debug("c0) query"); send_msg_via_ha(fsa_cluster_conn, req); return I_NULL; } /* A_CL_JOIN_ANNOUNCE */ /* this is kind of a workaround for the the fact that we may not be around * or are otherwise unable to reply when the DC sends out A_WELCOME_ALL */ enum crmd_fsa_input do_cl_join_announce(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); /* Once we hear from the DC, we can stop the timer * * This timer was started either on startup or when a node * left the CCM list */ /* dont announce if we're in one of these states */ if(cur_state != S_PENDING) { crm_warn("Do not announce ourselves in state %s", fsa_state2string(cur_state)); return I_NULL; } if(AM_I_OPERATIONAL) { const char *hb_from = cl_get_string( input->msg, F_CRM_HOST_FROM); crm_debug("c0) announce"); if(hb_from == NULL) { crm_err("Failed to determin origin of hb message"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } if(fsa_our_dc == NULL) { crm_info("Set DC to %s", hb_from); fsa_our_dc = crm_strdup(hb_from); } else if(safe_str_neq(fsa_our_dc, hb_from)) { /* reset the fsa_our_dc to NULL */ crm_warn("Resetting our DC to NULL after DC_HB" " from unrecognised node."); crm_free(fsa_our_dc); fsa_our_dc = NULL; return I_NULL; /* for now, wait for the DC's * to settle down */ } /* send as a broadcast */ { HA_Message *req = create_request( CRM_OP_JOIN_ANNOUNCE, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); send_msg_via_ha(fsa_cluster_conn, req); } } else { /* Delay announce until we have finished local startup */ crm_warn("Delaying announce until local startup is complete"); return I_NULL; } return I_NULL; } static int query_call_id = 0; /* A_CL_JOIN_REQUEST */ /* aka. accept the welcome offer */ enum crmd_fsa_input do_cl_join_request(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *welcome_from = cl_get_string(input->msg, F_CRM_HOST_FROM); #if 0 if(we are sick) { log error ; /* save the request for later? */ return I_NULL; } #endif crm_debug("c1) processing join offer: %s", cl_get_string(input->msg, F_CRM_TASK)); /* we only ever want the last one */ if(query_call_id > 0) { crm_debug("Cancelling previous join query"); remove_cib_op_callback(query_call_id, FALSE); } if(fsa_our_dc == NULL) { crm_info("Set DC to %s", welcome_from); fsa_our_dc = crm_strdup(welcome_from); } else if(safe_str_neq(welcome_from, fsa_our_dc)) { /* dont do anything until DC's sort themselves out */ crm_err("Expected a welcome from %s, but %s replied", fsa_our_dc, welcome_from); return I_NULL; } CRM_DEV_ASSERT(input != NULL); query_call_id = fsa_cib_conn->cmds->query( fsa_cib_conn, NULL, NULL, cib_scope_local); add_cib_op_callback( query_call_id, TRUE, copy_ha_msg_input(input), join_query_callback); register_fsa_action(A_DC_TIMER_STOP); return I_NULL; } void join_query_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { crm_data_t *local_cib = NULL; ha_msg_input_t *input = user_data; crm_data_t *generation = create_xml_node( NULL, XML_CIB_TAG_GENERATION_TUPPLE); CRM_DEV_ASSERT(input != NULL); query_call_id = 0; if(rc == cib_ok) { local_cib = find_xml_node(output, XML_TAG_CIB, TRUE); } if(local_cib != NULL) { HA_Message *reply = NULL; const char *join_id = ha_msg_value(input->msg, F_CRM_JOIN_ID); crm_debug("c2) respond to join offer"); crm_debug("Acknowledging %s as our DC", cl_get_string(input->msg, F_CRM_HOST_FROM)); copy_in_properties(generation, local_cib); reply = create_request( CRM_OP_JOIN_REQUEST, generation, fsa_our_dc, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); ha_msg_add(reply, F_CRM_JOIN_ID, join_id); send_msg_via_ha(fsa_cluster_conn, reply); } else { crm_err("Could not retrieve Generation to attach to our" " join acknowledgement: %s", cib_error2string(rc)); register_fsa_error_adv( C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); } delete_ha_msg_input(input); free_xml(generation); } /* A_CL_JOIN_RESULT */ /* aka. this is notification that we have (or have not) been accepted */ enum crmd_fsa_input do_cl_join_result(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { crm_data_t *tmp1 = NULL; gboolean was_nack = TRUE; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *op = cl_get_string(input->msg,F_CRM_TASK); const char *ack_nack = cl_get_string(input->msg,CRM_OP_JOIN_ACKNAK); const char *welcome_from = cl_get_string(input->msg,F_CRM_HOST_FROM); if(safe_str_neq(op, CRM_OP_JOIN_ACKNAK)) { - crm_debug("Ignoring op=%s message", op); + crm_debug_2("Ignoring op=%s message", op); return I_NULL; } /* calculate if it was an ack or a nack */ if(crm_is_true(ack_nack)) { was_nack = FALSE; } if(was_nack) { crm_err("Join with %s failed. NACK'd", welcome_from); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return I_NULL; } if(AM_I_DC == FALSE && safe_str_eq(welcome_from, fsa_our_uname)) { crm_warn("Discarding our own welcome - we're no longer the DC"); return I_NULL; } else if(fsa_our_dc == NULL) { crm_info("Set DC to %s", welcome_from); fsa_our_dc = crm_strdup(welcome_from); } /* send our status section to the DC */ crm_debug("c3) confirming join: %s", cl_get_string(input->msg, F_CRM_TASK)); - crm_debug("Discovering local LRM status"); + crm_debug_2("Discovering local LRM status"); tmp1 = do_lrm_query(TRUE); if(tmp1 != NULL) { const char *join_id = ha_msg_value(input->msg, F_CRM_JOIN_ID); HA_Message *reply = create_request( CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); ha_msg_add(reply, F_CRM_JOIN_ID, join_id); - crm_debug("Sending local LRM status"); + crm_debug_2("Sending local LRM status"); send_msg_via_ha(fsa_cluster_conn, reply); if(AM_I_DC == FALSE) { register_fsa_input(cause, I_NOT_DC, NULL); } free_xml(tmp1); } else { crm_err("Could send our LRM state to the DC"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } return I_NULL; } diff --git a/crm/crmd/join_dc.c b/crm/crmd/join_dc.c index dbe4a7e278..600f8903b1 100644 --- a/crm/crmd/join_dc.c +++ b/crm/crmd/join_dc.c @@ -1,628 +1,626 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include GHashTable *welcomed_nodes = NULL; GHashTable *integrated_nodes = NULL; GHashTable *finalized_nodes = NULL; GHashTable *confirmed_nodes = NULL; char *max_epoche = NULL; char *max_generation_from = NULL; crm_data_t *max_generation_xml = NULL; void initialize_join(gboolean before); gboolean finalize_join_for(gpointer key, gpointer value, gpointer user_data); void join_send_offer(gpointer key, gpointer value, gpointer user_data); void finalize_sync_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); gboolean process_join_ack_msg( const char *join_from, crm_data_t *lrm_update, int join_id); gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); void join_update_complete_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); void finalize_join(const char *caller); static int current_join_id = 0; /* A_DC_JOIN_OFFER_ALL */ enum crmd_fsa_input do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { /* reset everyones status back to down or in_ccm in the CIB * * any nodes that are active in the CIB but not in the CCM list * will be seen as offline by the PE anyway */ do_update_cib_nodes(NULL, TRUE); crm_info("0) Offering membership to %d clients", fsa_membership_copy->members_size); initialize_join(TRUE); current_join_id++; g_hash_table_foreach( fsa_membership_copy->members, join_send_offer, NULL); /* dont waste time by invoking the PE yet; */ crm_debug("1) Waiting on %d outstanding join acks", g_hash_table_size(welcomed_nodes)); return I_NULL; } /* A_DC_JOIN_OFFER_ONE */ enum crmd_fsa_input do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { oc_node_t member; gpointer a_node = NULL; ha_msg_input_t *welcome = fsa_typed_data(fsa_dt_ha_msg); const char *join_to = NULL; if(welcome == NULL) { crm_err("Attempt to send welcome message " "without a message to reply to!"); return I_NULL; } join_to = cl_get_string(welcome->msg, F_CRM_HOST_FROM); if(a_node != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) { /* note: it _is_ possible that a node will have been * sick or starting up when the original offer was made. * however, it will either re-announce itself in due course * _or_ we can re-store the original offer on the client. */ crm_debug("Re-offering membership to %s...", join_to); } crm_info("Processing annouce request from %s in state %s", join_to, fsa_state2string(cur_state)); /* always offer to the DC (ourselves) * this ensures the correct value for max_generation_from */ member.node_uname = crm_strdup(fsa_our_uname); join_send_offer(NULL, &member, NULL); crm_free(member.node_uname); member.node_uname = crm_strdup(join_to); join_send_offer(NULL, &member, NULL); crm_free(member.node_uname); /* this was a genuine join request, cancel any existing * transition and invoke the PE */ if(need_transition(fsa_state)) { register_fsa_action(A_TE_CANCEL); } /* dont waste time by invoking the pe yet; */ crm_debug("1) Waiting on %d outstanding join acks", g_hash_table_size(welcomed_nodes)); return I_NULL; } /* A_DC_JOIN_PROCESS_REQ */ enum crmd_fsa_input do_dc_join_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { crm_data_t *generation = NULL; int join_id = -1; gboolean ack_nack_bool = TRUE; const char *ack_nack = CRMD_JOINSTATE_MEMBER; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_from = cl_get_string(join_ack->msg,F_CRM_HOST_FROM); const char *ref = cl_get_string(join_ack->msg,XML_ATTR_REFERENCE); gpointer join_node = g_hash_table_lookup(fsa_membership_copy->members, join_from); crm_debug_3("2) Processing req from %s", join_from); generation = join_ack->xml; ha_msg_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); crm_log_xml_debug_2(max_generation_xml, "Max generation"); crm_log_xml_debug_2(generation, "Their generation"); if(join_node == NULL) { crm_err("Node %s is not a member", join_from); ack_nack_bool = FALSE; } else if(generation == NULL) { crm_err("Generation was NULL"); ack_nack_bool = FALSE; } else if(join_id != current_join_id) { crm_debug("Response from %s was for invalid join: %d vs. %d", join_from, join_id, current_join_id); check_join_state(cur_state, __FUNCTION__); return I_NULL; } else if(max_generation_xml == NULL) { max_generation_xml = copy_xml(generation); max_generation_from = crm_strdup(join_from); } else if(cib_compare_generation(max_generation_xml, generation) < 0) { crm_debug("%s has a better generation number than" " the current max %s", join_from, max_generation_from); crm_free(max_generation_from); free_xml(max_generation_xml); max_generation_from = crm_strdup(join_from); max_generation_xml = copy_xml(join_ack->xml); } - crm_log_xml_debug(max_generation_xml, "Current max generation"); - if(ack_nack_bool == FALSE) { /* NACK this client */ ack_nack = CRMD_JOINSTATE_DOWN; crm_err("2) NACK'ing node %s (ref %s)", join_from, ref); } else { crm_debug("2) Welcoming node %s after ACK (ref %s)", join_from, ref); } /* add them to our list of CRMD_STATE_ACTIVE nodes */ g_hash_table_insert( integrated_nodes, crm_strdup(join_from), crm_strdup(ack_nack)); - crm_debug("%u nodes have been integrated", - g_hash_table_size(integrated_nodes)); + crm_debug_2("%u nodes have been integrated", + g_hash_table_size(integrated_nodes)); g_hash_table_remove(welcomed_nodes, join_from); if(check_join_state(cur_state, __FUNCTION__) == FALSE) { /* dont waste time by invoking the PE yet; */ - crm_debug("Still waiting on %d outstanding join acks", - g_hash_table_size(welcomed_nodes)); + crm_debug_2("Still waiting on %d outstanding join acks", + g_hash_table_size(welcomed_nodes)); } return I_NULL; } #define JOIN_AFTER_SYNC 1 /* A_DC_JOIN_FINALIZE */ enum crmd_fsa_input do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum cib_errors rc = cib_ok; /* This we can do straight away and avoid clients timing us out * while we compute the latest CIB */ #if JOIN_AFTER_SYNC crm_debug("Finializing join for %d clients", g_hash_table_size(integrated_nodes)); #else crm_debug("Notifying %d clients of join results", g_hash_table_size(integrated_nodes)); g_hash_table_foreach_remove( integrated_nodes, finalize_join_for, NULL); #endif clear_bit_inplace(fsa_input_register, R_HAVE_CIB); if(max_generation_from == NULL || safe_str_eq(max_generation_from, fsa_our_uname)){ set_bit_inplace(fsa_input_register, R_HAVE_CIB); } if(is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { /* ask for the agreed best CIB */ crm_info("Asking %s for its copy of the CIB", crm_str(max_generation_from)); set_bit_inplace(fsa_input_register, R_CIB_ASKED); fsa_cib_conn->call_timeout = 10; rc = fsa_cib_conn->cmds->sync_from( fsa_cib_conn, max_generation_from, NULL, cib_quorum_override); fsa_cib_conn->call_timeout = 0; /* back to the default */ add_cib_op_callback(rc, FALSE, crm_strdup(max_generation_from), finalize_sync_callback); return I_NULL; } finalize_join(__FUNCTION__); return I_NULL; } void finalize_sync_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { CRM_DEV_ASSERT(cib_not_master != rc); clear_bit_inplace(fsa_input_register, R_CIB_ASKED); if(rc == cib_remote_timeout) { crm_err("Sync from %s resulted in an error: %s." " Use what we have...", (char*)user_data, cib_error2string(rc)); #if 0 /* restart the whole join process */ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__); return; #else rc = cib_ok; #endif } if(rc < cib_ok) { crm_err("Sync from %s resulted in an error: %s", (char*)user_data, cib_error2string(rc)); register_fsa_error_adv( C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); } else if(AM_I_DC && fsa_state == S_FINALIZE_JOIN) { finalize_join(__FUNCTION__); } else { crm_debug("No longer the DC in S_FINALIZE_JOIN: %s/%s", AM_I_DC?"DC":"CRMd", fsa_state2string(fsa_state)); } crm_free(user_data); } void finalize_join(const char *caller) { crm_data_t *cib = createEmptyCib(); crm_data_t *cib_update = NULL; set_bit_inplace(fsa_input_register, R_HAVE_CIB); clear_bit_inplace(fsa_input_register, R_CIB_ASKED); set_uuid(fsa_cluster_conn, cib, XML_ATTR_DC_UUID, fsa_our_uname); crm_debug_3("Update %s in the CIB to our uuid: %s", XML_ATTR_DC_UUID, crm_element_value(cib, XML_ATTR_DC_UUID)); cib_update = create_cib_fragment(cib, NULL); fsa_cib_conn->cmds->modify( fsa_cib_conn, NULL, cib_update, NULL, cib_quorum_override); free_xml(cib_update); free_xml(cib); crm_debug_3("Bumping the epoche and syncing to %d clients", g_hash_table_size(finalized_nodes)); fsa_cib_conn->cmds->bump_epoch( fsa_cib_conn, cib_scope_local|cib_quorum_override); #if JOIN_AFTER_SYNC /* make sure dc_uuid is re-set to us */ if(check_join_state(fsa_state, caller) == FALSE) { crm_debug("Notifying %d clients of join results", g_hash_table_size(integrated_nodes)); g_hash_table_foreach_remove( integrated_nodes, finalize_join_for, NULL); } #else check_join_state(cur_state, caller); rc = fsa_cib_conn->cmds->sync(fsa_cib_conn, NULL, cib_quorum_override); #endif } /* A_DC_JOIN_PROCESS_ACK */ enum crmd_fsa_input do_dc_join_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_from = cl_get_string(join_ack->msg, F_CRM_HOST_FROM); const char *op = cl_get_string(join_ack->msg, F_CRM_TASK); if(safe_str_neq(op, CRM_OP_JOIN_CONFIRM)) { crm_debug("Ignoring op=%s message", op); } else { int join_id = -1; ha_msg_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); process_join_ack_msg(join_from, join_ack->xml, join_id); } return I_NULL; } gboolean process_join_ack_msg(const char *join_from, crm_data_t *lrm_update, int join_id) { /* now update them to "member" */ int call_id = 0; crm_data_t *update = NULL; crm_data_t *fragment = NULL; const char *join_state = NULL; - crm_debug("Processing ack from %s", join_from); + crm_debug_2("Processing ack from %s", join_from); join_state = (const char *) g_hash_table_lookup(finalized_nodes, join_from); if(join_state == NULL) { crm_err("Join not in progress: ignoring join from %s", join_from); return FALSE; } else if(safe_str_neq(join_state, CRMD_JOINSTATE_MEMBER)) { crm_err("Node %s wasnt invited to join the cluster",join_from); g_hash_table_remove(finalized_nodes, join_from); return FALSE; } else if(join_id != current_join_id) { crm_err("Node %s responded to an invalid join: %d vs. %d", join_from, join_id, current_join_id); g_hash_table_remove(finalized_nodes, join_from); return FALSE; } g_hash_table_remove(finalized_nodes, join_from); if(g_hash_table_lookup(confirmed_nodes, join_from) != NULL) { crm_err("hash already contains confirmation from %s",join_from); } g_hash_table_insert(confirmed_nodes, crm_strdup(join_from), crm_strdup(CRMD_JOINSTATE_MEMBER)); crm_info("4) Updating node state to %s for %s", CRMD_JOINSTATE_MEMBER, join_from); /* update CIB with the current LRM status from the node * We dont need to notify the TE of these updates, a transition will * be started in due time */ call_id = fsa_cib_conn->cmds->modify( fsa_cib_conn, XML_CIB_TAG_STATUS, lrm_update, NULL, cib_scope_local|cib_quorum_override); add_cib_op_callback(call_id, TRUE,NULL, join_update_complete_callback); free_xml(fragment); free_xml(update); return TRUE; } gboolean finalize_join_for(gpointer key, gpointer value, gpointer user_data) { const char *join_to = NULL; const char *join_state = NULL; HA_Message *acknak = NULL; if(key == NULL || value == NULL) { return TRUE; } join_to = (const char *)key; join_state = (const char *)value; /* make sure the node exists in the config section */ create_node_entry(join_to, join_to, CRMD_JOINSTATE_MEMBER); /* send the ack/nack to the node */ acknak = create_request( CRM_OP_JOIN_ACKNAK, NULL, join_to, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL); ha_msg_add_int(acknak, F_CRM_JOIN_ID, current_join_id); /* set the ack/nack */ if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { crm_debug("3) ACK'ing join request from %s, state %s", join_to, join_state); ha_msg_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE); g_hash_table_insert( finalized_nodes, crm_strdup(join_to), crm_strdup(CRMD_JOINSTATE_MEMBER)); } else { crm_warn("3) NACK'ing join request from %s, state %s", join_to, join_state); ha_msg_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_FALSE); } send_msg_via_ha(fsa_cluster_conn, acknak); return TRUE; } void initialize_join(gboolean before) { /* clear out/reset a bunch of stuff */ crm_debug("Initializing join data"); g_hash_table_destroy(welcomed_nodes); g_hash_table_destroy(integrated_nodes); g_hash_table_destroy(finalized_nodes); g_hash_table_destroy(confirmed_nodes); if(before) { if(max_generation_from != NULL) { crm_free(max_generation_from); max_generation_from = NULL; } if(max_generation_xml != NULL) { free_xml(max_generation_xml); max_generation_xml = NULL; } clear_bit_inplace(fsa_input_register, R_HAVE_CIB); clear_bit_inplace(fsa_input_register, R_CIB_ASKED); } welcomed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); integrated_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); finalized_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); confirmed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } void join_send_offer(gpointer key, gpointer value, gpointer user_data) { const char *join_to = NULL; const char *crm_online = NULL; const oc_node_t *member = (const oc_node_t*)value; if(member != NULL) { join_to = member->node_uname; } if(join_to == NULL) { crm_err("No recipient for welcome message"); return; } g_hash_table_remove(confirmed_nodes, join_to); g_hash_table_remove(finalized_nodes, join_to); g_hash_table_remove(integrated_nodes, join_to); g_hash_table_remove(welcomed_nodes, join_to); crm_online = g_hash_table_lookup(crmd_peer_state, join_to); if(safe_str_eq(crm_online, ONLINESTATUS)) { HA_Message *offer = create_request( CRM_OP_JOIN_OFFER, NULL, join_to, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL); ha_msg_add_int(offer, F_CRM_JOIN_ID, current_join_id); /* send the welcome */ crm_debug("Sending %s(%d) to %s", CRM_OP_JOIN_OFFER, current_join_id, join_to); send_msg_via_ha(fsa_cluster_conn, offer); g_hash_table_insert(welcomed_nodes, crm_strdup(join_to), crm_strdup(CRMD_JOINSTATE_PENDING)); } else { crm_debug("Peer process on %s is not active", join_to); } } gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source) { - crm_debug("Invoked by %s in state: %s", + crm_debug_2("Invoked by %s in state: %s", source, fsa_state2string(cur_state)); if(cur_state == S_INTEGRATION) { if(g_hash_table_size(welcomed_nodes) == 0) { - crm_info("Integration of %d peers complete: %s", + crm_debug("Integration of %d peers complete: %s", g_hash_table_size(integrated_nodes), source); register_fsa_input_before( C_FSA_INTERNAL, I_INTEGRATED, NULL); return TRUE; } } else if(cur_state == S_FINALIZE_JOIN) { if(is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_debug("Delaying I_FINALIZED until we have the CIB"); return TRUE; } else if(g_hash_table_size(integrated_nodes) == 0 && g_hash_table_size(finalized_nodes) == 0) { - crm_info("Join process complete: %s", source); + crm_debug("Join process complete: %s", source); register_fsa_input_later( C_FSA_INTERNAL, I_FINALIZED, NULL); } else if(g_hash_table_size(integrated_nodes) != 0 && g_hash_table_size(finalized_nodes) != 0) { crm_err("Waiting on %d integrated nodes" " AND %d confirmations", g_hash_table_size(integrated_nodes), g_hash_table_size(finalized_nodes)); } else if(g_hash_table_size(integrated_nodes) != 0) { crm_debug("Still waiting on %d integrated nodes", g_hash_table_size(integrated_nodes)); } else if(g_hash_table_size(finalized_nodes) != 0) { - crm_debug("Still waiting on %d confirmations", + crm_debug_2("Still waiting on %d confirmations", g_hash_table_size(finalized_nodes)); } } return FALSE; } void join_update_complete_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { fsa_data_t *msg_data = NULL; if(rc == cib_ok) { check_join_state(fsa_state, __FUNCTION__); } else { crm_err("Join update failed"); crm_log_message(LOG_DEBUG, msg); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } diff --git a/crm/crmd/messages.c b/crm/crmd/messages.c index b54ca19b6a..6d07045a63 100644 --- a/crm/crmd/messages.c +++ b/crm/crmd/messages.c @@ -1,1216 +1,1216 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GListPtr fsa_message_queue = NULL; extern void crm_shutdown(int nsig); enum crmd_fsa_input handle_request(ha_msg_input_t *stored_msg); enum crmd_fsa_input handle_response(ha_msg_input_t *stored_msg); enum crmd_fsa_input handle_shutdown_request(HA_Message *stored_msg); ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t *orig); gboolean ipc_queue_helper(gpointer key, gpointer value, gpointer user_data); #ifdef MSG_LOG # define ROUTER_RESULT(x) crm_debug_3("Router result: %s", x); \ crm_log_message_adv(LOG_MSG, "router.log", relay_message); #else # define ROUTER_RESULT(x) crm_debug_3("Router result: %s", x) #endif /* debug only, can wrap all it likes */ int last_data_id = 0; void register_fsa_error_adv( enum crmd_fsa_cause cause, enum crmd_fsa_input input, fsa_data_t *cur_data, void *new_data, const char *raised_from) { /* save the current actions if any */ if(fsa_actions != A_NOTHING) { register_fsa_input_adv( cur_data?cur_data->fsa_cause:C_FSA_INTERNAL, I_NULL, cur_data?cur_data->data:NULL, fsa_actions, TRUE, __FUNCTION__); } /* reset the action list */ fsa_actions = A_NOTHING; /* register the error */ register_fsa_input_adv( cause, input, new_data, A_NOTHING, TRUE, raised_from); } static gboolean last_was_vote = FALSE; void register_fsa_input_adv( enum crmd_fsa_cause cause, enum crmd_fsa_input input, void *data, long long with_actions, gboolean prepend, const char *raised_from) { unsigned old_len = g_list_length(fsa_message_queue); fsa_data_t *fsa_data = NULL; last_data_id++; - crm_debug("%s raised FSA input %d (%s) (cause=%s) %s data", - raised_from, last_data_id, fsa_input2string(input), - fsa_cause2string(cause), data?"with":"without"); + crm_debug_2("%s raised FSA input %d (%s) (cause=%s) %s data", + raised_from, last_data_id, fsa_input2string(input), + fsa_cause2string(cause), data?"with":"without"); if(input == I_WAIT_FOR_EVENT) { do_fsa_stall = TRUE; crm_debug("Stalling the FSA pending further input"); if(old_len > 0) { crm_warn("%s stalled the FSA with pending inputs", raised_from); fsa_dump_queue(LOG_DEBUG); } if(data == NULL) { set_bit_inplace(fsa_actions, with_actions); with_actions = A_NOTHING; return; } crm_err("%s stalled the FSA with data - this may be broken", raised_from); } if(old_len == 0) { last_was_vote = FALSE; } if(input == I_NULL && with_actions == A_NOTHING /* && data == NULL */){ /* no point doing anything */ crm_err("Cannot add entry to queue: no input and no action"); return; } else if(data == NULL) { last_was_vote = FALSE; } else if(last_was_vote && cause == C_HA_MESSAGE && input == I_ROUTER) { const char *op = cl_get_string( ((ha_msg_input_t*)data)->msg, F_CRM_TASK); if(safe_str_eq(op, CRM_OP_VOTE)) { /* It is always safe to treat N successive votes as * a single one * * If all the discarded votes are more "loosing" than * the first then the result is accurate * (win or loose). * * If any of the discarded votes are less "loosing" * than the first then we will cast our vote and the * eventual winner will vote us down again (which * even in the case that N=2, is no worse than if we * had not disarded the vote). */ crm_debug_2("Vote compression: %d", old_len); return; } } else if (cause == C_HA_MESSAGE && input == I_ROUTER) { const char *op = cl_get_string( ((ha_msg_input_t*)data)->msg, F_CRM_TASK); if(safe_str_eq(op, CRM_OP_VOTE)) { last_was_vote = TRUE; crm_debug_3("Added vote: %d", old_len); } } else { last_was_vote = FALSE; } crm_malloc0(fsa_data, sizeof(fsa_data_t)); fsa_data->id = last_data_id; fsa_data->fsa_input = input; fsa_data->fsa_cause = cause; fsa_data->origin = raised_from; fsa_data->data = NULL; fsa_data->data_type = fsa_dt_none; fsa_data->actions = with_actions; if(with_actions != A_NOTHING) { crm_debug_3("Adding actions %.16llx to input", with_actions); } if(data != NULL) { switch(cause) { case C_FSA_INTERNAL: case C_CRMD_STATUS_CALLBACK: case C_IPC_MESSAGE: case C_HA_MESSAGE: crm_debug_3("Copying %s data from %s as a HA msg", fsa_cause2string(cause), raised_from); fsa_data->data = copy_ha_msg_input(data); fsa_data->data_type = fsa_dt_ha_msg; break; case C_LRM_OP_CALLBACK: crm_debug_3("Copying %s data from %s as lrm_op_t", fsa_cause2string(cause), raised_from); fsa_data->data = copy_lrm_op((lrm_op_t*)data); fsa_data->data_type = fsa_dt_lrm; break; case C_CCM_CALLBACK: crm_debug_3("Copying %s data from %s as CCM data", fsa_cause2string(cause), raised_from); fsa_data->data = copy_ccm_data(data); fsa_data->data_type = fsa_dt_ccm; break; case C_SUBSYSTEM_CONNECT: case C_LRM_MONITOR_CALLBACK: case C_TIMER_POPPED: case C_SHUTDOWN: case C_HEARTBEAT_FAILED: case C_HA_DISCONNECT: case C_ILLEGAL: case C_UNKNOWN: case C_STARTUP: crm_err("Copying %s data (from %s)" " not yet implemented", fsa_cause2string(cause), raised_from); exit(1); break; } crm_debug_4("%s data copied", fsa_cause2string(fsa_data->fsa_cause)); } /* make sure to free it properly later */ if(prepend) { crm_debug_2("Prepending input"); fsa_message_queue = g_list_prepend(fsa_message_queue, fsa_data); } else { fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); } crm_debug_2("Queue len: %d", g_list_length(fsa_message_queue)); fsa_dump_queue(LOG_DEBUG_2); if(old_len == g_list_length(fsa_message_queue)){ crm_err("Couldnt add message to the queue"); } if(fsa_source) { G_main_set_trigger(fsa_source); } } void fsa_dump_queue(int log_level) { if(log_level < (int)crm_log_level) { return; } slist_iter( data, fsa_data_t, fsa_message_queue, lpc, do_crm_log(log_level, __FILE__, __FUNCTION__, "queue[%d(%d)]: input %s raised by %s()\t(cause=%s)", lpc, data->id, fsa_input2string(data->fsa_input), data->origin, fsa_cause2string(data->fsa_cause)); ); } ha_msg_input_t * copy_ha_msg_input(ha_msg_input_t *orig) { ha_msg_input_t *input_copy = NULL; crm_malloc0(input_copy, sizeof(ha_msg_input_t)); if(orig != NULL) { crm_debug_4("Copy msg"); input_copy->msg = ha_msg_copy(orig->msg); if(orig->xml != NULL) { crm_debug_4("Copy xml"); input_copy->xml = copy_xml(orig->xml); } } else { crm_debug_3("No message to copy"); } return input_copy; } void delete_fsa_input(fsa_data_t *fsa_data) { lrm_op_t *op = NULL; crm_data_t *foo = NULL; struct crmd_ccm_data_s *ccm_input = NULL; if(fsa_data == NULL) { return; } crm_debug_4("About to free %s data", fsa_cause2string(fsa_data->fsa_cause)); if(fsa_data->data != NULL) { switch(fsa_data->data_type) { case fsa_dt_ha_msg: delete_ha_msg_input(fsa_data->data); break; case fsa_dt_xml: foo = fsa_data->data; free_xml(foo); break; case fsa_dt_lrm: op = (lrm_op_t*)fsa_data->data; crm_free(op->user_data); crm_free(op->output); crm_free(op->rsc_id); crm_free(op->app_name); crm_free(op); break; case fsa_dt_ccm: ccm_input = (struct crmd_ccm_data_s *) fsa_data->data; crm_free(ccm_input->oc); crm_free(ccm_input); break; case fsa_dt_none: if(fsa_data->data != NULL) { crm_err("Dont know how to free %s data from %s", fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); exit(1); } break; } crm_debug_4("%s data freed", fsa_cause2string(fsa_data->fsa_cause)); } crm_free(fsa_data); } /* returns the next message */ fsa_data_t * get_message(void) { fsa_data_t* message = g_list_nth_data(fsa_message_queue, 0); fsa_message_queue = g_list_remove(fsa_message_queue, message); return message; } /* returns the current head of the FIFO queue */ gboolean is_message(void) { return (g_list_length(fsa_message_queue) > 0); } void * fsa_typed_data_adv( fsa_data_t *fsa_data, enum fsa_data_type a_type, const char *caller) { void *ret_val = NULL; if(fsa_data == NULL) { do_crm_log(LOG_ERR, caller, NULL, "No FSA data available"); } else if(fsa_data->data == NULL) { do_crm_log(LOG_ERR, caller, NULL, "No message data available"); } else if(fsa_data->data_type != a_type) { do_crm_log(LOG_CRIT, caller, NULL, "Message data was the wrong type! %d vs. requested=%d." " Origin: %s", fsa_data->data_type, a_type, fsa_data->origin); CRM_ASSERT(fsa_data->data_type == a_type); } else { ret_val = fsa_data->data; } return ret_val; } /* A_MSG_ROUTE */ enum crmd_fsa_input do_msg_route(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum crmd_fsa_input result = I_NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); gboolean routed = FALSE; if(msg_data->fsa_cause != C_IPC_MESSAGE && msg_data->fsa_cause != C_HA_MESSAGE) { /* dont try and route these */ crm_warn("Can only process HA and IPC messages"); return I_NULL; } /* try passing the buck first */ crm_debug_4("Attempting to route message"); routed = relay_message(input->msg, cause==C_IPC_MESSAGE); if(routed == FALSE) { crm_debug_4("Message wasn't routed... try handling locally"); /* calculate defer */ result = handle_message(input); switch(result) { case I_NULL: break; case I_DC_HEARTBEAT: break; case I_CIB_OP: break; /* what else should go here? */ default: crm_debug_4("Defering local processing of message"); register_fsa_input_later( cause, result, msg_data->data); result = I_NULL; break; } if(result == I_NULL) { crm_debug_4("Message processed"); } else { register_fsa_input(cause, result, msg_data->data); } } else { crm_debug_4("Message routed..."); input->msg = NULL; } return I_NULL; } /* * This method frees msg */ gboolean send_request(HA_Message *msg, char **msg_reference) { gboolean was_sent = FALSE; /* crm_log_xml_debug_3(request, "Final request..."); */ if(msg_reference != NULL) { *msg_reference = crm_strdup( cl_get_string(msg, XML_ATTR_REFERENCE)); } was_sent = relay_message(msg, TRUE); if(was_sent == FALSE) { ha_msg_input_t *fsa_input = new_ha_msg_input(msg); register_fsa_input(C_IPC_MESSAGE, I_ROUTER, fsa_input); delete_ha_msg_input(fsa_input); crm_msg_del(msg); } return was_sent; } /* unless more processing is required, relay_message is freed */ gboolean relay_message(HA_Message *relay_message, gboolean originated_locally) { int is_for_dc = 0; int is_for_dcib = 0; int is_for_crm = 0; int is_for_cib = 0; int is_local = 0; gboolean processing_complete = FALSE; const char *host_to = cl_get_string(relay_message, F_CRM_HOST_TO); const char *sys_to = cl_get_string(relay_message, F_CRM_SYS_TO); const char *sys_from= cl_get_string(relay_message, F_CRM_SYS_FROM); const char *type = cl_get_string(relay_message, F_TYPE); const char *msg_error = NULL; crm_debug_3("Routing message %s", cl_get_string(relay_message, XML_ATTR_REFERENCE)); if(relay_message == NULL) { msg_error = "Cannot route empty message"; } else if(safe_str_eq(CRM_OP_HELLO, cl_get_string(relay_message, F_CRM_TASK))){ /* quietly ignore */ processing_complete = TRUE; } else if(safe_str_neq(type, T_CRM)) { msg_error = "Bad message type"; } else if(sys_to == NULL) { msg_error = "Bad message destination: no subsystem"; } if(msg_error != NULL) { processing_complete = TRUE; crm_err("%s", msg_error); crm_log_message(LOG_WARNING, relay_message); } if(processing_complete) { crm_msg_del(relay_message); return TRUE; } processing_complete = TRUE; is_for_dc = (strcmp(CRM_SYSTEM_DC, sys_to) == 0); is_for_dcib = (strcmp(CRM_SYSTEM_DCIB, sys_to) == 0); is_for_cib = (strcmp(CRM_SYSTEM_CIB, sys_to) == 0); is_for_crm = (strcmp(CRM_SYSTEM_CRMD, sys_to) == 0); is_local = 0; if(host_to == NULL || strlen(host_to) == 0) { if(is_for_dc) { is_local = 0; } else if(is_for_crm && originated_locally) { is_local = 0; } else { is_local = 1; } } else if(strcmp(fsa_our_uname, host_to) == 0) { is_local=1; } if(is_for_dc || is_for_dcib) { if(AM_I_DC) { ROUTER_RESULT("Message result: DC/CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if(originated_locally && safe_str_neq(sys_from, CRM_SYSTEM_PENGINE) && safe_str_neq(sys_from, CRM_SYSTEM_TENGINE)) { /* Neither the TE or PE should be sending messages * to DC's on other nodes * * By definition, if we are no longer the DC, then * the PE or TE's data should be discarded */ ROUTER_RESULT("Message result: External relay to DC"); send_msg_via_ha(fsa_cluster_conn, relay_message); } else { /* discard */ ROUTER_RESULT("Message result: Discard, not DC"); crm_msg_del(relay_message); } } else if(is_local && (is_for_crm || is_for_cib)) { ROUTER_RESULT("Message result: CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if(is_local) { ROUTER_RESULT("Message result: Local relay"); send_msg_via_ipc(relay_message, sys_to); } else { ROUTER_RESULT("Message result: External relay"); send_msg_via_ha(fsa_cluster_conn, relay_message); } return processing_complete; } gboolean crmd_authorize_message(ha_msg_input_t *client_msg, crmd_client_t *curr_client) { /* check the best case first */ const char *sys_from = cl_get_string(client_msg->msg, F_CRM_SYS_FROM); char *uuid = NULL; char *client_name = NULL; char *major_version = NULL; char *minor_version = NULL; const char *filtered_from; gpointer table_key = NULL; gboolean auth_result = FALSE; struct crm_subsystem_s *the_subsystem = NULL; gboolean can_reply = FALSE; /* no-one has registered with this id */ const char *op = cl_get_string(client_msg->msg, F_CRM_TASK); if (safe_str_neq(CRM_OP_HELLO, op)) { if(sys_from == NULL) { crm_warn("Message [%s] was had no value for %s... discarding", cl_get_string(client_msg->msg, XML_ATTR_REFERENCE), F_CRM_SYS_FROM); return FALSE; } filtered_from = sys_from; /* The CIB can have two names on the DC */ if(strcmp(sys_from, CRM_SYSTEM_DCIB) == 0) filtered_from = CRM_SYSTEM_CIB; if (g_hash_table_lookup (ipc_clients, filtered_from) != NULL) { can_reply = TRUE; /* reply can be routed */ } crm_debug_2("Message reply can%s be routed from %s.", can_reply?"":" not", sys_from); if(can_reply == FALSE) { crm_warn("Message [%s] not authorized", cl_get_string(client_msg->msg, XML_ATTR_REFERENCE)); } return can_reply; } crm_debug_3("received client join msg"); crm_log_message(LOG_MSG, client_msg->msg); auth_result = process_hello_message( client_msg->xml, &uuid, &client_name, &major_version, &minor_version); if (auth_result == TRUE) { if(client_name == NULL || uuid == NULL) { crm_err("Bad client details (client_name=%s, uuid=%s)", crm_str(client_name), crm_str(uuid)); auth_result = FALSE; } } if (auth_result == TRUE) { /* check version */ int mav = atoi(major_version); int miv = atoi(minor_version); crm_debug_3("Checking client version number"); if (mav < 0 || miv < 0) { crm_err("Client version (%d:%d) is not acceptable", mav, miv); auth_result = FALSE; } crm_free(major_version); crm_free(minor_version); } if (auth_result == TRUE) { /* if we already have one of those clients * only applies to te, pe etc. not admin clients */ if (strcmp(CRM_SYSTEM_PENGINE, client_name) == 0) { the_subsystem = pe_subsystem; } else if (strcmp(CRM_SYSTEM_TENGINE, client_name) == 0) { the_subsystem = te_subsystem; } if (the_subsystem != NULL) { /* do we already have one? */ crm_debug_3("Checking if %s is required/already connected", client_name); if(is_set(fsa_input_register, the_subsystem->flag_connected)) { auth_result = FALSE; crm_warn("Bit\t%.16llx set in %.16llx", the_subsystem->flag_connected, fsa_input_register); crm_err("Client %s is already connected", client_name); } else if(FALSE == is_set(fsa_input_register, the_subsystem->flag_required)) { auth_result = TRUE; crm_warn("Bit\t%.16llx not set in %.16llx", the_subsystem->flag_connected, fsa_input_register); crm_warn("Client %s joined but we dont need it", client_name); stop_subsystem(the_subsystem); } else { the_subsystem->ipc = curr_client->client_channel; set_bit_inplace(fsa_input_register, the_subsystem->flag_connected); } } else { table_key = (gpointer) generate_hash_key(client_name, uuid); } } if (auth_result == TRUE) { if(table_key == NULL) { table_key = (gpointer)crm_strdup(client_name); } crm_debug("Accepted client %s", crm_str(table_key)); curr_client->table_key = table_key; curr_client->sub_sys = crm_strdup(client_name); curr_client->uuid = crm_strdup(uuid); g_hash_table_insert (ipc_clients, table_key, curr_client->client_channel); send_hello_message(curr_client->client_channel, "n/a", CRM_SYSTEM_CRMD, "0", "1"); crm_debug_3("Updated client list with %s", crm_str(table_key)); G_main_set_trigger(fsa_source); } else { crm_warn("Rejected client logon request"); curr_client->client_channel->ch_status = IPC_DISC_PENDING; } if(uuid != NULL) crm_free(uuid); if(minor_version != NULL) crm_free(minor_version); if(major_version != NULL) crm_free(major_version); if(client_name != NULL) crm_free(client_name); /* hello messages should never be processed further */ return FALSE; } enum crmd_fsa_input handle_message(ha_msg_input_t *stored_msg) { enum crmd_fsa_input next_input = I_NULL; const char *type = NULL; if(stored_msg == NULL || stored_msg->msg == NULL) { crm_err("No message to handle"); return I_NULL; } type = cl_get_string(stored_msg->msg, F_CRM_MSG_TYPE); if(safe_str_eq(type, XML_ATTR_REQUEST)) { next_input = handle_request(stored_msg); } else if(safe_str_eq(type, XML_ATTR_RESPONSE)) { next_input = handle_response(stored_msg); } else { crm_err("Unknown message type: %s", type); } /* crm_debug_2("%s: Next input is %s", __FUNCTION__, */ /* fsa_input2string(next_input)); */ return next_input; } enum crmd_fsa_input handle_request(ha_msg_input_t *stored_msg) { HA_Message *msg = NULL; enum crmd_fsa_input next_input = I_NULL; const char *op = cl_get_string(stored_msg->msg, F_CRM_TASK); const char *sys_to = cl_get_string(stored_msg->msg, F_CRM_SYS_TO); const char *host_from = cl_get_string(stored_msg->msg, F_CRM_HOST_FROM); crm_debug_2("Received %s in state %s", op, fsa_state2string(fsa_state)); if(op == NULL) { crm_err("Bad message"); crm_log_message(LOG_ERR, stored_msg->msg); /*========== common actions ==========*/ } else if(strcmp(op, CRM_OP_NOOP) == 0) { crm_debug("no-op from %s", crm_str(host_from)); } else if(strcmp(op, CRM_OP_VOTE) == 0) { /* count the vote and decide what to do after that */ register_fsa_input_adv(C_HA_MESSAGE, I_NULL, stored_msg, A_ELECTION_COUNT, FALSE, __FUNCTION__); /* Sometimes we _must_ go into S_ELECTION */ if(fsa_state == S_HALT) { crm_debug("Forcing an election from S_HALT"); next_input = I_ELECTION; #if 0 } else if(AM_I_DC) { /* This is the old way of doing things but what is gained? */ next_input = I_ELECTION; #endif } } else if(strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) { crm_shutdown(SIGTERM); /*next_input = I_SHUTDOWN; */ next_input = I_NULL; } else if(strcmp(op, CRM_OP_PING) == 0) { /* eventually do some stuff to figure out * if we /are/ ok */ crm_data_t *ping = createPingAnswerFragment(sys_to, "ok"); crm_xml_add(ping, "crmd_state", fsa_state2string(fsa_state)); crm_info("Current state: %s", fsa_state2string(fsa_state)); msg = create_reply(stored_msg->msg, ping); if(relay_message(msg, TRUE) == FALSE) { crm_msg_del(msg); } /* probably better to do this via signals on the * local node */ } else if(strcmp(op, CRM_OP_DEBUG_UP) == 0) { int level = get_crm_log_level(); set_crm_log_level(level+1); crm_info("Debug set to %d (was %d)", get_crm_log_level(), level); } else if(strcmp(op, CRM_OP_DEBUG_DOWN) == 0) { int level = get_crm_log_level(); set_crm_log_level(level-1); crm_info("Debug set to %d (was %d)", get_crm_log_level(), level); } else if(strcmp(op, CRM_OP_JOIN_OFFER) == 0) { next_input = I_JOIN_OFFER; } else if(strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) { next_input = I_JOIN_RESULT; /* this functionality should only be enabled * if this is a development build */ } else if(CRM_DEV_BUILD && strcmp(op, CRM_OP_DIE) == 0/*constant condition*/) { crm_warn("Test-only code: Killing the CRM without mercy"); crm_warn("Inhibiting respawns"); exit(100); /*========== (NOT_DC)-Only Actions ==========*/ } else if(AM_I_DC == FALSE){ gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if(dc_match || fsa_our_dc == NULL) { if(strcmp(op, CRM_OP_HBEAT) == 0) { crm_debug_3("Received DC heartbeat from %s", host_from); next_input = I_DC_HEARTBEAT; } else if(fsa_our_dc == NULL) { crm_warn("CRMd discarding request: %s" " (DC: %s, from: %s)", op, crm_str(fsa_our_dc), host_from); crm_warn("Ignored Request"); crm_log_message(LOG_WARNING, stored_msg->msg); } else if(strcmp(op, CRM_OP_SHUTDOWN) == 0) { next_input = I_STOP; } else { crm_err("CRMd didnt expect request: %s", op); crm_log_message(LOG_ERR, stored_msg->msg); } } else { crm_warn("Discarding %s op from %s", op, host_from); } /*========== DC-Only Actions ==========*/ } else if(AM_I_DC) { const char *message = ha_msg_value(stored_msg->msg, "message"); if(safe_str_eq(op, CRM_OP_TEABORT)) { crm_debug("Transition cancelled: %s/%s", op, message); clear_bit_inplace(fsa_input_register, R_IN_TRANSITION); if(need_transition(fsa_state)) { next_input = I_PE_CALC; } else { crm_debug("Filtering %s op in state %s", op, fsa_state2string(fsa_state)); } } else if(safe_str_eq(op, CRM_OP_TETIMEOUT)) { crm_debug("Transition cancelled: %s/%s", op, message); clear_bit_inplace(fsa_input_register, R_IN_TRANSITION); if(fsa_state == S_IDLE) { crm_err("Transition timed out in S_IDLE"); next_input = I_PE_CALC; } else if(need_transition(fsa_state)) { next_input = I_PE_CALC; } else { crm_err("Filtering %s op in state %s", op, fsa_state2string(fsa_state)); } } else if(safe_str_eq(op, CRM_OP_TEABORTED)) { crm_debug("Transition cancelled: %s/%s", op, message); clear_bit_inplace(fsa_input_register, R_IN_TRANSITION); if(need_transition(fsa_state)) { next_input = I_PE_CALC; } else { crm_debug("Filtering %s op in state %s", op, fsa_state2string(fsa_state)); } } else if(strcmp(op, CRM_OP_TECOMPLETE) == 0) { crm_debug("Transition complete: %s/%s", op, message); clear_bit_inplace(fsa_input_register, R_IN_TRANSITION); if(fsa_state == S_TRANSITION_ENGINE) { next_input = I_TE_SUCCESS; } else { crm_debug("Filtering %s op in state %s", op, fsa_state2string(fsa_state)); } } else if(strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) { next_input = I_NODE_JOIN; } else if(strcmp(op, CRM_OP_JOIN_REQUEST) == 0) { next_input = I_JOIN_REQUEST; } else if(strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) { next_input = I_JOIN_RESULT; } else if(strcmp(op, CRM_OP_SHUTDOWN) == 0) { gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if(dc_match) { crm_err("We didnt ask to be shut down yet our" " TE is telling us too." " Better get out now!"); next_input = I_TERMINATE; } else if(is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("We asked to be shut down, " " are still the DC, yet another node" " (DC) is askin us to shutdown!"); next_input = I_STOP; } else if(fsa_state != S_STOPPING) { crm_err("Another node is asking us to shutdown" " but we think we're ok."); next_input = I_ELECTION; } } else if(strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { /* a slave wants to shut down */ /* create cib fragment and add to message */ next_input = handle_shutdown_request(stored_msg->msg); } else { crm_err("Unexpected request (%s) sent to the DC", op); crm_log_message(LOG_ERR, stored_msg->msg); } } return next_input; } enum crmd_fsa_input handle_response(ha_msg_input_t *stored_msg) { enum crmd_fsa_input next_input = I_NULL; const char *op = cl_get_string(stored_msg->msg, F_CRM_TASK); const char *sys_from = cl_get_string(stored_msg->msg, F_CRM_SYS_FROM); const char *msg_ref = cl_get_string(stored_msg->msg, XML_ATTR_REFERENCE); crm_debug_2("Received %s %s in state %s", op, XML_ATTR_RESPONSE, fsa_state2string(fsa_state)); if(op == NULL) { crm_err("Bad message"); crm_log_message(LOG_ERR, stored_msg->msg); } else if(AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) { if(safe_str_eq(msg_ref, fsa_pe_ref)) { next_input = I_PE_SUCCESS; } else { crm_debug_2("Skipping superceeded reply from %s", sys_from); } } else if(strcmp(op, CRM_OP_VOTE) == 0 || strcmp(op, CRM_OP_HBEAT) == 0 || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) { next_input = I_NULL; } else { crm_err("Unexpected response (op=%s) sent to the %s", op, AM_I_DC?"DC":"CRMd"); next_input = I_NULL; } return next_input; } enum crmd_fsa_input handle_shutdown_request(HA_Message *stored_msg) { /* handle here to avoid potential version issues * where the shutdown message/proceedure may have * been changed in later versions. * * This way the DC is always in control of the shutdown */ crm_data_t *frag = NULL; time_t now = time(NULL); char *now_s = crm_itoa((int)now); crm_data_t *node_state = create_xml_node(NULL, XML_CIB_TAG_STATE); const char *host_from= cl_get_string(stored_msg, F_CRM_HOST_FROM); crm_info("Creating shutdown request for %s",host_from); crm_log_message(LOG_MSG, stored_msg); set_uuid(fsa_cluster_conn, node_state, XML_ATTR_UUID, host_from); crm_xml_add(node_state, XML_ATTR_UNAME, host_from); crm_xml_add(node_state, XML_CIB_ATTR_SHUTDOWN, now_s); crm_xml_add(node_state, XML_CIB_ATTR_EXPSTATE, CRMD_STATE_INACTIVE); frag = create_cib_fragment(node_state, NULL); /* cleanup intermediate steps */ free_xml(node_state); crm_free(now_s); fsa_cib_conn->cmds->modify( fsa_cib_conn, XML_CIB_TAG_STATUS, frag, NULL, cib_quorum_override); free_xml(frag); /* will be picked up by the TE as long as its running */ if(need_transition(fsa_state) && is_set(fsa_input_register, R_TE_CONNECTED) == FALSE) { register_fsa_action(A_TE_CANCEL); } return I_NULL; } /* frees msg upon completion */ gboolean send_msg_via_ha(ll_cluster_t *hb_fd, HA_Message *msg) { int log_level = LOG_DEBUG_3; gboolean broadcast = FALSE; gboolean all_is_good = TRUE; const char *op = cl_get_string(msg, F_CRM_TASK); const char *sys_to = cl_get_string(msg, F_CRM_SYS_TO); const char *host_to = cl_get_string(msg, F_CRM_HOST_TO); if (msg == NULL) { crm_err("Attempt to send NULL Message via HA failed."); all_is_good = FALSE; } else { crm_debug_4("Relaying message to (%s) via HA", host_to); } if (all_is_good) { if (sys_to == NULL || strlen(sys_to) == 0) { crm_err("You did not specify a destination sub-system" " for this message."); all_is_good = FALSE; } } /* There are a number of messages may not need to be ordered. * At a later point perhaps we should detect them and send them * as unordered messages. */ if (all_is_good) { if (host_to == NULL || strlen(host_to) == 0 || safe_str_eq(sys_to, CRM_SYSTEM_DC)) { broadcast = TRUE; all_is_good = send_ha_message(hb_fd, msg, NULL); } else { all_is_good = send_ha_message(hb_fd, msg, host_to); } } if(all_is_good == FALSE) { log_level = LOG_ERR; } if(log_level == LOG_ERR || (safe_str_neq(op, CRM_OP_HBEAT))) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Sending %sHA message (ref=%s) to %s@%s %s.", broadcast?"broadcast ":"directed ", cl_get_string(msg, XML_ATTR_REFERENCE), crm_str(sys_to), host_to==NULL?"":host_to, all_is_good?"succeeded":"failed"); } crm_msg_del(msg); return all_is_good; } /* msg is deleted by the time this returns */ gboolean send_msg_via_ipc(HA_Message *msg, const char *sys) { gboolean send_ok = TRUE; IPC_Channel *client_channel; enum crmd_fsa_input next_input; crm_debug_4("relaying msg to sub_sys=%s via IPC", sys); client_channel = (IPC_Channel*)g_hash_table_lookup(ipc_clients, sys); if(cl_get_string(msg, F_CRM_HOST_FROM) == NULL) { ha_msg_add(msg, F_CRM_HOST_FROM, fsa_our_uname); } if (client_channel != NULL) { crm_debug_3("Sending message via channel %s.", sys); send_ok = send_ipc_message(client_channel, msg); msg = NULL; /* so the crm_msg_del() below doesnt fail */ } else if(sys != NULL && strcmp(sys, CRM_SYSTEM_CIB) == 0) { crm_err("Sub-system (%s) has been incorporated into the CRMd.", sys); crm_err("Change the way we handle this CIB message"); crm_log_message(LOG_ERR, msg); send_ok = FALSE; } else if(sys != NULL && strcmp(sys, CRM_SYSTEM_LRMD) == 0) { fsa_data_t *fsa_data = NULL; ha_msg_input_t *msg_copy = new_ha_msg_input(msg); crm_malloc0(fsa_data, sizeof(fsa_data_t)); fsa_data->fsa_input = I_MESSAGE; fsa_data->fsa_cause = C_IPC_MESSAGE; fsa_data->data = msg_copy; fsa_data->origin = __FUNCTION__; fsa_data->data_type = fsa_dt_ha_msg; #ifdef FSA_TRACE crm_debug_2("Invoking action %s (%.16llx)", fsa_action2string(A_LRM_INVOKE), A_LRM_INVOKE); #endif next_input = do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, fsa_data); delete_ha_msg_input(msg_copy); crm_free(fsa_data); /* todo: feed this back in for anything != I_NULL */ #ifdef FSA_TRACE crm_debug_2("Result of action %s was %s", fsa_action2string(A_LRM_INVOKE), fsa_input2string(next_input)); #endif } else { crm_err("Unknown Sub-system (%s)... discarding message.", crm_str(sys)); send_ok = FALSE; } crm_msg_del(msg); return send_ok; } void msg_queue_helper(void) { IPC_Channel *ipc = NULL; if(fsa_cluster_conn != NULL) { ipc = fsa_cluster_conn->llc_ops->ipcchan( fsa_cluster_conn); } if(ipc != NULL) { ipc->ops->resume_io(ipc); } /* g_hash_table_foreach_remove(ipc_clients, ipc_queue_helper, NULL); */ } gboolean ipc_queue_helper(gpointer key, gpointer value, gpointer user_data) { crmd_client_t *ipc_client = value; if(ipc_client->client_channel != NULL) { ipc_client->client_channel->ops->is_message_pending(ipc_client->client_channel); } return FALSE; }