diff --git a/crm/crmd/ccm.c b/crm/crmd/ccm.c index 6b969d7b5b..708c91a79f 100644 --- a/crm/crmd/ccm.c +++ b/crm/crmd/ccm.c @@ -1,506 +1,500 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* put these first so that uuid_t is defined without conflicts */ #include #include #include #include #include #include #include #include void oc_ev_special(const oc_ev_t *, oc_ev_class_t , int ); #include #include #include #include #include #include int register_with_ccm(ll_cluster_t *hb_cluster); void msg_ccm_join(const struct ha_msg *msg, void *foo); void crmd_ccm_input_callback(oc_ed_t event, void *cookie, size_t size, const void *data); void ccm_event_detail(const oc_ev_membership_t *oc, oc_ed_t event); gboolean ccm_dispatch(int fd, gpointer user_data); -void new_node_state(xmlNodePtr node_updates, - const char *uname, const char *state); #define CCM_EVENT_DETAIL 1 oc_ev_t *fsa_ev_token; /* A_CCM_CONNECT */ enum crmd_fsa_input do_ccm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { int ret; /* fd_set rset; */ int fsa_ev_fd; FNIN(); if(action & A_CCM_DISCONNECT){ oc_ev_unregister(fsa_ev_token); } if(action & A_CCM_CONNECT) { cl_log(LOG_INFO, "Registering with CCM"); oc_ev_register(&fsa_ev_token); cl_log(LOG_INFO, "Setting up CCM callbacks"); oc_ev_set_callback(fsa_ev_token, OC_EV_MEMB_CLASS, crmd_ccm_input_callback, NULL); oc_ev_special(fsa_ev_token, OC_EV_MEMB_CLASS, 0/*don't care*/); cl_log(LOG_INFO, "Activating CCM token"); ret = oc_ev_activate(fsa_ev_token, &fsa_ev_fd); if (ret){ cl_log(LOG_INFO, "CCM Activation failed... unregistering"); oc_ev_unregister(fsa_ev_token); return(I_FAIL); } cl_log(LOG_INFO, "CCM Activation passed... all set to go!"); /* FD_ZERO(&rset); */ /* FD_SET(fsa_ev_fd, &rset); */ //GFDSource* G_main_add_fd(G_PRIORITY_LOW, fsa_ev_fd, FALSE, ccm_dispatch, fsa_ev_token, default_ipc_input_destroy); } if(action & ~(A_CCM_CONNECT|A_CCM_DISCONNECT)) { cl_log(LOG_ERR, "Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } FNRET(I_NULL); } /* A_CCM_EVENT */ enum crmd_fsa_input do_ccm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { enum crmd_fsa_input return_input = I_NULL; const oc_ev_membership_t *oc = ((struct ccm_data *)data)->oc; oc_ed_t event = *((struct ccm_data *)data)->event; FNIN(); cl_log(LOG_INFO,"event=%s", event==OC_EV_MS_NEW_MEMBERSHIP?"NEW MEMBERSHIP": event==OC_EV_MS_NOT_PRIMARY?"NOT PRIMARY": event==OC_EV_MS_PRIMARY_RESTORED?"PRIMARY RESTORED": event==OC_EV_MS_EVICTED?"EVICTED": "NO QUORUM MEMBERSHIP"); if(CCM_EVENT_DETAIL) { ccm_event_detail(oc, event); } if (OC_EV_MS_EVICTED == event) { /* get out... NOW! */ return_input = I_SHUTDOWN; } if(return_input == I_SHUTDOWN) { ; /* ignore everything, the new DC will handle it */ } else { /* My understanding is that we will never get both * node leaving *and* node joining callbacks at the * same time. * * This logic would need to change if this is not * the case */ if(oc->m_n_out !=0) { return_input = I_NODE_LEFT; } else if(oc->m_n_in !=0) { /* delay the I_NODE_JOIN until they acknowledge our * DC status and send us their CIB */ return_input = I_NULL; } else { cl_log(LOG_INFO, "So why are we here? What CCM event happened?"); } } FNRET(return_input); } /* A_CCM_UPDATE_CACHE */ /* * Take the opportunity to update the node status in the CIB as well * (but only if we are the DC) */ enum crmd_fsa_input do_ccm_update_cache(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { enum crmd_fsa_input next_input = I_NULL; int lpc, offset; oc_node_t *members = NULL; oc_ed_t event = *((struct ccm_data *)data)->event; const oc_ev_membership_t *oc = ((struct ccm_data *)data)->oc; oc_node_list_t *tmp = NULL, *membership_copy = (oc_node_list_t *) cl_malloc(sizeof(oc_node_list_t)); - xmlNodePtr update_list = create_xml_node(NULL, XML_CIB_TAG_STATE); + xmlNodePtr update_list = NULL; + xmlNodePtr tmp1 = NULL; FNIN(); set_xml_property_copy(update_list, XML_ATTR_ID, fsa_our_uname); cl_log(LOG_INFO,"Updating CCM cache after a \"%s\" event.", event==OC_EV_MS_NEW_MEMBERSHIP?"NEW MEMBERSHIP": event==OC_EV_MS_NOT_PRIMARY?"NOT PRIMARY": event==OC_EV_MS_PRIMARY_RESTORED?"PRIMARY RESTORED": event==OC_EV_MS_EVICTED?"EVICTED": "NO QUORUM MEMBERSHIP"); /*--*-- All Member Nodes --*--*/ offset = oc->m_memb_idx; membership_copy->members_size = oc->m_n_member; if(membership_copy->members_size > 0) { int size = membership_copy->members_size; size = size * sizeof(oc_node_t); membership_copy->members = (oc_node_t *)cl_malloc(size); members = membership_copy->members; for(lpc=0; lpc < membership_copy->members_size; lpc++) { members[lpc].node_id = oc->m_array[offset+lpc].node_id; members[lpc].node_born_on = oc->m_array[offset+lpc].node_born_on; members[lpc].node_uname = cl_strdup(oc->m_array[offset+lpc].node_uname); - new_node_state(update_list, - members[lpc].node_uname, "in_ccm"); + tmp1 = create_node_state(members[lpc].node_uname, "in_ccm", NULL, NULL); + if(update_list == NULL) { + update_list = tmp1; + } else { + update_list = xmlAddSibling(update_list, tmp1); + } + } } else { membership_copy->members = NULL; } /*--*-- New Member Nodes --*--*/ offset = oc->m_in_idx; membership_copy->new_members_size = oc->m_n_in; if(membership_copy->new_members_size > 0) { int size = membership_copy->new_members_size; size = size * sizeof(oc_node_t); membership_copy->new_members = (oc_node_t *)cl_malloc(size); members = membership_copy->new_members; for(lpc=0; lpc < membership_copy->new_members_size; lpc++) { members[lpc].node_id = oc->m_array[offset+lpc].node_id; members[lpc].node_born_on = oc->m_array[offset+lpc].node_born_on; members[lpc].node_uname = cl_strdup(oc->m_array[offset+lpc].node_uname); - new_node_state(update_list, - members[lpc].node_uname, "in_ccm"); + tmp1 = create_node_state(members[lpc].node_uname, "in_ccm", NULL, NULL); + if(update_list == NULL) { + update_list = tmp1; + } else { + update_list = xmlAddSibling(update_list, tmp1); + } } } else { membership_copy->new_members = NULL; } /*--*-- Recently Dead Member Nodes --*--*/ offset = oc->m_out_idx; membership_copy->dead_members_size = oc->m_n_out; if(membership_copy->dead_members_size > 0) { int size = membership_copy->dead_members_size; size = size * sizeof(oc_node_t); membership_copy->dead_members = (oc_node_t *)cl_malloc(size); members = membership_copy->dead_members; for(lpc=0; lpc < membership_copy->dead_members_size; lpc++) { members[lpc].node_id = oc->m_array[offset+lpc].node_id; members[lpc].node_born_on = oc->m_array[offset+lpc].node_born_on; members[lpc].node_uname = cl_strdup(oc->m_array[offset+lpc].node_uname); - new_node_state(update_list, - members[lpc].node_uname, "down"); + tmp1 = create_node_state(members[lpc].node_uname, "down", NULL, NULL); + if(update_list == NULL) { + update_list = tmp1; + } else { + update_list = xmlAddSibling(update_list, tmp1); + } + } } else { membership_copy->dead_members = NULL; } if(AM_I_DC) { // should be sufficient for only the DC to do this xmlNodePtr fragment = create_cib_fragment(update_list, NULL); send_request(NULL, fragment, CRM_OPERATION_UPDATE, NULL, CRM_SYSTEM_DCIB); free_xml(fragment); } tmp = fsa_membership_copy; fsa_membership_copy = membership_copy; /* Free the old copy */ if(tmp != NULL) { if(tmp->members != NULL) cl_free(tmp->members); if(tmp->new_members != NULL) cl_free(tmp->new_members); if(tmp->dead_members != NULL) cl_free(tmp->dead_members); cl_free(tmp); } FNRET(next_input); } -/* - * node_updates must be pre-constructed (non-NULL) - */ -void -new_node_state(xmlNodePtr node_updates, - const char *uname, const char *state) -{ - xmlNodePtr tmp1 = create_xml_node(NULL, XML_CIB_TAG_STATE); - - set_node_tstamp(tmp1); - set_xml_property_copy(tmp1, XML_ATTR_ID, uname); - set_xml_property_copy(tmp1, "source", fsa_our_uname); - set_xml_property_copy(tmp1, "state", state); - - xmlAddSibling(node_updates, tmp1); -} - - - void ccm_event_detail(const oc_ev_membership_t *oc, oc_ed_t event) { int member_id = -1; gboolean member = FALSE; int lpc; int node_list_size; cl_log(LOG_INFO,"trans=%d, nodes=%d, new=%d, lost=%d n_idx=%d, " "new_idx=%d, old_idx=%d", oc->m_instance, oc->m_n_member, oc->m_n_in, oc->m_n_out, oc->m_memb_idx, oc->m_in_idx, oc->m_out_idx); cl_log(LOG_INFO, "NODES IN THE PRIMARY MEMBERSHIP"); node_list_size = oc->m_n_member; for(lpc=0; lpcm_array[oc->m_memb_idx+lpc].node_uname, oc->m_array[oc->m_memb_idx+lpc].node_id, oc->m_array[oc->m_memb_idx+lpc].node_born_on); if(fsa_our_uname != NULL && strcmp(fsa_our_uname, oc->m_array[oc->m_memb_idx+lpc].node_uname)) { member = TRUE; member_id = oc->m_array[oc->m_memb_idx+lpc].node_id; } } if (member == FALSE) { cl_log(LOG_WARNING, "MY NODE IS NOT IN CCM THE MEMBERSHIP LIST"); } else { cl_log(LOG_INFO, "MY NODE ID IS %d", member_id); } cl_log(LOG_INFO, "NEW MEMBERS"); if (oc->m_n_in==0) cl_log(LOG_INFO, "\tNONE"); for(lpc=0; lpcm_n_in; lpc++) { cl_log(LOG_INFO,"\t%s [nodeid=%d, born=%d]", oc->m_array[oc->m_in_idx+lpc].node_uname, oc->m_array[oc->m_in_idx+lpc].node_id, oc->m_array[oc->m_in_idx+lpc].node_born_on); } cl_log(LOG_INFO, "MEMBERS LOST"); if (oc->m_n_out==0) cl_log(LOG_INFO, "\tNONE"); for(lpc=0; lpcm_n_out; lpc++) { cl_log(LOG_INFO,"\t%s [nodeid=%d, born=%d]", oc->m_array[oc->m_out_idx+lpc].node_uname, oc->m_array[oc->m_out_idx+lpc].node_id, oc->m_array[oc->m_out_idx+lpc].node_born_on); if(fsa_our_uname != NULL && strcmp(fsa_our_uname, oc->m_array[oc->m_memb_idx+lpc].node_uname)) { cl_log(LOG_ERR, "We're not part of the cluster anymore"); } } cl_log(LOG_INFO, "-----------------------"); } int register_with_ccm(ll_cluster_t *hb_cluster) { FNRET(0); } gboolean ccm_dispatch(int fd, gpointer user_data) { oc_ev_t *ccm_token = (oc_ev_t*)user_data; oc_ev_handle_event(ccm_token); return TRUE; } void crmd_ccm_input_callback(oc_ed_t event, void *cookie, size_t size, const void *data) { struct ccm_data *event_data = NULL; FNIN(); if(data != NULL) { event_data = (struct ccm_data *) cl_malloc(sizeof(struct ccm_data)); event_data->event = &event; event_data->oc = (const oc_ev_membership_t *)data; s_crmd_fsa(C_CCM_CALLBACK, I_CCM_EVENT, (void*)event_data); event_data->event = NULL; event_data->oc = NULL; cl_free(event_data); } else { cl_log(LOG_INFO, "CCM Callback with NULL data... " "I dont /think/ this is bad"); } oc_ev_callback_done(cookie); FNOUT(); } void msg_ccm_join(const struct ha_msg *msg, void *foo) { FNIN(); cl_log(LOG_INFO, "\n###### Recieved ccm_join message..."); if (msg != NULL) { cl_log(LOG_INFO, "[type=%s]", ha_msg_value(msg, F_TYPE)); cl_log(LOG_INFO, "[orig=%s]", ha_msg_value(msg, F_ORIG)); cl_log(LOG_INFO, "[to=%s]", ha_msg_value(msg, F_TO)); cl_log(LOG_INFO, "[status=%s]", ha_msg_value(msg, F_STATUS)); cl_log(LOG_INFO, "[info=%s]", ha_msg_value(msg, F_COMMENT)); cl_log(LOG_INFO, "[rsc_hold=%s]", ha_msg_value(msg, F_RESOURCES)); cl_log(LOG_INFO, "[stable=%s]", ha_msg_value(msg, F_ISSTABLE)); cl_log(LOG_INFO, "[rtype=%s]", ha_msg_value(msg, F_RTYPE)); cl_log(LOG_INFO, "[ts=%s]", ha_msg_value(msg, F_TIME)); cl_log(LOG_INFO, "[seq=%s]", ha_msg_value(msg, F_SEQ)); cl_log(LOG_INFO, "[generation=%s]", ha_msg_value(msg, F_HBGENERATION)); // cl_log(LOG_INFO, "[=%s]", ha_msg_value(msg, F_)); } FNOUT(); } diff --git a/crm/crmd/crmd_fsa.h b/crm/crmd/crmd_fsa.h index 8426785f34..18e63b5386 100644 --- a/crm/crmd/crmd_fsa.h +++ b/crm/crmd/crmd_fsa.h @@ -1,129 +1,131 @@ -/* $Id: crmd_fsa.h,v 1.15 2004/04/15 08:31:13 andrew Exp $ */ +/* $Id: crmd_fsa.h,v 1.16 2004/05/14 21:15:54 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef XML_CRM_FSA__H #define XML_CRM_FSA__H #include #include #include #include #include #include struct ccm_data { const oc_ev_membership_t *oc; oc_ed_t *event; }; struct oc_node_list_s { int members_size; oc_node_t *members; int new_members_size; oc_node_t *new_members; int dead_members_size; oc_node_t *dead_members; // struct oc_node_list_s *next; }; /* copy from struct client_child in heartbeat.h * * Plus a couple of other things */ typedef struct oc_node_list_s oc_node_list_t; struct crm_subsystem_s { pid_t pid; /* Process id of child process */ int respawn; /* Respawn it if it dies? */ int respawncount; /* Last time we respawned this command */ int shortrcount; /* How many times has it respawned too fast? */ const char* command; /* What command to run? */ const char* path; /* Path (argv[0])? */ /* extras */ const char* name; IPC_Channel *ipc; /* How can we communicate with it */ long long flag; /* */ }; typedef struct fsa_timer_s fsa_timer_t; struct fsa_timer_s { guint source_id; /* timer source id */ uint period_ms; /* timer period */ enum crmd_fsa_input fsa_input; gboolean (*callback)(gpointer data); }; extern enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause, enum crmd_fsa_input initial_input, void *data); extern long long clear_flags(long long actions, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input); /* Utilities */ extern long long toggle_bit (long long action_list, long long action); extern long long clear_bit (long long action_list, long long action); extern long long set_bit (long long action_list, long long action); extern void toggle_bit_inplace(long long *action_list, long long action); extern void clear_bit_inplace (long long *action_list, long long action); extern void set_bit_inplace (long long *action_list, long long action); extern gboolean is_set(long long action_list, long long action); extern gboolean startTimer(fsa_timer_t *timer); extern gboolean stopTimer(fsa_timer_t *timer); extern gboolean timer_popped(gpointer data); extern gboolean do_dc_heartbeat(gpointer data); /* Global FSA stuff */ extern enum crmd_fsa_state fsa_state; extern oc_node_list_t *fsa_membership_copy; extern ll_cluster_t *fsa_cluster_conn; extern ll_lrm_t *fsa_lrm_conn; extern long long fsa_input_register; extern const char *fsa_our_uname; extern fsa_timer_t *election_trigger; /* */ extern fsa_timer_t *election_timeout; /* */ extern fsa_timer_t *shutdown_escalation_timmer; /* */ extern fsa_timer_t *dc_heartbeat; extern fsa_timer_t *integration_timer; extern struct crm_subsystem_s *cib_subsystem; extern struct crm_subsystem_s *te_subsystem; extern struct crm_subsystem_s *pe_subsystem; extern void cleanup_subsystem(struct crm_subsystem_s *the_subsystem); extern enum crmd_fsa_input send_cib_status_update(xmlNodePtr update); +extern xmlNodePtr create_node_state(const char *node, const char *state, + const char *exp_state, xmlNodePtr lrm_data); #define AM_I_DC is_set(fsa_input_register, R_THE_DC) #define AM_I_OPERATIONAL (is_set(fsa_input_register, R_STARTING)==FALSE) #include #endif diff --git a/crm/crmd/election.c b/crm/crmd/election.c index ce199387ef..155f6214c7 100644 --- a/crm/crmd/election.c +++ b/crm/crmd/election.c @@ -1,586 +1,644 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *joined_nodes = NULL; /* A_ELECTION_VOTE */ enum crmd_fsa_input do_election_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { enum crmd_fsa_input election_result = I_NULL; FNIN(); /* dont vote if we're in one of these states or wanting to shut down */ switch(cur_state) { case S_RECOVERY: case S_RECOVERY_DC: case S_STOPPING: case S_RELEASE_DC: case S_TERMINATE: FNRET(I_NULL); // log warning break; default: if(is_set(fsa_input_register, R_SHUTDOWN)) { FNRET(I_NULL); // log warning } break; } send_request(NULL, NULL, CRM_OPERATION_VOTE, NULL, CRM_SYSTEM_CRMD); FNRET(election_result); } gboolean timer_popped(gpointer data) { fsa_timer_t *timer = (fsa_timer_t *)data; cl_log(LOG_INFO, "#!!#!!# Timer %s just popped!", fsa_input2string(timer->fsa_input)); stopTimer(timer); // dont make it go off again s_crmd_fsa(C_TIMER_POPPED, timer->fsa_input, NULL); return TRUE; } gboolean do_dc_heartbeat(gpointer data) { fsa_timer_t *timer = (fsa_timer_t *)data; // cl_log(LOG_DEBUG, "#!!#!!# Heartbeat timer just popped!"); gboolean was_sent = send_request(NULL, NULL, CRM_OPERATION_HBEAT, NULL, CRM_SYSTEM_CRMD); if(was_sent == FALSE) { // this is bad stopTimer(timer); // dont make it go off again s_crmd_fsa(C_HEARTBEAT_FAILED, I_SHUTDOWN, NULL); } return TRUE; } /* A_ELECTION_COUNT */ enum crmd_fsa_input do_election_count_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { gboolean we_loose = FALSE; xmlNodePtr vote = (xmlNodePtr)data; unsigned int my_born = -1, your_born = -1; int lpc = 0, my_index = -1, your_index = -1; enum crmd_fsa_input election_result = I_NULL; const char *vote_from = xmlGetProp(vote, XML_ATTR_HOSTFROM); const char *lowest_uname = NULL; int lowest_bornon = 0; FNIN(); if(vote_from == NULL || strcmp(vote_from, fsa_our_uname) == 0) { // dont count our own vote FNRET(election_result); } if(fsa_membership_copy->members_size < 1) { // if even we are not in the cluster then we should not vote FNRET(I_FAIL); } lowest_uname = fsa_membership_copy->members[0].node_uname; lowest_bornon = fsa_membership_copy->members[0].node_born_on; for(; lpc < fsa_membership_copy->members_size; lpc++) { const char *node_uname = fsa_membership_copy->members[lpc].node_uname; int this_born_on = fsa_membership_copy->members[lpc].node_born_on; if(node_uname == NULL) { continue; } if(strcmp(vote_from, node_uname) == 0) { your_born = this_born_on; your_index = lpc; } else if (strcmp(fsa_our_uname, node_uname) == 0) { my_born = this_born_on; my_index = lpc; } if(lowest_bornon > this_born_on) { lowest_uname = node_uname; lowest_bornon = this_born_on; } else if(lowest_bornon == this_born_on && strcmp(lowest_uname, node_uname) > 0) { lowest_uname = node_uname; lowest_bornon = this_born_on; } } #if 0 cl_log(LOG_DEBUG, "%s (bornon=%d), our bornon (%d)", vote_from, your_born, my_born); cl_log(LOG_DEBUG, "%s %s %s", fsa_our_uname, strcmp(fsa_our_uname, vote_from) < 0?"<":">=", vote_from); #endif cl_log(LOG_DEBUG, "Election winner should be %s (born_on=%d)", lowest_uname, lowest_bornon); if(lowest_uname != NULL && strcmp(lowest_uname, fsa_our_uname) == 0){ cl_log(LOG_DEBUG, "Election win: lowest born_on and uname"); election_result = I_ELECTION_DC; } else if(your_born < my_born) { cl_log(LOG_DEBUG, "Election fail: born_on"); we_loose = TRUE; } else if(your_born == my_born && strcmp(fsa_our_uname, vote_from) > 0) { cl_log(LOG_DEBUG, "Election fail: uname"); we_loose = TRUE; } else { CRM_DEBUG("We might win... we should vote (possibly again)"); election_result = I_DC_TIMEOUT; } if(we_loose) { if(fsa_input_register & R_THE_DC) { cl_log(LOG_DEBUG, "Give up the DC"); election_result = I_RELEASE_DC; } else { cl_log(LOG_DEBUG, "We werent the DC anyway"); election_result = I_NOT_DC; } } if(we_loose || election_result == I_ELECTION_DC) { // cancel timer, its been decided stopTimer(election_timeout); } FNRET(election_result); } /* A_ELECT_TIMER_START, A_ELECTION_TIMEOUT */ // we won enum crmd_fsa_input do_election_timer_ctrl(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { FNIN(); if(action & A_ELECT_TIMER_START) { CRM_DEBUG("Starting the election timer..."); startTimer(election_timeout); } else if(action & A_ELECT_TIMER_STOP || action & A_ELECTION_TIMEOUT) { CRM_DEBUG("Stopping the election timer..."); stopTimer(election_timeout); } else { cl_log(LOG_ERR, "unexpected action %s", fsa_action2string(action)); } if(action & A_ELECTION_TIMEOUT) { CRM_DEBUG("The election timer went off, we win!"); FNRET(I_ELECTION_DC); } FNRET(I_NULL); } /* A_DC_TIMER_STOP, A_DC_TIMER_START */ enum crmd_fsa_input do_dc_timer_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { gboolean timer_op_ok = TRUE; FNIN(); if(action & A_DC_TIMER_STOP) { timer_op_ok = stopTimer(election_trigger); } /* dont start a timer that wasnt already running */ if(action & A_DC_TIMER_START && timer_op_ok) { startTimer(election_trigger); } FNRET(I_NULL); } /* A_DC_TAKEOVER */ enum crmd_fsa_input do_dc_takeover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { FNIN(); CRM_DEBUG("################## Taking over the DC ##################"); set_bit_inplace(&fsa_input_register, R_THE_DC); CRM_DEBUG("Am I the DC? %s", AM_I_DC?"yes":"no"); set_bit_inplace(&fsa_input_register, R_JOIN_OK); set_bit_inplace(&fsa_input_register, R_INVOKE_PE); clear_bit_inplace(&fsa_input_register, R_CIB_DONE); clear_bit_inplace(&fsa_input_register, R_HAVE_CIB); startTimer(dc_heartbeat); FNRET(I_NULL); } /* A_DC_RELEASE */ enum crmd_fsa_input do_dc_release(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { enum crmd_fsa_input result = I_NULL; FNIN(); CRM_DEBUG("################## Releasing the DC ##################"); stopTimer(dc_heartbeat); if(action & A_DC_RELEASE) { clear_bit_inplace(&fsa_input_register, R_THE_DC); /* get a new CIB from the new DC */ clear_bit_inplace(&fsa_input_register, R_HAVE_CIB); } else if (action & A_DC_RELEASED) { if(cur_state == S_STOPPING) { result = I_SHUTDOWN; // necessary? result = I_RELEASE_SUCCESS; } #if 0 else if( are there errors ) { // we cant stay up if not healthy // or perhaps I_ERROR and go to S_RECOVER? result = I_SHUTDOWN; } #endif else result = I_RELEASE_SUCCESS; } else { cl_log(LOG_ERR, "Warning, do_dc_release invoked for action %s", fsa_action2string(action)); } CRM_DEBUG("Am I still the DC? %s", AM_I_DC?"yes":"no"); FNRET(result); } /* A_JOIN_WELCOME, A_JOIN_WELCOME_ALL */ enum crmd_fsa_input do_send_welcome(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { int lpc = 0, size = 0, num_sent = 0; oc_node_t *members; gboolean was_sent = TRUE; FNIN(); if(action & A_JOIN_WELCOME && data == NULL) { cl_log(LOG_ERR, "Attempt to send welcome message " "without a message to reply to!"); FNRET(I_NULL); } else if(action & A_JOIN_WELCOME) { xmlNodePtr welcome = (xmlNodePtr)data; const char *join_to = xmlGetProp(welcome, XML_ATTR_HOSTFROM); if(join_to != NULL) { +/* + xmlNodePtr update = + create_node_state(new_node, "active", "active", NULL); + xmlNodePtr tmp1 = create_cib_fragment(update, NULL); + send_request(NULL, tmp1, "update", NULL, CRM_SYSTEM_DCIB); +*/ send_request(NULL, NULL, CRM_OPERATION_WELCOME, join_to, CRM_SYSTEM_CRMD); + } FNRET(I_NULL); } // welcome everyone... /* Give everyone a chance to join before invoking the PolicyEngine */ stopTimer(integration_timer); startTimer(integration_timer); members = fsa_membership_copy->members; size = fsa_membership_copy->members_size; if(joined_nodes != NULL) { g_hash_table_destroy(joined_nodes); joined_nodes = g_hash_table_new(&g_str_hash, &g_str_equal); } - + // reset everyones status back to down in the CIB + xmlNodePtr cib_copy = get_cib_copy(); + xmlNodePtr tmp1 = get_object_root(XML_CIB_TAG_STATUS, cib_copy); + xmlNodePtr node_entry = tmp1->children; + xmlNodePtr update = NULL; + while(node_entry != NULL){ + tmp1 = create_node_state(xmlGetProp(node_entry, "id"), + "down", NULL, NULL); + node_entry = node_entry->next; + + if(update == NULL) { + update = tmp1; + } else { + update = xmlAddSibling(update, tmp1); + } + } + + if(update != NULL) { + xml_message_debug(update, "creating fragment for..."); + tmp1 = create_cib_fragment(update, NULL); + send_request(NULL, tmp1, "update", NULL, CRM_SYSTEM_DCIB); + + } + + free_xml(tmp1); + free_xml(update); + free_xml(cib_copy); + for(; members != NULL && lpc < size; lpc++) { const char *new_node = members[lpc].node_uname; if(strcmp(fsa_our_uname, new_node) == 0) { // dont send one to ourselves continue; } CRM_DEBUG("Sending welcome message to %s (%d)", new_node, was_sent); num_sent++; was_sent = was_sent && send_request(NULL, NULL, CRM_OPERATION_WELCOME, new_node, CRM_SYSTEM_CRMD); + CRM_DEBUG("Sent welcome message to %s (%d)", new_node, was_sent); } if(was_sent == FALSE) FNRET(I_FAIL); /* No point hanging around in S_INTEGRATION if we're the only ones here! */ if(num_sent == 0) { // that was the last outstanding join ack) cl_log(LOG_INFO,"That was the last outstanding join ack"); FNRET(I_SUCCESS); } else { cl_log(LOG_DEBUG, "Still waiting on %d outstanding join acks", num_sent); //dont waste time by invoking the pe yet; } FNRET(I_NULL); } +xmlNodePtr +create_node_state(const char *node, const char *state, + const char *exp_state, xmlNodePtr lrm_data) +{ + xmlNodePtr node_state = create_xml_node(NULL, XML_CIB_TAG_STATE); + + set_xml_property_copy(node_state, XML_ATTR_ID, node); + set_xml_property_copy(node_state, "state", state); + if(exp_state != NULL) { + set_xml_property_copy(node_state, "exp_state", exp_state); + } + + if(lrm_data != NULL) { +// set_xml_property_copy(data, "replace_lrm", "true"); + add_node_copy(node_state, lrm_data); + } + + xml_message_debug(node_state, "created"); + + return node_state; +} + + /* A_JOIN_ACK */ enum crmd_fsa_input do_ack_welcome(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { xmlNodePtr welcome = (xmlNodePtr)data; xmlNodePtr cib_copy; xmlNodePtr tmp1; xmlNodePtr tmp2; FNIN(); #if 0 if(we are sick) { log error ; FNRET(I_NULL); } #endif cib_copy = get_cib_copy(); tmp1 = get_object_root(XML_CIB_TAG_STATUS, cib_copy); tmp2 = create_cib_fragment(tmp1, NULL); send_ha_reply(fsa_cluster_conn, welcome, tmp2); free_xml(tmp2); free_xml(cib_copy); FNRET(I_NULL); } /* A_ANNOUNCE */ enum crmd_fsa_input do_announce(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { FNIN(); /* Once we hear from the DC, we can stop the timer * * This timer was started either on startup or when a node * left the CCM list */ /* dont announce if we're in one of these states */ switch(cur_state) { case S_RECOVERY: case S_RECOVERY_DC: case S_RELEASE_DC: case S_TERMINATE: FNRET(I_NULL); // log warning break; default: break; } if(AM_I_OPERATIONAL) { send_request(NULL, NULL, CRM_OPERATION_ANNOUNCE, NULL, CRM_SYSTEM_DC); } else { /* Delay announce until we have finished local startup */ FNRET(I_NULL); } FNRET(I_NULL); } /* A_JOIN_PROCESS_ACK */ enum crmd_fsa_input do_process_welcome_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { int lpc = 0, size = 0; oc_node_t *members; gboolean is_a_member = FALSE; xmlNodePtr tmp1; xmlNodePtr join_ack = (xmlNodePtr)data; xmlNodePtr cib_fragment; const char *join_from = xmlGetProp(join_ack, XML_ATTR_HOSTFROM); FNIN(); FNIN(); members = fsa_membership_copy->members; size = fsa_membership_copy->members_size; for(; lpc < size; lpc++) { const char *new_node = members[lpc].node_uname; if(strcmp(join_from, new_node) == 0) { is_a_member = TRUE; } } cib_fragment = find_xml_node(join_ack, XML_TAG_FRAGMENT); if(is_a_member == FALSE) { cl_log(LOG_ERR, "Node %s is not known to us", join_from); /* make sure any information from this node is discarded, * it is invalid */ free_xml(cib_fragment); FNRET(I_FAIL); } cl_log(LOG_DEBUG, "Welcoming node %s after ACK", join_from); // add them to our list of "active" nodes g_hash_table_insert(joined_nodes, strdup(join_from),strdup(join_from)); if(cib_fragment == NULL) { cl_log(LOG_ERR, "No status information was part of the" " Welcome ACK from %s", join_from); FNRET(I_NULL); } /* TODO: check the fragment is only for the status section = get_xml_attr(cib_fragment, NULL, XML_ATTR_FILTER_TYPE, TRUE); */ /* Make changes so that state=active for this node when the update * is processed by A_CIB_INVOKE */ tmp1 = find_xml_node(cib_fragment, XML_TAG_CIB); tmp1 = get_object_root(XML_CIB_TAG_STATUS, tmp1); tmp1 = find_entity(tmp1, XML_CIB_TAG_STATE, join_from, FALSE); set_xml_property_copy(tmp1, "state", "active"); if(g_hash_table_size(joined_nodes) == fsa_membership_copy->members_size) { // that was the last outstanding join ack) cl_log(LOG_INFO,"That was the last outstanding join ack"); FNRET(I_SUCCESS); } else { cl_log(LOG_DEBUG, "Still waiting on %d outstanding join acks", size); //dont waste time by invoking the pe yet; } FNRET(I_CIB_OP); }