diff --git a/crm/crmd/atest.c b/crm/crmd/atest.c index 990b43abd4..8c9f4427c4 100644 --- a/crm/crmd/atest.c +++ b/crm/crmd/atest.c @@ -1,126 +1,129 @@ -/* $Id: atest.c,v 1.5 2005/05/18 20:15:57 andrew Exp $ */ +/* $Id: atest.c,v 1.6 2005/06/14 11:09:03 davidlee Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include + #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include const char* crm_system_name = "core"; gboolean process_atest_message( HA_Message *msg, crm_data_t *xml_data, IPC_Channel *sender); GMainLoop* mainloop = NULL; int main(int argc, char ** argv) { #if 0 IPC_Channel* channels[2]; crm_data_t *a_cib = NULL; HA_Message *cmd = NULL; crm_log_init(crm_system_name); set_crm_log_level(LOG_DEBUG_5-1); a_cib = string2xml(""); /* createEmptyCib(); */ cl_log_message(LOG_DEBUG, (HA_Message*)a_cib); if (ipc_channel_pair(channels) != IPC_OK) { cl_perror("Can't create ipc channel pair"); exit(1); } G_main_add_IPC_Channel(G_PRIORITY_LOW, channels[1], FALSE, subsystem_msg_dispatch, (void*)process_atest_message, default_ipc_connection_destroy); /* send transition graph over IPC instead */ cmd = create_request(CRM_OP_TRANSITION, a_cib, NULL, CRM_SYSTEM_TENGINE, CRM_SYSTEM_TENGINE, NULL); send_ipc_message(channels[0], cmd); mainloop = g_main_new(FALSE); crm_debug("Starting mainloop"); g_main_run(mainloop); #else crm_log_init(crm_system_name); set_crm_log_level(LOG_DEBUG_5-1); /* read local config file */ crm_debug("Enabling coredumps"); if(cl_set_corerootdir(HA_COREDIR) < 0){ cl_perror("cannot set corerootdir"); } if(cl_enable_coredumps(1) != 0) { crm_err("Cannot enable coredumps"); } if(cl_cdtocoredir() != 0) { crm_err("Cannot cd to coredump dir"); } crm_warn("Calling abort()"); abort(); crm_err("We just dumped core"); #endif return 0; } gboolean process_atest_message(HA_Message *msg, crm_data_t *xml_data, IPC_Channel *sender) { crm_debug("made it"); cl_log_message(LOG_DEBUG, msg); return TRUE; } diff --git a/crm/crmd/callbacks.c b/crm/crmd/callbacks.c index b2c522769e..69e9a95216 100644 --- a/crm/crmd/callbacks.c +++ b/crm/crmd/callbacks.c @@ -1,597 +1,600 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include + #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *crmd_peer_state = NULL; crm_data_t *find_xml_in_hamessage(const HA_Message * msg); void crmd_ha_connection_destroy(gpointer user_data); /* From join_dc... */ extern gboolean check_join_state( enum crmd_fsa_state cur_state, const char *source); /* #define MAX_EMPTY_CALLBACKS 20 */ /* int empty_callbacks = 0; */ gboolean crmd_ha_msg_dispatch(IPC_Channel *channel, gpointer user_data) { int lpc = 0; ll_cluster_t *hb_cluster = (ll_cluster_t*)user_data; while(lpc < 2 && hb_cluster->llc_ops->msgready(hb_cluster)) { if(channel->ch_status != IPC_CONNECT) { /* there really is no point continuing */ break; } lpc++; /* invoke the callbacks but dont block */ hb_cluster->llc_ops->rcvmsg(hb_cluster, 0); } crm_debug_3("%d HA messages dispatched", lpc); G_main_set_trigger(fsa_source); if (channel && (channel->ch_status != IPC_CONNECT)) { crm_crit("Lost connection to heartbeat service."); return FALSE; } return TRUE; } void crmd_ha_msg_callback(const HA_Message * msg, void* private_data) { ha_msg_input_t *new_input = NULL; oc_node_t *from_node = NULL; const char *from = ha_msg_value(msg, F_ORIG); const char *seq = ha_msg_value(msg, F_SEQ); const char *op = ha_msg_value(msg, F_CRM_TASK); const char *sys_to = ha_msg_value(msg, F_CRM_SYS_TO); const char *sys_from = ha_msg_value(msg, F_CRM_SYS_FROM); CRM_DEV_ASSERT(from != NULL); if(fsa_membership_copy == NULL) { crm_debug("Ignoring HA messages until we are" " connected to the CCM (%s op from %s)", op, from); crm_log_message_adv( LOG_MSG, "HA[inbound]: Ignore (No CCM)", msg); return; } from_node = g_hash_table_lookup(fsa_membership_copy->members, from); if(from_node == NULL) { int level = LOG_DEBUG; if(safe_str_eq(op, CRM_OP_VOTE)) { level = LOG_WARNING; } else if(AM_I_DC && safe_str_eq(op, CRM_OP_JOIN_ANNOUNCE)) { level = LOG_WARNING; } else if(safe_str_eq(sys_from, CRM_SYSTEM_DC)) { level = LOG_WARNING; } do_crm_log(level, __FILE__, __FUNCTION__, "Ignoring HA message (op=%s) from %s: not in our" " membership list (size=%d)", op, from, g_hash_table_size(fsa_membership_copy->members)); crm_log_message_adv(LOG_MSG, "HA[inbound]: CCM Discard", msg); } else if(AM_I_DC && safe_str_eq(sys_from, CRM_SYSTEM_DC) && safe_str_neq(from, fsa_our_uname)) { crm_err("Another DC detected: %s (op=%s)", from, op); crm_log_message_adv( LOG_WARNING, "HA[inbound]: Duplicate DC", msg); new_input = new_ha_msg_input(msg); /* make sure the election happens NOW */ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, new_input, __FUNCTION__); #if 0 /* still thinking about this one... * could create a timing issue if we dont notice the * election before a new DC is elected. */ } else if(fsa_our_dc != NULL && safe_str_eq(sys_from, CRM_SYSTEM_DC) && safe_str_neq(from, fsa_our_dc)) { crm_warn("Ignoring message from wrong DC: %s vs. %s ", from, fsa_our_dc); crm_log_message_adv(LOG_WARNING, "HA[inbound]: wrong DC", msg); #endif } else if(safe_str_eq(sys_to, CRM_SYSTEM_DC) && AM_I_DC == FALSE) { crm_debug_2("Ignoring message for the DC [F_SEQ=%s]", seq); crm_log_message_adv(LOG_DEBUG_4, "HA[inbound]: ignore", msg); return; } else if(safe_str_eq(from, fsa_our_uname) && safe_str_eq(op, CRM_OP_VOTE)) { crm_log_message_adv(LOG_DEBUG_4, "HA[inbound]", msg); crm_debug_2("Ignoring our own vote [F_SEQ=%s]: own vote", seq); return; } else if(AM_I_DC && safe_str_eq(op, CRM_OP_HBEAT)) { crm_debug_2("Ignoring our own heartbeat [F_SEQ=%s]", seq); crm_log_message_adv(LOG_DEBUG_4, "HA[inbound]: own heartbeat", msg); return; } else { crm_debug_3("Processing message"); crm_log_message_adv(LOG_MSG, "HA[inbound]", msg); new_input = new_ha_msg_input(msg); register_fsa_input(C_HA_MESSAGE, I_ROUTER, new_input); } #if 0 if(ha_msg_value(msg, XML_ATTR_REFERENCE) == NULL) { ha_msg_add(new_input->msg, XML_ATTR_REFERENCE, seq); } #endif delete_ha_msg_input(new_input); return; } /* * Apparently returning TRUE means "stay connected, keep doing stuff". * Returning FALSE means "we're all done, close the connection" */ gboolean crmd_ipc_msg_callback(IPC_Channel *client, gpointer user_data) { int lpc = 0; IPC_Message *msg = NULL; ha_msg_input_t *new_input = NULL; crmd_client_t *curr_client = (crmd_client_t*)user_data; gboolean stay_connected = TRUE; crm_debug_2("Processing IPC message from %s", curr_client->table_key); while(lpc == 0 && client->ops->is_message_pending(client)) { if (client->ch_status == IPC_DISCONNECT) { /* The message which was pending for us is that * the IPC status is now IPC_DISCONNECT */ break; } if (client->ops->recv(client, &msg) != IPC_OK) { perror("Receive failure:"); crm_err("[%s] [receive failure]", curr_client->table_key); stay_connected = FALSE; break; } else if (msg == NULL) { crm_err("[%s] [no message this time]", curr_client->table_key); continue; } lpc++; new_input = new_ipc_msg_input(msg); msg->msg_done(msg); crm_debug_2("Processing msg from %s", curr_client->table_key); crm_log_message_adv(LOG_MSG, "CRMd[inbound]", new_input->msg); if(crmd_authorize_message(new_input, curr_client)) { register_fsa_input(C_IPC_MESSAGE, I_ROUTER, new_input); } delete_ha_msg_input(new_input); msg = NULL; new_input = NULL; } crm_debug_2("Processed %d messages", lpc); if (client->ch_status == IPC_DISCONNECT) { stay_connected = FALSE; process_client_disconnect(curr_client); } G_main_set_trigger(fsa_source); return stay_connected; } gboolean lrm_dispatch(IPC_Channel*src_not_used, gpointer user_data) { int num_msgs = 0; ll_lrm_t *lrm = (ll_lrm_t*)user_data; crm_debug_3("received callback"); num_msgs = lrm->lrm_ops->rcvmsg(lrm, FALSE); if(num_msgs < 1) { crm_err("lrm->lrm_ops->rcvmsg() failed, connection lost?"); clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); return FALSE; } return TRUE; } void lrm_op_callback(lrm_op_t* op) { CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return; } crm_debug("received callback: %s/%s (%s)", op->op_type, op->rsc_id, op_status2text(op->op_status)); /* Make sure the LRM events are received in order */ register_fsa_input_later(C_LRM_OP_CALLBACK, I_LRM_EVENT, op); } void crmd_ha_status_callback( const char *node, const char * status, void* private_data) { crm_data_t *update = NULL; crm_debug_3("received callback"); crm_notice("Status update: Node %s now has status [%s]",node,status); if(safe_str_neq(status, DEADSTATUS)) { crm_debug_3("nstatus callback was not for a dead node"); return; } /* this node is taost */ update = create_node_state( node, node, status, NULL, NULL, NULL, NULL, __FUNCTION__); set_xml_property_copy( update, XML_CIB_ATTR_CLEAR_SHUTDOWN, XML_BOOLEAN_TRUE); /* this change should not be broadcast */ update_local_cib(create_cib_fragment(update, NULL)); G_main_set_trigger(fsa_source); free_xml(update); } void crmd_client_status_callback(const char * node, const char * client, const char * status, void * private) { const char *join = NULL; const char *extra = NULL; crm_data_t * update = NULL; crm_debug_3("received callback"); if(safe_str_neq(client, CRM_SYSTEM_CRMD)) { return; } if(safe_str_eq(status, JOINSTATUS)){ status = ONLINESTATUS; extra = XML_CIB_ATTR_CLEAR_SHUTDOWN; } else if(safe_str_eq(status, LEAVESTATUS)){ status = OFFLINESTATUS; join = CRMD_JOINSTATE_DOWN; extra = XML_CIB_ATTR_CLEAR_SHUTDOWN; } set_bit_inplace(fsa_input_register, R_PEER_DATA); g_hash_table_replace( crmd_peer_state, crm_strdup(node), crm_strdup(status)); if(fsa_state == S_STARTING || fsa_state == S_STOPPING) { return; } crm_notice("Status update: Client %s/%s now has status [%s]", node, client, status); if(safe_str_eq(node, fsa_our_dc) && safe_str_eq(status, OFFLINESTATUS)) { /* did our DC leave us */ crm_info("Got client status callback - our DC is dead"); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); } else { crm_data_t *fragment = NULL; crm_debug_3("Got client status callback"); update = create_node_state( node, node, NULL, NULL, status, join, NULL, __FUNCTION__); set_xml_property_copy(update, extra, XML_BOOLEAN_TRUE); fragment = create_cib_fragment(update, NULL); /* it is safe to keep these updates on the local node * each node updates their own CIB */ fsa_cib_conn->cmds->modify( fsa_cib_conn, XML_CIB_TAG_STATUS, fragment, NULL, cib_inhibit_bcast|cib_scope_local|cib_quorum_override); free_xml(fragment); free_xml(update); if(AM_I_DC && safe_str_eq(status, OFFLINESTATUS)) { g_hash_table_remove(confirmed_nodes, node); g_hash_table_remove(finalized_nodes, node); g_hash_table_remove(integrated_nodes, node); g_hash_table_remove(welcomed_nodes, node); check_join_state(fsa_state, __FUNCTION__); } } G_main_set_trigger(fsa_source); } void crmd_ha_connection_destroy(gpointer user_data) { crm_crit("Heartbeat has left us"); /* this is always an error */ /* feed this back into the FSA */ register_fsa_input(C_HA_DISCONNECT, I_ERROR, NULL); } gboolean crmd_client_connect(IPC_Channel *client_channel, gpointer user_data) { if (client_channel == NULL) { crm_err("Channel was NULL"); } else if (client_channel->ch_status == IPC_DISCONNECT) { crm_err("Channel was disconnected"); } else { crmd_client_t *blank_client = NULL; crm_debug_3("Channel connected"); crm_malloc0(blank_client, sizeof(crmd_client_t)); if (blank_client == NULL) { return FALSE; } client_channel->ops->set_recv_qlen(client_channel, 100); client_channel->ops->set_send_qlen(client_channel, 100); blank_client->client_channel = client_channel; blank_client->sub_sys = NULL; blank_client->uuid = NULL; blank_client->table_key = NULL; blank_client->client_source = G_main_add_IPC_Channel( G_PRIORITY_LOW, client_channel, FALSE, crmd_ipc_msg_callback, blank_client, default_ipc_connection_destroy); } return TRUE; } gboolean ccm_dispatch(int fd, gpointer user_data) { int rc = 0; oc_ev_t *ccm_token = (oc_ev_t*)user_data; gboolean was_error = FALSE; crm_debug_3("received callback"); rc = oc_ev_handle_event(ccm_token); if(rc != 0) { crm_err("CCM connection appears to have failed: rc=%d.", rc); register_fsa_input(C_CCM_CALLBACK, I_ERROR, NULL); was_error = TRUE; } G_main_set_trigger(fsa_source); return !was_error; } static gboolean fsa_have_quorum = FALSE; void crmd_ccm_msg_callback( oc_ed_t event, void *cookie, size_t size, const void *data) { int instance = -1; gboolean update_cache = FALSE; struct crmd_ccm_data_s *event_data = NULL; const oc_ev_membership_t *membership = data; gboolean update_quorum = FALSE; gboolean trigger_transition = FALSE; crm_debug_3("received callback"); if(data != NULL) { instance = membership->m_instance; } crm_info("Quorum %s after event=%s (id=%d)", ccm_have_quorum(event)?"(re)attained":"lost", ccm_event_name(event), instance); switch(event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID:/* fall through */ update_cache = TRUE; update_quorum = TRUE; break; case OC_EV_MS_NOT_PRIMARY: #if UNTESTED if(AM_I_DC == FALSE) { break; } /* tell the TE to pretend it had completed and stop */ /* side effect: we'll end up in S_IDLE */ register_fsa_action(A_TE_HALT, TRUE); #endif break; case OC_EV_MS_PRIMARY_RESTORED: fsa_membership_copy->id = instance; if(AM_I_DC && need_transition(fsa_state)) { trigger_transition = TRUE; } break; case OC_EV_MS_EVICTED: update_quorum = TRUE; register_fsa_input(C_FSA_INTERNAL, I_STOP, NULL); break; default: crm_err("Unknown CCM event: %d", event); } if(update_quorum && ccm_have_quorum(event) == FALSE) { /* did we just loose quorum? */ if(fsa_have_quorum && need_transition(fsa_state)) { crm_info("Quorum lost: triggering transition (%s)", ccm_event_name(event)); trigger_transition = TRUE; } fsa_have_quorum = FALSE; } else if(update_quorum) { crm_debug("Updating quorum after event %s", ccm_event_name(event)); fsa_have_quorum = TRUE; } if(trigger_transition) { crm_debug("Scheduling transition after event %s", ccm_event_name(event)); /* make sure that when we query the CIB that it has * the changes that triggered the transition */ switch(event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID: case OC_EV_MS_PRIMARY_RESTORED: fsa_membership_copy->id = instance; break; default: break; } if(update_cache == FALSE) { /* a stand-alone transition */ register_fsa_action(A_TE_CANCEL); } } if(update_cache) { crm_debug("Updating cache after event %s", ccm_event_name(event)); crm_malloc0(event_data, sizeof(struct crmd_ccm_data_s)); if(event_data == NULL) { return; } event_data->event = event; if(data != NULL) { event_data->oc = copy_ccm_oc_data(data); } register_fsa_input_adv( C_CCM_CALLBACK, I_CCM_EVENT, event_data, trigger_transition?A_TE_CANCEL:A_NOTHING, FALSE, __FUNCTION__); if (event_data->oc) { crm_free(event_data->oc); event_data->oc = NULL; } crm_free(event_data); } oc_ev_callback_done(cookie); return; } void crmd_cib_connection_destroy(gpointer user_data) { if(is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("Connection to the CIB terminated..."); return; } /* eventually this will trigger a reconnect, not a shutdown */ crm_err("Connection to the CIB terminated..."); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit_inplace(fsa_input_register, R_CIB_CONNECTED); return; } longclock_t fsa_start = 0; longclock_t fsa_stop = 0; longclock_t fsa_diff = 0; gboolean crm_fsa_trigger(gpointer user_data) { unsigned int fsa_diff_ms = 0; if(fsa_diff_max_ms > 0) { fsa_start = time_longclock(); } s_crmd_fsa(C_FSA_INTERNAL); if(fsa_diff_max_ms > 0) { fsa_stop = time_longclock(); fsa_diff = sub_longclock(fsa_stop, fsa_start); fsa_diff_ms = longclockto_ms(fsa_diff); if(fsa_diff_ms > fsa_diff_max_ms) { crm_err("FSA took %dms to complete", fsa_diff_ms); } else if(fsa_diff_ms > fsa_diff_warn_ms) { crm_warn("FSA took %dms to complete", fsa_diff_ms); } } return TRUE; } diff --git a/crm/crmd/cib.c b/crm/crmd/cib.c index 0ffaba6b8f..441644137a 100644 --- a/crm/crmd/cib.c +++ b/crm/crmd/cib.c @@ -1,280 +1,283 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include + #include #include #include #include #include #include /* for access */ #include #include #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include #include struct crm_subsystem_s *cib_subsystem = NULL; int cib_retries = 0; /* A_CIB_STOP, A_CIB_START, A_CIB_RESTART, */ enum crmd_fsa_input do_cib_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum crmd_fsa_input result = I_NULL; struct crm_subsystem_s *this_subsys = cib_subsystem; long long stop_actions = A_CIB_STOP; long long start_actions = A_CIB_START; if(action & stop_actions) { if(fsa_cib_conn != NULL && fsa_cib_conn->state != cib_disconnected) { fsa_cib_conn->cmds->signoff(fsa_cib_conn); } } if(action & start_actions) { if(cur_state != S_STOPPING) { if(fsa_cib_conn == NULL) { fsa_cib_conn = cib_new(); } if(cib_ok != fsa_cib_conn->cmds->signon( fsa_cib_conn, CRM_SYSTEM_CRMD, cib_command)){ crm_debug("Could not connect to the CIB service"); #if 0 } else if(cib_ok != fsa_cib_conn->cmds->set_op_callback( fsa_cib_conn, crmd_cib_op_callback)) { crm_err("Could not set op callback"); #endif } else if(fsa_cib_conn->cmds->set_connection_dnotify( fsa_cib_conn, crmd_cib_connection_destroy)!=cib_ok){ crm_err("Could not set dnotify callback"); } else { set_bit_inplace( fsa_input_register, R_CIB_CONNECTED); } if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { cib_retries++; crm_warn("Couldn't complete CIB registration %d" " times... pause and retry", cib_retries); if(cib_retries < 30) { crm_timer_start(wait_timer); crmd_fsa_stall(NULL); } else { crm_err("Could not complete CIB" " registration %d times..." " hard error", cib_retries); register_fsa_error( C_FSA_INTERNAL, I_ERROR, NULL); } } else { cib_retries = 0; } } else { crm_info("Ignoring request to start %s after shutdown", this_subsys->name); } } return result; } /* A_CIB_INVOKE, A_CIB_BUMPGEN, A_UPDATE_NODESTATUS */ enum crmd_fsa_input do_cib_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { HA_Message *answer = NULL; enum crmd_fsa_input result = I_NULL; ha_msg_input_t *cib_msg = fsa_typed_data(fsa_dt_ha_msg); const char *sys_from = cl_get_string(cib_msg->msg, F_CRM_SYS_FROM); if(fsa_cib_conn->state == cib_disconnected) { if(cur_state != S_STOPPING) { crm_err("CIB is disconnected"); crm_log_message_adv(LOG_WARNING, "CIB Input", cib_msg->msg); return I_NULL; } crm_info("CIB is disconnected"); crm_log_message_adv(LOG_DEBUG, "CIB Input", cib_msg->msg); return I_NULL; } if(action & A_CIB_INVOKE) { if(safe_str_eq(sys_from, CRM_SYSTEM_CRMD)) { action = A_CIB_INVOKE_LOCAL; } else if(safe_str_eq(sys_from, CRM_SYSTEM_DC)) { action = A_CIB_INVOKE_LOCAL; } } if(action & A_CIB_INVOKE || action & A_CIB_INVOKE_LOCAL) { int call_options = 0; enum cib_errors rc = cib_ok; crm_data_t *cib_frag = NULL; const char *section = NULL; const char *op = cl_get_string(cib_msg->msg, F_CRM_TASK); section = cl_get_string(cib_msg->msg, F_CIB_SECTION); ha_msg_value_int(cib_msg->msg, F_CIB_CALLOPTS, &call_options); crm_log_message(LOG_MSG, cib_msg->msg); crm_log_xml_debug_3(cib_msg->xml, "[CIB update]"); if(op == NULL) { crm_err("Invalid CIB Message"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return I_NULL; } cib_frag = NULL; rc = fsa_cib_conn->cmds->variant_op( fsa_cib_conn, op, NULL, section, cib_msg->xml, &cib_frag, call_options); if(rc < cib_ok || (action & A_CIB_INVOKE)) { answer = create_reply(cib_msg->msg, cib_frag); ha_msg_add(answer,XML_ATTR_RESULT,cib_error2string(rc)); } if(action & A_CIB_INVOKE) { if(relay_message(answer, TRUE) == FALSE) { crm_err("Confused what to do with cib result"); crm_log_message(LOG_ERR, answer); crm_msg_del(answer); result = I_ERROR; } } else if(rc < cib_ok) { ha_msg_input_t *input = NULL; crm_err("Internal CRM/CIB command from %s() failed: %s", msg_data->origin, cib_error2string(rc)); crm_log_message_adv(LOG_WARNING, "CIB Input", cib_msg->msg); crm_log_message_adv(LOG_WARNING, "CIB Reply", answer); input = new_ha_msg_input(answer); register_fsa_input(C_FSA_INTERNAL, I_ERROR, input); crm_msg_del(answer); delete_ha_msg_input(input); } return result; } else { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } return I_NULL; } /* frees fragment as part of delete_ha_msg_input() */ void update_local_cib_adv( crm_data_t *msg_data, gboolean do_now, const char *raised_from) { HA_Message *msg = NULL; ha_msg_input_t *fsa_input = NULL; int call_options = cib_quorum_override|cib_scope_local; CRM_DEV_ASSERT(msg_data != NULL); crm_malloc0(fsa_input, sizeof(ha_msg_input_t)); msg = create_request(CIB_OP_UPDATE, msg_data, NULL, CRM_SYSTEM_CIB, CRM_SYSTEM_CRMD, NULL); ha_msg_add(msg, F_CIB_SECTION, crm_element_value(msg_data, XML_ATTR_SECTION)); ha_msg_add_int(msg, F_CIB_CALLOPTS, call_options); ha_msg_add(msg, "call_origin", raised_from); fsa_input->msg = msg; fsa_input->xml = msg_data; if(AM_I_DC && crm_assert_failed) { /* register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); */ } if(do_now == FALSE) { crm_debug_3("Registering event with FSA"); register_fsa_input_adv(C_FSA_INTERNAL, I_CIB_OP, fsa_input, 0, FALSE, raised_from); } else { fsa_data_t *op_data = NULL; crm_debug_3("Invoking CIB handler directly"); crm_malloc0(op_data, sizeof(fsa_data_t)); op_data->fsa_cause = C_FSA_INTERNAL; op_data->fsa_input = I_CIB_OP; op_data->origin = raised_from; op_data->data = fsa_input; op_data->data_type = fsa_dt_ha_msg; do_cib_invoke(A_CIB_INVOKE_LOCAL, C_FSA_INTERNAL, fsa_state, I_CIB_OP, op_data); crm_free(op_data); crm_debug_3("CIB handler completed"); } crm_debug_3("deleting input"); crm_msg_del(fsa_input->msg); free_xml(fsa_input->xml); crm_free(fsa_input); crm_debug_3("deleted input"); } diff --git a/crm/crmd/control.c b/crm/crmd/control.c index 5942b45b67..8199685589 100644 --- a/crm/crmd/control.c +++ b/crm/crmd/control.c @@ -1,662 +1,664 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include + #include #include #include #include #include #include #include #include #include #include #include #include #include extern void crmd_ha_connection_destroy(gpointer user_data); extern gboolean stop_all_resources(void); gboolean crm_shutdown(int nsig, gpointer unused); gboolean register_with_ha(ll_cluster_t *hb_cluster, const char *client_name); GHashTable *ipc_clients = NULL; GTRIGSource *fsa_source = NULL; /* A_HA_CONNECT */ enum crmd_fsa_input do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { gboolean registered = FALSE; if(action & A_HA_DISCONNECT) { if(fsa_cluster_conn != NULL) { fsa_cluster_conn->llc_ops->signoff( fsa_cluster_conn, FALSE); } } if(action & A_HA_CONNECT) { if(fsa_cluster_conn == NULL) { fsa_cluster_conn = ll_cluster_new("heartbeat"); } /* make sure we are disconnected first */ fsa_cluster_conn->llc_ops->signoff(fsa_cluster_conn, FALSE); registered = register_with_ha( fsa_cluster_conn, crm_system_name); if(registered == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } } if(action & ~(A_HA_CONNECT|A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } return I_NULL; } /* A_SHUTDOWN */ enum crmd_fsa_input do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum crmd_fsa_input next_input = I_NULL; enum crmd_fsa_input tmp = I_NULL; /* just in case */ set_bit_inplace(fsa_input_register, R_SHUTDOWN); /* last attempt to shut these down */ if(is_set(fsa_input_register, R_PE_CONNECTED)) { crm_warn("Last attempt to shutdown the PolicyEngine"); tmp = do_pe_control(A_PE_STOP, cause, cur_state, current_input, msg_data); if(tmp != I_NULL) { next_input = I_ERROR; crm_err("Failed to shutdown the PolicyEngine"); } } if(is_set(fsa_input_register, R_TE_CONNECTED)) { crm_warn("Last attempt to shutdown the Transitioner"); tmp = do_pe_control(A_TE_STOP, cause, cur_state, current_input, msg_data); if(tmp != I_NULL) { next_input = I_ERROR; crm_err("Failed to shutdown the Transitioner"); } } crm_info("Stopping all remaining local resources"); if(is_set(fsa_input_register, R_LRM_CONNECTED)) { stop_all_resources(); } else { crm_err("Exiting with no LRM connection..." " resources may be active!"); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } return next_input; } /* A_SHUTDOWN_REQ */ enum crmd_fsa_input do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum crmd_fsa_input next_input = I_NULL; HA_Message *msg = NULL; crm_info("Sending shutdown request to DC: %s", crm_str(fsa_our_dc)); msg = create_request( CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); /* set_bit_inplace(fsa_input_register, R_STAYDOWN); */ if(send_request(msg, NULL) == FALSE) { next_input = I_ERROR; } else { crm_timer_start(shutdown_timer); } return next_input; } /* A_EXIT_0, A_EXIT_1 */ enum crmd_fsa_input do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int exit_code = 0; gboolean do_exit = FALSE; if(action & A_EXIT_0) { if(is_set(fsa_input_register, R_PE_CONNECTED)) { crm_info("Waiting for the PE to disconnect"); crmd_fsa_stall(NULL); } else if(is_set(fsa_input_register, R_TE_CONNECTED)) { crm_info("Waiting for the TE to disconnect"); crmd_fsa_stall(NULL); } else { do_exit = TRUE; crm_info("Performing %s - gracefully exiting the CRMd", fsa_action2string(action)); } } else { do_exit = TRUE; exit_code = 1; crm_warn("Performing %s - forcefully exiting the CRMd... now!", fsa_action2string(action)); } if(do_exit) { if(is_set(fsa_input_register, R_IN_RECOVERY)) { crm_info("Could not recover from internal error"); exit_code = 2; } else if(is_set(fsa_input_register, R_STAYDOWN)) { crm_info("Inhibiting respawn by Heartbeat"); exit_code = 100; } crm_info("[%s] stopped (%d)", crm_system_name, exit_code); exit(exit_code); } return I_NULL; } /* A_STARTUP */ enum crmd_fsa_input do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int was_error = 0; int interval = 1; /* seconds between DC heartbeats */ crm_info("Register Signal Handler"); G_main_add_SignalHandler( G_PRIORITY_HIGH, SIGTERM, crm_shutdown, NULL, NULL); fsa_source = G_main_add_TriggerHandler( G_PRIORITY_HIGH, crm_fsa_trigger, NULL, NULL); ipc_clients = g_hash_table_new(g_str_hash, g_str_equal); if(was_error == 0) { crm_info("Init server comms"); was_error = init_server_ipc_comms( crm_strdup(CRM_SYSTEM_CRMD), crmd_client_connect, default_ipc_connection_destroy); } if(was_error == 0) { crm_info("Creating CIB object"); fsa_cib_conn = cib_new(); } /* set up the timers */ crm_malloc0(dc_heartbeat, sizeof(fsa_timer_t)); crm_malloc0(integration_timer, sizeof(fsa_timer_t)); crm_malloc0(finalization_timer, sizeof(fsa_timer_t)); crm_malloc0(election_trigger, sizeof(fsa_timer_t)); crm_malloc0(election_timeout, sizeof(fsa_timer_t)); crm_malloc0(shutdown_escalation_timer, sizeof(fsa_timer_t)); crm_malloc0(wait_timer, sizeof(fsa_timer_t)); crm_malloc0(shutdown_timer, sizeof(fsa_timer_t)); interval = interval * 1000; if(election_trigger != NULL) { election_trigger->source_id = -1; election_trigger->period_ms = -1; election_trigger->fsa_input = I_DC_TIMEOUT; election_trigger->callback = crm_timer_popped; election_trigger->repeat = FALSE; } else { was_error = TRUE; } if(dc_heartbeat != NULL) { dc_heartbeat->source_id = -1; dc_heartbeat->period_ms = -1; dc_heartbeat->fsa_input = I_NULL; dc_heartbeat->callback = do_dc_heartbeat; dc_heartbeat->repeat = FALSE; } else { was_error = TRUE; } if(election_timeout != NULL) { election_timeout->source_id = -1; election_timeout->period_ms = -1; election_timeout->fsa_input = I_ELECTION_DC; election_timeout->callback = crm_timer_popped; election_timeout->repeat = FALSE; } else { was_error = TRUE; } if(integration_timer != NULL) { integration_timer->source_id = -1; integration_timer->period_ms = -1; integration_timer->fsa_input = I_INTEGRATED; integration_timer->callback = crm_timer_popped; integration_timer->repeat = FALSE; } else { was_error = TRUE; } if(finalization_timer != NULL) { finalization_timer->source_id = -1; finalization_timer->period_ms = -1; finalization_timer->fsa_input = I_FINALIZED; finalization_timer->callback = crm_timer_popped; finalization_timer->repeat = FALSE; } else { was_error = TRUE; } if(shutdown_escalation_timer != NULL) { shutdown_escalation_timer->source_id = -1; shutdown_escalation_timer->period_ms = -1; shutdown_escalation_timer->fsa_input = I_STOP; shutdown_escalation_timer->callback = crm_timer_popped; shutdown_escalation_timer->repeat = FALSE; } else { was_error = TRUE; } if(wait_timer != NULL) { wait_timer->source_id = -1; wait_timer->period_ms = 500; wait_timer->fsa_input = I_NULL; wait_timer->callback = crm_timer_popped; wait_timer->repeat = FALSE; } else { was_error = TRUE; } if(shutdown_timer != NULL) { shutdown_timer->source_id = -1; shutdown_timer->period_ms = -1; shutdown_timer->fsa_input = I_SHUTDOWN; shutdown_timer->callback = crm_timer_popped; shutdown_timer->repeat = TRUE; } else { was_error = TRUE; } /* set up the sub systems */ crm_malloc0(cib_subsystem, sizeof(struct crm_subsystem_s)); crm_malloc0(te_subsystem, sizeof(struct crm_subsystem_s)); crm_malloc0(pe_subsystem, sizeof(struct crm_subsystem_s)); if(cib_subsystem != NULL) { cib_subsystem->pid = -1; cib_subsystem->path = BIN_DIR; cib_subsystem->name = CRM_SYSTEM_CIB; cib_subsystem->command = BIN_DIR"/"CRM_SYSTEM_CIB; cib_subsystem->args = "-VVc"; cib_subsystem->flag_connected = R_CIB_CONNECTED; cib_subsystem->flag_required = R_CIB_REQUIRED; } else { was_error = TRUE; } if(te_subsystem != NULL) { te_subsystem->pid = -1; te_subsystem->path = BIN_DIR; te_subsystem->name = CRM_SYSTEM_TENGINE; te_subsystem->command = BIN_DIR"/"CRM_SYSTEM_TENGINE; te_subsystem->args = NULL; te_subsystem->flag_connected = R_TE_CONNECTED; te_subsystem->flag_required = R_TE_REQUIRED; } else { was_error = TRUE; } if(pe_subsystem != NULL) { pe_subsystem->pid = -1; pe_subsystem->path = BIN_DIR; pe_subsystem->name = CRM_SYSTEM_PENGINE; pe_subsystem->command = BIN_DIR"/"CRM_SYSTEM_PENGINE; pe_subsystem->args = NULL; pe_subsystem->flag_connected = R_PE_CONNECTED; pe_subsystem->flag_required = R_PE_REQUIRED; } else { was_error = TRUE; } if(was_error) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } welcomed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); integrated_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); finalized_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); confirmed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); crmd_peer_state = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); set_sigchld_proctrack(G_PRIORITY_HIGH); return I_NULL; } extern GHashTable *shutdown_ops; /* A_STOP */ enum crmd_fsa_input do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { if(g_hash_table_size(shutdown_ops) > 0) { crm_err("%d stop operations outstanding at exit", g_hash_table_size(shutdown_ops)); } return I_NULL; } /* A_STARTED */ enum crmd_fsa_input do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { if(is_set(fsa_input_register, R_CCM_DATA) == FALSE) { crm_info("Delaying start, CCM (%.16llx) not connected", R_CCM_DATA); crmd_fsa_stall(NULL); return I_NULL; } else if(is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) { crm_info("Delaying start, LRM (%.16llx) not connected", R_LRM_CONNECTED); crmd_fsa_stall(NULL); return I_NULL; } else if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Delaying start, CIB (%.16llx) not connected", R_CIB_CONNECTED); crmd_fsa_stall(NULL); return I_NULL; } else if(is_set(fsa_input_register, R_PEER_DATA) == FALSE) { HA_Message * msg = NULL; /* try reading from HA */ crm_info("Delaying start, Peer data (%.16llx) not recieved", R_PEER_DATA); crm_debug_3("Looking for a HA message"); msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0); if(msg != NULL) { crm_debug_3("There was a HA message"); crm_msg_del(msg); } crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return I_NULL; } crm_info("The local CRM is operational"); clear_bit_inplace(fsa_input_register, R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); return I_NULL; } /* A_RECOVER */ enum crmd_fsa_input do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { set_bit_inplace(fsa_input_register, R_IN_RECOVERY); crm_err("Action %s (%.16llx) not supported", fsa_action2string(action), action); register_fsa_input(C_FSA_INTERNAL, I_STOP, NULL); return I_NULL; } /* A_READCONFIG */ enum crmd_fsa_input do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { /* this one probably is worthwhile blocking on */ crm_data_t *cib_copy = get_cib_copy(fsa_cib_conn); crm_data_t *config = get_object_root(XML_CIB_TAG_CRMCONFIG, cib_copy); dc_heartbeat->period_ms = 0; xml_child_iter( config, iter, XML_CIB_TAG_NVPAIR, const char *name = crm_element_value(iter, XML_NVPAIR_ATTR_NAME); const char *value = crm_element_value(iter, XML_NVPAIR_ATTR_VALUE); if(name == NULL || value == NULL) { continue; } else if(safe_str_eq(name, XML_CONFIG_ATTR_DC_BEAT)) { dc_heartbeat->period_ms = atoi(value); } else if(safe_str_eq(name, XML_CONFIG_ATTR_DC_DEADTIME)) { election_trigger->period_ms = atoi(value); } else if(safe_str_eq(name, XML_CONFIG_ATTR_FORCE_QUIT)) { shutdown_escalation_timer->period_ms = atoi(value); } ); if(dc_heartbeat->period_ms < 1) { /* sensible default */ dc_heartbeat->period_ms = crm_get_msec( getenv("HA_"KEY_KEEPALIVE)); } election_timeout->period_ms = dc_heartbeat->period_ms * 6; integration_timer->period_ms = dc_heartbeat->period_ms * 6; finalization_timer->period_ms = dc_heartbeat->period_ms * 6; integration_timer->period_ms = crm_get_msec("5min"); finalization_timer->period_ms = crm_get_msec("5min"); if(election_trigger->period_ms < 1 || election_trigger->period_ms > election_timeout->period_ms) { /* sensible default */ election_trigger->period_ms = election_timeout->period_ms * 2; } if(shutdown_escalation_timer->period_ms < 1 || election_timeout->period_ms > shutdown_escalation_timer->period_ms) { /* sensible default - 32 election cycles */ shutdown_escalation_timer->period_ms = (election_timeout->period_ms + election_trigger->period_ms) * 32; } shutdown_timer->period_ms = election_trigger->period_ms; return I_NULL; } gboolean crm_shutdown(int nsig, gpointer unused) { if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) { if(is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating the shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); } else { set_bit_inplace(fsa_input_register, R_SHUTDOWN); /* set_bit_inplace(fsa_input_register, R_STAYDOWN); */ /* if we ever win an election we're the last man standing */ election_timeout->fsa_input = I_STOP; if(is_set(fsa_input_register, R_SHUTDOWN)) { /* cant rely on this... */ crm_timer_start(shutdown_escalation_timer); register_fsa_input(C_SHUTDOWN,I_SHUTDOWN,NULL); } else { crm_err("Could not set R_SHUTDOWN"); exit(LSB_EXIT_ENOTSUPPORTED); } } } else { crm_info("exit from shutdown"); exit(LSB_EXIT_OK); } return TRUE; } gboolean register_with_ha(ll_cluster_t *hb_cluster, const char *client_name) { crm_debug("Signing in with Heartbeat"); if (hb_cluster->llc_ops->signon(hb_cluster, client_name)!= HA_OK) { crm_err("Cannot sign on with heartbeat: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } crm_set_ha_options(hb_cluster); crm_debug_3("Be informed of CRM messages"); if (HA_OK != hb_cluster->llc_ops->set_msg_callback( hb_cluster, T_CRM, crmd_ha_msg_callback, hb_cluster)){ crm_err("Cannot set msg callback: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } #if 0 crm_debug_3("Be informed of Node Status changes"); if (HA_OK != hb_cluster->llc_ops->set_nstatus_callback( hb_cluster, crmd_ha_status_callback, hb_cluster)){ crm_err("Cannot set nstatus callback: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } #endif crm_debug_3("Be informed of CRM Client Status changes"); if (HA_OK != hb_cluster->llc_ops->set_cstatus_callback( hb_cluster, crmd_client_status_callback, hb_cluster)) { crm_err("Cannot set cstatus callback: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } crm_debug_3("Adding channel to mainloop"); G_main_add_IPC_Channel( G_PRIORITY_HIGH, hb_cluster->llc_ops->ipcchan(hb_cluster), FALSE, crmd_ha_msg_dispatch, hb_cluster /* userdata */, crmd_ha_connection_destroy); crm_debug_3("Finding our node name"); if ((fsa_our_uname = hb_cluster->llc_ops->get_mynodeid(hb_cluster)) == NULL) { crm_err("get_mynodeid() failed"); return FALSE; } crm_info("FSA Hostname: %s", fsa_our_uname); /* Async get client status information in the cluster */ crm_debug_3("Requesting an initial dump of CRMD client_status"); fsa_cluster_conn->llc_ops->client_status( fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD, -1); return TRUE; } diff --git a/crm/crmd/fsa.c b/crm/crmd/fsa.c index 52e49ad1df..de13998429 100644 --- a/crm/crmd/fsa.c +++ b/crm/crmd/fsa.c @@ -1,681 +1,684 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include + #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern uint highest_born_on; extern int num_join_invites; extern GHashTable *welcomed_nodes; extern GHashTable *integrated_nodes; extern GHashTable *finalized_nodes; extern GHashTable *confirmed_nodes; extern void initialize_join(gboolean before); long long do_state_transition(long long actions, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t *msg_data); long long clear_flags(long long actions, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input); void dump_rsc_info(void); void dump_rsc_info_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); void ghash_print_node(gpointer key, gpointer value, gpointer user_data); #define DOT_PREFIX "live.dot: " #define DOT_LOG LOG_DEBUG_2 #define do_dot_log(fmt...) do_crm_log(DOT_LOG, NULL, NULL, fmt) #define do_dot_action(fmt...) do_crm_log(DOT_LOG, NULL, NULL, fmt) longclock_t action_start = 0; longclock_t action_stop = 0; longclock_t action_diff = 0; unsigned int action_diff_ms = 0; #define IF_FSA_ACTION(x,y) \ if(is_set(fsa_actions,x)) { \ enum crmd_fsa_input result = I_NULL; \ last_action = x; \ fsa_actions = clear_bit(fsa_actions, x); \ crm_debug_3("Invoking action %s (%.16llx)", \ fsa_action2string(x), x); \ if(action_diff_max_ms > 0) { \ action_start = time_longclock(); \ } \ result = y(x, fsa_data->fsa_cause, fsa_state, \ fsa_data->fsa_input, fsa_data); \ if(action_diff_max_ms > 0) { \ action_stop = time_longclock(); \ action_diff = sub_longclock(action_stop, action_start); \ action_diff_ms = longclockto_ms(action_diff); \ if(action_diff_ms > action_diff_max_ms) { \ crm_err("Action %s took %dms to complete", \ fsa_action2string(x), \ action_diff_ms); \ } else if(action_diff_ms > action_diff_warn_ms) { \ crm_warn("Action %s took %dms to complete", \ fsa_action2string(x), \ action_diff_ms); \ } \ } \ crm_debug_3("Action complete: %s (%.16llx)", \ fsa_action2string(x), x); \ CRM_DEV_ASSERT(result == I_NULL); \ do_dot_action(DOT_PREFIX"\t// %s", fsa_action2string(x)); \ } /* #define ELSEIF_FSA_ACTION(x,y) else IF_FSA_ACTION(x,y) */ void init_dotfile(void); void s_crmd_fsa_actions(fsa_data_t *fsa_data); void log_fsa_input(fsa_data_t *stored_msg); void init_dotfile(void) { do_dot_log(DOT_PREFIX"digraph \"g\" {"); do_dot_log(DOT_PREFIX" size = \"30,30\""); do_dot_log(DOT_PREFIX" graph ["); do_dot_log(DOT_PREFIX" fontsize = \"12\""); do_dot_log(DOT_PREFIX" fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX" fontcolor = \"black\""); do_dot_log(DOT_PREFIX" bb = \"0,0,398.922306,478.927856\""); do_dot_log(DOT_PREFIX" color = \"black\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX" node ["); do_dot_log(DOT_PREFIX" fontsize = \"12\""); do_dot_log(DOT_PREFIX" fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX" fontcolor = \"black\""); do_dot_log(DOT_PREFIX" shape = \"ellipse\""); do_dot_log(DOT_PREFIX" color = \"black\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX" edge ["); do_dot_log(DOT_PREFIX" fontsize = \"12\""); do_dot_log(DOT_PREFIX" fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX" fontcolor = \"black\""); do_dot_log(DOT_PREFIX" color = \"black\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX"// special nodes"); do_dot_log(DOT_PREFIX" \"S_PENDING\" "); do_dot_log(DOT_PREFIX" ["); do_dot_log(DOT_PREFIX" color = \"blue\""); do_dot_log(DOT_PREFIX" fontcolor = \"blue\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX" \"S_TERMINATE\" "); do_dot_log(DOT_PREFIX" ["); do_dot_log(DOT_PREFIX" color = \"red\""); do_dot_log(DOT_PREFIX" fontcolor = \"red\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX"// DC only nodes"); do_dot_log(DOT_PREFIX" \"S_INTEGRATION\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX" \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX" \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX" \"S_RELEASE_DC\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX" \"S_IDLE\" [ fontcolor = \"green\" ]"); } volatile enum crmd_fsa_state fsa_state = S_STARTING; oc_node_list_t *fsa_membership_copy; ll_cluster_t *fsa_cluster_conn; ll_lrm_t *fsa_lrm_conn; volatile long long fsa_input_register; volatile long long fsa_actions = A_NOTHING; const char *fsa_our_uname; char *fsa_our_dc; cib_t *fsa_cib_conn = NULL; fsa_timer_t *election_trigger = NULL; /* */ fsa_timer_t *election_timeout = NULL; /* */ fsa_timer_t *shutdown_escalation_timer = NULL; /* */ fsa_timer_t *shutdown_timer = NULL; /* */ fsa_timer_t *integration_timer = NULL; fsa_timer_t *finalization_timer = NULL; fsa_timer_t *dc_heartbeat = NULL; fsa_timer_t *wait_timer = NULL; volatile gboolean do_fsa_stall = FALSE; enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause) { fsa_data_t *fsa_data = NULL; long long register_copy = fsa_input_register; long long new_actions = A_NOTHING; long long last_action = A_NOTHING; enum crmd_fsa_state last_state = fsa_state; crm_debug_2("FSA invoked with Cause: %s\tState: %s", fsa_cause2string(cause), fsa_state2string(fsa_state)); do_fsa_stall = FALSE; if(is_message() == FALSE && fsa_actions != A_NOTHING) { /* fake the first message so we can get into the loop */ crm_malloc0(fsa_data, sizeof(fsa_data_t)); fsa_data->fsa_input = I_NULL; fsa_data->fsa_cause = C_FSA_INTERNAL; fsa_data->origin = __FUNCTION__; fsa_data->data_type = fsa_dt_none; fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); fsa_data = NULL; } while(is_message() && do_fsa_stall == FALSE) { crm_debug_2("Checking messages (%d remaining)", g_list_length(fsa_message_queue)); fsa_data = get_message(); CRM_DEV_ASSERT(fsa_data != NULL); if(crm_assert_failed) { continue; } log_fsa_input(fsa_data); /* add any actions back to the queue */ fsa_actions |= fsa_data->actions; /* get the next batch of actions */ new_actions = crmd_fsa_actions[fsa_data->fsa_input][fsa_state]; fsa_actions |= new_actions; #ifdef FSA_TRACE if(new_actions != A_NOTHING) { crm_debug_2("Adding FSA actions %.16llx for %s/%s", new_actions, fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state)); fsa_dump_actions(new_actions, "\tFSA scheduled"); } else if(fsa_data->fsa_input != I_NULL && new_actions == A_NOTHING) { crm_debug("No action specified for input,state (%s,%s)", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state)); } if(fsa_data->actions != A_NOTHING) { crm_debug_2("Adding input actions %.16llx for %s/%s", new_actions, fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state)); fsa_dump_actions(fsa_data->actions,"\tInput scheduled"); } #endif /* logging : *before* the state is changed */ IF_FSA_ACTION(A_ERROR, do_log) IF_FSA_ACTION(A_WARN, do_log) IF_FSA_ACTION(A_LOG, do_log) /* update state variables */ last_state = fsa_state; fsa_state = crmd_fsa_state[fsa_data->fsa_input][fsa_state]; /* * Hook to allow actions to removed due to certain inputs */ fsa_actions = clear_flags( fsa_actions, cause, fsa_state, fsa_data->fsa_input); /* * Hook for change of state. * Allows actions to be added or removed when entering a state */ if(last_state != fsa_state){ fsa_actions = do_state_transition( fsa_actions, last_state, fsa_state, fsa_data); } else { do_dot_log(DOT_PREFIX"\t// FSA input: State=%s \tCause=%s" " \tInput=%s \tOrigin=%s() \tid=%d", fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id); } /* start doing things... */ s_crmd_fsa_actions(fsa_data); delete_fsa_input(fsa_data); fsa_data = NULL; } if(g_list_length(fsa_message_queue) > 0 || fsa_actions != A_NOTHING || do_fsa_stall) { crm_debug("Exiting the FSA: queue=%d, fsa_actions=0x%llx, stalled=%s", g_list_length(fsa_message_queue), fsa_actions, do_fsa_stall?"true":"false"); } else { crm_debug_2("Exiting the FSA"); } /* cleanup inputs? */ if(register_copy != fsa_input_register) { long long same = register_copy & fsa_input_register; fsa_dump_inputs(LOG_DEBUG, "Added input:", fsa_input_register ^ same); fsa_dump_inputs(LOG_DEBUG, "Removed input:", register_copy ^ same); } #if 0 if(fsa_state != S_STARTING && g_list_length(fsa_message_queue) && is_ipc_empty(te_subsystem->ipc) && is_ipc_empty(pe_subsystem->ipc) && is_ipc_empty(fsa_cib_conn->cmds->channel(fsa_cib_conn) && is_ipc_empty(fsa_cluster_conn->llc_ops->ipcchan(fsa_cluster_conn)) ){ static gboolean mem_needs_init = TRUE; if(mem_needs_init) { crm_debug("Reached a stable point:" " reseting memory usage stats to zero"); crm_zero_mem_stats(NULL); mem_needs_init = FALSE; } else { crm_debug("Reached a stable point:" " checking memory usage"); crm_mem_stats(NULL); } } #endif fsa_dump_queue(LOG_DEBUG); return fsa_state; } void s_crmd_fsa_actions(fsa_data_t *fsa_data) { long long last_action = A_NOTHING; /* * Process actions in order of priority but do only one * action at a time to avoid complicating the ordering. */ while(fsa_actions != A_NOTHING && do_fsa_stall == FALSE) { msg_queue_helper(); CRM_DEV_ASSERT(fsa_data != NULL); if(crm_assert_failed) { return; } /* regular action processing in order of action priority * * Make sure all actions that connect to required systems * are performed first */ /* get out of here NOW! before anything worse happens */ IF_FSA_ACTION(A_EXIT_1, do_exit) else IF_FSA_ACTION(A_ERROR, do_log) else IF_FSA_ACTION(A_WARN, do_log) else IF_FSA_ACTION(A_LOG, do_log) /* essential start tasks */ else IF_FSA_ACTION(A_HA_CONNECT, do_ha_control) else IF_FSA_ACTION(A_STARTUP, do_startup) else IF_FSA_ACTION(A_CIB_START, do_cib_control) else IF_FSA_ACTION(A_READCONFIG, do_read_config) /* sub-system start/connect */ else IF_FSA_ACTION(A_LRM_CONNECT, do_lrm_control) else IF_FSA_ACTION(A_CCM_CONNECT, do_ccm_control) else IF_FSA_ACTION(A_TE_START, do_te_control) else IF_FSA_ACTION(A_PE_START, do_pe_control) /* sub-system restart */ else IF_FSA_ACTION(O_CIB_RESTART, do_cib_control) else IF_FSA_ACTION(O_PE_RESTART, do_pe_control) else IF_FSA_ACTION(O_TE_RESTART, do_te_control) /* Timers */ /* else IF_FSA_ACTION(O_DC_TIMER_RESTART, do_timer_control) */ else IF_FSA_ACTION(A_DC_TIMER_STOP, do_timer_control) else IF_FSA_ACTION(A_INTEGRATE_TIMER_STOP, do_timer_control) else IF_FSA_ACTION(A_INTEGRATE_TIMER_START, do_timer_control) else IF_FSA_ACTION(A_FINALIZE_TIMER_STOP, do_timer_control) else IF_FSA_ACTION(A_FINALIZE_TIMER_START, do_timer_control) /* * Highest priority actions */ else IF_FSA_ACTION(A_CIB_BUMPGEN, do_cib_invoke) else IF_FSA_ACTION(A_MSG_ROUTE, do_msg_route) else IF_FSA_ACTION(A_RECOVER, do_recover) else IF_FSA_ACTION(A_CL_JOIN_REQUEST, do_cl_join_request) else IF_FSA_ACTION(A_CL_JOIN_RESULT, do_cl_join_result) else IF_FSA_ACTION(A_SHUTDOWN_REQ, do_shutdown_req) else IF_FSA_ACTION(A_ELECTION_VOTE, do_election_vote) else IF_FSA_ACTION(A_ELECTION_COUNT, do_election_count_vote) /* * High priority actions * Update the cache first */ else IF_FSA_ACTION(A_CCM_UPDATE_CACHE, do_ccm_update_cache) else IF_FSA_ACTION(A_CCM_EVENT, do_ccm_event) else IF_FSA_ACTION(A_STARTED, do_started) else IF_FSA_ACTION(A_CL_JOIN_QUERY, do_cl_join_query) else IF_FSA_ACTION(A_DC_TIMER_START, do_timer_control) /* * Medium priority actions */ else IF_FSA_ACTION(A_DC_TAKEOVER, do_dc_takeover) else IF_FSA_ACTION(A_DC_RELEASE, do_dc_release) else IF_FSA_ACTION(A_ELECTION_START, do_election_vote) else IF_FSA_ACTION(A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all) else IF_FSA_ACTION(A_DC_JOIN_OFFER_ONE, do_dc_join_offer_all) else IF_FSA_ACTION(A_DC_JOIN_PROCESS_REQ,do_dc_join_req) else IF_FSA_ACTION(A_DC_JOIN_PROCESS_ACK,do_dc_join_ack) /* * Low(er) priority actions * Make sure the CIB is always updated before invoking the * PE, and the PE before the TE */ else IF_FSA_ACTION(A_CIB_INVOKE_LOCAL, do_cib_invoke) else IF_FSA_ACTION(A_CIB_INVOKE, do_cib_invoke) else IF_FSA_ACTION(A_DC_JOIN_FINALIZE, do_dc_join_finalize) else IF_FSA_ACTION(A_LRM_INVOKE, do_lrm_invoke) else IF_FSA_ACTION(A_LRM_EVENT, do_lrm_event) else IF_FSA_ACTION(A_TE_HALT, do_te_invoke) else IF_FSA_ACTION(A_TE_CANCEL, do_te_invoke) else IF_FSA_ACTION(A_PE_INVOKE, do_pe_invoke) else IF_FSA_ACTION(A_TE_INVOKE, do_te_invoke) else IF_FSA_ACTION(A_CL_JOIN_ANNOUNCE, do_cl_join_announce) /* sub-system stop */ else IF_FSA_ACTION(A_DC_RELEASED, do_dc_release) else IF_FSA_ACTION(A_PE_STOP, do_pe_control) else IF_FSA_ACTION(A_TE_STOP, do_te_control) else IF_FSA_ACTION(A_SHUTDOWN, do_shutdown) else IF_FSA_ACTION(A_STOP, do_stop) else IF_FSA_ACTION(A_CCM_DISCONNECT, do_ccm_control) else IF_FSA_ACTION(A_LRM_DISCONNECT, do_lrm_control) else IF_FSA_ACTION(A_HA_DISCONNECT, do_ha_control) else IF_FSA_ACTION(A_CIB_STOP, do_cib_control) /* exit gracefully */ else IF_FSA_ACTION(A_EXIT_0, do_exit) /* else IF_FSA_ACTION(A_, do_) */ /* Error checking and reporting */ else { crm_err("Action %s (0x%llx) not supported ", fsa_action2string(fsa_actions), fsa_actions); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL, __FUNCTION__); } } } void log_fsa_input(fsa_data_t *stored_msg) { crm_debug("Processing queued input %d", stored_msg->id); if(stored_msg->fsa_cause == C_CCM_CALLBACK) { crm_debug_3("FSA processing CCM callback from %s", stored_msg->origin); } else if(stored_msg->fsa_cause == C_LRM_OP_CALLBACK) { crm_debug_3("FSA processing LRM callback from %s", stored_msg->origin); } else if(stored_msg->data == NULL) { crm_debug_3("FSA processing input from %s", stored_msg->origin); } else { ha_msg_input_t *ha_input = fsa_typed_data_adv( stored_msg, fsa_dt_ha_msg, __FUNCTION__); crm_debug_3("FSA processing XML message from %s", stored_msg->origin); crm_log_message(LOG_MSG, ha_input->msg); crm_log_xml_debug_3(ha_input->xml, "FSA message data"); } } long long do_state_transition(long long actions, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t *msg_data) { long long tmp = actions; gboolean clear_recovery_bit = TRUE; enum crmd_fsa_cause cause = msg_data->fsa_cause; enum crmd_fsa_input current_input = msg_data->fsa_input; const char *state_from = fsa_state2string(cur_state); const char *state_to = fsa_state2string(next_state); const char *input = fsa_input2string(current_input); CRM_DEV_ASSERT(cur_state != next_state); do_dot_log(DOT_PREFIX"\t%s -> %s [ label=%s cause=%s origin=%s ]", state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); crm_info("State transition %s -> %s [ input=%s cause=%s origin=%s ]", state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); /* the last two clauses might cause trouble later */ if(election_timeout != NULL && next_state != S_ELECTION && cur_state != S_RELEASE_DC) { crm_timer_stop(election_timeout); /* } else { */ /* crm_timer_start(election_timeout); */ } #if 0 if(is_set(fsa_input_register, R_SHUTDOWN)){ set_bit_inplace(tmp, A_DC_TIMER_STOP); } #endif if(next_state == S_INTEGRATION) { set_bit_inplace(tmp, A_INTEGRATE_TIMER_START); } else { set_bit_inplace(tmp, A_INTEGRATE_TIMER_STOP); } if(next_state == S_FINALIZE_JOIN) { set_bit_inplace(tmp, A_FINALIZE_TIMER_START); } else { set_bit_inplace(tmp, A_FINALIZE_TIMER_STOP); } if(next_state != S_PENDING) { set_bit_inplace(tmp, A_DC_TIMER_STOP); } if(next_state != S_ELECTION) { highest_born_on = 0; } switch(next_state) { case S_PENDING: case S_ELECTION: crm_debug_2("Resetting our DC to NULL on transition to %s", fsa_state2string(next_state)); crm_free(fsa_our_dc); fsa_our_dc = NULL; break; case S_NOT_DC: if(is_set(fsa_input_register, R_SHUTDOWN)){ crm_info("(Re)Issuing shutdown request now" " that we have a new DC"); set_bit_inplace(tmp, A_SHUTDOWN_REQ); } CRM_DEV_ASSERT(fsa_our_dc != NULL); if(fsa_our_dc == NULL) { crm_err("Reached S_NOT_DC without a DC" " being recorded"); } break; case S_RECOVERY: clear_recovery_bit = FALSE; break; case S_FINALIZE_JOIN: if(cause == C_TIMER_POPPED) { crm_warn("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } if(g_hash_table_size(welcomed_nodes) > 0) { char *msg = crm_strdup( " Welcome reply not received from"); crm_warn("%u cluster nodes failed to respond" " to the join offer.", g_hash_table_size(welcomed_nodes)); g_hash_table_foreach( welcomed_nodes, ghash_print_node, msg); crm_free(msg); } else { crm_info("All %d cluster nodes " "responded to the join offer.", g_hash_table_size(integrated_nodes)); } break; case S_POLICY_ENGINE: if(cause == C_TIMER_POPPED) { crm_warn("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } if(g_hash_table_size(finalized_nodes) > 0) { char *msg = crm_strdup( " Confirm not received from"); crm_err("%u cluster nodes failed to confirm" " their join.", g_hash_table_size(finalized_nodes)); g_hash_table_foreach( finalized_nodes, ghash_print_node, msg); crm_free(msg); } else if(g_hash_table_size(confirmed_nodes) == fsa_membership_copy->members_size) { crm_info("All %u cluster nodes are" " eligable to run resources.", fsa_membership_copy->members_size); } else { crm_warn("Only %u of %u cluster " "nodes are eligable to run resources", g_hash_table_size(confirmed_nodes), fsa_membership_copy->members_size); } /* initialize_join(FALSE); */ break; case S_STOPPING: case S_TERMINATE: /* possibly redundant */ crm_timer_stop(shutdown_timer); set_bit_inplace(fsa_input_register, R_SHUTDOWN); break; case S_IDLE: dump_rsc_info(); break; default: break; } if(clear_recovery_bit && next_state != S_PENDING) { tmp = clear_bit(tmp, A_RECOVER); } else if(clear_recovery_bit == FALSE) { tmp = set_bit(tmp, A_RECOVER); } if(tmp != actions) { fsa_dump_actions(actions ^ tmp, "New actions"); actions = tmp; } return actions; } long long clear_flags(long long actions, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input) { long long saved_actions = actions; long long startup_actions = A_STARTUP|A_CIB_START|A_LRM_CONNECT|A_CCM_CONNECT|A_HA_CONNECT|A_READCONFIG|A_STARTED|A_CL_JOIN_QUERY; if(cur_state == S_STOPPING || is_set(fsa_input_register, R_SHUTDOWN)) { clear_bit_inplace(actions, startup_actions); } fsa_dump_actions(actions ^ saved_actions, "Cleared Actions"); return actions; } void dump_rsc_info(void) { } void ghash_print_node(gpointer key, gpointer value, gpointer user_data) { const char *text = user_data; const char *uname = key; crm_info("%s: %s", text, uname); } diff --git a/crm/crmd/lrm.c b/crm/crmd/lrm.c index 34a28ea53a..f2ccc71c7d 100644 --- a/crm/crmd/lrm.c +++ b/crm/crmd/lrm.c @@ -1,1114 +1,1117 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include + #include #include #include #include #include #include /* for access */ #include #include #include #include #include #include #include #include #include #include #include char *make_stop_id(const char *rsc, int call_id); void ghash_print_pending(gpointer key, gpointer value, gpointer user_data); gboolean stop_all_resources(void); gboolean resource_stopped(gpointer key, gpointer value, gpointer user_data); gboolean build_operation_update( crm_data_t *rsc_list, lrm_op_t *op, const char *src, int lpc); gboolean build_active_RAs(crm_data_t *rsc_list); void do_update_resource(lrm_op_t *op); enum crmd_fsa_input do_lrm_rsc_op( lrm_rsc_t *rsc, char *rid, const char *operation, crm_data_t *msg); enum crmd_fsa_input do_fake_lrm_op(gpointer data); void stop_recurring_action( gpointer key, gpointer value, gpointer user_data); gboolean remove_recurring_action( gpointer key, gpointer value, gpointer user_data); void free_recurring_op(gpointer value); GHashTable *xml2list(crm_data_t *parent); GHashTable *monitors = NULL; GHashTable *resources = NULL; GHashTable *resources_confirmed = NULL; GHashTable *shutdown_ops = NULL; int num_lrm_register_fails = 0; int max_lrm_register_fails = 30; const char *rsc_path[] = { /* XML_GRAPH_TAG_RSC_OP, */ XML_CIB_TAG_RESOURCE, "instance_attributes", "rsc_parameters" }; enum crmd_rscstate { crmd_rscstate_NULL, crmd_rscstate_START, crmd_rscstate_START_PENDING, crmd_rscstate_START_OK, crmd_rscstate_START_FAIL, crmd_rscstate_STOP, crmd_rscstate_STOP_PENDING, crmd_rscstate_STOP_OK, crmd_rscstate_STOP_FAIL, crmd_rscstate_MON, crmd_rscstate_MON_PENDING, crmd_rscstate_MON_OK, crmd_rscstate_MON_FAIL, crmd_rscstate_GENERIC_PENDING, crmd_rscstate_GENERIC_OK, crmd_rscstate_GENERIC_FAIL }; void free_lrm_op(lrm_op_t *op); const char *crmd_rscstate2string(enum crmd_rscstate state); const char * crmd_rscstate2string(enum crmd_rscstate state) { switch(state) { case crmd_rscstate_NULL: return NULL; case crmd_rscstate_START: return CRMD_ACTION_START; case crmd_rscstate_START_PENDING: return CRMD_ACTION_START_PENDING; case crmd_rscstate_START_OK: return CRMD_ACTION_STARTED; case crmd_rscstate_START_FAIL: return CRMD_ACTION_START_FAIL; case crmd_rscstate_STOP: return CRMD_ACTION_STOP; case crmd_rscstate_STOP_PENDING: return CRMD_ACTION_STOP_PENDING; case crmd_rscstate_STOP_OK: return CRMD_ACTION_STOPPED; case crmd_rscstate_STOP_FAIL: return CRMD_ACTION_STOP_FAIL; case crmd_rscstate_MON: return CRMD_ACTION_MON; case crmd_rscstate_MON_PENDING: return CRMD_ACTION_MON_PENDING; case crmd_rscstate_MON_OK: return CRMD_ACTION_MON_OK; case crmd_rscstate_MON_FAIL: return CRMD_ACTION_MON_FAIL; case crmd_rscstate_GENERIC_PENDING: return CRMD_ACTION_GENERIC_PENDING; case crmd_rscstate_GENERIC_OK: return CRMD_ACTION_GENERIC_OK; case crmd_rscstate_GENERIC_FAIL: return CRMD_ACTION_GENERIC_FAIL; } return ""; } /* A_LRM_CONNECT */ enum crmd_fsa_input do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int ret = HA_OK; if(action & A_LRM_DISCONNECT) { if(fsa_lrm_conn) { fsa_lrm_conn->lrm_ops->signoff(fsa_lrm_conn); } /* TODO: Clean up the hashtable */ } if(action & A_LRM_CONNECT) { crm_debug_4("LRM: connect..."); ret = HA_OK; monitors = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, free_recurring_op); resources = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); resources_confirmed = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); shutdown_ops = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); fsa_lrm_conn = ll_lrm_new(XML_CIB_TAG_LRM); if(NULL == fsa_lrm_conn) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); ret = HA_FAIL; } if(ret == HA_OK) { crm_debug_4("LRM: sigon..."); ret = fsa_lrm_conn->lrm_ops->signon( fsa_lrm_conn, CRM_SYSTEM_CRMD); } if(ret != HA_OK) { if(++num_lrm_register_fails < max_lrm_register_fails) { crm_warn("Failed to sign on to the LRM %d" " (%d max) times", num_lrm_register_fails, max_lrm_register_fails); crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return I_NULL; } } if(ret == HA_OK) { crm_debug_4("LRM: set_lrm_callback..."); ret = fsa_lrm_conn->lrm_ops->set_lrm_callback( fsa_lrm_conn, lrm_op_callback); if(ret != HA_OK) { crm_err("Failed to set LRM callbacks"); } } if(ret != HA_OK) { crm_err("Failed to sign on to the LRM %d" " (max) times", num_lrm_register_fails); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return I_NULL; } /* TODO: create a destroy handler that causes * some recovery to happen */ G_main_add_IPC_Channel(G_PRIORITY_LOW, fsa_lrm_conn->lrm_ops->ipcchan(fsa_lrm_conn), FALSE, lrm_dispatch, fsa_lrm_conn, default_ipc_connection_destroy); set_bit_inplace(fsa_input_register, R_LRM_CONNECTED); } if(action & ~(A_LRM_CONNECT|A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } return I_NULL; } gboolean stop_all_resources(void) { GListPtr lrm_list = NULL; crm_info("Makeing sure all active resources are stopped before exit"); if(fsa_lrm_conn == NULL) { return TRUE; } else if(is_set(fsa_input_register, R_SENT_RSC_STOP)) { crm_debug("Already sent stop operation"); return TRUE; } lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn); slist_iter( rsc_id, char, lrm_list, lpc, const char *last_op = g_hash_table_lookup(resources, rsc_id); if(safe_str_neq(last_op, CRMD_ACTION_STOP)) { crm_warn("Resource %s was active at shutdown", rsc_id); do_lrm_rsc_op(NULL, rsc_id, CRMD_ACTION_STOP, NULL); } ); set_bit_inplace(fsa_input_register, R_SENT_RSC_STOP); if(g_hash_table_size(shutdown_ops) == 0) { register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_info("Waiting for %d pending stop operations " " to complete before exiting", g_hash_table_size(shutdown_ops)); } return TRUE; } gboolean build_operation_update( crm_data_t *xml_rsc, lrm_op_t *op, const char *src, int lpc) { int len = 0; char *tmp = NULL; char *fail_state = NULL; crm_data_t *xml_op = NULL; CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return FALSE; } crm_debug("%s: Updating resouce %s after %s %s op", src, op->rsc_id, op_status2text(op->op_status), op->op_type); if(op->op_status == LRM_OP_CANCELLED) { crm_debug("Ignoring cancelled op"); return TRUE; } xml_op = create_xml_node(xml_rsc, XML_LRM_TAG_RSC_OP); if(op->interval <= 0 ||safe_str_eq(op->op_type, CRMD_ACTION_START) || safe_str_eq(op->op_type, CRMD_ACTION_STOP)) { set_xml_property_copy(xml_op, XML_ATTR_ID, op->op_type); } else { char *op_id = generate_op_key( op->rsc_id, op->op_type, op->interval); set_xml_property_copy(xml_op, XML_ATTR_ID, op_id); crm_free(op_id); } set_xml_property_copy(xml_rsc, XML_LRM_ATTR_LASTOP, op->op_type); set_xml_property_copy(xml_op, XML_LRM_ATTR_TASK, op->op_type); set_xml_property_copy(xml_op, "origin", src); /* Handle recurring ops - infer last op_status */ if(op->op_status == LRM_OP_PENDING && op->interval > 0) { if(op->rc == 0) { crm_debug("Mapping pending operation to DONE"); op->op_status = LRM_OP_DONE; } else { crm_debug("Mapping pending operation to ERROR"); op->op_status = LRM_OP_ERROR; } } switch(op->op_status) { case LRM_OP_PENDING: break; case LRM_OP_CANCELLED: crm_err("What to do here"); break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: crm_debug("Resource action %s/%s failed: %d", op->rsc_id, op->op_type, op->op_status); len = strlen(op->op_type); len += strlen("_failed_"); crm_malloc0(fail_state, sizeof(char)*len); if(fail_state != NULL) { sprintf(fail_state, "%s_failed", op->op_type); } set_xml_property_copy( xml_op, XML_LRM_ATTR_RSCSTATE, fail_state); if(lpc == 0) { set_xml_property_copy( xml_rsc, XML_LRM_ATTR_RSCSTATE, fail_state); } crm_free(fail_state); break; case LRM_OP_DONE: set_xml_property_copy( xml_op, XML_LRM_ATTR_RSCSTATE, op->user_data); if(lpc == 0) { set_xml_property_copy( xml_rsc, XML_LRM_ATTR_RSCSTATE, op->user_data); } break; } tmp = crm_itoa(op->call_id); set_xml_property_copy(xml_op, XML_LRM_ATTR_CALLID, tmp); crm_free(tmp); /* set these on 'xml_rsc' too to make life easy for the TE */ tmp = crm_itoa(op->rc); set_xml_property_copy(xml_op, XML_LRM_ATTR_RC, tmp); set_xml_property_copy(xml_rsc, XML_LRM_ATTR_RC, tmp); crm_free(tmp); tmp = crm_itoa(op->op_status); set_xml_property_copy(xml_op, XML_LRM_ATTR_OPSTATUS, tmp); set_xml_property_copy(xml_rsc, XML_LRM_ATTR_OPSTATUS, tmp); crm_free(tmp); set_node_tstamp(xml_op); return TRUE; } gboolean build_active_RAs(crm_data_t *rsc_list) { GList *op_list = NULL; GList *lrm_list = NULL; gboolean found_op = FALSE; state_flag_t cur_state = 0; if(fsa_lrm_conn == NULL) { return FALSE; } lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn); slist_iter( rid, char, lrm_list, lpc, lrm_rsc_t *the_rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); crm_data_t *xml_rsc = create_xml_node( rsc_list, XML_LRM_TAG_RESOURCE); int max_call_id = -1; crm_debug("Processing lrm_rsc_t entry %s", rid); if(the_rsc == NULL) { crm_err("NULL resource returned from the LRM"); continue; } set_xml_property_copy(xml_rsc, XML_ATTR_ID, the_rsc->id); op_list = the_rsc->ops->get_cur_state(the_rsc, &cur_state); crm_debug_2("\tcurrent state:%s", cur_state==LRM_RSC_IDLE?"Idle":"Busy"); slist_iter( op, lrm_op_t, op_list, llpc, crm_debug_2("Processing op %s for %s (status=%d, rc=%d)", op->op_type, the_rsc->id, op->op_status, op->rc); if(max_call_id < op->call_id) { build_operation_update(xml_rsc, op, __FUNCTION__, llpc); } else if(max_call_id > op->call_id) { crm_err("Bad call_id in list=%d. Previous call_id=%d", op->call_id, max_call_id); } else { crm_debug("Skipping duplicate entry for call_id=%d", op->call_id); } max_call_id = op->call_id; found_op = TRUE; ); if(found_op == FALSE) { crm_err("Could not properly determin last op" " for %s from %d entries", the_rsc->id, g_list_length(op_list)); } g_list_free(op_list); ); g_list_free(lrm_list); return TRUE; } crm_data_t* do_lrm_query(gboolean is_replace) { crm_data_t *xml_result= NULL; crm_data_t *xml_state = NULL; crm_data_t *xml_data = NULL; crm_data_t *rsc_list = NULL; xml_state = create_node_state( fsa_our_uname, fsa_our_uname, ACTIVESTATUS, XML_BOOLEAN_TRUE, ONLINESTATUS, CRMD_JOINSTATE_MEMBER, CRMD_JOINSTATE_MEMBER, __FUNCTION__); xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(rsc_list); if(is_replace) { set_xml_property_copy( xml_state, XML_CIB_ATTR_REPLACE, XML_CIB_TAG_LRM); } xml_result = create_cib_fragment(xml_state, NULL); crm_log_xml_debug_3(xml_state, "Current state of the LRM"); return xml_result; } /* A_LRM_INVOKE */ enum crmd_fsa_input do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { const char *crm_op = NULL; const char *operation = NULL; enum crmd_fsa_input next_input = I_NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); crm_op = cl_get_string(input->msg, F_CRM_TASK); operation = get_xml_attr_nested( input->xml, rsc_path, DIMOF(rsc_path) -3, XML_LRM_ATTR_TASK, FALSE); if(crm_op != NULL && safe_str_eq(crm_op, "lrm_query")) { crm_data_t *data = do_lrm_query(FALSE); HA_Message *reply = create_reply(input->msg, data); if(relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_message(LOG_ERR, reply); crm_msg_del(reply); } free_xml(data); } else if(operation != NULL) { char rid[64]; lrm_rsc_t *rsc = NULL; const char *id_from_cib = NULL; if(cur_state == S_STOPPING || cur_state == S_TERMINATE) { /* we will have already scheduled a stop */ crm_debug("Ignoring LRM operation while in state %s", fsa_state2string(cur_state)); } id_from_cib = get_xml_attr_nested( input->xml, rsc_path, DIMOF(rsc_path) -2, XML_ATTR_ID, TRUE); if(id_from_cib == NULL) { crm_err("No value for %s in message at level %d.", XML_ATTR_ID, DIMOF(rsc_path) -2); return I_NULL; } /* only the first 16 chars are used by the LRM */ strncpy(rid, id_from_cib, 64); rid[63] = 0; rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); next_input = do_lrm_rsc_op(rsc, rid, operation, input->xml); } else { crm_err("Operation was neither a lrm_query, nor a rsc op. %s", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } return next_input; } struct recurring_op_s { char *rsc_id; int call_id; }; enum crmd_fsa_input do_lrm_rsc_op( lrm_rsc_t *rsc, char *rid, const char *operation, crm_data_t *msg) { int call_id = 0; char *op_id = NULL; lrm_op_t* op = NULL; const char *type = NULL; const char *class = NULL; const char *provider = NULL; GHashTable *params = NULL; fsa_data_t *msg_data = NULL; CRM_DEV_ASSERT(rid != NULL); if(rsc != NULL) { class = rsc->class; type = rsc->type; } else if(msg != NULL) { class = get_xml_attr_nested( msg, rsc_path, DIMOF(rsc_path) -2, XML_AGENT_ATTR_CLASS, TRUE); type = get_xml_attr_nested( msg, rsc_path, DIMOF(rsc_path) -2, XML_ATTR_TYPE, TRUE); provider = get_xml_attr_nested( msg, rsc_path, DIMOF(rsc_path) -2, XML_AGENT_ATTR_PROVIDER, FALSE); } if(safe_str_neq(operation, CRMD_ACTION_STOP)) { if(fsa_state == S_STARTING || fsa_state == S_STOPPING || fsa_state == S_TERMINATE) { crm_err("Discarding attempt to perform action %s on %s" " while in state %s", operation, rid, fsa_state2string(fsa_state)); return I_NULL; } else if(AM_I_DC == FALSE && fsa_state != S_NOT_DC) { crm_warn("Discarding attempt to perform action %s on %s" " in state %s", operation, rid, fsa_state2string(fsa_state)); return I_NULL; } } if(rsc == NULL) { /* check if its already there */ rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); } if(rsc == NULL) { /* add it to the list */ crm_debug_2("adding rsc %s before operation", rid); if(msg != NULL) { params = xml2list(msg); } else { CRM_DEV_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); } fsa_lrm_conn->lrm_ops->add_rsc( fsa_lrm_conn, rid, class, type, provider, params); rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); } if(rsc == NULL) { crm_err("Could not add resource %s to LRM", rid); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } /* stop the monitor before stopping the resource */ if(safe_str_eq(operation, CRMD_ACTION_STOP)) { g_hash_table_foreach(monitors, stop_recurring_action, rsc); g_hash_table_foreach_remove( monitors, remove_recurring_action, rsc); } /* now do the op */ crm_info("Performing op %s on %s", operation, rid); crm_malloc0(op, sizeof(lrm_op_t)); op->op_type = crm_strdup(operation); op->op_status = LRM_OP_PENDING; op->user_data = NULL; if(params == NULL) { if(msg != NULL) { params = xml2list(msg); } else { CRM_DEV_ASSERT(safe_str_eq( CRMD_ACTION_STOP, operation)); } } op->params = params; op->interval = crm_get_msec(g_hash_table_lookup(op->params,"interval")); op->timeout = crm_get_msec(g_hash_table_lookup(op->params, "timeout")); op->start_delay = crm_get_msec( g_hash_table_lookup(op->params,"start_delay")); /* sanity */ if(op->interval < 0) { op->interval = 0; } if(op->timeout < 0) { op->timeout = 0; } if(op->start_delay < 0) { op->start_delay = 0; } if(g_hash_table_lookup(op->params, "timeout") != NULL) { char *timeout_ms = crm_itoa(op->timeout); g_hash_table_replace( op->params, crm_strdup("timeout"), timeout_ms); } if(g_hash_table_lookup(op->params, "interval") != NULL) { char *interval_ms = crm_itoa(op->interval); g_hash_table_replace( op->params, crm_strdup("interval"), interval_ms); } if(g_hash_table_lookup(op->params, "start_delay") != NULL) { char *delay_ms = crm_itoa(op->start_delay); g_hash_table_replace( op->params, crm_strdup("start_delay"), delay_ms); } if(op->interval > 0) { struct recurring_op_s *existing_op = NULL; op_id = generate_op_key(rsc->id, op->op_type, op->interval); existing_op = g_hash_table_lookup(monitors, op_id); if(existing_op != NULL) { crm_debug("Cancelling previous invocation of" " %s on %s (%d)", crm_str(op_id), rsc->id,existing_op->call_id); /*cancel it so we can then restart it without conflict*/ rsc->ops->cancel_op(rsc, existing_op->call_id); g_hash_table_remove(monitors, op_id); } } op->app_name = crm_strdup(CRM_SYSTEM_CRMD); if(safe_str_eq(operation, CRMD_ACTION_MON)) { op->target_rc = CHANGED; } else { op->target_rc = EVERYTIME; } if(safe_str_eq(CRMD_ACTION_START, operation)) { op->user_data = crm_strdup(CRMD_ACTION_STARTED); } else if(safe_str_eq(CRMD_ACTION_STOP, operation)) { op->user_data = crm_strdup(CRMD_ACTION_STOPPED); } else if(safe_str_eq(CRMD_ACTION_MON, operation)) { const char *last_op = g_hash_table_lookup(resources, rsc->id); op->user_data = crm_strdup(CRMD_ACTION_MON_OK); if(safe_str_eq(last_op, CRMD_ACTION_STOP)) { crm_err("Attempting to schedule %s _after_ a stop.", op_id); free_lrm_op(op); crm_free(op_id); return I_NULL; } } else { crm_warn("Using status \"%s\" for op \"%s\"" "... this is still in the experimental stage.", CRMD_ACTION_GENERIC_OK, operation); op->user_data = crm_strdup(CRMD_ACTION_GENERIC_OK); } g_hash_table_replace( resources, crm_strdup(rsc->id), crm_strdup(operation)); op->user_data_len = 1+strlen(op->user_data); call_id = rsc->ops->perform_op(rsc, op); if(call_id <= 0) { crm_err("Operation %s on %s failed: %d", operation, rid, call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else if(op->interval > 0) { struct recurring_op_s *op = NULL; crm_malloc0(op, sizeof(struct recurring_op_s)); crm_debug("Adding recurring %s op for %s (%d)", op_id, rsc->id, call_id); op->call_id = call_id; op->rsc_id = crm_strdup(rsc->id); g_hash_table_insert(monitors, op_id, op); op_id = NULL; } else { /* record all non-recurring operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); g_hash_table_replace( shutdown_ops, call_id_s, crm_strdup(rsc->id)); crm_debug("Recording pending op: %s", call_id_s); } crm_free(op_id); free_lrm_op(op); return I_NULL; } void stop_recurring_action(gpointer key, gpointer value, gpointer user_data) { lrm_rsc_t *rsc = user_data; struct recurring_op_s *op = (struct recurring_op_s*)value; if(safe_str_eq(op->rsc_id, rsc->id)) { if(op->call_id > 0) { crm_debug("Stopping recurring op %d for %s (%s)", op->call_id, rsc->id, (char*)key); rsc->ops->cancel_op(rsc, op->call_id); } else { crm_err("Invalid call_id %d for %s", op->call_id, rsc->id); /* TODO: we probably need to look up the LRM to find it */ } } } gboolean remove_recurring_action(gpointer key, gpointer value, gpointer user_data) { lrm_rsc_t *rsc = user_data; struct recurring_op_s *op = (struct recurring_op_s*)value; if(safe_str_eq(op->rsc_id, rsc->id)) { return TRUE; } return FALSE; } void free_recurring_op(gpointer value) { struct recurring_op_s *op = (struct recurring_op_s*)value; crm_free(op->rsc_id); crm_free(op); } void free_lrm_op(lrm_op_t *op) { g_hash_table_destroy(op->params); crm_free(op->user_data); crm_free(op->op_type); crm_free(op->app_name); crm_free(op); } GHashTable * xml2list(crm_data_t *parent) { crm_data_t *nvpair_list = NULL; GHashTable *nvpair_hash = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); CRM_DEV_ASSERT(parent != NULL); if(parent != NULL) { nvpair_list = find_xml_node(parent, XML_TAG_ATTRS, FALSE); if(nvpair_list == NULL) { crm_debug("No attributes in %s", crm_element_name(parent)); crm_log_xml_debug_2(parent,"No attributes for resource op"); } } xml_child_iter( nvpair_list, node_iter, XML_CIB_TAG_NVPAIR, const char *key = crm_element_value( node_iter, XML_NVPAIR_ATTR_NAME); const char *value = crm_element_value( node_iter, XML_NVPAIR_ATTR_VALUE); crm_debug_2("Added %s=%s", key, value); g_hash_table_insert( nvpair_hash, crm_strdup(key), crm_strdup(value)); ); return nvpair_hash; } void do_update_resource(lrm_op_t* op) { /* */ crm_data_t *update, *iter; crm_data_t *fragment; int rc = cib_ok; CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return; } update = create_xml_node(NULL, XML_CIB_TAG_STATE); set_uuid(fsa_cluster_conn, update, XML_ATTR_UUID, fsa_our_uname); set_xml_property_copy(update, XML_ATTR_UNAME, fsa_our_uname); iter = create_xml_node(update, XML_CIB_TAG_LRM); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); set_xml_property_copy(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, op, __FUNCTION__, 0); fragment = create_cib_fragment(update, NULL); /* make it an asyncronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" when the really weren't * * the alternative however means blocking here for too long, which * isnt acceptable */ rc = fsa_cib_conn->cmds->modify( fsa_cib_conn, XML_CIB_TAG_STATUS, fragment, NULL, cib_quorum_override); if(rc > 0) { /* the return code is a call number, not an error code */ crm_debug_3("Sent resource state update message: %d", rc); } else { crm_err("Resource state update failed: %s", cib_error2string(rc)); CRM_DEV_ASSERT(rc == cib_ok); } free_xml(fragment); free_xml(update); } enum crmd_fsa_input do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data) { lrm_op_t* op = NULL; const char *last_op = NULL; if(msg_data->fsa_cause != C_LRM_OP_CALLBACK) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } op = fsa_typed_data(fsa_dt_lrm); CRM_DEV_ASSERT(op != NULL); CRM_DEV_ASSERT(op != NULL && op->rsc_id != NULL); if(crm_assert_failed) { return I_NULL; } if(op->op_status == LRM_OP_DONE && op->rc != EXECRA_OK) { crm_warn("Mapping operation %d status with a rc=%d" " to status %d", op->op_status, op->rc, LRM_OP_ERROR); op->op_status = LRM_OP_ERROR; } switch(op->op_status) { case LRM_OP_PENDING: /* this really shouldnt happen */ crm_err("LRM operation (%d) %s on %s %s: %s", op->call_id, op->op_type, crm_str(op->rsc_id), op_status2text(op->op_status), execra_code2string(op->rc)); break; case LRM_OP_ERROR: crm_err("LRM operation (%d) %s on %s %s: %s", op->call_id, op->op_type, crm_str(op->rsc_id), op_status2text(op->op_status), execra_code2string(op->rc)); crm_debug("Result: %s", op->output); break; case LRM_OP_CANCELLED: crm_warn("LRM operation (%d) %s on %s %s", op->call_id, op->op_type, crm_str(op->rsc_id), op_status2text(op->op_status)); return I_NULL; break; case LRM_OP_TIMEOUT: last_op = g_hash_table_lookup( resources_confirmed, crm_strdup(op->rsc_id)); if(safe_str_eq(last_op, CRMD_ACTION_STOP) && safe_str_eq(op->op_type, CRMD_ACTION_MON)) { crm_err("LRM sent a timed out %s operation" " _after_ a confirmed stop", op->op_type); return I_NULL; } crm_err("LRM operation (%d) %s on %s %s", op->call_id, op->op_type, crm_str(op->rsc_id), op_status2text(op->op_status)); break; case LRM_OP_NOTSUPPORTED: crm_err("LRM operation (%d) %s on %s %s", op->call_id, op->op_type, crm_str(op->rsc_id), op_status2text(op->op_status)); break; case LRM_OP_DONE: crm_debug("LRM operation (%d) %s on %s %s", op->call_id, op->op_type, crm_str(op->rsc_id), op_status2text(op->op_status)); break; } g_hash_table_replace(resources_confirmed, crm_strdup(op->rsc_id), crm_strdup(op->op_type)); do_update_resource(op); if(g_hash_table_size(shutdown_ops) > 0) { char *op_id = make_stop_id(op->rsc_id, op->call_id); if(g_hash_table_remove(shutdown_ops, op_id)) { crm_debug("Shutdown op %d (%s %s) confirmed", op->call_id, op->op_type, op->rsc_id); } else { crm_debug("Shutdown op %d (%s %s) not matched: %s", op->call_id, op->op_type, op->rsc_id, op_id); } crm_free(op_id); } if(is_set(fsa_input_register, R_SENT_RSC_STOP)) { if(g_hash_table_size(shutdown_ops) == 0) { register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_debug("Still waiting for %d pending stop operations" " to complete before exiting", g_hash_table_size(shutdown_ops)); g_hash_table_foreach( shutdown_ops, ghash_print_pending, NULL); } } return I_NULL; } char * make_stop_id(const char *rsc, int call_id) { char *op_id = NULL; crm_malloc0(op_id, strlen(rsc) + 34); if(op_id != NULL) { snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id); } return op_id; } void ghash_print_pending(gpointer key, gpointer value, gpointer user_data) { const char *uname = key; crm_debug("Pending action: %s", uname); } gboolean resource_stopped(gpointer key, gpointer value, gpointer user_data) { const char *this_rsc = value; const char *target_rsc = user_data; if(safe_str_eq(this_rsc, target_rsc)) { return TRUE; } return FALSE; } diff --git a/crm/crmd/main.c b/crm/crmd/main.c index 71f85ca4dc..a601ed54fd 100644 --- a/crm/crmd/main.c +++ b/crm/crmd/main.c @@ -1,186 +1,189 @@ -/* $Id: main.c,v 1.19 2005/05/18 20:15:57 andrew Exp $ */ +/* $Id: main.c,v 1.20 2005/06/14 11:09:03 davidlee Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include + #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include const char* crm_system_name = SYS_NAME; #define OPTARGS "hV" void usage(const char* cmd, int exit_status); int init_start(void); void crmd_hamsg_callback(const HA_Message * msg, void* private_data); gboolean crmd_tickle_apphb(gpointer data); extern void init_dotfile(void); GMainLoop* crmd_mainloop = NULL; int main(int argc, char ** argv) { int flag; int argerr = 0; crm_log_init(crm_system_name); while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch(flag) { case 'V': cl_log_enable_stderr(1); alter_debug(DEBUG_INC); break; case 'h': /* Help message */ usage(crm_system_name, LSB_EXIT_OK); break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { usage(crm_system_name,LSB_EXIT_GENERIC); } /* read local config file */ crm_debug_3("Enabling coredumps"); if(cl_enable_coredumps(1) != 0) { crm_warn("Cannot enable coredumps"); } return init_start(); } int init_start(void) { int exit_code = 0; enum crmd_fsa_state state; fsa_state = S_STARTING; fsa_input_register = 0; /* zero out the regester */ init_dotfile(); crm_info("Starting %s", crm_system_name); register_fsa_input(C_STARTUP, I_STARTUP, NULL); state = s_crmd_fsa(C_STARTUP); if (state == S_PENDING || state == S_STARTING) { /* Create the mainloop and run it... */ crmd_mainloop = g_main_new(FALSE); crm_info("Starting %s's mainloop", crm_system_name); #ifdef REALTIME_SUPPORT static int crm_realtime = 1; if (crm_realtime == 1){ cl_enable_realtime(); }else if (crm_realtime == 0){ cl_disable_realtime(); } cl_make_realtime(SCHED_RR, 5, 64, 64); #endif g_main_run(crmd_mainloop); return_to_orig_privs(); if(is_set(fsa_input_register, R_STAYDOWN)) { crm_info("Inhibiting respawn by Heartbeat"); exit_code = 100; } } else { crm_err("Startup of %s failed. Current state: %s", crm_system_name, fsa_state2string(state)); exit_code = 1; } crm_info("[%s] stopped (%d)", crm_system_name, exit_code); return exit_code; } void usage(const char* cmd, int exit_status) { FILE* stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-srkh]" "[-c configure file]\n", cmd); /* fprintf(stream, "\t-d\tsets debug level\n"); */ /* fprintf(stream, "\t-s\tgets daemon status\n"); */ /* fprintf(stream, "\t-r\trestarts daemon\n"); */ /* fprintf(stream, "\t-k\tstops daemon\n"); */ /* fprintf(stream, "\t-h\thelp message\n"); */ fflush(stream); exit(exit_status); } gboolean crmd_tickle_apphb(gpointer data) { char app_instance[APPNAME_LEN]; int rc = 0; sprintf(app_instance, "%s_%ld", crm_system_name, (long)getpid()); rc = apphb_hb(); if (rc < 0) { cl_perror("%s apphb_hb failure", app_instance); exit(3); } return TRUE; } diff --git a/crm/crmd/messages.c b/crm/crmd/messages.c index efe35360d9..91900ebf5c 100644 --- a/crm/crmd/messages.c +++ b/crm/crmd/messages.c @@ -1,1215 +1,1218 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include + #include #include #include #include #include #include #include #include #include #include #include #include #include #include GListPtr fsa_message_queue = NULL; extern void crm_shutdown(int nsig); enum crmd_fsa_input handle_request(ha_msg_input_t *stored_msg); enum crmd_fsa_input handle_response(ha_msg_input_t *stored_msg); enum crmd_fsa_input handle_shutdown_request(HA_Message *stored_msg); ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t *orig); gboolean ipc_queue_helper(gpointer key, gpointer value, gpointer user_data); #ifdef MSG_LOG # define ROUTER_RESULT(x) crm_debug_3("Router result: %s", x); \ crm_log_message_adv(LOG_MSG, "router.log", relay_message); #else # define ROUTER_RESULT(x) crm_debug_3("Router result: %s", x) #endif /* debug only, can wrap all it likes */ int last_data_id = 0; void register_fsa_error_adv( enum crmd_fsa_cause cause, enum crmd_fsa_input input, fsa_data_t *cur_data, void *new_data, const char *raised_from) { /* save the current actions if any */ if(fsa_actions != A_NOTHING) { register_fsa_input_adv( cur_data?cur_data->fsa_cause:C_FSA_INTERNAL, I_NULL, cur_data?cur_data->data:NULL, fsa_actions, TRUE, __FUNCTION__); } /* reset the action list */ fsa_actions = A_NOTHING; /* register the error */ register_fsa_input_adv( cause, input, new_data, A_NOTHING, TRUE, raised_from); } static gboolean last_was_vote = FALSE; void register_fsa_input_adv( enum crmd_fsa_cause cause, enum crmd_fsa_input input, void *data, long long with_actions, gboolean prepend, const char *raised_from) { unsigned old_len = g_list_length(fsa_message_queue); fsa_data_t *fsa_data = NULL; last_data_id++; crm_debug("%s raised FSA input %d (%s) (cause=%s) %s data", raised_from, last_data_id, fsa_input2string(input), fsa_cause2string(cause), data?"with":"without"); if(input == I_WAIT_FOR_EVENT) { do_fsa_stall = TRUE; crm_debug("Stalling the FSA pending further input"); if(old_len > 0) { crm_warn("%s stalled the FSA with pending inputs", raised_from); fsa_dump_queue(LOG_DEBUG); } if(data == NULL) { set_bit_inplace(fsa_actions, with_actions); with_actions = A_NOTHING; return; } crm_err("%s stalled the FSA with data - this may be broken", raised_from); } if(old_len == 0) { last_was_vote = FALSE; } if(input == I_NULL && with_actions == A_NOTHING /* && data == NULL */){ /* no point doing anything */ crm_err("Cannot add entry to queue: no input and no action"); return; } else if(data == NULL) { last_was_vote = FALSE; } else if(last_was_vote && cause == C_HA_MESSAGE && input == I_ROUTER) { const char *op = cl_get_string( ((ha_msg_input_t*)data)->msg, F_CRM_TASK); if(safe_str_eq(op, CRM_OP_VOTE)) { /* It is always safe to treat N successive votes as * a single one * * If all the discarded votes are more "loosing" than * the first then the result is accurate * (win or loose). * * If any of the discarded votes are less "loosing" * than the first then we will cast our vote and the * eventual winner will vote us down again (which * even in the case that N=2, is no worse than if we * had not disarded the vote). */ crm_debug_2("Vote compression: %d", old_len); return; } } else if (cause == C_HA_MESSAGE && input == I_ROUTER) { const char *op = cl_get_string( ((ha_msg_input_t*)data)->msg, F_CRM_TASK); if(safe_str_eq(op, CRM_OP_VOTE)) { last_was_vote = TRUE; crm_debug_3("Added vote: %d", old_len); } } else { last_was_vote = FALSE; } crm_malloc0(fsa_data, sizeof(fsa_data_t)); fsa_data->id = last_data_id; fsa_data->fsa_input = input; fsa_data->fsa_cause = cause; fsa_data->origin = raised_from; fsa_data->data = NULL; fsa_data->data_type = fsa_dt_none; fsa_data->actions = with_actions; if(with_actions != A_NOTHING) { crm_debug_3("Adding actions %.16llx to input", with_actions); } if(data != NULL) { switch(cause) { case C_FSA_INTERNAL: case C_CRMD_STATUS_CALLBACK: case C_IPC_MESSAGE: case C_HA_MESSAGE: crm_debug_3("Copying %s data from %s as a HA msg", fsa_cause2string(cause), raised_from); fsa_data->data = copy_ha_msg_input(data); fsa_data->data_type = fsa_dt_ha_msg; break; case C_LRM_OP_CALLBACK: crm_debug_3("Copying %s data from %s as lrm_op_t", fsa_cause2string(cause), raised_from); fsa_data->data = copy_lrm_op((lrm_op_t*)data); fsa_data->data_type = fsa_dt_lrm; break; case C_CCM_CALLBACK: crm_debug_3("Copying %s data from %s as CCM data", fsa_cause2string(cause), raised_from); fsa_data->data = copy_ccm_data(data); fsa_data->data_type = fsa_dt_ccm; break; case C_SUBSYSTEM_CONNECT: case C_LRM_MONITOR_CALLBACK: case C_TIMER_POPPED: case C_SHUTDOWN: case C_HEARTBEAT_FAILED: case C_HA_DISCONNECT: case C_ILLEGAL: case C_UNKNOWN: case C_STARTUP: crm_err("Copying %s data (from %s)" " not yet implemented", fsa_cause2string(cause), raised_from); exit(1); break; } crm_debug_4("%s data copied", fsa_cause2string(fsa_data->fsa_cause)); } /* make sure to free it properly later */ if(prepend) { crm_debug_2("Prepending input"); fsa_message_queue = g_list_prepend(fsa_message_queue, fsa_data); } else { fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); } crm_debug_2("Queue len: %d", g_list_length(fsa_message_queue)); fsa_dump_queue(LOG_DEBUG_2); if(old_len == g_list_length(fsa_message_queue)){ crm_err("Couldnt add message to the queue"); } if(fsa_source) { G_main_set_trigger(fsa_source); } } void fsa_dump_queue(int log_level) { if(log_level < (int)crm_log_level) { return; } slist_iter( data, fsa_data_t, fsa_message_queue, lpc, do_crm_log(log_level, __FILE__, __FUNCTION__, "queue[%d(%d)]: input %s raised by %s()\t(cause=%s)", lpc, data->id, fsa_input2string(data->fsa_input), data->origin, fsa_cause2string(data->fsa_cause)); ); } ha_msg_input_t * copy_ha_msg_input(ha_msg_input_t *orig) { ha_msg_input_t *input_copy = NULL; crm_malloc0(input_copy, sizeof(ha_msg_input_t)); if(orig != NULL) { crm_debug_4("Copy msg"); input_copy->msg = ha_msg_copy(orig->msg); if(orig->xml != NULL) { crm_debug_4("Copy xml"); input_copy->xml = copy_xml(orig->xml); } } else { crm_debug_3("No message to copy"); } return input_copy; } void delete_fsa_input(fsa_data_t *fsa_data) { lrm_op_t *op = NULL; crm_data_t *foo = NULL; struct crmd_ccm_data_s *ccm_input = NULL; if(fsa_data == NULL) { return; } crm_debug_4("About to free %s data", fsa_cause2string(fsa_data->fsa_cause)); if(fsa_data->data != NULL) { switch(fsa_data->data_type) { case fsa_dt_ha_msg: delete_ha_msg_input(fsa_data->data); break; case fsa_dt_xml: foo = fsa_data->data; free_xml(foo); break; case fsa_dt_lrm: op = (lrm_op_t*)fsa_data->data; crm_free(op->user_data); crm_free(op->output); crm_free(op->rsc_id); crm_free(op->app_name); crm_free(op); break; case fsa_dt_ccm: ccm_input = (struct crmd_ccm_data_s *) fsa_data->data; crm_free(ccm_input->oc); crm_free(ccm_input); break; case fsa_dt_none: if(fsa_data->data != NULL) { crm_err("Dont know how to free %s data from %s", fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); exit(1); } break; } crm_debug_4("%s data freed", fsa_cause2string(fsa_data->fsa_cause)); } crm_free(fsa_data); } /* returns the next message */ fsa_data_t * get_message(void) { fsa_data_t* message = g_list_nth_data(fsa_message_queue, 0); fsa_message_queue = g_list_remove(fsa_message_queue, message); return message; } /* returns the current head of the FIFO queue */ gboolean is_message(void) { return (g_list_length(fsa_message_queue) > 0); } void * fsa_typed_data_adv( fsa_data_t *fsa_data, enum fsa_data_type a_type, const char *caller) { void *ret_val = NULL; if(fsa_data == NULL) { do_crm_log(LOG_ERR, caller, NULL, "No FSA data available"); } else if(fsa_data->data == NULL) { do_crm_log(LOG_ERR, caller, NULL, "No message data available"); } else if(fsa_data->data_type != a_type) { do_crm_log(LOG_CRIT, caller, NULL, "Message data was the wrong type! %d vs. requested=%d." " Origin: %s", fsa_data->data_type, a_type, fsa_data->origin); CRM_ASSERT(fsa_data->data_type == a_type); } else { ret_val = fsa_data->data; } return ret_val; } /* A_MSG_ROUTE */ enum crmd_fsa_input do_msg_route(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum crmd_fsa_input result = I_NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); gboolean routed = FALSE; if(msg_data->fsa_cause != C_IPC_MESSAGE && msg_data->fsa_cause != C_HA_MESSAGE) { /* dont try and route these */ crm_warn("Can only process HA and IPC messages"); return I_NULL; } /* try passing the buck first */ crm_debug_4("Attempting to route message"); routed = relay_message(input->msg, cause==C_IPC_MESSAGE); if(routed == FALSE) { crm_debug_4("Message wasn't routed... try handling locally"); /* calculate defer */ result = handle_message(input); switch(result) { case I_NULL: break; case I_DC_HEARTBEAT: break; case I_CIB_OP: break; /* what else should go here? */ default: crm_debug_4("Defering local processing of message"); register_fsa_input_later( cause, result, msg_data->data); result = I_NULL; break; } if(result == I_NULL) { crm_debug_4("Message processed"); } else { register_fsa_input(cause, result, msg_data->data); } } else { crm_debug_4("Message routed..."); input->msg = NULL; } return I_NULL; } /* * This method frees msg */ gboolean send_request(HA_Message *msg, char **msg_reference) { gboolean was_sent = FALSE; /* crm_log_xml_debug_3(request, "Final request..."); */ if(msg_reference != NULL) { *msg_reference = crm_strdup( cl_get_string(msg, XML_ATTR_REFERENCE)); } was_sent = relay_message(msg, TRUE); if(was_sent == FALSE) { ha_msg_input_t *fsa_input = new_ha_msg_input(msg); register_fsa_input(C_IPC_MESSAGE, I_ROUTER, fsa_input); delete_ha_msg_input(fsa_input); crm_msg_del(msg); } return was_sent; } /* unless more processing is required, relay_message is freed */ gboolean relay_message(HA_Message *relay_message, gboolean originated_locally) { int is_for_dc = 0; int is_for_dcib = 0; int is_for_crm = 0; int is_for_cib = 0; int is_local = 0; gboolean processing_complete = FALSE; const char *host_to = cl_get_string(relay_message, F_CRM_HOST_TO); const char *sys_to = cl_get_string(relay_message, F_CRM_SYS_TO); const char *sys_from= cl_get_string(relay_message, F_CRM_SYS_FROM); const char *type = cl_get_string(relay_message, F_TYPE); const char *msg_error = NULL; crm_debug_3("Routing message %s", cl_get_string(relay_message, XML_ATTR_REFERENCE)); if(relay_message == NULL) { msg_error = "Cannot route empty message"; } else if(safe_str_eq(CRM_OP_HELLO, cl_get_string(relay_message, F_CRM_TASK))){ /* quietly ignore */ processing_complete = TRUE; } else if(safe_str_neq(type, T_CRM)) { msg_error = "Bad message type"; } else if(sys_to == NULL) { msg_error = "Bad message destination: no subsystem"; } if(msg_error != NULL) { processing_complete = TRUE; crm_err("%s", msg_error); crm_log_message(LOG_WARNING, relay_message); } if(processing_complete) { crm_msg_del(relay_message); return TRUE; } processing_complete = TRUE; is_for_dc = (strcmp(CRM_SYSTEM_DC, sys_to) == 0); is_for_dcib = (strcmp(CRM_SYSTEM_DCIB, sys_to) == 0); is_for_cib = (strcmp(CRM_SYSTEM_CIB, sys_to) == 0); is_for_crm = (strcmp(CRM_SYSTEM_CRMD, sys_to) == 0); is_local = 0; if(host_to == NULL || strlen(host_to) == 0) { if(is_for_dc) { is_local = 0; } else if(is_for_crm && originated_locally) { is_local = 0; } else { is_local = 1; } } else if(strcmp(fsa_our_uname, host_to) == 0) { is_local=1; } if(is_for_dc || is_for_dcib) { if(AM_I_DC) { ROUTER_RESULT("Message result: DC/CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if(originated_locally && safe_str_neq(sys_from, CRM_SYSTEM_PENGINE) && safe_str_neq(sys_from, CRM_SYSTEM_TENGINE)) { /* Neither the TE or PE should be sending messages * to DC's on other nodes * * By definition, if we are no longer the DC, then * the PE or TE's data should be discarded */ ROUTER_RESULT("Message result: External relay to DC"); send_msg_via_ha(fsa_cluster_conn, relay_message); } else { /* discard */ ROUTER_RESULT("Message result: Discard, not DC"); crm_msg_del(relay_message); } } else if(is_local && (is_for_crm || is_for_cib)) { ROUTER_RESULT("Message result: CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if(is_local) { ROUTER_RESULT("Message result: Local relay"); send_msg_via_ipc(relay_message, sys_to); } else { ROUTER_RESULT("Message result: External relay"); send_msg_via_ha(fsa_cluster_conn, relay_message); } return processing_complete; } gboolean crmd_authorize_message(ha_msg_input_t *client_msg, crmd_client_t *curr_client) { /* check the best case first */ const char *sys_from = cl_get_string(client_msg->msg, F_CRM_SYS_FROM); char *uuid = NULL; char *client_name = NULL; char *major_version = NULL; char *minor_version = NULL; const char *filtered_from; gpointer table_key = NULL; gboolean auth_result = FALSE; struct crm_subsystem_s *the_subsystem = NULL; gboolean can_reply = FALSE; /* no-one has registered with this id */ const char *op = cl_get_string(client_msg->msg, F_CRM_TASK); if (safe_str_neq(CRM_OP_HELLO, op)) { if(sys_from == NULL) { crm_warn("Message [%s] was had no value for %s... discarding", cl_get_string(client_msg->msg, XML_ATTR_REFERENCE), F_CRM_SYS_FROM); return FALSE; } filtered_from = sys_from; /* The CIB can have two names on the DC */ if(strcmp(sys_from, CRM_SYSTEM_DCIB) == 0) filtered_from = CRM_SYSTEM_CIB; if (g_hash_table_lookup (ipc_clients, filtered_from) != NULL) { can_reply = TRUE; /* reply can be routed */ } crm_debug_2("Message reply can%s be routed from %s.", can_reply?"":" not", sys_from); if(can_reply == FALSE) { crm_warn("Message [%s] not authorized", cl_get_string(client_msg->msg, XML_ATTR_REFERENCE)); } return can_reply; } crm_debug_3("received client join msg"); crm_log_message(LOG_MSG, client_msg->msg); auth_result = process_hello_message( client_msg->xml, &uuid, &client_name, &major_version, &minor_version); if (auth_result == TRUE) { if(client_name == NULL || uuid == NULL) { crm_err("Bad client details (client_name=%s, uuid=%s)", crm_str(client_name), crm_str(uuid)); auth_result = FALSE; } } if (auth_result == TRUE) { /* check version */ int mav = atoi(major_version); int miv = atoi(minor_version); crm_debug_3("Checking client version number"); if (mav < 0 || miv < 0) { crm_err("Client version (%d:%d) is not acceptable", mav, miv); auth_result = FALSE; } crm_free(major_version); crm_free(minor_version); } if (auth_result == TRUE) { /* if we already have one of those clients * only applies to te, pe etc. not admin clients */ if (strcmp(CRM_SYSTEM_PENGINE, client_name) == 0) { the_subsystem = pe_subsystem; } else if (strcmp(CRM_SYSTEM_TENGINE, client_name) == 0) { the_subsystem = te_subsystem; } if (the_subsystem != NULL) { /* do we already have one? */ crm_debug_3("Checking if %s is required/already connected", client_name); if(is_set(fsa_input_register, the_subsystem->flag_connected)) { auth_result = FALSE; crm_warn("Bit\t%.16llx set in %.16llx", the_subsystem->flag_connected, fsa_input_register); crm_err("Client %s is already connected", client_name); } else if(FALSE == is_set(fsa_input_register, the_subsystem->flag_required)) { auth_result = TRUE; crm_warn("Bit\t%.16llx not set in %.16llx", the_subsystem->flag_connected, fsa_input_register); crm_warn("Client %s joined but we dont need it", client_name); stop_subsystem(the_subsystem); } else { the_subsystem->ipc = curr_client->client_channel; set_bit_inplace(fsa_input_register, the_subsystem->flag_connected); } } else { table_key = (gpointer) generate_hash_key(client_name, uuid); } } if (auth_result == TRUE) { if(table_key == NULL) { table_key = (gpointer)crm_strdup(client_name); } crm_debug("Accepted client %s", crm_str(table_key)); curr_client->table_key = table_key; curr_client->sub_sys = crm_strdup(client_name); curr_client->uuid = crm_strdup(uuid); g_hash_table_insert (ipc_clients, table_key, curr_client->client_channel); send_hello_message(curr_client->client_channel, "n/a", CRM_SYSTEM_CRMD, "0", "1"); crm_debug_3("Updated client list with %s", crm_str(table_key)); G_main_set_trigger(fsa_source); } else { crm_warn("Rejected client logon request"); curr_client->client_channel->ch_status = IPC_DISC_PENDING; } if(uuid != NULL) crm_free(uuid); if(minor_version != NULL) crm_free(minor_version); if(major_version != NULL) crm_free(major_version); if(client_name != NULL) crm_free(client_name); /* hello messages should never be processed further */ return FALSE; } enum crmd_fsa_input handle_message(ha_msg_input_t *stored_msg) { enum crmd_fsa_input next_input = I_NULL; const char *type = NULL; if(stored_msg == NULL || stored_msg->msg == NULL) { crm_err("No message to handle"); return I_NULL; } type = cl_get_string(stored_msg->msg, F_CRM_MSG_TYPE); if(safe_str_eq(type, XML_ATTR_REQUEST)) { next_input = handle_request(stored_msg); } else if(safe_str_eq(type, XML_ATTR_RESPONSE)) { next_input = handle_response(stored_msg); } else { crm_err("Unknown message type: %s", type); } /* crm_debug_2("%s: Next input is %s", __FUNCTION__, */ /* fsa_input2string(next_input)); */ return next_input; } enum crmd_fsa_input handle_request(ha_msg_input_t *stored_msg) { HA_Message *msg = NULL; enum crmd_fsa_input next_input = I_NULL; const char *op = cl_get_string(stored_msg->msg, F_CRM_TASK); const char *sys_to = cl_get_string(stored_msg->msg, F_CRM_SYS_TO); const char *host_from = cl_get_string(stored_msg->msg, F_CRM_HOST_FROM); crm_debug_2("Received %s in state %s", op, fsa_state2string(fsa_state)); if(op == NULL) { crm_err("Bad message"); crm_log_message(LOG_ERR, stored_msg->msg); /*========== common actions ==========*/ } else if(strcmp(op, CRM_OP_NOOP) == 0) { crm_debug("no-op from %s", crm_str(host_from)); } else if(strcmp(op, CRM_OP_VOTE) == 0) { /* count the vote and decide what to do after that */ register_fsa_input_adv(C_HA_MESSAGE, I_NULL, stored_msg, A_ELECTION_COUNT, FALSE, __FUNCTION__); /* Sometimes we _must_ go into S_ELECTION */ if(fsa_state == S_HALT) { crm_debug("Forcing an election from S_HALT"); next_input = I_ELECTION; #if 0 } else if(AM_I_DC) { /* This is the old way of doing things but what is gained? */ next_input = I_ELECTION; #endif } } else if(strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) { crm_shutdown(SIGTERM); /*next_input = I_SHUTDOWN; */ next_input = I_NULL; } else if(strcmp(op, CRM_OP_PING) == 0) { /* eventually do some stuff to figure out * if we /are/ ok */ crm_data_t *ping = createPingAnswerFragment(sys_to, "ok"); set_xml_property_copy(ping, "crmd_state", fsa_state2string(fsa_state)); crm_info("Current state: %s", fsa_state2string(fsa_state)); msg = create_reply(stored_msg->msg, ping); if(relay_message(msg, TRUE) == FALSE) { crm_msg_del(msg); } /* probably better to do this via signals on the * local node */ } else if(strcmp(op, CRM_OP_DEBUG_UP) == 0) { int level = get_crm_log_level(); set_crm_log_level(level+1); crm_info("Debug set to %d (was %d)", get_crm_log_level(), level); } else if(strcmp(op, CRM_OP_DEBUG_DOWN) == 0) { int level = get_crm_log_level(); set_crm_log_level(level-1); crm_info("Debug set to %d (was %d)", get_crm_log_level(), level); } else if(strcmp(op, CRM_OP_JOIN_OFFER) == 0) { next_input = I_JOIN_OFFER; } else if(strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) { next_input = I_JOIN_RESULT; /* this functionality should only be enabled * if this is a development build */ } else if(CRM_DEV_BUILD && strcmp(op, CRM_OP_DIE) == 0/*constant condition*/) { crm_warn("Test-only code: Killing the CRM without mercy"); crm_warn("Inhibiting respawns"); exit(100); /*========== (NOT_DC)-Only Actions ==========*/ } else if(AM_I_DC == FALSE){ gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if(dc_match || fsa_our_dc == NULL) { if(strcmp(op, CRM_OP_HBEAT) == 0) { crm_debug_3("Received DC heartbeat from %s", host_from); next_input = I_DC_HEARTBEAT; } else if(fsa_our_dc == NULL) { crm_warn("CRMd discarding request: %s" " (DC: %s, from: %s)", op, crm_str(fsa_our_dc), host_from); crm_warn("Ignored Request"); crm_log_message(LOG_WARNING, stored_msg->msg); } else if(strcmp(op, CRM_OP_SHUTDOWN) == 0) { next_input = I_STOP; } else { crm_err("CRMd didnt expect request: %s", op); crm_log_message(LOG_ERR, stored_msg->msg); } } else { crm_warn("Discarding %s op from %s", op, host_from); } /*========== DC-Only Actions ==========*/ } else if(AM_I_DC) { const char *message = ha_msg_value(stored_msg->msg, "message"); if(safe_str_eq(op, CRM_OP_TEABORT)) { crm_debug("Transition cancelled: %s/%s", op, message); clear_bit_inplace(fsa_input_register, R_IN_TRANSITION); if(need_transition(fsa_state)) { next_input = I_PE_CALC; } else { crm_debug("Filtering %s op in state %s", op, fsa_state2string(fsa_state)); } } else if(safe_str_eq(op, CRM_OP_TETIMEOUT)) { crm_debug("Transition cancelled: %s/%s", op, message); clear_bit_inplace(fsa_input_register, R_IN_TRANSITION); if(fsa_state == S_IDLE) { crm_err("Transition timed out in S_IDLE"); next_input = I_PE_CALC; } else if(need_transition(fsa_state)) { next_input = I_PE_CALC; } else { crm_err("Filtering %s op in state %s", op, fsa_state2string(fsa_state)); } } else if(safe_str_eq(op, CRM_OP_TEABORTED)) { crm_debug("Transition cancelled: %s/%s", op, message); clear_bit_inplace(fsa_input_register, R_IN_TRANSITION); if(need_transition(fsa_state)) { next_input = I_PE_CALC; } else { crm_debug("Filtering %s op in state %s", op, fsa_state2string(fsa_state)); } } else if(strcmp(op, CRM_OP_TECOMPLETE) == 0) { crm_debug("Transition complete: %s/%s", op, message); clear_bit_inplace(fsa_input_register, R_IN_TRANSITION); if(fsa_state == S_TRANSITION_ENGINE) { next_input = I_TE_SUCCESS; } else { crm_debug("Filtering %s op in state %s", op, fsa_state2string(fsa_state)); } } else if(strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) { next_input = I_NODE_JOIN; } else if(strcmp(op, CRM_OP_JOIN_REQUEST) == 0) { next_input = I_JOIN_REQUEST; } else if(strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) { next_input = I_JOIN_RESULT; } else if(strcmp(op, CRM_OP_SHUTDOWN) == 0) { gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if(dc_match) { crm_err("We didnt ask to be shut down yet our" " TE is telling us too." " Better get out now!"); next_input = I_TERMINATE; } else if(is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("We asked to be shut down, " " are still the DC, yet another node" " (DC) is askin us to shutdown!"); next_input = I_STOP; } else if(fsa_state != S_STOPPING) { crm_err("Another node is asking us to shutdown" " but we think we're ok."); next_input = I_ELECTION; } } else if(strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { /* a slave wants to shut down */ /* create cib fragment and add to message */ next_input = handle_shutdown_request(stored_msg->msg); } else { crm_err("Unexpected request (%s) sent to the DC", op); crm_log_message(LOG_ERR, stored_msg->msg); } } return next_input; } enum crmd_fsa_input handle_response(ha_msg_input_t *stored_msg) { enum crmd_fsa_input next_input = I_NULL; const char *op = cl_get_string(stored_msg->msg, F_CRM_TASK); const char *sys_from = cl_get_string(stored_msg->msg, F_CRM_SYS_FROM); const char *msg_ref = cl_get_string(stored_msg->msg, XML_ATTR_REFERENCE); crm_debug_2("Received %s %s in state %s", op, XML_ATTR_RESPONSE, fsa_state2string(fsa_state)); if(op == NULL) { crm_err("Bad message"); crm_log_message(LOG_ERR, stored_msg->msg); } else if(AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) { if(safe_str_eq(msg_ref, fsa_pe_ref)) { next_input = I_PE_SUCCESS; } else { crm_debug_2("Skipping superceeded reply from %s", sys_from); } } else if(strcmp(op, CRM_OP_VOTE) == 0 || strcmp(op, CRM_OP_HBEAT) == 0 || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) { next_input = I_NULL; } else { crm_err("Unexpected response (op=%s) sent to the %s", op, AM_I_DC?"DC":"CRMd"); next_input = I_NULL; } return next_input; } enum crmd_fsa_input handle_shutdown_request(HA_Message *stored_msg) { /* handle here to avoid potential version issues * where the shutdown message/proceedure may have * been changed in later versions. * * This way the DC is always in control of the shutdown */ crm_data_t *frag = NULL; time_t now = time(NULL); char *now_s = crm_itoa((int)now); crm_data_t *node_state = create_xml_node(NULL, XML_CIB_TAG_STATE); const char *host_from= cl_get_string(stored_msg, F_CRM_HOST_FROM); crm_info("Creating shutdown request for %s",host_from); crm_log_message(LOG_MSG, stored_msg); set_uuid(fsa_cluster_conn, node_state, XML_ATTR_UUID, host_from); set_xml_property_copy(node_state, XML_ATTR_UNAME, host_from); set_xml_property_copy(node_state, XML_CIB_ATTR_SHUTDOWN, now_s); set_xml_property_copy( node_state, XML_CIB_ATTR_EXPSTATE, CRMD_STATE_INACTIVE); frag = create_cib_fragment(node_state, NULL); /* cleanup intermediate steps */ free_xml(node_state); crm_free(now_s); fsa_cib_conn->cmds->modify( fsa_cib_conn, XML_CIB_TAG_STATUS, frag, NULL, cib_quorum_override); free_xml(frag); /* will be picked up by the TE as long as its running */ if(need_transition(fsa_state) && is_set(fsa_input_register, R_TE_CONNECTED) == FALSE) { register_fsa_action(A_TE_CANCEL); } return I_NULL; } /* frees msg upon completion */ gboolean send_msg_via_ha(ll_cluster_t *hb_fd, HA_Message *msg) { int log_level = LOG_DEBUG_3; gboolean broadcast = FALSE; gboolean all_is_good = TRUE; const char *op = cl_get_string(msg, F_CRM_TASK); const char *sys_to = cl_get_string(msg, F_CRM_SYS_TO); const char *host_to = cl_get_string(msg, F_CRM_HOST_TO); if (msg == NULL) { crm_err("Attempt to send NULL Message via HA failed."); all_is_good = FALSE; } else { crm_debug_4("Relaying message to (%s) via HA", host_to); } if (all_is_good) { if (sys_to == NULL || strlen(sys_to) == 0) { crm_err("You did not specify a destination sub-system" " for this message."); all_is_good = FALSE; } } /* There are a number of messages may not need to be ordered. * At a later point perhaps we should detect them and send them * as unordered messages. */ if (all_is_good) { if (host_to == NULL || strlen(host_to) == 0 || safe_str_eq(sys_to, CRM_SYSTEM_DC)) { broadcast = TRUE; all_is_good = send_ha_message(hb_fd, msg, NULL); } else { all_is_good = send_ha_message(hb_fd, msg, host_to); } } if(all_is_good == FALSE) { log_level = LOG_ERR; } if(log_level == LOG_ERR || (safe_str_neq(op, CRM_OP_HBEAT))) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Sending %sHA message (ref=%s) to %s@%s %s.", broadcast?"broadcast ":"directed ", cl_get_string(msg, XML_ATTR_REFERENCE), crm_str(sys_to), host_to==NULL?"":host_to, all_is_good?"succeeded":"failed"); } crm_msg_del(msg); return all_is_good; } /* msg is deleted by the time this returns */ gboolean send_msg_via_ipc(HA_Message *msg, const char *sys) { gboolean send_ok = TRUE; IPC_Channel *client_channel; enum crmd_fsa_input next_input; crm_debug_4("relaying msg to sub_sys=%s via IPC", sys); client_channel = (IPC_Channel*)g_hash_table_lookup(ipc_clients, sys); if(cl_get_string(msg, F_CRM_HOST_FROM) == NULL) { ha_msg_add(msg, F_CRM_HOST_FROM, fsa_our_uname); } if (client_channel != NULL) { crm_debug_3("Sending message via channel %s.", sys); send_ok = send_ipc_message(client_channel, msg); msg = NULL; /* so the crm_msg_del() below doesnt fail */ } else if(sys != NULL && strcmp(sys, CRM_SYSTEM_CIB) == 0) { crm_err("Sub-system (%s) has been incorporated into the CRMd.", sys); crm_err("Change the way we handle this CIB message"); crm_log_message(LOG_ERR, msg); send_ok = FALSE; } else if(sys != NULL && strcmp(sys, CRM_SYSTEM_LRMD) == 0) { fsa_data_t *fsa_data = NULL; ha_msg_input_t *msg_copy = new_ha_msg_input(msg); crm_malloc0(fsa_data, sizeof(fsa_data_t)); fsa_data->fsa_input = I_MESSAGE; fsa_data->fsa_cause = C_IPC_MESSAGE; fsa_data->data = msg_copy; fsa_data->origin = __FUNCTION__; fsa_data->data_type = fsa_dt_ha_msg; #ifdef FSA_TRACE crm_debug_2("Invoking action %s (%.16llx)", fsa_action2string(A_LRM_INVOKE), A_LRM_INVOKE); #endif next_input = do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, fsa_data); delete_ha_msg_input(msg_copy); crm_free(fsa_data); /* todo: feed this back in for anything != I_NULL */ #ifdef FSA_TRACE crm_debug_2("Result of action %s was %s", fsa_action2string(A_LRM_INVOKE), fsa_input2string(next_input)); #endif } else { crm_err("Unknown Sub-system (%s)... discarding message.", crm_str(sys)); send_ok = FALSE; } crm_msg_del(msg); return send_ok; } void msg_queue_helper(void) { IPC_Channel *ipc = NULL; if(fsa_cluster_conn != NULL) { ipc = fsa_cluster_conn->llc_ops->ipcchan( fsa_cluster_conn); } if(ipc != NULL) { ipc->ops->resume_io(ipc); } /* g_hash_table_foreach_remove(ipc_clients, ipc_queue_helper, NULL); */ } gboolean ipc_queue_helper(gpointer key, gpointer value, gpointer user_data) { crmd_client_t *ipc_client = value; if(ipc_client->client_channel != NULL) { ipc_client->client_channel->ops->is_message_pending(ipc_client->client_channel); } return FALSE; } diff --git a/crm/crmd/pengine.c b/crm/crmd/pengine.c index d9d53bdf17..9bc4c709e2 100644 --- a/crm/crmd/pengine.c +++ b/crm/crmd/pengine.c @@ -1,209 +1,212 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include + #include #include #include #include #include #include /* for access */ #include #include #include #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include #include #define CLIENT_EXIT_WAIT 30 struct crm_subsystem_s *pe_subsystem = NULL; void do_pe_invoke_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); /* A_PE_START, A_PE_STOP, A_TE_RESTART */ enum crmd_fsa_input do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum crmd_fsa_input result = I_NULL; struct crm_subsystem_s *this_subsys = pe_subsystem; long long stop_actions = A_PE_STOP; long long start_actions = A_PE_START; if(action & stop_actions) { if(stop_subsystem(this_subsys) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } if(action & start_actions) { if(cur_state != S_STOPPING) { if(start_subsystem(this_subsys) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } else { crm_info("Ignoring request to start %s while shutting down", this_subsys->name); } } return result; } char *fsa_pe_ref = NULL; /* A_PE_INVOKE */ enum crmd_fsa_input do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int call_id = 0; if(is_set(fsa_input_register, R_PE_CONNECTED) == FALSE){ if(pe_subsystem->pid > 0) { int pid_status = -1; int rc = waitpid( pe_subsystem->pid, &pid_status, WNOHANG); if(rc > 0 && WIFEXITED(pid_status)) { clear_bit_inplace(fsa_input_register, pe_subsystem->flag_connected); if(is_set(fsa_input_register, pe_subsystem->flag_required)) { /* this wasnt supposed to happen */ crm_err("%s[%d] terminated during start", pe_subsystem->name, pe_subsystem->pid); register_fsa_error( C_FSA_INTERNAL, I_ERROR, NULL); } pe_subsystem->pid = -1; return I_NULL; } } crm_info("Waiting for the PE to connect"); crmd_fsa_stall(NULL); return I_NULL; } if(is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_err("Attempted to invoke the PE without a consistent" " copy of the CIB!"); /* start the join from scratch */ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL); return I_NULL; } crm_debug("Requesting the current CIB"); call_id = fsa_cib_conn->cmds->query( fsa_cib_conn, NULL, NULL, cib_scope_local); if(FALSE == add_cib_op_callback( call_id, TRUE, NULL, do_pe_invoke_callback)) { crm_err("Cant retrieve the CIB to invoke the %s subsystem with", pe_subsystem->name); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } return I_NULL; } void do_pe_invoke_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { HA_Message *cmd = NULL; int ccm_transition_id = -1; gboolean cib_has_quorum = FALSE; crm_data_t *local_cib = find_xml_node(output, XML_TAG_CIB, TRUE); if(AM_I_DC == FALSE || is_set(fsa_input_register, R_PE_CONNECTED) == FALSE || fsa_state != S_POLICY_ENGINE) { crm_debug("No need to invoke the PE anymore"); return; } crm_debug_2("Invoking %s with %p", CRM_SYSTEM_PENGINE, local_cib); CRM_DEV_ASSERT(local_cib != NULL); CRM_DEV_ASSERT(crm_element_value(local_cib, XML_ATTR_DC_UUID) != NULL); cib_has_quorum = crm_is_true( crm_element_value(local_cib, XML_ATTR_HAVE_QUORUM)); ccm_transition_id = crm_atoi( crm_element_value(local_cib, XML_ATTR_CCM_TRANSITION), "-1"); if(ccm_transition_id < fsa_membership_copy->id) { /* the cib is behind */ crm_debug("Re-asking for the CIB until membership/quorum" " matches: CIB=%d < CRM=%d", ccm_transition_id, fsa_membership_copy->id); mssleep(500); register_fsa_action(A_PE_INVOKE); return; } else if(ccm_transition_id > fsa_membership_copy->id) { /* we are behind */ crm_info("Waiting for another CCM event before proceeding:" " CIB=%d > CRM=%d", ccm_transition_id, fsa_membership_copy->id); return; } if(fsa_pe_ref) { crm_free(fsa_pe_ref); fsa_pe_ref = NULL; } cmd = create_request( CRM_OP_PECALC, local_cib, NULL, CRM_SYSTEM_PENGINE, CRM_SYSTEM_DC, NULL); send_request(cmd, &fsa_pe_ref); crm_debug("Invoking the PE: %s", fsa_pe_ref); } diff --git a/crm/crmd/subsystems.c b/crm/crmd/subsystems.c index baa372d389..1046a119cc 100644 --- a/crm/crmd/subsystems.c +++ b/crm/crmd/subsystems.c @@ -1,215 +1,218 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include + #include #include #include #include #include #include /* for access */ #include #include #include #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include #include static void crmdManagedChildRegistered(ProcTrack* p) { struct crm_subsystem_s *the_subsystem = p->privatedata; the_subsystem->pid = p->pid; } static void crmdManagedChildDied( ProcTrack* p, int status, int signo, int exitcode, int waslogged) { struct crm_subsystem_s *the_subsystem = p->privatedata; the_subsystem->pid = -1; the_subsystem->ipc = NULL; clear_bit_inplace(fsa_input_register, the_subsystem->flag_connected); if(is_set(fsa_input_register, the_subsystem->flag_required)) { /* this wasnt supposed to happen */ crm_err("The %s subsystem terminated unexpectedly", the_subsystem->name); register_fsa_input_before(C_IPC_MESSAGE, I_ERROR, NULL); } p->privatedata = NULL; } static const char * crmdManagedChildName(ProcTrack* p) { struct crm_subsystem_s *the_subsystem = p->privatedata; return the_subsystem->name; } static ProcTrack_ops crmd_managed_child_ops = { crmdManagedChildDied, crmdManagedChildRegistered, crmdManagedChildName }; gboolean stop_subsystem(struct crm_subsystem_s* the_subsystem) { IPC_Channel *client_channel = the_subsystem->ipc; crm_debug_2("Stopping sub-system \"%s\"", the_subsystem->name); clear_bit_inplace(fsa_input_register, the_subsystem->flag_required); if (the_subsystem->pid <= 0) { crm_debug_2("Client %s not running", the_subsystem->name); } else if(FALSE == is_set( fsa_input_register, the_subsystem->flag_connected)) { /* running but not yet connected */ crm_warn("Stopping %s before it had connected", the_subsystem->name); CL_KILL(the_subsystem->pid, -SIGKILL); the_subsystem->pid = -1; } else if(client_channel == NULL || client_channel->ops->get_chan_status( client_channel) != IPC_CONNECT) { crm_err("Client %s has already quit", the_subsystem->name); } else { HA_Message *quit = create_request( CRM_OP_QUIT, NULL, NULL, the_subsystem->name, AM_I_DC?CRM_SYSTEM_DC:CRM_SYSTEM_CRMD, NULL); crm_info("Sending quit message to %s.", the_subsystem->name); send_ipc_message(client_channel, quit); } return TRUE; } gboolean start_subsystem(struct crm_subsystem_s* the_subsystem) { pid_t pid; struct stat buf; int s_res; unsigned int j; struct rlimit oflimits; const char *devnull = "/dev/null"; char *args = NULL; crm_info("Starting sub-system \"%s\"", the_subsystem->name); set_bit_inplace(fsa_input_register, the_subsystem->flag_required); if (the_subsystem->pid > 0) { crm_warn("Client %s already running as pid %d", the_subsystem->name, (int) the_subsystem->pid); /* starting a started X is not an error */ return TRUE; } /* * We want to ensure that the exec will succeed before * we bother forking. */ if (access(the_subsystem->path, F_OK|X_OK) != 0) { cl_perror("Cannot (access) exec %s", the_subsystem->path); return FALSE; } s_res = stat(the_subsystem->command, &buf); if(s_res != 0) { cl_perror("Cannot (stat) exec %s", the_subsystem->command); return FALSE; } /* We need to fork so we can make child procs not real time */ switch(pid=fork()) { case -1: crm_err("Cannot fork."); return FALSE; default: /* Parent */ NewTrackedProc(pid, 0, PT_LOGNORMAL, the_subsystem, &crmd_managed_child_ops); the_subsystem->pid = pid; return TRUE; case 0: /* Child */ break; } crm_debug("Executing \"%s %s\" (pid %d)", the_subsystem->command, the_subsystem->args, (int) getpid()); /* A precautionary measure */ getrlimit(RLIMIT_NOFILE, &oflimits); for (j=0; j < oflimits.rlim_cur; ++j) { close(j); } (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ if(the_subsystem->args != NULL) { args = crm_strdup(the_subsystem->args); } { char* const start_args[] = { crm_strdup(the_subsystem->command), args, NULL }; (void)execvp(the_subsystem->command, start_args); } /* Should not happen */ cl_perror("FATAL: Cannot exec %s %s", the_subsystem->command, crm_str(the_subsystem->args)); exit(100); /* Suppress respawning */ return TRUE; /* never reached */ } diff --git a/crm/crmd/tengine.c b/crm/crmd/tengine.c index 0ed967727d..2cd6bb7e0f 100644 --- a/crm/crmd/tengine.c +++ b/crm/crmd/tengine.c @@ -1,191 +1,194 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include + #include #include #include #include #include #include /* for access */ #include #include #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include #include struct crm_subsystem_s *te_subsystem = NULL; /* A_TE_START, A_TE_STOP, A_TE_RESTART */ enum crmd_fsa_input do_te_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum crmd_fsa_input result = I_NULL; struct crm_subsystem_s *this_subsys = te_subsystem; long long stop_actions = A_TE_STOP; long long start_actions = A_TE_START; /* if(action & stop_actions && cur_state != S_STOPPING */ /* && is_set(fsa_input_register, R_TE_PEND)) { */ /* result = I_WAIT_FOR_EVENT; */ /* return result; */ /* } */ if(action & stop_actions) { if(stop_subsystem(this_subsys) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } if(action & start_actions) { if(cur_state != S_STOPPING) { if(start_subsystem(this_subsys) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } else { crm_info("Ignoring request to start %s while shutting down", this_subsys->name); } } return result; } /* A_TE_INVOKE, A_TE_CANCEL */ enum crmd_fsa_input do_te_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum crmd_fsa_input ret = I_NULL; HA_Message *cmd = NULL; if(AM_I_DC == FALSE) { crm_debug("Not DC: No need to invoke the TE (anymore): %s", fsa_action2string(action)); return I_NULL; } else if(fsa_state != S_TRANSITION_ENGINE && action ^ A_TE_CANCEL) { crm_debug("No need to invoke the TE (%s) in state %s", fsa_action2string(action), fsa_state2string(fsa_state)); return I_NULL; } else if(is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("No point invoking the TE, we're shutting down"); return I_NULL; } else if(is_set(fsa_input_register, R_TE_CONNECTED) == FALSE) { if(te_subsystem->pid > 0) { int pid_status = -1; int rc = waitpid( te_subsystem->pid, &pid_status, WNOHANG); if(rc > 0 && WIFEXITED(pid_status)) { clear_bit_inplace(fsa_input_register, te_subsystem->flag_connected); if(is_set(fsa_input_register, te_subsystem->flag_required)) { /* this wasnt supposed to happen */ crm_err("%s[%d] terminated during start", te_subsystem->name, te_subsystem->pid); register_fsa_error( C_FSA_INTERNAL, I_ERROR, NULL); } te_subsystem->pid = -1; return I_NULL; } } crm_info("Waiting for the TE to connect before action %s", fsa_action2string(action)); if(action & A_TE_INVOKE) { register_fsa_input( msg_data->fsa_cause, msg_data->fsa_input, msg_data->data); } crmd_fsa_stall(NULL); return I_NULL; } if(action & A_TE_INVOKE) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); if(input->xml != NULL) { crm_debug("Starting a transition"); set_bit_inplace(fsa_input_register, R_IN_TRANSITION); cmd = create_request( CRM_OP_TRANSITION, input->xml, NULL, CRM_SYSTEM_TENGINE, CRM_SYSTEM_DC, NULL); send_request(cmd, NULL); } else { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } else if(action & A_TE_CANCEL) { crm_debug("Cancelling the active Transition"); cmd = create_request( CRM_OP_TEABORT, NULL, NULL, CRM_SYSTEM_TENGINE, CRM_SYSTEM_DC, NULL); send_request(cmd, NULL); } else if(action & A_TE_HALT) { cmd = create_request( CRM_OP_TE_HALT, NULL, NULL, CRM_SYSTEM_TENGINE, CRM_SYSTEM_DC, NULL); send_request(cmd, NULL); } return ret; } diff --git a/crm/crmd/utils.c b/crm/crmd/utils.c index 5a05f0baaa..757e59dc0d 100644 --- a/crm/crmd/utils.c +++ b/crm/crmd/utils.c @@ -1,1388 +1,1391 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include + #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void copy_ccm_node(oc_node_t a_node, oc_node_t *a_node_copy); /* A_DC_TIMER_STOP, A_DC_TIMER_START, * A_FINALIZE_TIMER_STOP, A_FINALIZE_TIMER_START * A_INTEGRATE_TIMER_STOP, A_INTEGRATE_TIMER_START */ enum crmd_fsa_input do_timer_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { gboolean timer_op_ok = TRUE; enum crmd_fsa_input result = I_NULL; if(action & A_DC_TIMER_STOP) { timer_op_ok = crm_timer_stop(election_trigger); } else if(action & A_FINALIZE_TIMER_STOP) { timer_op_ok = crm_timer_stop(finalization_timer); } else if(action & A_INTEGRATE_TIMER_STOP) { timer_op_ok = crm_timer_stop(integration_timer); /* } else if(action & A_ELECTION_TIMEOUT_STOP) { */ /* timer_op_ok = crm_timer_stop(election_timeout); */ } /* dont start a timer that wasnt already running */ if(action & A_DC_TIMER_START && timer_op_ok) { crm_timer_start(election_trigger); if(AM_I_DC) { /* there can be only one */ result = I_ELECTION; } } else if(action & A_FINALIZE_TIMER_START) { crm_timer_start(finalization_timer); } else if(action & A_INTEGRATE_TIMER_START) { crm_timer_start(integration_timer); /* } else if(action & A_ELECTION_TIMEOUT_START) { */ /* crm_timer_start(election_timeout); */ } return I_NULL; } gboolean crm_timer_popped(gpointer data) { fsa_timer_t *timer = (fsa_timer_t *)data; if(timer == election_trigger) { crm_info("Election Trigger (%s) just popped!", fsa_input2string(timer->fsa_input)); } else if(timer == election_timeout) { crm_info("Election Timeout (%s) just popped!", fsa_input2string(timer->fsa_input)); } else { crm_info("Timer %s just popped!", fsa_input2string(timer->fsa_input)); } if(timer->repeat == FALSE) { crm_timer_stop(timer); /* make it _not_ go off again */ } if(timer->fsa_input == I_INTEGRATED) { register_fsa_input_before( C_TIMER_POPPED, timer->fsa_input, NULL); } else if(timer->fsa_input == I_FINALIZED && fsa_state != S_FINALIZE_JOIN) { crm_debug("Discarding I_FINALIZED event in state: %s", fsa_state2string(fsa_state)); } else if(timer->fsa_input != I_NULL) { register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL); } G_main_set_trigger(fsa_source); return TRUE; } gboolean crm_timer_start(fsa_timer_t *timer) { const char *timer_desc = NULL; if(timer == election_trigger) { timer_desc = "Election Trigger"; } else if(timer == election_timeout) { timer_desc = "Election Timeout"; } else { timer_desc = "Timer"; } if((timer->source_id == (guint)-1 || timer->source_id == (guint)-2) && timer->period_ms > 0) { timer->source_id = Gmain_timeout_add( timer->period_ms, timer->callback, (void*)timer); crm_debug("Started %s (%s:%dms), src=%d", timer_desc, fsa_input2string(timer->fsa_input), timer->period_ms, timer->source_id); } else if(timer->period_ms < 0) { crm_err("Tried to start %s (%s:%dms) with a -ve period", timer_desc, fsa_input2string(timer->fsa_input), timer->period_ms); } else { crm_debug("%s (%s:%dms) already running: src=%d", timer_desc, fsa_input2string(timer->fsa_input), timer->period_ms, timer->source_id); return FALSE; } return TRUE; } gboolean crm_timer_stop(fsa_timer_t *timer) { const char *timer_desc = NULL; if(timer == election_trigger) { timer_desc = "Election Trigger"; } else if(timer == election_timeout) { timer_desc = "Election Timeout"; } else if(timer == wait_timer) { timer_desc = "Stall Timeout"; } else { timer_desc = "Timer"; } if(timer == NULL) { crm_err("Attempted to stop NULL timer"); return FALSE; } else if(timer->source_id != (guint)-1 && timer->source_id != (guint)-2) { crm_debug("Stopping %s (%s:%dms), src=%d", timer_desc, fsa_input2string(timer->fsa_input), timer->period_ms, timer->source_id); g_source_remove(timer->source_id); timer->source_id = -2; } else { crm_debug_2("%s (%s:%dms) already stopped", timer_desc, fsa_input2string(timer->fsa_input), timer->period_ms); timer->source_id = -2; return FALSE; } return TRUE; } long long toggle_bit(long long action_list, long long action) { crm_debug_5("Toggling bit %.16llx", action); action_list ^= action; crm_debug_5("Result %.16llx", action_list & action); return action_list; } long long clear_bit(long long action_list, long long action) { int level = LOG_DEBUG_5; crm_log_maybe(level, "Clearing bit\t%.16llx", action); /* ensure its set */ action_list |= action; /* then toggle */ action_list = action_list ^ action; return action_list; } long long set_bit(long long action_list, long long action) { int level = LOG_DEBUG_5; crm_log_maybe(level, "Setting bit\t%.16llx", action); action_list |= action; return action_list; } gboolean is_set(long long action_list, long long action) { crm_debug_5("Checking bit\t%.16llx in %.16llx", action, action_list); return ((action_list & action) == action); } gboolean is_set_any(long long action_list, long long action) { crm_debug_5("Checking bit\t%.16llx in %.16llx", action, action_list); return ((action_list & action) != 0); } const char * fsa_input2string(enum crmd_fsa_input input) { const char *inputAsText = NULL; switch(input){ case I_NULL: inputAsText = "I_NULL"; break; case I_CCM_EVENT: inputAsText = "I_CCM_EVENT"; break; case I_CIB_OP: inputAsText = "I_CIB_OP"; break; case I_CIB_UPDATE: inputAsText = "I_CIB_UPDATE"; break; case I_DC_TIMEOUT: inputAsText = "I_DC_TIMEOUT"; break; case I_ELECTION: inputAsText = "I_ELECTION"; break; case I_PE_CALC: inputAsText = "I_PE_CALC"; break; case I_RELEASE_DC: inputAsText = "I_RELEASE_DC"; break; case I_ELECTION_DC: inputAsText = "I_ELECTION_DC"; break; case I_ERROR: inputAsText = "I_ERROR"; break; case I_FAIL: inputAsText = "I_FAIL"; break; case I_INTEGRATED: inputAsText = "I_INTEGRATED"; break; case I_FINALIZED: inputAsText = "I_FINALIZED"; break; case I_NODE_JOIN: inputAsText = "I_NODE_JOIN"; break; case I_JOIN_OFFER: inputAsText = "I_JOIN_OFFER"; break; case I_JOIN_REQUEST: inputAsText = "I_JOIN_REQUEST"; break; case I_JOIN_RESULT: inputAsText = "I_JOIN_RESULT"; break; case I_NOT_DC: inputAsText = "I_NOT_DC"; break; case I_RECOVERED: inputAsText = "I_RECOVERED"; break; case I_RELEASE_FAIL: inputAsText = "I_RELEASE_FAIL"; break; case I_RELEASE_SUCCESS: inputAsText = "I_RELEASE_SUCCESS"; break; case I_RESTART: inputAsText = "I_RESTART"; break; case I_PE_SUCCESS: inputAsText = "I_PE_SUCCESS"; break; case I_ROUTER: inputAsText = "I_ROUTER"; break; case I_SHUTDOWN: inputAsText = "I_SHUTDOWN"; break; case I_STARTUP: inputAsText = "I_STARTUP"; break; case I_TE_SUCCESS: inputAsText = "I_TE_SUCCESS"; break; case I_STOP: inputAsText = "I_STOP"; break; case I_DC_HEARTBEAT: inputAsText = "I_DC_HEARTBEAT"; break; case I_WAIT_FOR_EVENT: inputAsText = "I_WAIT_FOR_EVENT"; break; case I_LRM_EVENT: inputAsText = "I_LRM_EVENT"; break; case I_PENDING: inputAsText = "I_PENDING"; break; case I_HALT: inputAsText = "I_HALT"; break; case I_TERMINATE: inputAsText = "I_TERMINATE"; break; case I_ILLEGAL: inputAsText = "I_ILLEGAL"; break; } if(inputAsText == NULL) { crm_err("Input %d is unknown", input); inputAsText = ""; } return inputAsText; } const char * fsa_state2string(enum crmd_fsa_state state) { const char *stateAsText = NULL; switch(state){ case S_IDLE: stateAsText = "S_IDLE"; break; case S_ELECTION: stateAsText = "S_ELECTION"; break; case S_INTEGRATION: stateAsText = "S_INTEGRATION"; break; case S_FINALIZE_JOIN: stateAsText = "S_FINALIZE_JOIN"; break; case S_NOT_DC: stateAsText = "S_NOT_DC"; break; case S_POLICY_ENGINE: stateAsText = "S_POLICY_ENGINE"; break; case S_RECOVERY: stateAsText = "S_RECOVERY"; break; case S_RELEASE_DC: stateAsText = "S_RELEASE_DC"; break; case S_PENDING: stateAsText = "S_PENDING"; break; case S_STOPPING: stateAsText = "S_STOPPING"; break; case S_TERMINATE: stateAsText = "S_TERMINATE"; break; case S_TRANSITION_ENGINE: stateAsText = "S_TRANSITION_ENGINE"; break; case S_STARTING: stateAsText = "S_STARTING"; break; case S_HALT: stateAsText = "S_HALT"; break; case S_ILLEGAL: stateAsText = "S_ILLEGAL"; break; } if(stateAsText == NULL) { crm_err("State %d is unknown", state); stateAsText = ""; } return stateAsText; } const char * fsa_cause2string(enum crmd_fsa_cause cause) { const char *causeAsText = NULL; switch(cause){ case C_UNKNOWN: causeAsText = "C_UNKNOWN"; break; case C_STARTUP: causeAsText = "C_STARTUP"; break; case C_IPC_MESSAGE: causeAsText = "C_IPC_MESSAGE"; break; case C_HA_MESSAGE: causeAsText = "C_HA_MESSAGE"; break; case C_CCM_CALLBACK: causeAsText = "C_CCM_CALLBACK"; break; case C_TIMER_POPPED: causeAsText = "C_TIMER_POPPED"; break; case C_SHUTDOWN: causeAsText = "C_SHUTDOWN"; break; case C_HEARTBEAT_FAILED: causeAsText = "C_HEARTBEAT_FAILED"; break; case C_SUBSYSTEM_CONNECT: causeAsText = "C_SUBSYSTEM_CONNECT"; break; case C_LRM_OP_CALLBACK: causeAsText = "C_LRM_OP_CALLBACK"; break; case C_LRM_MONITOR_CALLBACK: causeAsText = "C_LRM_MONITOR_CALLBACK"; break; case C_CRMD_STATUS_CALLBACK: causeAsText = "C_CRMD_STATUS_CALLBACK"; break; case C_HA_DISCONNECT: causeAsText = "C_HA_DISCONNECT"; break; case C_FSA_INTERNAL: causeAsText = "C_FSA_INTERNAL"; break; case C_ILLEGAL: causeAsText = "C_ILLEGAL"; break; } if(causeAsText == NULL) { crm_err("Cause %d is unknown", cause); causeAsText = ""; } return causeAsText; } const char * fsa_action2string(long long action) { const char *actionAsText = NULL; switch(action){ case A_NOTHING: actionAsText = "A_NOTHING"; break; case A_ELECTION_START: actionAsText = "A_ELECTION_START"; break; case A_READCONFIG: actionAsText = "A_READCONFIG"; break; case O_RELEASE: actionAsText = "O_RELEASE"; break; case A_STARTUP: actionAsText = "A_STARTUP"; break; case A_STARTED: actionAsText = "A_STARTED"; break; case A_HA_CONNECT: actionAsText = "A_HA_CONNECT"; break; case A_HA_DISCONNECT: actionAsText = "A_HA_DISCONNECT"; break; case A_LRM_CONNECT: actionAsText = "A_LRM_CONNECT"; break; case A_LRM_EVENT: actionAsText = "A_LRM_EVENT"; break; case A_LRM_INVOKE: actionAsText = "A_LRM_INVOKE"; break; case A_LRM_DISCONNECT: actionAsText = "A_LRM_DISCONNECT"; break; case A_CL_JOIN_QUERY: actionAsText = "A_CL_JOIN_QUERY"; break; case A_DC_TIMER_STOP: actionAsText = "A_DC_TIMER_STOP"; break; case A_DC_TIMER_START: actionAsText = "A_DC_TIMER_START"; break; case A_INTEGRATE_TIMER_START: actionAsText = "A_INTEGRATE_TIMER_START"; break; case A_INTEGRATE_TIMER_STOP: actionAsText = "A_INTEGRATE_TIMER_STOP"; break; case A_FINALIZE_TIMER_START: actionAsText = "A_FINALIZE_TIMER_START"; break; case A_FINALIZE_TIMER_STOP: actionAsText = "A_FINALIZE_TIMER_STOP"; break; case A_ELECTION_COUNT: actionAsText = "A_ELECTION_COUNT"; break; case A_ELECTION_VOTE: actionAsText = "A_ELECTION_VOTE"; break; case A_CL_JOIN_ANNOUNCE: actionAsText = "A_CL_JOIN_ANNOUNCE"; break; case A_CL_JOIN_REQUEST: actionAsText = "A_CL_JOIN_REQUEST"; break; case A_CL_JOIN_RESULT: actionAsText = "A_CL_JOIN_RESULT"; break; case A_DC_JOIN_OFFER_ALL: actionAsText = "A_DC_JOIN_OFFER_ALL"; break; case A_DC_JOIN_OFFER_ONE: actionAsText = "A_DC_JOIN_OFFER_ONE"; break; case A_DC_JOIN_PROCESS_REQ: actionAsText = "A_DC_JOIN_PROCESS_REQ"; break; case A_DC_JOIN_PROCESS_ACK: actionAsText = "A_DC_JOIN_PROCESS_ACK"; break; case A_DC_JOIN_FINALIZE: actionAsText = "A_DC_JOIN_FINALIZE"; break; case A_MSG_PROCESS: actionAsText = "A_MSG_PROCESS"; break; case A_MSG_ROUTE: actionAsText = "A_MSG_ROUTE"; break; case A_RECOVER: actionAsText = "A_RECOVER"; break; case A_DC_RELEASE: actionAsText = "A_DC_RELEASE"; break; case A_DC_RELEASED: actionAsText = "A_DC_RELEASED"; break; case A_DC_TAKEOVER: actionAsText = "A_DC_TAKEOVER"; break; case A_SHUTDOWN: actionAsText = "A_SHUTDOWN"; break; case A_SHUTDOWN_REQ: actionAsText = "A_SHUTDOWN_REQ"; break; case A_STOP: actionAsText = "A_STOP "; break; case A_EXIT_0: actionAsText = "A_EXIT_0"; break; case A_EXIT_1: actionAsText = "A_EXIT_1"; break; case A_CCM_CONNECT: actionAsText = "A_CCM_CONNECT"; break; case A_CCM_DISCONNECT: actionAsText = "A_CCM_DISCONNECT"; break; case A_CCM_EVENT: actionAsText = "A_CCM_EVENT"; break; case A_CCM_UPDATE_CACHE: actionAsText = "A_CCM_UPDATE_CACHE"; break; case A_CIB_BUMPGEN: actionAsText = "A_CIB_BUMPGEN"; break; case A_CIB_INVOKE: actionAsText = "A_CIB_INVOKE"; break; case O_CIB_RESTART: actionAsText = "O_CIB_RESTART"; break; case A_CIB_START: actionAsText = "A_CIB_START"; break; case A_CIB_STOP: actionAsText = "A_CIB_STOP"; break; case A_TE_INVOKE: actionAsText = "A_TE_INVOKE"; break; case O_TE_RESTART: actionAsText = "O_TE_RESTART"; break; case A_TE_START: actionAsText = "A_TE_START"; break; case A_TE_STOP: actionAsText = "A_TE_STOP"; break; case A_TE_CANCEL: actionAsText = "A_TE_CANCEL"; break; case A_PE_INVOKE: actionAsText = "A_PE_INVOKE"; break; case O_PE_RESTART: actionAsText = "O_PE_RESTART"; break; case A_PE_START: actionAsText = "A_PE_START"; break; case A_PE_STOP: actionAsText = "A_PE_STOP"; break; case A_NODE_BLOCK: actionAsText = "A_NODE_BLOCK"; break; case A_UPDATE_NODESTATUS: actionAsText = "A_UPDATE_NODESTATUS"; break; case A_LOG: actionAsText = "A_LOG "; break; case A_ERROR: actionAsText = "A_ERROR "; break; case A_WARN: actionAsText = "A_WARN "; break; } if(actionAsText == NULL) { crm_err("Action %.16llx is unknown", action); actionAsText = ""; } return actionAsText; } void fsa_dump_inputs(int log_level, const char *text, long long input_register) { if(input_register == A_NOTHING) { return; } if(text == NULL) { text = "Input register contents:"; } if(is_set(input_register, R_THE_DC)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_THE_DC)", text, R_THE_DC); } if(is_set(input_register, R_STARTING)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_STARTING)", text, R_STARTING); } if(is_set(input_register, R_SHUTDOWN)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_SHUTDOWN)", text, R_SHUTDOWN); } if(is_set(input_register, R_STAYDOWN)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_STAYDOWN)", text, R_STAYDOWN); } if(is_set(input_register, R_JOIN_OK)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_JOIN_OK)", text, R_JOIN_OK); } if(is_set(input_register, R_HAVE_RES)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_HAVE_RES)", text, R_HAVE_RES); } if(is_set(input_register, R_INVOKE_PE)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_INVOKE_PE)", text, R_INVOKE_PE); } if(is_set(input_register, R_CIB_CONNECTED)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_CIB_CONNECTED)", text, R_CIB_CONNECTED); } if(is_set(input_register, R_PE_CONNECTED)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_PE_CONNECTED)", text, R_PE_CONNECTED); } if(is_set(input_register, R_TE_CONNECTED)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_TE_CONNECTED)", text, R_TE_CONNECTED); } if(is_set(input_register, R_LRM_CONNECTED)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_LRM_CONNECTED)", text, R_LRM_CONNECTED); } if(is_set(input_register, R_CIB_REQUIRED)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_CIB_REQUIRED)", text, R_CIB_REQUIRED); } if(is_set(input_register, R_PE_REQUIRED)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_PE_REQUIRED)", text, R_PE_REQUIRED); } if(is_set(input_register, R_TE_REQUIRED)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_TE_REQUIRED)", text, R_TE_REQUIRED); } if(is_set(input_register, R_REQ_PEND)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_REQ_PEND)", text, R_REQ_PEND); } if(is_set(input_register, R_PE_PEND)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_PE_PEND)", text, R_PE_PEND); } if(is_set(input_register, R_TE_PEND)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_TE_PEND)", text, R_TE_PEND); } if(is_set(input_register, R_RESP_PEND)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_RESP_PEND)", text, R_RESP_PEND); } if(is_set(input_register, R_CIB_DONE)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_CIB_DONE)", text, R_CIB_DONE); } if(is_set(input_register, R_HAVE_CIB)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_HAVE_CIB)", text, R_HAVE_CIB); } if(is_set(input_register, R_CIB_ASKED)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_CIB_ASKED)", text, R_CIB_ASKED); } if(is_set(input_register, R_CCM_DATA)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_CCM_DATA)", text, R_CCM_DATA); } if(is_set(input_register, R_PEER_DATA)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_PEER_DATA)", text, R_PEER_DATA); } if(is_set(input_register, R_IN_RECOVERY)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "%s %.16llx (R_IN_RECOVERY)", text, R_IN_RECOVERY); } } void fsa_dump_actions(long long action, const char *text) { int log_level = LOG_DEBUG_3; if(is_set(action, A_READCONFIG)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_READCONFIG) %s", A_READCONFIG, text); } if(is_set(action, A_STARTUP)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_STARTUP) %s", A_STARTUP, text); } if(is_set(action, A_STARTED)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_STARTED) %s", A_STARTED, text); } if(is_set(action, A_HA_CONNECT)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_CONNECT) %s", A_HA_CONNECT, text); } if(is_set(action, A_HA_DISCONNECT)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_DISCONNECT) %s", A_HA_DISCONNECT, text); } if(is_set(action, A_LRM_CONNECT)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_LRM_CONNECT) %s", A_LRM_CONNECT, text); } if(is_set(action, A_LRM_EVENT)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_LRM_EVENT) %s", A_LRM_EVENT, text); } if(is_set(action, A_LRM_INVOKE)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_LRM_INVOKE) %s", A_LRM_INVOKE, text); } if(is_set(action, A_LRM_DISCONNECT)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_LRM_DISCONNECT) %s", A_LRM_DISCONNECT, text); } if(is_set(action, A_DC_TIMER_STOP)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_DC_TIMER_STOP) %s", A_DC_TIMER_STOP, text); } if(is_set(action, A_DC_TIMER_START)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_DC_TIMER_START) %s", A_DC_TIMER_START, text); } if(is_set(action, A_INTEGRATE_TIMER_START)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_INTEGRATE_TIMER_START) %s", A_INTEGRATE_TIMER_START, text); } if(is_set(action, A_INTEGRATE_TIMER_STOP)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_INTEGRATE_TIMER_STOP) %s", A_INTEGRATE_TIMER_STOP, text); } if(is_set(action, A_FINALIZE_TIMER_START)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_FINALIZE_TIMER_START) %s", A_FINALIZE_TIMER_START, text); } if(is_set(action, A_FINALIZE_TIMER_STOP)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_FINALIZE_TIMER_STOP) %s", A_FINALIZE_TIMER_STOP, text); } if(is_set(action, A_ELECTION_COUNT)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_ELECTION_COUNT) %s", A_ELECTION_COUNT, text); } if(is_set(action, A_ELECTION_VOTE)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_ELECTION_VOTE) %s", A_ELECTION_VOTE, text); } if(is_set(action, A_CL_JOIN_ANNOUNCE)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_CL_JOIN_ANNOUNCE) %s", A_CL_JOIN_ANNOUNCE, text); } if(is_set(action, A_CL_JOIN_REQUEST)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_CL_JOIN_REQUEST) %s", A_CL_JOIN_REQUEST, text); } if(is_set(action, A_CL_JOIN_RESULT)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_CL_JOIN_RESULT) %s", A_CL_JOIN_RESULT, text); } if(is_set(action, A_DC_JOIN_OFFER_ALL)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_DC_JOIN_OFFER_ALL) %s", A_DC_JOIN_OFFER_ALL, text); } if(is_set(action, A_DC_JOIN_OFFER_ONE)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_DC_JOIN_OFFER_ONE) %s", A_DC_JOIN_OFFER_ONE, text); } if(is_set(action, A_DC_JOIN_PROCESS_REQ)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_DC_JOIN_PROCESS_REQ) %s", A_DC_JOIN_PROCESS_REQ, text); } if(is_set(action, A_DC_JOIN_PROCESS_ACK)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_DC_JOIN_PROCESS_ACK) %s", A_DC_JOIN_PROCESS_ACK, text); } if(is_set(action, A_DC_JOIN_FINALIZE)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_DC_JOIN_FINALIZE) %s", A_DC_JOIN_FINALIZE, text); } if(is_set(action, A_MSG_PROCESS)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_MSG_PROCESS) %s", A_MSG_PROCESS, text); } if(is_set(action, A_MSG_ROUTE)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_MSG_ROUTE) %s", A_MSG_ROUTE, text); } if(is_set(action, A_RECOVER)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_RECOVER) %s", A_RECOVER, text); } if(is_set(action, A_DC_RELEASE)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_DC_RELEASE) %s", A_DC_RELEASE, text); } if(is_set(action, A_DC_RELEASED)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_DC_RELEASED) %s", A_DC_RELEASED, text); } if(is_set(action, A_DC_TAKEOVER)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_DC_TAKEOVER) %s", A_DC_TAKEOVER, text); } if(is_set(action, A_SHUTDOWN)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_SHUTDOWN) %s", A_SHUTDOWN, text); } if(is_set(action, A_SHUTDOWN_REQ)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_SHUTDOWN_REQ) %s", A_SHUTDOWN_REQ, text); } if(is_set(action, A_STOP)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_STOP ) %s", A_STOP , text); } if(is_set(action, A_EXIT_0)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_EXIT_0) %s", A_EXIT_0, text); } if(is_set(action, A_EXIT_1)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_EXIT_1) %s", A_EXIT_1, text); } if(is_set(action, A_CCM_CONNECT)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_CCM_CONNECT) %s", A_CCM_CONNECT, text); } if(is_set(action, A_CCM_DISCONNECT)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_CCM_DISCONNECT) %s", A_CCM_DISCONNECT, text); } if(is_set(action, A_CCM_EVENT)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_CCM_EVENT) %s", A_CCM_EVENT, text); } if(is_set(action, A_CCM_UPDATE_CACHE)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_CCM_UPDATE_CACHE) %s", A_CCM_UPDATE_CACHE, text); } if(is_set(action, A_CIB_BUMPGEN)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_CIB_BUMPGEN) %s", A_CIB_BUMPGEN, text); } if(is_set(action, A_CIB_INVOKE)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_CIB_INVOKE) %s", A_CIB_INVOKE, text); } if(is_set(action, A_CIB_START)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_CIB_START) %s", A_CIB_START, text); } if(is_set(action, A_CIB_STOP)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_CIB_STOP) %s", A_CIB_STOP, text); } if(is_set(action, A_TE_INVOKE)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_TE_INVOKE) %s", A_TE_INVOKE, text); } if(is_set(action, A_TE_START)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_TE_START) %s", A_TE_START, text); } if(is_set(action, A_TE_STOP)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_TE_STOP) %s", A_TE_STOP, text); } if(is_set(action, A_TE_CANCEL)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_TE_CANCEL) %s", A_TE_CANCEL, text); } if(is_set(action, A_PE_INVOKE)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_PE_INVOKE) %s", A_PE_INVOKE, text); } if(is_set(action, A_PE_START)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_PE_START) %s", A_PE_START, text); } if(is_set(action, A_PE_STOP)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_PE_STOP) %s", A_PE_STOP, text); } if(is_set(action, A_NODE_BLOCK)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_NODE_BLOCK) %s", A_NODE_BLOCK, text); } if(is_set(action, A_UPDATE_NODESTATUS)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_UPDATE_NODESTATUS) %s", A_UPDATE_NODESTATUS, text); } if(is_set(action, A_LOG)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_LOG ) %s", A_LOG, text); } if(is_set(action, A_ERROR)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_ERROR ) %s", A_ERROR, text); } if(is_set(action, A_WARN)) { do_crm_log(log_level, __FILE__, __FUNCTION__, "Action %.16llx (A_WARN ) %s", A_WARN, text); } } void create_node_entry(const char *uuid, const char *uname, const char *type) { /* make sure a node entry exists for the new node * * this will add anyone except the first ever node in the cluster * since it will also be the DC which doesnt go through the * join process (with itself). We can include a special case * later if desired. */ crm_data_t *tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE); crm_debug_3("Creating node entry for %s", uname); set_uuid(fsa_cluster_conn, tmp1, XML_ATTR_UUID, uname); set_xml_property_copy(tmp1, XML_ATTR_UNAME, uname); set_xml_property_copy(tmp1, XML_ATTR_TYPE, type); update_local_cib(create_cib_fragment(tmp1, NULL)); free_xml(tmp1); } struct crmd_ccm_data_s * copy_ccm_data(const struct crmd_ccm_data_s *ccm_input) { const oc_ev_membership_t *oc_in = (const oc_ev_membership_t *)ccm_input->oc; struct crmd_ccm_data_s *ccm_input_copy = NULL; crm_malloc0(ccm_input_copy, sizeof(struct crmd_ccm_data_s)); ccm_input_copy->oc = copy_ccm_oc_data(oc_in); ccm_input_copy->event = ccm_input->event; return ccm_input_copy; } oc_ev_membership_t * copy_ccm_oc_data(const oc_ev_membership_t *oc_in) { unsigned lpc = 0; int size = 0; int offset = 0; unsigned num_nodes = 0; oc_ev_membership_t *oc_copy = NULL; if(oc_in->m_n_member > 0 && num_nodes < oc_in->m_n_member + oc_in->m_memb_idx) { num_nodes = oc_in->m_n_member + oc_in->m_memb_idx; crm_debug_3("Updated ccm nodes to %d - 1", num_nodes); } if(oc_in->m_n_in > 0 && num_nodes < oc_in->m_n_in + oc_in->m_in_idx) { num_nodes = oc_in->m_n_in + oc_in->m_in_idx; crm_debug_3("Updated ccm nodes to %d - 2", num_nodes); } if(oc_in->m_n_out > 0 && num_nodes < oc_in->m_n_out + oc_in->m_out_idx) { num_nodes = oc_in->m_n_out + oc_in->m_out_idx; crm_debug_3("Updated ccm nodes to %d - 3", num_nodes); } /* why 2*?? * ccm code does it like this so i guess its right... */ size = sizeof(oc_ev_membership_t) + sizeof(int) + 2*num_nodes*sizeof(oc_node_t); crm_debug_3("Copying %d ccm nodes", num_nodes); crm_malloc0(oc_copy, size); oc_copy->m_instance = oc_in->m_instance; oc_copy->m_n_member = oc_in->m_n_member; oc_copy->m_memb_idx = oc_in->m_memb_idx; oc_copy->m_n_out = oc_in->m_n_out; oc_copy->m_out_idx = oc_in->m_out_idx; oc_copy->m_n_in = oc_in->m_n_in; oc_copy->m_in_idx = oc_in->m_in_idx; crm_debug_3("instance=%d, nodes=%d (idx=%d), new=%d (idx=%d), lost=%d (idx=%d)", oc_in->m_instance, oc_in->m_n_member, oc_in->m_memb_idx, oc_in->m_n_in, oc_in->m_in_idx, oc_in->m_n_out, oc_in->m_out_idx); offset = oc_in->m_memb_idx; for(lpc = 0; lpc < oc_in->m_n_member; lpc++) { oc_node_t a_node = oc_in->m_array[lpc+offset]; oc_node_t *a_node_copy = &(oc_copy->m_array[lpc+offset]); crm_debug_3("Copying ccm member node %d", lpc); copy_ccm_node(a_node, a_node_copy); } offset = oc_in->m_in_idx; for(lpc = 0; lpc < oc_in->m_n_in; lpc++) { oc_node_t a_node = oc_in->m_array[lpc+offset]; oc_node_t *a_node_copy = &(oc_copy->m_array[lpc+offset]); crm_debug_3("Copying ccm new node %d", lpc); copy_ccm_node(a_node, a_node_copy); } offset = oc_in->m_out_idx; for(lpc = 0; lpc < oc_in->m_n_out; lpc++) { oc_node_t a_node = oc_in->m_array[lpc+offset]; oc_node_t *a_node_copy = &(oc_copy->m_array[lpc+offset]); crm_debug_3("Copying ccm lost node %d", lpc); copy_ccm_node(a_node, a_node_copy); } return oc_copy; } void copy_ccm_node(oc_node_t a_node, oc_node_t *a_node_copy) { crm_debug_3("Copying ccm node: id=%d, born=%d, uname=%s", a_node.node_id, a_node.node_born_on, a_node.node_uname); a_node_copy->node_id = a_node.node_id; a_node_copy->node_born_on = a_node.node_born_on; a_node_copy->node_uname = NULL; if(a_node.node_uname != NULL) { a_node_copy->node_uname = crm_strdup(a_node.node_uname); } else { crm_err("Node Id %d had a NULL uname!", a_node.node_id); } crm_debug_3("Copied ccm node: id=%d, born=%d, uname=%s", a_node_copy->node_id, a_node_copy->node_born_on, a_node_copy->node_uname); } lrm_op_t * copy_lrm_op(const lrm_op_t *op) { lrm_op_t *op_copy = NULL; crm_malloc0(op_copy, sizeof(lrm_op_t)); op_copy->op_type = crm_strdup(op->op_type); /* input fields */ /* GHashTable* params; */ op_copy->params = NULL; op_copy->timeout = op->timeout; op_copy->interval = op->interval; op_copy->target_rc = op->target_rc; /* in the CRM, this is always an int */ if(op->user_data != NULL) { op_copy->user_data = crm_strdup(op->user_data); } /* output fields */ op_copy->op_status = op->op_status; op_copy->rc = op->rc; op_copy->call_id = op->call_id; op_copy->output = NULL; op_copy->rsc_id = crm_strdup(op->rsc_id); op_copy->app_name = crm_strdup(op->app_name); if(op->output!= NULL) { op_copy->output = crm_strdup(op->output); } return op_copy; } lrm_rsc_t * copy_lrm_rsc(const lrm_rsc_t *rsc) { lrm_rsc_t *rsc_copy = NULL; if(rsc == NULL) { return NULL; } crm_malloc0(rsc_copy, sizeof(lrm_rsc_t)); rsc_copy->id = crm_strdup(rsc->id); rsc_copy->type = crm_strdup(rsc->type); rsc_copy->class = NULL; rsc_copy->provider = NULL; if(rsc->class != NULL) { rsc_copy->class = crm_strdup(rsc->class); } if(rsc->provider != NULL) { rsc_copy->provider = crm_strdup(rsc->provider); } /* GHashTable* params; */ rsc_copy->params = NULL; rsc_copy->ops = NULL; return rsc_copy; } crm_data_t* create_node_state( const char *uuid, const char *uname, const char *ha_state, const char *ccm_state, const char *crmd_state, const char *join_state, const char *exp_state, const char *src) { crm_data_t *node_state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_debug_2("%s Creating node state entry for %s", src, uname); set_uuid(fsa_cluster_conn, node_state, XML_ATTR_UUID, uname); set_xml_property_copy(node_state, XML_ATTR_UNAME, uname); set_xml_property_copy( node_state, XML_CIB_ATTR_HASTATE, ha_state); set_xml_property_copy( node_state, XML_CIB_ATTR_INCCM, ccm_state); set_xml_property_copy( node_state, XML_CIB_ATTR_CRMDSTATE, crmd_state); set_xml_property_copy( node_state, XML_CIB_ATTR_JOINSTATE, join_state); set_xml_property_copy( node_state, XML_CIB_ATTR_EXPSTATE, exp_state); set_xml_property_copy(node_state, "origin", src); crm_log_xml_debug_3(node_state, "created"); return node_state; } gboolean need_transition(enum crmd_fsa_state state) { if(state == S_POLICY_ENGINE || state == S_TRANSITION_ENGINE || state == S_IDLE) { return TRUE; } return FALSE; } extern GHashTable *ipc_clients; void process_client_disconnect(crmd_client_t *curr_client) { struct crm_subsystem_s *the_subsystem = NULL; CRM_DEV_ASSERT(curr_client != NULL); if(crm_assert_failed) { return; } crm_debug_2("received HUP from %s", curr_client->table_key); if (curr_client->sub_sys == NULL) { crm_debug("Client hadn't registered with us yet"); } else if (strcmp(CRM_SYSTEM_PENGINE, curr_client->sub_sys) == 0) { the_subsystem = pe_subsystem; } else if (strcmp(CRM_SYSTEM_TENGINE, curr_client->sub_sys) == 0) { the_subsystem = te_subsystem; } else if (strcmp(CRM_SYSTEM_CIB, curr_client->sub_sys) == 0){ the_subsystem = cib_subsystem; } if(the_subsystem != NULL) { the_subsystem->ipc = NULL; } /* else that was a transient client */ if (curr_client->table_key != NULL) { /* * Key is destroyed below: * curr_client->table_key * Value is cleaned up by: * G_main_del_IPC_Channel */ g_hash_table_remove(ipc_clients, curr_client->table_key); } crm_free(curr_client->table_key); crm_free(curr_client->sub_sys); crm_free(curr_client->uuid); crm_free(curr_client); }