diff --git a/crm/crmd/fsa.c b/crm/crmd/fsa.c index e3909458ad..c7c0d273d8 100644 --- a/crm/crmd/fsa.c +++ b/crm/crmd/fsa.c @@ -1,797 +1,797 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include longclock_t action_start = 0; longclock_t action_stop = 0; longclock_t action_diff = 0; unsigned int action_diff_ms = 0; char *fsa_our_dc = NULL; cib_t *fsa_cib_conn = NULL; char *fsa_our_dc_version = NULL; ll_lrm_t *fsa_lrm_conn; ll_cluster_t *fsa_cluster_conn; oc_node_list_t *fsa_membership_copy; const char *fsa_our_uuid = NULL; const char *fsa_our_uname = NULL; fsa_timer_t *wait_timer = NULL; fsa_timer_t *recheck_timer = NULL; fsa_timer_t *election_trigger = NULL; fsa_timer_t *election_timeout = NULL; fsa_timer_t *integration_timer = NULL; fsa_timer_t *finalization_timer = NULL; fsa_timer_t *shutdown_escalation_timer = NULL; volatile gboolean do_fsa_stall = FALSE; volatile long long fsa_input_register = 0; volatile long long fsa_actions = A_NOTHING; volatile enum crmd_fsa_state fsa_state = S_STARTING; extern uint highest_born_on; extern uint num_join_invites; extern GHashTable *welcomed_nodes; extern GHashTable *finalized_nodes; extern GHashTable *confirmed_nodes; extern GHashTable *integrated_nodes; extern void initialize_join(gboolean before); #define DOT_PREFIX "actions:trace: " #define do_dot_log(fmt...) do_crm_log(LOG_DEBUG_2, NULL, NULL, fmt) long long do_state_transition(long long actions, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t *msg_data); long long clear_flags(long long actions, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input); void dump_rsc_info(void); void dump_rsc_info_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); void ghash_print_node(gpointer key, gpointer value, gpointer user_data); void s_crmd_fsa_actions(fsa_data_t *fsa_data); void log_fsa_input(fsa_data_t *stored_msg); void init_dotfile(void); void init_dotfile(void) { do_dot_log(DOT_PREFIX"digraph \"g\" {"); do_dot_log(DOT_PREFIX" size = \"30,30\""); do_dot_log(DOT_PREFIX" graph ["); do_dot_log(DOT_PREFIX" fontsize = \"12\""); do_dot_log(DOT_PREFIX" fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX" fontcolor = \"black\""); do_dot_log(DOT_PREFIX" bb = \"0,0,398.922306,478.927856\""); do_dot_log(DOT_PREFIX" color = \"black\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX" node ["); do_dot_log(DOT_PREFIX" fontsize = \"12\""); do_dot_log(DOT_PREFIX" fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX" fontcolor = \"black\""); do_dot_log(DOT_PREFIX" shape = \"ellipse\""); do_dot_log(DOT_PREFIX" color = \"black\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX" edge ["); do_dot_log(DOT_PREFIX" fontsize = \"12\""); do_dot_log(DOT_PREFIX" fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX" fontcolor = \"black\""); do_dot_log(DOT_PREFIX" color = \"black\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX"// special nodes"); do_dot_log(DOT_PREFIX" \"S_PENDING\" "); do_dot_log(DOT_PREFIX" ["); do_dot_log(DOT_PREFIX" color = \"blue\""); do_dot_log(DOT_PREFIX" fontcolor = \"blue\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX" \"S_TERMINATE\" "); do_dot_log(DOT_PREFIX" ["); do_dot_log(DOT_PREFIX" color = \"red\""); do_dot_log(DOT_PREFIX" fontcolor = \"red\""); do_dot_log(DOT_PREFIX" ]"); do_dot_log(DOT_PREFIX"// DC only nodes"); do_dot_log(DOT_PREFIX" \"S_INTEGRATION\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX" \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX" \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX" \"S_RELEASE_DC\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX" \"S_IDLE\" [ fontcolor = \"green\" ]"); } static void do_fsa_action(fsa_data_t *fsa_data, long long an_action, enum crmd_fsa_input (*function)(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data)) { int action_log_level = LOG_DEBUG; gboolean do_time_check = TRUE; enum crmd_fsa_input result = I_NULL; if(is_set(fsa_actions, an_action) == FALSE) { crm_err("Action %s (%.16llx) was not requestsed", fsa_action2string(an_action), an_action); return; } if(an_action & A_MSG_ROUTE) { action_log_level = LOG_DEBUG_2; } else if(an_action & A_CIB_START) { do_time_check = FALSE; } fsa_actions = clear_bit(fsa_actions, an_action); crm_debug_3("Invoking action %s (%.16llx)", fsa_action2string(an_action), an_action); if(do_time_check && action_diff_max_ms > 0) { action_start = time_longclock(); } do_crm_log(action_log_level, NULL, NULL, DOT_PREFIX"\t// %s", fsa_action2string(an_action)); result = function(an_action, fsa_data->fsa_cause, fsa_state, fsa_data->fsa_input, fsa_data); CRM_DEV_ASSERT(result == I_NULL); crm_debug_3("Action complete: %s (%.16llx)", fsa_action2string(an_action), an_action); if(do_time_check && action_diff_max_ms > 0) { action_stop = time_longclock(); action_diff = sub_longclock(action_stop, action_start); action_diff_ms = longclockto_ms(action_diff); if(action_diff_ms > action_diff_max_ms) { crm_err("Action %s took %dms to complete", fsa_action2string(an_action), action_diff_ms); } else if(action_diff_ms > action_diff_warn_ms) { crm_warn("Action %s took %dms to complete", fsa_action2string(an_action), action_diff_ms); } } } enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause) { fsa_data_t *fsa_data = NULL; long long register_copy = fsa_input_register; long long new_actions = A_NOTHING; enum crmd_fsa_state last_state = fsa_state; crm_debug_2("FSA invoked with Cause: %s\tState: %s", fsa_cause2string(cause), fsa_state2string(fsa_state)); do_fsa_stall = FALSE; if(is_message() == FALSE && fsa_actions != A_NOTHING) { /* fake the first message so we can get into the loop */ crm_malloc0(fsa_data, sizeof(fsa_data_t)); fsa_data->fsa_input = I_NULL; fsa_data->fsa_cause = C_FSA_INTERNAL; fsa_data->origin = __FUNCTION__; fsa_data->data_type = fsa_dt_none; fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); fsa_data = NULL; } while(is_message() && do_fsa_stall == FALSE) { crm_debug_2("Checking messages (%d remaining)", g_list_length(fsa_message_queue)); fsa_data = get_message(); CRM_DEV_ASSERT(fsa_data != NULL); if(crm_assert_failed) { continue; } log_fsa_input(fsa_data); /* add any actions back to the queue */ fsa_actions |= fsa_data->actions; /* get the next batch of actions */ new_actions = crmd_fsa_actions[fsa_data->fsa_input][fsa_state]; fsa_actions |= new_actions; if(fsa_data->fsa_input != I_NULL && fsa_data->fsa_input != I_ROUTER) { crm_debug("Processing %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); } /* if(fsa_actions & A_SHUTDOWN) { crm_log_level = LOG_DEBUG_2; } */ #ifdef FSA_TRACE if(new_actions != A_NOTHING) { crm_debug_2("Adding FSA actions %.16llx for %s/%s", new_actions, fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state)); fsa_dump_actions(new_actions, "\tFSA scheduled"); } else if(fsa_data->fsa_input != I_NULL && new_actions == A_NOTHING) { crm_debug("No action specified for input,state (%s,%s)", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state)); } if(fsa_data->actions != A_NOTHING) { crm_debug_2("Adding input actions %.16llx for %s/%s", new_actions, fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state)); fsa_dump_actions(fsa_data->actions,"\tInput scheduled"); } #endif /* logging : *before* the state is changed */ if(is_set(fsa_actions, A_ERROR)) { do_fsa_action(fsa_data, A_ERROR, do_log); } if(is_set(fsa_actions, A_WARN)) { do_fsa_action(fsa_data, A_WARN, do_log); } if(is_set(fsa_actions, A_LOG)) { do_fsa_action(fsa_data, A_LOG, do_log); } /* update state variables */ last_state = fsa_state; fsa_state = crmd_fsa_state[fsa_data->fsa_input][fsa_state]; /* * Hook to allow actions to removed due to certain inputs */ fsa_actions = clear_flags( fsa_actions, cause, fsa_state, fsa_data->fsa_input); /* * Hook for change of state. * Allows actions to be added or removed when entering a state */ if(last_state != fsa_state){ fsa_actions = do_state_transition( fsa_actions, last_state, fsa_state, fsa_data); } else { do_dot_log(DOT_PREFIX"\t// FSA input: State=%s \tCause=%s" " \tInput=%s \tOrigin=%s() \tid=%d", fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id); } /* start doing things... */ s_crmd_fsa_actions(fsa_data); delete_fsa_input(fsa_data); fsa_data = NULL; } if(g_list_length(fsa_message_queue) > 0 || fsa_actions != A_NOTHING || do_fsa_stall) { crm_debug("Exiting the FSA: queue=%d, fsa_actions=0x%llx, stalled=%s", g_list_length(fsa_message_queue), fsa_actions, do_fsa_stall?"true":"false"); } else { crm_debug_2("Exiting the FSA"); } /* cleanup inputs? */ if(register_copy != fsa_input_register) { long long same = register_copy & fsa_input_register; fsa_dump_inputs(LOG_DEBUG, "Added input:", fsa_input_register ^ same); fsa_dump_inputs(LOG_DEBUG, "Removed input:", register_copy ^ same); } #if 0 if(fsa_state != S_STARTING && g_list_length(fsa_message_queue) && is_ipc_empty(te_subsystem->ipc) && is_ipc_empty(pe_subsystem->ipc) && is_ipc_empty(fsa_cib_conn->cmds->channel(fsa_cib_conn) && is_ipc_empty(fsa_cluster_conn->llc_ops->ipcchan(fsa_cluster_conn)) ){ static gboolean mem_needs_init = TRUE; if(mem_needs_init) { crm_debug("Reached a stable point:" " reseting memory usage stats to zero"); crm_zero_mem_stats(NULL); mem_needs_init = FALSE; } else { crm_debug("Reached a stable point:" " checking memory usage"); crm_mem_stats(NULL); } } #endif fsa_dump_queue(LOG_DEBUG); return fsa_state; } void s_crmd_fsa_actions(fsa_data_t *fsa_data) { /* * Process actions in order of priority but do only one * action at a time to avoid complicating the ordering. */ while(fsa_actions != A_NOTHING && do_fsa_stall == FALSE) { msg_queue_helper(); CRM_DEV_ASSERT(fsa_data != NULL); if(crm_assert_failed) { return; } /* regular action processing in order of action priority * * Make sure all actions that connect to required systems * are performed first */ if(is_set(fsa_actions, A_ERROR)) { do_fsa_action(fsa_data, A_ERROR, do_log); } else if(is_set(fsa_actions, A_WARN)) { do_fsa_action(fsa_data, A_WARN, do_log); } else if(is_set(fsa_actions, A_LOG)) { do_fsa_action(fsa_data, A_LOG, do_log); /* get out of here NOW! before anything worse happens */ } else if(is_set(fsa_actions, A_EXIT_1)) { do_fsa_action(fsa_data, A_EXIT_1, do_exit); /* essential start tasks */ } else if(is_set(fsa_actions, A_STARTUP)) { do_fsa_action(fsa_data, A_STARTUP, do_startup); } else if(is_set(fsa_actions, A_CIB_START)) { do_fsa_action(fsa_data, A_CIB_START, do_cib_control); } else if(is_set(fsa_actions, A_HA_CONNECT)) { do_fsa_action(fsa_data, A_HA_CONNECT, do_ha_control); } else if(is_set(fsa_actions, A_READCONFIG)) { do_fsa_action(fsa_data, A_READCONFIG, do_read_config); /* sub-system start/connect */ } else if(is_set(fsa_actions, A_LRM_CONNECT)) { do_fsa_action(fsa_data, A_LRM_CONNECT, do_lrm_control); } else if(is_set(fsa_actions, A_CCM_CONNECT)) { do_fsa_action(fsa_data, A_CCM_CONNECT, do_ccm_control); } else if(is_set(fsa_actions, A_TE_START)) { do_fsa_action(fsa_data, A_TE_START, do_te_control); } else if(is_set(fsa_actions, A_PE_START)) { do_fsa_action(fsa_data, A_PE_START, do_pe_control); /* sub-system restart */ } else if(is_set(fsa_actions, O_CIB_RESTART)) { do_fsa_action(fsa_data, O_CIB_RESTART, do_cib_control); } else if(is_set(fsa_actions, O_PE_RESTART)) { do_fsa_action(fsa_data, O_PE_RESTART, do_pe_control); } else if(is_set(fsa_actions, O_TE_RESTART)) { do_fsa_action(fsa_data, O_TE_RESTART, do_te_control); /* Timers */ /* else if(is_set(fsa_actions, O_DC_TIMER_RESTART)) { do_fsa_action(fsa_data, O_DC_TIMER_RESTART, do_timer_control) */; } else if(is_set(fsa_actions, A_DC_TIMER_STOP)) { do_fsa_action(fsa_data, A_DC_TIMER_STOP, do_timer_control); } else if(is_set(fsa_actions, A_INTEGRATE_TIMER_STOP)) { do_fsa_action(fsa_data, A_INTEGRATE_TIMER_STOP, do_timer_control); } else if(is_set(fsa_actions, A_INTEGRATE_TIMER_START)) { do_fsa_action(fsa_data, A_INTEGRATE_TIMER_START,do_timer_control); } else if(is_set(fsa_actions, A_FINALIZE_TIMER_STOP)) { do_fsa_action(fsa_data, A_FINALIZE_TIMER_STOP, do_timer_control); } else if(is_set(fsa_actions, A_FINALIZE_TIMER_START)) { do_fsa_action(fsa_data, A_FINALIZE_TIMER_START, do_timer_control); /* * Highest priority actions */ } else if(is_set(fsa_actions, A_CIB_BUMPGEN)) { do_fsa_action(fsa_data, A_CIB_BUMPGEN, do_cib_invoke); } else if(is_set(fsa_actions, A_MSG_ROUTE)) { do_fsa_action(fsa_data, A_MSG_ROUTE, do_msg_route); } else if(is_set(fsa_actions, A_RECOVER)) { do_fsa_action(fsa_data, A_RECOVER, do_recover); } else if(is_set(fsa_actions, A_CL_JOIN_RESULT)) { do_fsa_action(fsa_data, A_CL_JOIN_RESULT, do_cl_join_finalize_respond); } else if(is_set(fsa_actions, A_CL_JOIN_REQUEST)) { do_fsa_action(fsa_data, A_CL_JOIN_REQUEST, do_cl_join_offer_respond); } else if(is_set(fsa_actions, A_SHUTDOWN_REQ)) { do_fsa_action(fsa_data, A_SHUTDOWN_REQ, do_shutdown_req); } else if(is_set(fsa_actions, A_ELECTION_VOTE)) { do_fsa_action(fsa_data, A_ELECTION_VOTE, do_election_vote); } else if(is_set(fsa_actions, A_ELECTION_COUNT)) { do_fsa_action(fsa_data, A_ELECTION_COUNT, do_election_count_vote); /* * High priority actions * Update the cache first */ } else if(is_set(fsa_actions, A_CCM_UPDATE_CACHE)) { do_fsa_action(fsa_data, A_CCM_UPDATE_CACHE, do_ccm_update_cache); } else if(is_set(fsa_actions, A_CCM_EVENT)) { do_fsa_action(fsa_data, A_CCM_EVENT, do_ccm_event); } else if(is_set(fsa_actions, A_STARTED)) { do_fsa_action(fsa_data, A_STARTED, do_started); } else if(is_set(fsa_actions, A_CL_JOIN_QUERY)) { do_fsa_action(fsa_data, A_CL_JOIN_QUERY, do_cl_join_query); } else if(is_set(fsa_actions, A_DC_TIMER_START)) { do_fsa_action(fsa_data, A_DC_TIMER_START, do_timer_control); /* * Medium priority actions */ } else if(is_set(fsa_actions, A_DC_TAKEOVER)) { do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover); } else if(is_set(fsa_actions, A_DC_RELEASE)) { do_fsa_action(fsa_data, A_DC_RELEASE, do_dc_release); } else if(is_set(fsa_actions, A_ELECTION_CHECK)) { do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check); } else if(is_set(fsa_actions, A_ELECTION_START)) { do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote); } else if(is_set(fsa_actions, A_TE_HALT)) { do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke); } else if(is_set(fsa_actions, A_TE_CANCEL)) { do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke); } else if(is_set(fsa_actions, A_DC_JOIN_OFFER_ALL)) { do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all); } else if(is_set(fsa_actions, A_DC_JOIN_OFFER_ONE)) { do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_all); } else if(is_set(fsa_actions, A_DC_JOIN_PROCESS_REQ)) { do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ, do_dc_join_filter_offer); } else if(is_set(fsa_actions, A_DC_JOIN_PROCESS_ACK)) { do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack); /* * Low(er) priority actions * Make sure the CIB is always updated before invoking the * PE, and the PE before the TE */ } else if(is_set(fsa_actions, A_CIB_INVOKE_LOCAL)) { do_fsa_action(fsa_data, A_CIB_INVOKE_LOCAL, do_cib_invoke); } else if(is_set(fsa_actions, A_CIB_INVOKE)) { do_fsa_action(fsa_data, A_CIB_INVOKE, do_cib_invoke); } else if(is_set(fsa_actions, A_DC_JOIN_FINALIZE)) { do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize); } else if(is_set(fsa_actions, A_LRM_INVOKE)) { do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke); } else if(is_set(fsa_actions, A_LRM_EVENT)) { do_fsa_action(fsa_data, A_LRM_EVENT, do_lrm_event); } else if(is_set(fsa_actions, A_PE_INVOKE)) { do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke); } else if(is_set(fsa_actions, A_TE_INVOKE)) { do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke); } else if(is_set(fsa_actions, A_CL_JOIN_ANNOUNCE)) { do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce); /* sub-system stop */ } else if(is_set(fsa_actions, A_DC_RELEASED)) { do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release); } else if(is_set(fsa_actions, A_PE_STOP)) { do_fsa_action(fsa_data, A_PE_STOP, do_pe_control); } else if(is_set(fsa_actions, A_TE_STOP)) { do_fsa_action(fsa_data, A_TE_STOP, do_te_control); } else if(is_set(fsa_actions, A_SHUTDOWN)) { do_fsa_action(fsa_data, A_SHUTDOWN, do_shutdown); } else if(is_set(fsa_actions, A_STOP)) { do_fsa_action(fsa_data, A_STOP, do_stop); } else if(is_set(fsa_actions, A_CCM_DISCONNECT)) { do_fsa_action(fsa_data, A_CCM_DISCONNECT, do_ccm_control); } else if(is_set(fsa_actions, A_LRM_DISCONNECT)) { do_fsa_action(fsa_data, A_LRM_DISCONNECT, do_lrm_control); } else if(is_set(fsa_actions, A_HA_DISCONNECT)) { do_fsa_action(fsa_data, A_HA_DISCONNECT, do_ha_control); } else if(is_set(fsa_actions, A_CIB_STOP)) { do_fsa_action(fsa_data, A_CIB_STOP, do_cib_control); /* exit gracefully */ } else if(is_set(fsa_actions, A_EXIT_0)) { do_fsa_action(fsa_data, A_EXIT_0, do_exit); /* Error checking and reporting */ } else { crm_err("Action %s (0x%llx) not supported ", fsa_action2string(fsa_actions), fsa_actions); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL, __FUNCTION__); } } } void log_fsa_input(fsa_data_t *stored_msg) { crm_debug_2("Processing queued input %d", stored_msg->id); if(stored_msg->fsa_cause == C_CCM_CALLBACK) { crm_debug_3("FSA processing CCM callback from %s", stored_msg->origin); } else if(stored_msg->fsa_cause == C_LRM_OP_CALLBACK) { crm_debug_3("FSA processing LRM callback from %s", stored_msg->origin); } else if(stored_msg->data == NULL) { crm_debug_3("FSA processing input from %s", stored_msg->origin); } else { ha_msg_input_t *ha_input = fsa_typed_data_adv( stored_msg, fsa_dt_ha_msg, __FUNCTION__); crm_debug_3("FSA processing XML message from %s", stored_msg->origin); crm_log_message(LOG_MSG, ha_input->msg); crm_log_xml_debug_3(ha_input->xml, "FSA message data"); } } long long do_state_transition(long long actions, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t *msg_data) { long long tmp = actions; gboolean clear_recovery_bit = TRUE; enum crmd_fsa_cause cause = msg_data->fsa_cause; enum crmd_fsa_input current_input = msg_data->fsa_input; const char *state_from = fsa_state2string(cur_state); const char *state_to = fsa_state2string(next_state); const char *input = fsa_input2string(current_input); CRM_DEV_ASSERT(cur_state != next_state); do_dot_log(DOT_PREFIX"\t%s -> %s [ label=%s cause=%s origin=%s ]", state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); - crm_info("State transition %s -> %s [ input=%s cause=%s origin=%s ]", - state_from, state_to, input, fsa_cause2string(cause), - msg_data->origin); + crm_info("%s: State transition %s -> %s [ input=%s cause=%s origin=%s ]", + fsa_our_uname, state_from, state_to, input, + fsa_cause2string(cause), msg_data->origin); /* the last two clauses might cause trouble later */ if(election_timeout != NULL && next_state != S_ELECTION && cur_state != S_RELEASE_DC) { crm_timer_stop(election_timeout); /* } else { */ /* crm_timer_start(election_timeout); */ } #if 0 if(is_set(fsa_input_register, R_SHUTDOWN)){ set_bit_inplace(tmp, A_DC_TIMER_STOP); } #endif if(next_state == S_INTEGRATION) { set_bit_inplace(tmp, A_INTEGRATE_TIMER_START); } else { set_bit_inplace(tmp, A_INTEGRATE_TIMER_STOP); } if(next_state == S_FINALIZE_JOIN) { set_bit_inplace(tmp, A_FINALIZE_TIMER_START); } else { set_bit_inplace(tmp, A_FINALIZE_TIMER_STOP); } if(next_state != S_PENDING) { set_bit_inplace(tmp, A_DC_TIMER_STOP); } if(next_state != S_ELECTION) { highest_born_on = 0; } if(next_state != S_IDLE) { crm_timer_stop(recheck_timer); } switch(next_state) { case S_PENDING: fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local); /* fall through */ case S_ELECTION: crm_debug_2("Resetting our DC to NULL on transition to %s", fsa_state2string(next_state)); update_dc(NULL, FALSE); break; case S_NOT_DC: if(is_set(fsa_input_register, R_SHUTDOWN)){ crm_info("(Re)Issuing shutdown request now" " that we have a new DC"); set_bit_inplace(tmp, A_SHUTDOWN_REQ); } CRM_DEV_ASSERT(fsa_our_dc != NULL); if(fsa_our_dc == NULL) { crm_err("Reached S_NOT_DC without a DC" " being recorded"); } break; case S_RECOVERY: clear_recovery_bit = FALSE; break; case S_FINALIZE_JOIN: CRM_DEV_ASSERT(AM_I_DC); if(cause == C_TIMER_POPPED) { crm_warn("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } if(g_hash_table_size(welcomed_nodes) > 0) { char *msg = crm_strdup( " Welcome reply not received from"); crm_warn("%u cluster nodes failed to respond" " to the join offer.", g_hash_table_size(welcomed_nodes)); g_hash_table_foreach( welcomed_nodes, ghash_print_node, msg); crm_free(msg); } else { crm_info("All %d cluster nodes " "responded to the join offer.", g_hash_table_size(integrated_nodes)); } break; case S_POLICY_ENGINE: CRM_DEV_ASSERT(AM_I_DC); if(cause == C_TIMER_POPPED) { crm_warn("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } if(g_hash_table_size(finalized_nodes) > 0) { char *msg = crm_strdup( " Confirm not received from"); crm_err("%u cluster nodes failed to confirm" " their join.", g_hash_table_size(finalized_nodes)); g_hash_table_foreach( finalized_nodes, ghash_print_node, msg); crm_free(msg); } else if(g_hash_table_size(confirmed_nodes) == fsa_membership_copy->members_size) { crm_info("All %u cluster nodes are" " eligable to run resources.", fsa_membership_copy->members_size); } else if(g_hash_table_size(confirmed_nodes) > fsa_membership_copy->members_size) { crm_err("We have more confirmed nodes than our membership does"); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } else { crm_warn("Only %u of %u cluster " "nodes are eligable to run resources", g_hash_table_size(confirmed_nodes), fsa_membership_copy->members_size); } /* initialize_join(FALSE); */ break; case S_STOPPING: case S_TERMINATE: /* possibly redundant */ set_bit_inplace(fsa_input_register, R_SHUTDOWN); break; case S_IDLE: CRM_DEV_ASSERT(AM_I_DC); dump_rsc_info(); if(is_set(fsa_input_register, R_SHUTDOWN)){ crm_info("(Re)Issuing shutdown request now" " that we are the DC"); set_bit_inplace(tmp, A_SHUTDOWN_REQ); } if(recheck_timer->period_ms > 0) { crm_timer_start(recheck_timer); } break; default: break; } if(clear_recovery_bit && next_state != S_PENDING) { tmp = clear_bit(tmp, A_RECOVER); } else if(clear_recovery_bit == FALSE) { tmp = set_bit(tmp, A_RECOVER); } if(tmp != actions) { fsa_dump_actions(actions ^ tmp, "New actions"); actions = tmp; } return actions; } long long clear_flags(long long actions, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input) { long long saved_actions = actions; long long startup_actions = A_STARTUP|A_CIB_START|A_LRM_CONNECT|A_CCM_CONNECT|A_HA_CONNECT|A_READCONFIG|A_STARTED|A_CL_JOIN_QUERY; if(cur_state == S_STOPPING || is_set(fsa_input_register, R_SHUTDOWN)) { clear_bit_inplace(actions, startup_actions); } fsa_dump_actions(actions ^ saved_actions, "Cleared Actions"); return actions; } void dump_rsc_info(void) { } void ghash_print_node(gpointer key, gpointer value, gpointer user_data) { const char *text = user_data; const char *uname = key; const char *value_s = value; crm_info("%s: %s %s", text, uname, value_s); } diff --git a/cts/CM_LinuxHAv2.py.in b/cts/CM_LinuxHAv2.py.in index 9539989386..39ad73c61b 100755 --- a/cts/CM_LinuxHAv2.py.in +++ b/cts/CM_LinuxHAv2.py.in @@ -1,612 +1,617 @@ #!@PYTHON@ '''CTS: Cluster Testing System: LinuxHA v2 dependent modules... ''' __copyright__=''' Author: Huang Zhen Copyright (C) 2004 International Business Machines Additional Audits, Revised Start action, Default Configuration: Copyright (C) 2004 Andrew Beekhof ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import os,sys,CTS,CTSaudits,CTStests, warnings from CTS import * from CM_hb import HeartbeatCM from xml.dom.minidom import * from CTSaudits import ClusterAudit from CTStests import * from CIB import * ####################################################################### # # LinuxHA v2 dependent modules # ####################################################################### class LinuxHAv2(HeartbeatCM): ''' The linux-ha version 2 cluster manager class. It implements the things we need to talk to and manipulate linux-ha version 2 clusters ''' def __init__(self, Environment, randseed=None): HeartbeatCM.__init__(self, Environment, randseed=randseed) self.cib_installed = 0 self.config = None self.cluster_monitor = 0 self.use_short_names = 1 self.update({ "Name" : "linux-ha-v2", "DeadTime" : 300, "StartTime" : 300, # Max time to start up "StableTime" : 30, "StartCmd" : "@INITDIR@/heartbeat@INIT_EXT@ start > /dev/null 2>&1", "StopCmd" : "@INITDIR@/heartbeat@INIT_EXT@ stop > /dev/null 2>&1", "ElectionCmd" : "@sbindir@/crmadmin -E %s", "StatusCmd" : "@sbindir@/crmadmin -S %s 2>/dev/null", "EpocheCmd" : "@sbindir@/ccm_tool -e", "QuorumCmd" : "@sbindir@/ccm_tool -q", "CibQuery" : "@sbindir@/cibadmin -Ql", "ParitionCmd" : "@sbindir@/ccm_tool -p", "IsRscRunning" : "@libdir@/heartbeat/lrmadmin -E %s monitor 0 0 EVERYTIME 2>/dev/null|grep return", "ExecuteRscOp" : "@libdir@/heartbeat/lrmadmin -E %s %s 0 0 EVERYTIME 2>/dev/null", "CIBfile" : "%s:@HA_VARLIBDIR@/heartbeat/crm/cib.xml", "TmpDir" : "/tmp", "BreakCommCmd2" : "@libdir@/heartbeat/TestHeartbeatComm break-communication %s>/dev/null 2>&1", "IsIPAddrRscRunning" : "", "StandbyCmd" : "@sbindir@/crm_standby -U %s -v %s 2>/dev/null", "UUIDQueryCmd" : "@sbindir@/crmadmin -N", "StandbyQueryCmd" : "@sbindir@/crm_standby -GQ -U %s 2>/dev/null", # Patterns to look for in the log files for various occasions... - "Pat:DC_IDLE" : "crmd:.*State transition.*-> S_IDLE", + "Pat:DC_IDLE" : "crmd.*State transition.*-> S_IDLE", # This wont work if we have multiple partitions # Use: "Pat:They_started" : "%s crmd:.*State transition.*-> S_NOT_DC", "Pat:They_started" : "Updating node state to member for %s", - "Pat:We_started" : "%s crmd:.*State transition.*-> S_IDLE", - "Pat:We_stopped" : "%s heartbeat.*Heartbeat shutdown complete", + "Pat:We_started" : "crmd.*%s: State transition.*-> S_IDLE", + "Pat:We_stopped" : "heartbeat.*%s.*Heartbeat shutdown complete", "Pat:Logd_stopped" : "%s logd:.*Exiting write process", "Pat:They_stopped" : "%s crmd:.*LOST:.* %s ", - "Pat:All_stopped" : "%s .*heartbeat.*Heartbeat shutdown complete", + "Pat:All_stopped" : "heartbeat.*%s.*Heartbeat shutdown complete", "Pat:They_dead" : "node %s.*: is dead", "Pat:TransitionComplete" : "Transition status: Complete: complete", + # Bad news Regexes. Should never occur. "BadRegexes" : ( r"ERROR:", r"CRIT:", r"Shutting down\.", r"Forcing shutdown\.", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r", exiting\.", r"WARN.*Ignoring HA message.*vote.*not in our membership list", - r"pengine:.*Attempting recovery of resource", - r"pengine:.*Handling failed ", - r"tengine:.*is taking more than 2x its timeout", + r"pengine.*Attempting recovery of resource", + r"pengine.*Handling failed ", + r"tengine.*is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", ), }) del self["Standby"] + if self.Env["DoBSC"]: + del self["Pat:They_stopped"] + del self["Pat:Logd_stopped"] + self.check_transitions = 0 self.check_elections = 0 self.CIBsync = {} self.default_cts_cib=CIB(self).cib() self.debug(self.default_cts_cib) def errorstoignore(self): # At some point implement a more elegant solution that # also produces a report at the end '''Return list of errors which are known and very noisey should be ignored''' if 1: return [ "crmadmin:" ] return [] def install_config(self, node): if not self.CIBsync.has_key(node) and self.Env["ClobberCIB"] == 1: self.CIBsync[node] = 1 self.rsh.remote_py(node, "os", "system", "rm -f @HA_VARLIBDIR@/heartbeat/crm/cib.xml") self.rsh.remote_py(node, "os", "system", "rm -f @HA_VARLIBDIR@/heartbeat/crm/cib.xml.sig") # Only install the CIB on the first node, all the other ones will pick it up from there if self.cib_installed == 1: return None self.cib_installed = 1 if self.Env["CIBfilename"] == None: self.debug("Installing Generated CIB on node %s" %(node)) warnings.filterwarnings("ignore") cib_file=os.tmpnam() warnings.resetwarnings() os.system("rm -f "+cib_file) self.debug("Creating new CIB for " + node + " in: " + cib_file) os.system("echo \'" + self.default_cts_cib + "\' > " + cib_file) - if 0!=self.rsh.cp(cib_file,"root@" + (self["CIBfile"]%node)): - raise ValueError("Can not scp file to %s "%node) + if 0!=self.rsh.echo_cp(None, cib_file, node, "@HA_VARLIBDIR@/heartbeat/crm/cib.xml"): + raise ValueError("Can not create CIB on %s "%node) os.system("rm -f "+cib_file) else: self.debug("Installing CIB (%s) on node %s" %(self.Env["CIBfilename"], node)) - if 0!=self.rsh.cp(self.Env["CIBfilename"], - "root@" + (self["CIBfile"]%node)): + if 0!=self.rsh.cp(self.Env["CIBfilename"], "root@" + (self["CIBfile"]%node)): raise ValueError("Can not scp file to %s "%node) self.rsh.remote_py(node, "os", "system", "chown @HA_CCMUSER@ @HA_VARLIBDIR@/heartbeat/crm/cib.xml") def prepare(self): '''Finish the Initialization process. Prepare to test...''' for node in self.Env["nodes"]: self.ShouldBeStatus[node] = "" self.StataCM(node) def test_node_CM(self, node): '''Report the status of the cluster manager on a given node''' if not self.ns.WaitForNodeToComeUp(node): self.log("WaitForNodeToComeUp failed, node %s set to down" % node) self.ShouldBeStatus[node] == self["down"] return 3 watchpats = [ ] watchpats.append("Current ping state: (S_IDLE|S_NOT_DC)") watchpats.append(self["Pat:They_started"]%node) idle_watch = CTS.LogWatcher(self["LogFileName"], watchpats) idle_watch.setwatch() out=self.rsh.readaline(node, self["StatusCmd"]%node) ret= (string.find(out, 'ok') != -1) self.debug("Node %s status: %s" %(node, out)) if not ret: if self.ShouldBeStatus[node] == self["up"]: self.log( "Node status for %s is %s but we think it should be %s" %(node, self["down"], self.ShouldBeStatus[node])) self.ShouldBeStatus[node]=self["down"] return 0 if self.ShouldBeStatus[node] == self["down"]: self.log( "Node status for %s is %s but we think it should be %s: %s" %(node, self["up"], self.ShouldBeStatus[node], out)) self.ShouldBeStatus[node]=self["up"] # check the output first - because syslog-ng looses messages if string.find(out, 'S_NOT_DC') != -1: # Up and stable return 2 if string.find(out, 'S_IDLE') != -1: # Up and stable return 2 # fall back to syslog-ng and wait if not idle_watch.look(): # just up self.debug("Warn: Node %s is unstable: %s" %(node, out)) return 1 # Up and stable return 2 # Is the node up or is the node down def StataCM(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) > 0: return 1 return None # Being up and being stable is not the same question... def node_stable(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) == 2: return 1 self.log("Warn: Node %s not stable" %(node)) return None def cluster_stable(self, timeout=None): watchpats = [ ] watchpats.append("Current ping state: S_IDLE") watchpats.append(self["Pat:DC_IDLE"]) if timeout == None: timeout = self["DeadTime"] idle_watch = CTS.LogWatcher(self["LogFileName"], watchpats, timeout) idle_watch.setwatch() if not self.ns.WaitForAllNodesToComeUp(self.Env["nodes"]): self.log("WaitForAllNodesToComeUp failed.") return None any_up = 0 for node in self.Env["nodes"]: # have each node dump its current state if self.ShouldBeStatus[node] == self["up"]: self.rsh.readaline(node, (self["StatusCmd"] %node) ) any_up = 1 if any_up == 0 or idle_watch.look(): return 1 self.log("Warn: Cluster Master not IDLE") return None def is_node_dc(self, node, status_line=None): rc = 0 if not self.ns.WaitForNodeToComeUp(node): self.log("WaitForNodeToComeUp %s failed." % node) return rc if not status_line: status_line = self.rsh.readaline(node, self["StatusCmd"]%node) if not status_line: rc = 0 elif string.find(status_line, 'S_IDLE') != -1: rc = 1 elif string.find(status_line, 'S_INTEGRATION') != -1: rc = 1 elif string.find(status_line, 'S_FINALIZE_JOIN') != -1: rc = 1 elif string.find(status_line, 'S_POLICY_ENGINE') != -1: rc = 1 elif string.find(status_line, 'S_TRANSITION_ENGINE') != -1: rc = 1 if rc == 1: self.debug("%s _is_ the DC" % node) return rc def isolate_node(self, node, allowlist): '''isolate the communication between the nodes''' if not self.ns.WaitForNodeToComeUp(node): self.log("WaitForNodeToComeUp %s failed." % node) return None rc = self.rsh(node, self["BreakCommCmd2"]%allowlist) if rc == 0: return 1 else: self.log("Could not break the communication from node: %s",node) return None def Configuration(self): if self.config: return self.config.getElementsByTagName('configuration')[0] warnings.filterwarnings("ignore") cib_file=os.tmpnam() warnings.resetwarnings() os.system("rm -f "+cib_file) if self.Env["ClobberCIB"] == 1: if self.Env["CIBfilename"] == None: self.debug("Creating new CIB in: " + cib_file) os.system("echo \'"+ self.default_cts_cib +"\' > "+ cib_file) else: os.system("cp "+self.Env["CIBfilename"]+" "+cib_file) - else: - if 0!=self.rsh.cp("root@"+self["CIBfile"]%self.Env["nodes"][0],cib_file): - raise ValueError("Can not copy file to %s, maybe permission denied"%self["TmpDir"]) + else: + if 0 != self.rsh.echo_cp( + self.Env["nodes"][0], "@HA_VARLIBDIR@/heartbeat/crm/cib.xml", None, cib_file): + raise ValueError("Can not copy file to %s, maybe permission denied"%cib_file) self.config = parse(cib_file) os.remove(cib_file) return self.config.getElementsByTagName('configuration')[0] def Resources(self): ResourceList = [] #read resources in cib configuration = self.Configuration() resources = configuration.getElementsByTagName('resources')[0] rscs = configuration.getElementsByTagName('primitive') incs = configuration.getElementsByTagName('clone') groups = configuration.getElementsByTagName('group') for rsc in rscs: if rsc in resources.childNodes: ResourceList.append(HAResource(self,rsc)) for grp in groups: for rsc in rscs: if rsc in grp.childNodes: if self.use_short_names: resource = HAResource(self,rsc) else: resource = HAResource(self,rsc,grp.getAttribute('id')) ResourceList.append(resource) for inc in incs: max = 0 inc_name = inc.getAttribute("id") instance_attributes = inc.getElementsByTagName('instance_attributes')[0] attributes = instance_attributes.getElementsByTagName('attributes')[0] nvpairs = attributes.getElementsByTagName('nvpair') for nvpair in nvpairs: if nvpair.getAttribute("name") == "clone_max": max = int(nvpair.getAttribute("value")) inc_rsc = inc.getElementsByTagName('primitive')[0] for i in range(0,max): rsc = HAResource(self,inc_rsc) rsc.inc_no = i rsc.inc_name = inc_name rsc.inc_max = max if self.use_short_names: rsc.rid = rsc.rid + ":%d"%i else: rsc.rid = inc_name+":"+rsc.rid + ":%d"%i rsc.Instance = rsc.rid ResourceList.append(rsc) return ResourceList def ResourceGroups(self): GroupList = [] #read resources in cib configuration = self.Configuration() groups = configuration.getElementsByTagName('group') rscs = configuration.getElementsByTagName('primitive') for grp in groups: group = [] GroupList.append(group) for rsc in rscs: if rsc in grp.childNodes: if self.use_short_names: resource = HAResource(self,rsc) else: resource = HAResource(self,rsc,grp.getAttribute('id')) group.append(resource) return GroupList def Dependencies(self): DependencyList = [] #read dependency in cib configuration=self.Configuration() constraints=configuration.getElementsByTagName('constraints')[0] rsc_to_rscs=configuration.getElementsByTagName('rsc_to_rsc') for node in rsc_to_rscs: dependency = {} dependency["id"]=node.getAttribute('id') dependency["from"]=node.getAttribute('from') dependency["to"]=node.getAttribute('to') dependency["type"]=node.getAttribute('type') dependency["strength"]=node.getAttribute('strength') DependencyList.append(dependency) return DependencyList def find_partitions(self): ccm_partitions = [] if not self.ns.WaitForAllNodesToComeUp(self.Env["nodes"]): self.log("WaitForAllNodesToComeUp failed.") return None for node in self.Env["nodes"]: self.debug("Retrieving partition details for %s" %node) if self.ShouldBeStatus[node] == self["up"]: partition = self.rsh.readaline(node, self["ParitionCmd"]) if not partition: self.log("no partition details for %s" %node) elif len(partition) > 2: partition = partition[:-1] self.debug("partition details for %s: %s" %(node, partition)) found=0 for a_partition in ccm_partitions: if partition == a_partition: found = 1 if found == 0: self.debug("Adding partition") ccm_partitions.append(partition) else: self.log("bad partition details for %s" %node) return ccm_partitions def HasQuorum(self, node_list): # If we are auditing a partition, then one side will # have quorum and the other not. # So the caller needs to tell us which we are checking # If no value for node_list is specified... assume all nodes if not node_list: node_list = self.Env["nodes"] if not self.ns.WaitForAllNodesToComeUp(node_list): self.log("WaitForAllNodesToComeUp failed in HasQuorum.") return 0 for node in node_list: if self.ShouldBeStatus[node] == self["up"]: quorum = self.rsh.readaline(node, self["QuorumCmd"]) if string.find(quorum, "1") != -1: return 1 elif string.find(quorum, "0") != -1: return 0 else: self.log("WARN: Unexpected quorum test result from "+ node +":"+ quorum) return 0 def Components(self): complist = [Process("lrmd",self),Process("crmd",self)] if self.Env["DoFencing"] == 1 : complist.append(Process("stonithd",self)) complist.append(Process("heartbeat",self)) return complist def NodeUUID(self, node): if not self.ns.WaitForNodeToComeUp(node): self.log("WaitForNodeToComeUp %s failed in NodeUUID." % node) return "" lines = self.rsh.readlines(node, self["UUIDQueryCmd"]) for line in lines: self.debug("UUIDLine:"+ line) m = re.search(r'%s.+\((.+)\)' % node, line) if m: return m.group(1) return "" def StandbyStatus(self, node): if not self.ns.WaitForNodeToComeUp(node): self.log("WaitForNodeToComeUp %s failed in StandbyStatus." % node) return "" out=self.rsh.readaline(node, self["StandbyQueryCmd"]%node) if not out: return "off" out = out[:-1] self.debug("Standby result: "+out) return out # status == "on" : Enter Standby mode # status == "off": Enter Active mode def SetStandbyMode(self, node, status): if not self.ns.WaitForNodeToComeUp(node): self.log("WaitForNodeToComeUp %s failed in SetStandbyMode." % node) return True current_status = self.StandbyStatus(node) cmd = self["StandbyCmd"] % (node, status) ret = self.rsh(node, cmd) return True class HAResource(Resource): def __init__(self, cm, node, group=None): ''' Get information from xml node ''' if group == None : self.rid = str(node.getAttribute('id')) else : self.rid = group + ":" + str(node.getAttribute('id')) self.rclass = str(node.getAttribute('class')) self.rtype = str(node.getAttribute('type')) self.inc_name = None self.inc_no = -1 self.inc_max = -1 self.rparameters = {} nvpairs = [] list = node.getElementsByTagName('instance_attributes') if len(list) > 0: attributes = list[0] list = attributes.getElementsByTagName('attributes') if len(list) > 0: parameters = list[0] nvpairs = parameters.getElementsByTagName('nvpair') for nvpair in nvpairs: name=nvpair.getAttribute('name') value=nvpair.getAttribute('value') self.rparameters[name]=value # This should normally be called first... FIXME! Resource.__init__(self, cm, self.rtype, self.rid) # resources that dont need quorum will have: # ops = node.getElementsByTagName('op') for op in ops: if op.getAttribute('name') == "start" and op.getAttribute('prereq') == "nothing": self.needs_quorum = 0 if self.needs_quorum == 0: self.CM.debug("Resource %s does not need quorum" % self.rid) # else: # self.CM.debug("Resource %s DOES need quorum" % self.rid) def IsRunningOn(self, nodename): ''' This member function returns true if our resource is running on the given node in the cluster. We call the status operation for the resource script. ''' if not self.CM.ns.WaitForNodeToComeUp(nodename): self.CM.log("WaitForNodeToComeUp %s failed in IsRunningOn." % node) return 0 out=self.CM.rsh.readaline(nodename, self.CM["IsRscRunning"]%self.rid) return re.search("0",out) def RunningNodes(self): ResourceNodes = [] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["up"]: if self.IsRunningOn(node): ResourceNodes.append(node) return ResourceNodes def _ResourceOperation(self, operation, nodename): ''' Execute an operation on the resource ''' if not self.CM.ns.WaitForNodeToComeUp(nodename): self.CM.log("WaitForNodeToComeUp %s failed in _ResourceOperation." % node) return 0 self.CM.rsh.readaline(nodename, self.CM["ExecuteRscOp"]%(self.rid,operation)) return self.CM.rsh.lastrc == 0 def Start(self, nodename): ''' This member function starts or activates the resource. ''' return self._ResourceOperation("start", nodename) def Stop(self, nodename): ''' This member function stops or deactivates the resource. ''' return self._ResourceOperation("stop", nodename) def IsWorkingCorrectly(self, nodename): return self._ResourceOperation("monitor", nodename) ####################################################################### # # A little test code... # # Which you are advised to completely ignore... # ####################################################################### if __name__ == '__main__': pass