diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c index fced72f650..2940a35554 100644 --- a/crmd/te_callbacks.c +++ b/crmd/te_callbacks.c @@ -1,430 +1,433 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include void te_update_confirm(const char *event, xmlNode *msg); xmlNode *need_abort(xmlNode *update); extern char *te_uuid; gboolean shuttingdown = FALSE; crm_graph_t *transition_graph; GTRIGSource *transition_trigger = NULL; crm_action_timer_t *transition_timer = NULL; void te_update_diff(const char *event, xmlNode *msg) { int rc = -1; const char *op = NULL; xmlNode *diff = NULL; xmlNode *status = NULL; xmlNode *cib_top = NULL; int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; CRM_CHECK(msg != NULL, return); crm_element_value_int(msg, F_CIB_RC, &rc); if(transition_graph == NULL) { crm_debug_3("No graph"); return; } else if(rc < cib_ok) { crm_debug_3("Filter rc=%d (%s)", rc, cib_error2string(rc)); return; } else if(transition_graph->complete == TRUE && fsa_state != S_IDLE && fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) { crm_debug_2("Filter state=%s, complete=%d", fsa_state2string(fsa_state), transition_graph->complete); return; } op = crm_element_value(msg, F_CIB_OPERATION); diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); cib_diff_version_details( diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_debug("Processing diff (%s): %d.%d.%d -> %d.%d.%d (%s)", op, diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates, fsa_state2string(fsa_state)); log_cib_diff(LOG_DEBUG_2, diff, op); /* Process anything that was added */ cib_top = get_xpath_object_relative("//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB, diff, LOG_ERR); status = first_named_child(cib_top, XML_CIB_TAG_STATUS); if(status != NULL) { /* newly completed resource operations */ extract_event(status); } /* configuration changes */ if(need_abort(cib_top)) { return; } /* Process anything that was removed */ cib_top = get_xpath_object_relative("//"XML_TAG_DIFF_REMOVED"//"XML_TAG_CIB, diff, LOG_DEBUG); /* configuration changes */ if(need_abort(cib_top)) { return; } /* node attributes that were removed */ status = first_named_child(cib_top, XML_CIB_TAG_STATUS); xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, xmlNode *attrs = find_xml_node(node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); if(attrs != NULL) { crm_info("Aborting on "XML_TAG_TRANSIENT_NODEATTRS" deletions"); abort_transition(INFINITY, tg_restart, XML_TAG_TRANSIENT_NODEATTRS, attrs); } ); } gboolean process_te_message(xmlNode *msg, xmlNode *xml_data) { xmlNode *xml_obj = NULL; const char *from = crm_element_value(msg, F_ORIG); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *ref = crm_element_value(msg, XML_ATTR_REFERENCE); const char *op = crm_element_value(msg, F_CRM_TASK); const char *type = crm_element_value(msg, F_CRM_MSG_TYPE); crm_debug_2("Processing %s (%s) message", op, ref); crm_log_xml(LOG_DEBUG_3, "ipc", msg); if(op == NULL){ /* error */ } else if(sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) { crm_debug_2("Bad sys-to %s", crm_str(sys_to)); return FALSE; } else if(safe_str_eq(op, CRM_OP_INVOKE_LRM) && safe_str_eq(sys_from, CRM_SYSTEM_LRMD) /* && safe_str_eq(type, XML_ATTR_RESPONSE) */ ){ xml_obj = xml_data; CRM_CHECK(xml_obj != NULL, crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); CRM_CHECK(xml_obj != NULL, crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); xml_obj = get_object_root(XML_CIB_TAG_STATUS, xml_obj); CRM_CHECK(xml_obj != NULL, crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); crm_log_xml(LOG_DEBUG_2, "Processing (N)ACK", msg); crm_info("Processing (N)ACK %s from %s", crm_element_value(msg, XML_ATTR_REFERENCE), from); extract_event(xml_obj); } else { crm_err("Unknown command: %s::%s from %s", type, op, sys_from); } crm_debug_3("finished processing message"); return TRUE; } void tengine_stonith_callback(stonith_ops_t * op) { const char *allow_fail = NULL; int target_rc = -1; int stonith_id = -1; int transition_id = -1; char *uuid = NULL; crm_action_t *stonith_action = NULL; if(op == NULL) { crm_err("Called with a NULL op!"); return; } crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", op->call_id, op->optype, op->node_name, op->op_result, (char *)op->node_list, op->private_data); /* restore the orignal transition timeout */ stonith_op_active--; if(stonith_op_active == 0) { crm_info("Restoring transition timeout: %d", active_timeout); transition_graph->transition_timeout = active_timeout; } /* this will mark the event complete if a match is found */ CRM_CHECK(op->private_data != NULL, return); /* filter out old STONITH actions */ CRM_CHECK(decode_transition_key( op->private_data, &uuid, &transition_id, &stonith_id, &target_rc), crm_err("Invalid event detected"); goto bail; ); if(transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid) || transition_graph->id != transition_id) { crm_info("Ignoring STONITH action initiated outside" " of the current transition"); } stonith_action = get_action(stonith_id, TRUE); if(stonith_action == NULL) { crm_err("Stonith action not matched"); goto bail; } switch(op->op_result) { case STONITH_SUCCEEDED: send_stonith_update(op); break; case STONITH_CANNOT: case STONITH_TIMEOUT: case STONITH_GENERIC: stonith_action->failed = TRUE; allow_fail = crm_meta_value(stonith_action->params, XML_ATTR_TE_ALLOWFAIL); if(FALSE == crm_is_true(allow_fail)) { crm_err("Stonith of %s failed (%d)..." " aborting transition.", op->node_name, op->op_result); abort_transition(INFINITY, tg_restart, "Stonith failed", NULL); } break; default: crm_err("Unsupported action result: %d", op->op_result); abort_transition(INFINITY, tg_restart, "Unsupport Stonith result", NULL); } update_graph(transition_graph, stonith_action); trigger_graph(); bail: crm_free(uuid); return; } void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); crm_log_xml_warn(msg, "Failed update"); } else { erase_status_tag(user_data, XML_CIB_TAG_LRM); } crm_free(user_data); } void cib_action_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if(rc < cib_ok) { crm_err("Update %d FAILED: %s", call_id, cib_error2string(rc)); } } void cib_failcount_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if(rc < cib_ok) { crm_err("Update %d FAILED: %s", call_id, cib_error2string(rc)); } } gboolean action_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_CHECK(timer->action != NULL, return FALSE); if(transition_graph->complete) { crm_warn("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_action_warn) { print_action( LOG_WARNING,"Action missed its timeout: ", timer->action); } else { /* fail the action */ cib_action_update(timer->action, LRM_OP_TIMEOUT, EXECRA_UNKNOWN_ERROR); } return FALSE; } static int unconfirmed_actions(gboolean send_updates) { int unconfirmed = 0; const char *key = NULL; const char *task = NULL; const char *node = NULL; crm_debug_2("Unconfirmed actions..."); slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, /* lookup event */ slist_iter( action, crm_action_t, synapse->actions, lpc2, if(action->executed == FALSE) { continue; } else if(action->confirmed) { continue; } unconfirmed++; task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); key = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); crm_info("Action %s %d unconfirmed from %s", key, action->id, node); if(action->type != action_type_rsc) { continue; } else if(send_updates == FALSE) { continue; } else if(safe_str_eq(task, "cancel")) { /* we dont need to update the CIB with these */ continue; } else if(safe_str_eq(task, "stop")) { /* *never* update the CIB with these */ continue; } cib_action_update(action, LRM_OP_PENDING, EXECRA_STATUS_UNKNOWN); ); ); if(unconfirmed > 0) { crm_warn("Waiting on %d unconfirmed actions", unconfirmed); } return unconfirmed; } gboolean global_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); if(transition_graph == NULL) { crm_err("No current graph"); return FALSE; } crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_CHECK(timer->action == NULL, return FALSE); - if(transition_graph->complete) { + if(fsa_state != S_TRANSITION_ENGINE) { + crm_err("Discarding transition timeout in state: %s", fsa_state2string(fsa_state)); + + } else if(transition_graph->complete) { crm_err("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_abort) { int unconfirmed = unconfirmed_actions(FALSE); crm_warn("Transition abort timeout reached..." " marking transition complete."); transition_graph->complete = TRUE; abort_transition(INFINITY, tg_restart, "Global Timeout", NULL); if(unconfirmed != 0) { crm_warn("Writing %d unconfirmed actions to the CIB", unconfirmed); unconfirmed_actions(TRUE); } } return FALSE; } diff --git a/crmd/tengine.c b/crmd/tengine.c index 2b5ab4aac5..29f1d7444c 100644 --- a/crmd/tengine.c +++ b/crmd/tengine.c @@ -1,280 +1,281 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include /* for access */ #include #include #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include #include #include extern crm_graph_functions_t te_graph_fns; struct crm_subsystem_s *te_subsystem = NULL; static void global_cib_callback(const xmlNode *msg, int callid ,int rc, xmlNode *output) { } static crm_graph_t *create_blank_graph(void) { crm_graph_t *a_graph = unpack_graph(NULL, NULL); a_graph->complete = TRUE; a_graph->abort_reason = "DC Takeover"; a_graph->completion_action = tg_restart; return a_graph; } /* A_TE_START, A_TE_STOP, A_TE_RESTART */ void do_te_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int dummy; gboolean init_ok = TRUE; cl_uuid_t new_uuid; char uuid_str[UU_UNPARSE_SIZEOF]; if(action & A_TE_STOP) { + stop_te_timer(transition_timer); if(transition_graph) { destroy_graph(transition_graph); transition_graph = NULL; } if(fsa_cib_conn && cib_ok != fsa_cib_conn->cmds->del_notify_callback( fsa_cib_conn, T_CIB_DIFF_NOTIFY, te_update_diff)) { crm_err("Could not set CIB notification callback"); init_ok = FALSE; } clear_bit_inplace(fsa_input_register, te_subsystem->flag_connected); crm_info("Transitioner is now inactive"); if(stonith_src) { GCHSource *source = stonith_src; crm_info("Disconnecting STONITH..."); stonith_src = NULL; /* so that we don't try to reconnect */ G_main_del_IPC_Channel(source); stonithd_signoff(); } } if((action & A_TE_START) == 0) { return; } else if(is_set(fsa_input_register, te_subsystem->flag_connected)) { crm_debug("The transitioner is already active"); return; } else if((action & A_TE_START) && cur_state == S_STOPPING) { crm_info("Ignoring request to start %s while shutting down", te_subsystem->name); return; } cl_uuid_generate(&new_uuid); cl_uuid_unparse(&new_uuid, uuid_str); te_uuid = crm_strdup(uuid_str); crm_info("Registering TE UUID: %s", te_uuid); if(transition_trigger == NULL) { transition_trigger = G_main_add_TriggerHandler( G_PRIORITY_LOW, te_graph_trigger, NULL, NULL); } if(stonith_reconnect == NULL) { stonith_reconnect = G_main_add_TriggerHandler( G_PRIORITY_LOW, te_connect_stonith, &dummy, NULL); } if(cib_ok != fsa_cib_conn->cmds->add_notify_callback( fsa_cib_conn, T_CIB_DIFF_NOTIFY, te_update_diff)) { crm_err("Could not set CIB notification callback"); init_ok = FALSE; } if(cib_EXISTS != fsa_cib_conn->cmds->add_notify_callback( fsa_cib_conn, T_CIB_DIFF_NOTIFY, te_update_diff)) { crm_err("Set duplicate CIB notification callback"); } if(cib_ok != fsa_cib_conn->cmds->set_op_callback(fsa_cib_conn, global_cib_callback)) { crm_err("Could not set CIB global callback"); init_ok = FALSE; } if(init_ok) { G_main_set_trigger(stonith_reconnect); set_graph_functions(&te_graph_fns); if(transition_graph) { destroy_graph(transition_graph); } /* create a blank one */ transition_graph = create_blank_graph(); crm_malloc0(transition_timer, sizeof(crm_action_timer_t)); transition_timer->source_id = 0; transition_timer->reason = timeout_abort; transition_timer->action = NULL; crm_debug("Transitioner is now active"); set_bit_inplace(fsa_input_register, te_subsystem->flag_connected); } } /* A_TE_INVOKE, A_TE_CANCEL */ void do_te_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { if(AM_I_DC == FALSE) { crm_err("Not DC: No need to invoke the TE (anymore): %s", fsa_action2string(action)); return; } else if(fsa_state != S_TRANSITION_ENGINE && (action & A_TE_INVOKE)) { crm_err("No need to invoke the TE (%s) in state %s", fsa_action2string(action), fsa_state2string(fsa_state)); return; } if(action & A_TE_CANCEL) { crm_debug("Cancelling the active Transition"); abort_transition(INFINITY, tg_restart, "Peer Cancelled", NULL); } else if(action & A_TE_HALT) { abort_transition(INFINITY, tg_stop, "Peer Halt", NULL); } else if(action & A_TE_INVOKE) { const char *value = NULL; xmlNode *graph_data = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *graph_file = crm_element_value(input->msg, F_CRM_TGRAPH); const char *graph_input = crm_element_value(input->msg, F_CRM_TGRAPH_INPUT); if(graph_file == NULL && input->xml == NULL) { crm_log_xml_err(input->msg, "Bad command"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return; } if(transition_graph->complete == FALSE) { crm_info("Another transition is already active"); abort_transition(INFINITY, tg_restart, "Transition Active", NULL); return; } stop_te_timer(transition_timer); graph_data = input->xml; if(graph_data == NULL && graph_file != NULL) { graph_data = filename2xml(graph_file); } CRM_CHECK(graph_data != NULL, crm_err("Input raised by %s is invalid", msg_data->origin); crm_log_xml_err(input->msg, "Bad command"); return); destroy_graph(transition_graph); transition_graph = unpack_graph(graph_data, graph_input); CRM_CHECK(transition_graph != NULL, transition_graph = create_blank_graph(); return); crm_info("Processing graph %d derived from %s", transition_graph->id, graph_input); if(transition_graph->transition_timeout > 0) { start_global_timer(transition_timer, transition_graph->transition_timeout); } value = crm_element_value(graph_data, "failed-stop-offset"); if(value) { failed_stop_offset = crm_strdup(value); } value = crm_element_value(graph_data, "failed-start-offset"); if(value) { failed_start_offset = crm_strdup(value); } trigger_graph(); print_graph(LOG_DEBUG_2, transition_graph); if(graph_data != input->xml) { free_xml(graph_data); } } } #if 0 gboolean shuttingdown; gboolean tengine_shutdown(int nsig, gpointer unused) { shuttingdown = TRUE; abort_transition(INFINITY, tg_shutdown, "Shutdown", NULL); return TRUE; } gboolean te_stop(void) { destroy_graph(transition_graph); crm_free(transition_timer); #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { stonithd_signoff(); } #endif crm_free(te_uuid); } #endif