diff --git a/crm/tengine/actions.c b/crm/tengine/actions.c index e8875dcf3b..00ef198b3a 100644 --- a/crm/tengine/actions.c +++ b/crm/tengine/actions.c @@ -1,514 +1,473 @@ -/* $Id: actions.c,v 1.15 2006/02/20 17:05:27 andrew Exp $ */ +/* $Id: actions.c,v 1.16 2006/02/27 09:55:57 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include char *te_uuid = NULL; IPC_Channel *crm_ch = NULL; void send_rsc_command(crm_action_t *action); -extern void cib_action_updated(const HA_Message *msg, int call_id, int rc, - crm_data_t *output, void *user_data); +extern crm_action_timer_t *transition_timer; static void te_start_action_timer(crm_action_t *action) { crm_malloc0(action->timer, sizeof(crm_action_timer_t)); action->timer->timeout = action->timeout; action->timer->reason = timeout_action_warn; action->timer->action = action; action->timer->source_id = Gmain_timeout_add( action->timer->timeout, action_timer_callback, (void*)action->timer); CRM_ASSERT(action->timer->source_id != 0); } static gboolean te_pseudo_action(crm_graph_t *graph, crm_action_t *pseudo) { crm_debug("Event handler: action %d executed", pseudo->id); pseudo->confirmed = TRUE; update_graph(graph, pseudo); trigger_graph(); return TRUE; } void send_stonith_update(stonith_ops_t * op) { enum cib_errors rc = cib_ok; const char *target = op->node_name; const char *uuid = op->node_uuid; /* zero out the node-status & remove all LRM status info */ crm_data_t *node_state = create_xml_node(NULL, XML_CIB_TAG_STATE); CRM_DEV_ASSERT(op->node_name != NULL); CRM_DEV_ASSERT(op->node_uuid != NULL); crm_xml_add(node_state, XML_ATTR_UUID, uuid); crm_xml_add(node_state, XML_ATTR_UNAME, target); crm_xml_add(node_state, XML_CIB_ATTR_HASTATE, DEADSTATUS); crm_xml_add(node_state, XML_CIB_ATTR_INCCM, XML_BOOLEAN_NO); crm_xml_add(node_state, XML_CIB_ATTR_CRMDSTATE, OFFLINESTATUS); crm_xml_add(node_state, XML_CIB_ATTR_JOINSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_CIB_ATTR_EXPSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_CIB_ATTR_REPLACE, XML_CIB_TAG_LRM); crm_xml_add(node_state, "origin", __FUNCTION__); rc = te_cib_conn->cmds->update( te_cib_conn, XML_CIB_TAG_STATUS, node_state, NULL, cib_quorum_override|cib_scope_local); if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); abort_transition( INFINITY, tg_shutdown, "CIB update failed", node_state); } else { /* delay processing the trigger until the update completes */ add_cib_op_callback(rc, FALSE, NULL, cib_fencing_updated); } free_xml(node_state); return; } static gboolean te_fence_node(crm_graph_t *graph, crm_action_t *action) { char *key = NULL; const char *id = NULL; const char *uuid = NULL; const char *target = NULL; stonith_ops_t * st_op = NULL; id = ID(action->xml); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); CRM_DEV_ASSERT(id != NULL); CRM_DEV_ASSERT(uuid != NULL); CRM_DEV_ASSERT(target != NULL); if(crm_assert_failed) { /* error */ te_log_action(LOG_ERR, "Corrupted command (id=%s): no node", crm_str(id)); return FALSE; } te_log_action(LOG_INFO, "Executing fencing operation (%s) on %s (timeout=%d)", id, target, transition_graph->transition_timeout / 2); crm_malloc0(st_op, sizeof(stonith_ops_t)); st_op->optype = RESET; st_op->timeout = transition_graph->transition_timeout / 2; st_op->node_name = crm_strdup(target); st_op->node_uuid = crm_strdup(uuid); key = generate_transition_key(transition_graph->id, te_uuid); st_op->private_data = crm_concat(id, key, ';'); crm_free(key); if(stonithd_input_IPC_channel() == NULL) { crm_err("Cannot fence %s: stonith not available", target); return FALSE; } else if (ST_OK != stonithd_node_fence( st_op )) { crm_err("Cannot fence %s: stonithd_node_fence() call failed ", target); return FALSE; } return TRUE; } static gboolean te_crm_command(crm_graph_t *graph, crm_action_t *action) { char *value = NULL; char *counter = NULL; HA_Message *cmd = NULL; const char *id = NULL; const char *task = NULL; const char *on_node = NULL; gboolean ret = TRUE; id = ID(action->xml); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_DEV_ASSERT(on_node != NULL && strlen(on_node) != 0); if(crm_assert_failed) { /* error */ te_log_action(LOG_ERR, "Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task)); return FALSE; } te_log_action(LOG_INFO, "Executing crm-event (%s): %s on %s", crm_str(id), crm_str(task), on_node); cmd = create_request(task, NULL, on_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); counter = generate_transition_key(transition_graph->id, te_uuid); crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); ret = send_ipc_message(crm_ch, cmd); crm_free(counter); value = g_hash_table_lookup(action->params, XML_ATTR_TE_NOWAIT); if(ret == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if(crm_is_true(value)) { crm_info("Skipping wait for %d", action->id); action->confirmed = TRUE; update_graph(graph, action); trigger_graph(); } else if(ret && action->timeout > 0) { crm_debug("Setting timer for action %d",action->id); action->timer->reason = timeout_action_warn; te_start_action_timer(action); } return TRUE; } static gboolean te_rsc_command(crm_graph_t *graph, crm_action_t *action) { /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ const char *task = NULL; const char *on_node = NULL; action->executed = FALSE; on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_DEV_ASSERT(on_node != NULL && strlen(on_node) != 0); if(crm_assert_failed) { /* error */ te_log_action(LOG_ERR, "Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task)); return FALSE; } send_rsc_command(action); return TRUE; } gboolean cib_action_update(crm_action_t *action, int status) { char *code = NULL; crm_data_t *fragment = NULL; crm_data_t *state = NULL; crm_data_t *rsc = NULL; crm_data_t *xml_op = NULL; char *op_id = NULL; enum cib_errors rc = cib_ok; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *rsc_id = crm_element_value(action->xml, XML_LRM_ATTR_RSCID); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task_uuid = crm_element_value( action->xml, XML_LRM_ATTR_TASK_KEY); const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); int call_options = cib_quorum_override|cib_scope_local; if(status == LRM_OP_TIMEOUT) { if(crm_element_value(action->xml, XML_LRM_ATTR_RSCID) != NULL) { crm_warn("%s: %s %s on %s timed out", crm_element_name(action->xml), task_uuid, rsc_id, target); } else { crm_warn("%s: %s on %s timed out", crm_element_name(action->xml), task_uuid, target); } } code = crm_itoa(status); /* update the CIB */ fragment = NULL; state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(state, XML_ATTR_UUID, target_uuid); crm_xml_add(state, XML_ATTR_UNAME, target); rsc = create_xml_node(state, XML_CIB_TAG_LRM); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); xml_op = create_xml_node(rsc,XML_LRM_TAG_RSC_OP); crm_xml_add(rsc, XML_ATTR_ID, rsc_id); crm_xml_add(xml_op, XML_ATTR_ID, task); op_id = generate_op_key(rsc_id, task, action->interval); crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_free(op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(rsc, XML_LRM_ATTR_RSCSTATE, get_rsc_state(task, status)); crm_xml_add(xml_op, XML_LRM_ATTR_OPSTATUS, code); crm_xml_add(xml_op, XML_LRM_ATTR_CALLID, "-1"); crm_xml_add(xml_op, XML_LRM_ATTR_RC, code); crm_xml_add(xml_op, "origin", __FUNCTION__); crm_free(code); code = generate_transition_key(transition_graph->id, te_uuid); crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, code); crm_free(code); code = generate_transition_magic( crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY), status, status); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, code); crm_free(code); set_node_tstamp(xml_op); fragment = create_cib_fragment(state, XML_CIB_TAG_STATUS); crm_debug_3("Updating CIB with \"%s\" (%s): %s %s on %s", status<0?"new action":XML_ATTR_TIMEOUT, crm_element_name(action->xml), crm_str(task), rsc_id, target); rc = te_cib_conn->cmds->update( te_cib_conn, XML_CIB_TAG_STATUS, fragment, NULL, call_options); crm_debug("Updating CIB with %s action %d: %s %s on %s (call_id=%d)", op_status2text(status), action->id, task_uuid, rsc_id, target, rc); - if(status == LRM_OP_PENDING) { - crm_debug_2("Waiting for callback id: %d", rc); - add_cib_op_callback(rc, FALSE, action, cib_action_updated); - } + crm_debug_2("Waiting for callback id: %d", rc); + add_cib_op_callback(rc, FALSE, action, cib_action_updated); + free_xml(fragment); free_xml(state); action->sent_update = TRUE; if(rc < cib_ok) { return FALSE; } return TRUE; } void send_rsc_command(crm_action_t *action) { HA_Message *cmd = NULL; crm_data_t *rsc_op = NULL; char *counter = crm_itoa(transition_graph->id); const char *task = NULL; const char *value = NULL; const char *rsc_id = NULL; const char *on_node = NULL; const char *task_uuid = NULL; CRM_ASSERT(action != NULL); CRM_ASSERT(action->xml != NULL); rsc_op = action->xml; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); rsc_id = crm_element_value(rsc_op, XML_LRM_ATTR_RSCID); on_node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET); counter = generate_transition_key(transition_graph->id, te_uuid); crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); crm_info("Initiating action %d: %s on %s", action->id, task_uuid, on_node); crm_free(counter); if(rsc_op != NULL) { crm_log_xml_debug_2(rsc_op, "Performing"); } cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, on_node, CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); - +#if 1 send_ipc_message(crm_ch, cmd); +#else + /* test the TE timer/recovery code */ + if((action->id % 11) == 0) { + crm_err("Faking lost action %d: %s", action->id, task_uuid); + } else { + send_ipc_message(crm_ch, cmd); + } +#endif action->executed = TRUE; value = g_hash_table_lookup(action->params, XML_ATTR_TE_NOWAIT); if(crm_is_true(value)) { crm_debug("Skipping wait for %d", action->id); action->confirmed = TRUE; update_graph(transition_graph, action); trigger_graph(); } else if(action->timeout > 0) { int action_timeout = 2 * action->timeout; crm_debug_3("Setting timer for action %s", task_uuid); if(transition_graph->transition_timeout < action_timeout) { crm_debug("Action %d:" " Increasing transition %d timeout to %d", action->id, transition_graph->id, transition_graph->transition_timeout); transition_graph->transition_timeout = action_timeout; } te_start_action_timer(action); } } crm_graph_functions_t te_graph_fns = { te_pseudo_action, te_rsc_command, te_crm_command, te_fence_node }; -static int -unconfirmed_actions(gboolean send_updates) -{ - int unconfirmed = 0; - crm_debug_2("Unconfirmed actions..."); - slist_iter( - synapse, synapse_t, transition_graph->synapses, lpc, - - /* lookup event */ - slist_iter( - action, crm_action_t, synapse->actions, lpc2, - if(action->executed == FALSE) { - continue; - - } else if(action->confirmed) { - continue; - } - - unconfirmed++; - crm_debug("Action %d: unconfirmed",action->id); - if(send_updates) { - cib_action_update(action, LRM_OP_TIMEOUT); - } - ); - ); - if(unconfirmed > 0) { - crm_info("Waiting on %d unconfirmed actions", unconfirmed); - } - return unconfirmed; -} - void notify_crmd(crm_graph_t *graph) { - int log_level = LOG_DEBUG; - const char *abort_reason = "Complete"; - enum transition_action completion_action = tg_restart; - int id = -1; - - int unconfirmed = unconfirmed_actions(FALSE); - int pending_callbacks = num_cib_op_callbacks(); HA_Message *cmd = NULL; + int log_level = LOG_DEBUG; const char *op = CRM_OP_TEABORT; + int pending_callbacks = num_cib_op_callbacks(); + - if(unconfirmed != 0) { - crm_err("Write %d unconfirmed actions to the CIB", unconfirmed); - /* TODO: actually write them */ - unconfirmed_actions(TRUE); - -/* return; */ - } + stop_te_timer(transition_timer); if(pending_callbacks != 0) { - crm_err("Delaying completion until all CIB updates complete"); + crm_warn("Delaying completion until all CIB updates complete"); return; } CRM_DEV_ASSERT(graph->complete); - - completion_action = graph->completion_action; - id = graph->id; - if(graph->abort_reason != NULL) { - abort_reason = graph->abort_reason; - } - switch(completion_action) { + switch(graph->completion_action) { case tg_stop: op = CRM_OP_TECOMPLETE; log_level = LOG_INFO; break; case tg_abort: case tg_restart: op = CRM_OP_TEABORT; break; case tg_shutdown: crm_info("Exiting after transition"); exit(LSB_EXIT_OK); } te_log_action(log_level, "Transition %d status: %s - %s", - id, op, abort_reason); + graph->id, op, graph->abort_reason); print_graph(log_level, graph); cmd = create_request( op, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_TENGINE, NULL); - if(abort_reason != NULL) { - ha_msg_add(cmd, "message", abort_reason); + if(graph->abort_reason != NULL) { + ha_msg_add(cmd, "message", graph->abort_reason); } send_ipc_message(crm_ch, cmd); graph->abort_reason = NULL; graph->completion_action = tg_restart; } diff --git a/crm/tengine/callbacks.c b/crm/tengine/callbacks.c index 455fbad867..ddb608a06c 100644 --- a/crm/tengine/callbacks.c +++ b/crm/tengine/callbacks.c @@ -1,477 +1,540 @@ -/* $Id: callbacks.c,v 1.69 2006/02/20 16:21:51 andrew Exp $ */ +/* $Id: callbacks.c,v 1.70 2006/02/27 09:55:57 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include void te_update_confirm(const char *event, HA_Message *msg); void te_update_diff(const char *event, HA_Message *msg); crm_data_t *need_abort(crm_data_t *update); void cib_fencing_updated(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); extern char *te_uuid; gboolean shuttingdown = FALSE; crm_graph_t *transition_graph; GTRIGSource *transition_trigger = NULL; +crm_action_timer_t *transition_timer = NULL; + +static gboolean +start_global_timer(crm_action_timer_t *timer, int timeout) +{ + CRM_ASSERT(timer != NULL); + CRM_DEV_ASSERT(timer > 0); + CRM_DEV_ASSERT(timer->source_id == 0); + + if(timeout <= 0) { + crm_err("Tried to start timer with period: %d", timeout); + + } else if(timer->source_id == 0) { + crm_debug("Starting abort timer: %d", timeout); + timer->timeout = timeout; + timer->source_id = Gmain_timeout_add( + timeout, global_timer_callback, (void*)timer); + CRM_ASSERT(timer->source_id != 0); + return TRUE; + + } else { + crm_err("Timer is already active with period: %d", timer->timeout); + } + + return FALSE; +} void te_update_diff(const char *event, HA_Message *msg) { int rc = -1; const char *op = NULL; crm_data_t *diff = NULL; crm_data_t *aborted = NULL; const char *set_name = NULL; int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; if(msg == NULL) { crm_err("NULL update"); return; } ha_msg_value_int(msg, F_CIB_RC, &rc); op = cl_get_string(msg, F_CIB_OPERATION); if(rc < cib_ok) { crm_debug_2("Ignoring failed %s operation: %s", op, cib_error2string(rc)); return; } diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); cib_diff_version_details( diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_info("Processing diff (%s): %d.%d.%d -> %d.%d.%d", op, diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates); log_cib_diff(LOG_DEBUG_2, diff, op); set_name = "diff-added"; if(diff != NULL && aborted == NULL) { crm_data_t *section = NULL; crm_data_t *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); if(change_set != NULL) { crm_debug_2("Checking status changes"); section=get_object_root(XML_CIB_TAG_STATUS,change_set); } if(section != NULL) { extract_event(section); } crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); } set_name = "diff-removed"; if(diff != NULL && aborted == NULL) { crm_data_t *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); } if(aborted != NULL) { abort_transition( INFINITY, tg_restart, "Non-status change", NULL); } free_xml(diff); return; } gboolean process_te_message(HA_Message *msg, crm_data_t *xml_data, IPC_Channel *sender) { crm_data_t *xml_obj = NULL; const char *from = cl_get_string(msg, F_ORIG); const char *sys_to = cl_get_string(msg, F_CRM_SYS_TO); const char *sys_from = cl_get_string(msg, F_CRM_SYS_FROM); const char *ref = cl_get_string(msg, XML_ATTR_REFERENCE); const char *op = cl_get_string(msg, F_CRM_TASK); const char *type = cl_get_string(msg, F_CRM_MSG_TYPE); crm_debug_2("Processing %s (%s) message", op, ref); crm_log_message(LOG_DEBUG_3, msg); if(op == NULL){ /* error */ } else if(strcmp(op, CRM_OP_HELLO) == 0) { /* ignore */ } else if(sys_to == NULL || strcmp(sys_to, CRM_SYSTEM_TENGINE) != 0) { crm_debug_2("Bad sys-to %s", crm_str(sys_to)); return FALSE; } else if(safe_str_eq(op, CRM_OP_INVOKE_LRM) && safe_str_eq(sys_from, CRM_SYSTEM_LRMD) /* && safe_str_eq(type, XML_ATTR_RESPONSE) */ ){ #if CRM_DEPRECATED_SINCE_2_0_4 if(safe_str_eq(crm_element_name(xml_data), XML_TAG_CIB)) { xml_obj = xml_data; } else { xml_obj = find_xml_node(xml_data, XML_TAG_CIB, TRUE); } #else xml_obj = xml_data; CRM_DEV_ASSERT(safe_str_eq(crm_element_name(xml_obj), XML_TAG_CIB)); #endif CRM_DEV_ASSERT(xml_obj != NULL); if(xml_obj != NULL) { xml_obj = get_object_root(XML_CIB_TAG_STATUS, xml_obj); CRM_DEV_ASSERT(xml_obj != NULL); } if(xml_obj != NULL) { crm_log_message_adv(LOG_DEBUG_2, "Processing NACK Reply", msg); crm_debug("Processing NACK from %s", from); extract_event(xml_obj); } else { crm_log_message_adv(LOG_ERR, "Invalid NACK Reply", msg); } } else if(safe_str_eq(type, XML_ATTR_RESPONSE)) { crm_err("Message was a response not a request. Discarding"); return TRUE; } else if(strcmp(op, CRM_OP_TRANSITION) == 0) { if(transition_graph->complete == FALSE) { crm_info("Another transition is already active"); abort_transition( INFINITY,tg_restart,"Transition Active",NULL); } else { destroy_graph(transition_graph); transition_graph = unpack_graph(xml_data); + start_global_timer(transition_timer, + transition_graph->transition_timeout); trigger_graph(); print_graph(LOG_DEBUG, transition_graph); } } else if(strcmp(op, CRM_OP_TE_HALT) == 0) { abort_transition(INFINITY, tg_stop, "Peer Halt", NULL); } else if(strcmp(op, CRM_OP_TEABORT) == 0) { abort_transition(INFINITY, tg_restart, "Peer Cancelled", NULL); } else { crm_err("Unknown command: %s::%s from %s", type, op, sys_from); } crm_debug_3("finished processing message"); return TRUE; } void tengine_stonith_callback(stonith_ops_t * op) { const char *allow_fail = NULL; int stonith_id = -1; crm_action_t *stonith_action = NULL; char *op_key = NULL; char *call_id = NULL; if(op == NULL) { crm_err("Called with a NULL op!"); return; } crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", op->call_id, op->optype, op->node_name, op->op_result, (char *)op->node_list, op->private_data); /* this will mark the event complete if a match is found */ CRM_DEV_ASSERT(op->private_data != NULL); /* filter out old STONITH actions */ decodeNVpair(op->private_data, ';', &call_id, &op_key); if(op_key != NULL) { char *key = generate_transition_key( transition_graph->id, te_uuid); gboolean key_matched = safe_str_eq(key, op_key); crm_free(key); if(key_matched == FALSE) { crm_info("Ignoring old STONITH op: %s", op->private_data); return; } } #if 1 stonith_id = crm_parse_int(call_id, "-1"); if(stonith_id < 0) { crm_err("Stonith action not matched: %s (%s)", call_id, op->private_data); return; } #endif stonith_action = match_down_event( stonith_id, op->node_uuid, CRM_OP_FENCE); if(stonith_action == NULL) { crm_err("Stonith action not matched"); return; } switch(op->op_result) { case STONITH_SUCCEEDED: send_stonith_update(op); break; case STONITH_CANNOT: case STONITH_TIMEOUT: case STONITH_GENERIC: stonith_action->failed = TRUE; allow_fail = g_hash_table_lookup( stonith_action->params, XML_ATTR_TE_ALLOWFAIL); if(FALSE == crm_is_true(allow_fail)) { crm_err("Stonith of %s failed (%d)..." " aborting transition.", op->node_name, op->op_result); abort_transition(INFINITY, tg_restart, "Stonith failed", NULL); } break; default: crm_err("Unsupported action result: %d", op->op_result); abort_transition(INFINITY, tg_restart, "Unsupport Stonith result", NULL); } update_graph(transition_graph, stonith_action); trigger_graph(); return; } void tengine_stonith_connection_destroy(gpointer user_data) { #if 0 crm_err("Fencing daemon has left us: Shutting down...NOW"); /* shutdown properly later */ CRM_DEV_ASSERT(FALSE/* fencing daemon died */); #else crm_err("Fencing daemon has left us"); #endif return; } gboolean tengine_stonith_dispatch(IPC_Channel *sender, void *user_data) { int lpc = 0; while(stonithd_op_result_ready()) { if (sender->ch_status == IPC_DISCONNECT) { /* The message which was pending for us is that * the IPC status is now IPC_DISCONNECT */ break; } if(ST_FAIL == stonithd_receive_ops_result(FALSE)) { crm_err("stonithd_receive_ops_result() failed"); } else { lpc++; } } crm_debug_2("Processed %d messages", lpc); if (sender->ch_status == IPC_DISCONNECT) { return FALSE; } return TRUE; } void cib_fencing_updated(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { + trigger_graph(); + if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); crm_log_xml_warn(msg, "[Failed Update]"); } } void cib_action_updated(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { crm_action_t *action = user_data; const char *task_uuid = crm_element_value( action->xml, XML_LRM_ATTR_TASK_KEY); + trigger_graph(); + CRM_DEV_ASSERT(rc == cib_ok); if(rc < cib_ok) { crm_err("Update for action %d (%s) FAILED: %s", action->id, task_uuid, cib_error2string(rc)); - return; } } gboolean action_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_DEV_ASSERT(timer->action != NULL); if(transition_graph->complete) { crm_err("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_action_warn) { print_graph_action( LOG_WARNING,"Action missed its timeout", timer->action); } else { /* fail the action */ cib_action_update(timer->action, LRM_OP_TIMEOUT); } return FALSE; } + +static int +unconfirmed_actions(gboolean send_updates) +{ + int unconfirmed = 0; + crm_debug_2("Unconfirmed actions..."); + slist_iter( + synapse, synapse_t, transition_graph->synapses, lpc, + + /* lookup event */ + slist_iter( + action, crm_action_t, synapse->actions, lpc2, + if(action->executed == FALSE) { + continue; + + } else if(action->confirmed) { + continue; + } + + unconfirmed++; + crm_debug("Action %d: unconfirmed",action->id); + + if(action->type == action_type_rsc && send_updates) { + cib_action_update(action, LRM_OP_PENDING); + } + ); + ); + if(unconfirmed > 0) { + crm_info("Waiting on %d unconfirmed actions", unconfirmed); + } + return unconfirmed; +} + gboolean global_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_DEV_ASSERT(timer->action == NULL); if(transition_graph->complete) { crm_err("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_abort) { - crm_err("Transition abort timeout reached..." + int unconfirmed = unconfirmed_actions(FALSE); + crm_warn("Transition abort timeout reached..." " marking transition complete."); - print_graph(LOG_WARNING, transition_graph); transition_graph->complete = TRUE; - abort_transition(INFINITY, -1, "Global Timeout", NULL); + abort_transition(INFINITY, tg_restart, "Global Timeout", NULL); + + if(unconfirmed != 0) { + crm_warn("Writing %d unconfirmed actions to the CIB", + unconfirmed); + unconfirmed_actions(TRUE); + } } return FALSE; } -crm_action_timer_t *transition_timer = NULL; - gboolean te_graph_trigger(gpointer user_data) { + int timeout = 0; int pending_updates = 0; enum transition_status graph_rc = -1; if(transition_graph->complete) { notify_crmd(transition_graph); return TRUE; } - if(transition_timer == NULL) { - crm_malloc0(transition_timer, sizeof(crm_action_timer_t)); - transition_timer->source_id = 0; - transition_timer->reason = timeout_abort; - transition_timer->action = NULL; - } - graph_rc = run_graph(transition_graph); + timeout = transition_graph->transition_timeout; print_graph(LOG_DEBUG_2, transition_graph); - transition_timer->timeout = transition_graph->transition_timeout; if(graph_rc == transition_active) { crm_debug_3("Transition not yet complete"); stop_te_timer(transition_timer); - start_te_timer(transition_timer); + start_global_timer(transition_timer, timeout); return TRUE; } else if(graph_rc == transition_pending) { + timeout = transition_timer->timeout; crm_debug_3("Transition not yet complete - no actions fired"); return TRUE; } pending_updates = num_cib_op_callbacks(); CRM_DEV_ASSERT(pending_updates == 0); if(graph_rc != transition_complete) { crm_err("Transition failed: %s", transition_status(graph_rc)); print_graph(LOG_WARNING, transition_graph); } notify_crmd(transition_graph); return TRUE; } diff --git a/crm/tengine/main.c b/crm/tengine/main.c index 7ef0ae927a..68cf69a207 100644 --- a/crm/tengine/main.c +++ b/crm/tengine/main.c @@ -1,244 +1,250 @@ -/* $Id: main.c,v 1.31 2006/02/14 11:40:25 andrew Exp $ */ +/* $Id: main.c,v 1.32 2006/02/27 09:55:57 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SYS_NAME CRM_SYSTEM_TENGINE #define OPTARGS "hVc" GMainLoop* mainloop = NULL; const char* crm_system_name = SYS_NAME; cib_t *te_cib_conn = NULL; extern GTRIGSource *transition_trigger; +extern crm_action_timer_t *transition_timer; void usage(const char* cmd, int exit_status); int init_start(void); gboolean tengine_shutdown(int nsig, gpointer unused); extern void te_update_confirm(const char *event, HA_Message *msg); extern void te_update_diff(const char *event, HA_Message *msg); extern crm_graph_functions_t te_graph_fns; int main(int argc, char ** argv) { gboolean allow_cores = TRUE; int argerr = 0; int flag; crm_log_init(crm_system_name); G_main_add_SignalHandler( G_PRIORITY_HIGH, SIGTERM, tengine_shutdown, NULL, NULL); transition_trigger = G_main_add_TriggerHandler( G_PRIORITY_LOW, te_graph_trigger, NULL, NULL); crm_debug_3("Begining option processing"); while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch(flag) { case 'V': alter_debug(DEBUG_INC); break; case 'h': /* Help message */ usage(crm_system_name, LSB_EXIT_OK); break; case 'c': allow_cores = TRUE; break; default: ++argerr; break; } } crm_debug_3("Option processing complete"); if (optind > argc) { ++argerr; } if (argerr) { usage(crm_system_name,LSB_EXIT_GENERIC); } /* read local config file */ crm_debug_3("Starting..."); return init_start(); } int init_start(void) { int init_ok = TRUE; init_client_ipc_comms( CRM_SYSTEM_CRMD, subsystem_msg_dispatch, (void*)process_te_message, &crm_ch); if(crm_ch != NULL) { send_hello_message(crm_ch, "1234", CRM_SYSTEM_TENGINE, "0", "1"); } else { init_ok = FALSE; crm_err("Could not connect to the CRMd"); } if(init_ok) { crm_debug_4("Creating CIB connection"); te_cib_conn = cib_new(); if(te_cib_conn == NULL) { init_ok = FALSE; } } if(init_ok) { crm_debug_4("Connecting to the CIB"); if(cib_ok != te_cib_conn->cmds->signon( te_cib_conn, crm_system_name, cib_command)) { crm_err("Could not connect to the CIB"); init_ok = FALSE; } } if(init_ok) { crm_debug_4("Setting CIB notification callback"); if(cib_ok != te_cib_conn->cmds->add_notify_callback( te_cib_conn, T_CIB_DIFF_NOTIFY, te_update_diff)) { crm_err("Could not set CIB notification callback"); init_ok = FALSE; } } if(init_ok && ST_OK != stonithd_signon(crm_system_name)) { crm_err("Could not sign up to stonithd"); /* init_ok = FALSE; */ } if(init_ok && ST_OK != stonithd_set_stonith_ops_callback( tengine_stonith_callback)) { crm_err("Could not set stonith callback"); stonithd_signoff(); /* init_ok = FALSE; */ } if(init_ok) { IPC_Channel *fence_ch = stonithd_input_IPC_channel(); if(fence_ch == NULL) { } else if(NULL == G_main_add_IPC_Channel( G_PRIORITY_LOW, fence_ch, FALSE, tengine_stonith_dispatch, NULL, tengine_stonith_connection_destroy)) { crm_err("Failed to add Fencing channel to our mainloop"); init_ok = FALSE; } } if(init_ok) { cl_uuid_t new_uuid; char uuid_str[UU_UNPARSE_SIZEOF]; cl_uuid_generate(&new_uuid); cl_uuid_unparse(&new_uuid, uuid_str); te_uuid = crm_strdup(uuid_str); crm_info("Registering TE UUID: %s", te_uuid); set_graph_functions(&te_graph_fns); /* create a blank one */ transition_graph = unpack_graph(NULL); transition_graph->complete = TRUE; transition_graph->completion_action = tg_restart; + + crm_malloc0(transition_timer, sizeof(crm_action_timer_t)); + transition_timer->source_id = 0; + transition_timer->reason = timeout_abort; + transition_timer->action = NULL; } if(init_ok) { /* Create the mainloop and run it... */ crm_info("Starting %s", crm_system_name); mainloop = g_main_new(FALSE); g_main_run(mainloop); return_to_orig_privs(); crm_info("Exiting %s", crm_system_name); } else { crm_warn("Initialization errors, %s not starting.", crm_system_name); } if(init_ok) { return 0; } return 1; } void usage(const char* cmd, int exit_status) { FILE* stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-srkh]" "[-c configure file]\n", cmd); /* fprintf(stream, "\t-d\tsets debug level\n"); */ /* fprintf(stream, "\t-s\tgets daemon status\n"); */ /* fprintf(stream, "\t-r\trestarts daemon\n"); */ /* fprintf(stream, "\t-k\tstops daemon\n"); */ /* fprintf(stream, "\t-h\thelp message\n"); */ fflush(stream); exit(exit_status); } gboolean shuttingdown; gboolean tengine_shutdown(int nsig, gpointer unused) { shuttingdown = TRUE; abort_transition(INFINITY, tg_shutdown, "Shutdown", NULL); return TRUE; } diff --git a/crm/tengine/tengine.h b/crm/tengine/tengine.h index df3efd9969..43c7003966 100644 --- a/crm/tengine/tengine.h +++ b/crm/tengine/tengine.h @@ -1,66 +1,65 @@ -/* $Id: tengine.h,v 1.35 2006/02/20 16:21:51 andrew Exp $ */ +/* $Id: tengine.h,v 1.36 2006/02/27 09:55:57 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef TENGINE__H #define TENGINE__H #include #include #include extern IPC_Channel *crm_ch; extern GMainLoop* mainloop; /* tengine */ extern crm_action_t *match_down_event( int rc, const char *target, const char *filter); extern void send_stonith_update(stonith_ops_t * op); extern gboolean cib_action_update(crm_action_t *action, int status); /* utils */ extern gboolean stop_te_timer(crm_action_timer_t *timer); -extern gboolean start_te_timer(crm_action_timer_t *timer); extern const char *get_rsc_state(const char *task, op_status_t status); /* unpack */ extern gboolean extract_event(crm_data_t *msg); extern gboolean process_te_message( HA_Message * msg, crm_data_t *xml_data, IPC_Channel *sender); extern crm_graph_t *transition_graph; extern GTRIGSource *transition_trigger; extern char *te_uuid; extern cib_t *te_cib_conn; extern void notify_crmd(crm_graph_t *graph); #include extern void trigger_graph_processing(const char *fn, int line); extern void abort_transition_graph( int abort_priority, enum transition_action abort_action, const char *abort_text, crm_data_t *reason, const char *fn, int line); #define trigger_graph() trigger_graph_processing(__FUNCTION__, __LINE__) #define abort_transition(pri, action, text, reason) \ abort_transition_graph(pri, action, text, reason,__FUNCTION__,__LINE__); #endif diff --git a/crm/tengine/utils.c b/crm/tengine/utils.c index c0049c5543..1f027c856c 100644 --- a/crm/tengine/utils.c +++ b/crm/tengine/utils.c @@ -1,157 +1,134 @@ -/* $Id: utils.c,v 1.56 2006/02/20 16:21:51 andrew Exp $ */ +/* $Id: utils.c,v 1.57 2006/02/27 09:55:57 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include extern cib_t *te_cib_conn; const char * get_rsc_state(const char *task, op_status_t status) { if(safe_str_eq(CRMD_ACTION_START, task)) { if(status == LRM_OP_PENDING) { return CRMD_ACTION_START_PENDING; } else if(status == LRM_OP_DONE) { return CRMD_ACTION_STARTED; } else { return CRMD_ACTION_START_FAIL; } } else if(safe_str_eq(CRMD_ACTION_STOP, task)) { if(status == LRM_OP_PENDING) { return CRMD_ACTION_STOP_PENDING; } else if(status == LRM_OP_DONE) { return CRMD_ACTION_STOPPED; } else { return CRMD_ACTION_STOP_FAIL; } } else { if(safe_str_eq(CRMD_ACTION_MON, task)) { if(status == LRM_OP_PENDING) { return CRMD_ACTION_MON_PENDING; } else if(status == LRM_OP_DONE) { return CRMD_ACTION_MON_OK; } else { return CRMD_ACTION_MON_FAIL; } } else { const char *rsc_state = NULL; if(status == LRM_OP_PENDING) { rsc_state = CRMD_ACTION_GENERIC_PENDING; } else if(status == LRM_OP_DONE) { rsc_state = CRMD_ACTION_GENERIC_OK; } else { rsc_state = CRMD_ACTION_GENERIC_FAIL; } crm_warn("Using status \"%s\" for op \"%s\"..." " this is still in the experimental stage.", rsc_state, task); return rsc_state; } } } -gboolean -start_te_timer(crm_action_timer_t *timer) -{ - if(timer == NULL) { - return FALSE; - - } else if(timer->source_id != 0) { - timer->source_id = Gmain_timeout_add( - timer->timeout, global_timer_callback, (void*)timer); - CRM_ASSERT(timer->source_id != 0); - return TRUE; - - } else if(timer->timeout < 0) { - crm_err("Tried to start timer with -ve period"); - - } else { - crm_debug_3("#!!#!!# Timer already running (%d)", - timer->source_id); - } - return FALSE; -} - - gboolean stop_te_timer(crm_action_timer_t *timer) { if(timer == NULL) { return FALSE; } if(timer->source_id != 0) { Gmain_timeout_remove(timer->source_id); timer->source_id = 0; } else { return FALSE; } return TRUE; } void trigger_graph_processing(const char *fn, int line) { G_main_set_trigger(transition_trigger); crm_debug_2("%s:%d - Triggered graph processing", fn, line); } void abort_transition_graph( int abort_priority, enum transition_action abort_action, const char *abort_text, crm_data_t *reason, const char *fn, int line) { int log_level = LOG_DEBUG; if(abort_priority >= INFINITY) { log_level = LOG_INFO; } update_abort_priority( transition_graph, abort_priority, abort_action, abort_text); crm_log_maybe(log_level, "%s:%d - Triggered graph processing : %s", fn, line, abort_text); if(reason != NULL) { crm_log_xml(log_level, "Cause", reason); } if(transition_graph->complete) { notify_crmd(transition_graph); } else { G_main_set_trigger(transition_trigger); } }