diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c index 113e2c4e18..f1d5e02f66 100644 --- a/crmd/te_callbacks.c +++ b/crmd/te_callbacks.c @@ -1,466 +1,540 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include void te_update_confirm(const char *event, xmlNode *msg); -xmlNode *need_abort(xmlNode *update); extern char *te_uuid; gboolean shuttingdown = FALSE; crm_graph_t *transition_graph; GTRIGSource *transition_trigger = NULL; crm_action_timer_t *transition_timer = NULL; /* #define rsc_op_template "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */ -#define rsc_op_template "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s]" +#define rsc_op_template "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']" + +static void process_resource_updates(xmlXPathObject *xpathObj) +{ +/* + + + + + +*/ + int lpc = 0, max = xpathObj->nodesetval->nodeNr; + for(lpc = 0; lpc < max; lpc++) { + xmlNode *rsc_op = getXpathResult(xpathObj, lpc); + xmlNode *node = rsc_op; + while(node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) { + node = node->parent; + } + CRM_CHECK(node != NULL, continue); + process_graph_event(rsc_op, ID(node)); + } +} void te_update_diff(const char *event, xmlNode *msg) { int rc = -1; const char *op = NULL; xmlNode *diff = NULL; - xmlNode *status = NULL; xmlNode *cib_top = NULL; xmlXPathObject *xpathObj = NULL; int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; CRM_CHECK(msg != NULL, return); crm_element_value_int(msg, F_CIB_RC, &rc); if(transition_graph == NULL) { crm_debug_3("No graph"); return; } else if(rc < cib_ok) { crm_debug_3("Filter rc=%d (%s)", rc, cib_error2string(rc)); return; } else if(transition_graph->complete == TRUE && fsa_state != S_IDLE && fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) { crm_debug_2("Filter state=%s, complete=%d", fsa_state2string(fsa_state), transition_graph->complete); return; } op = crm_element_value(msg, F_CIB_OPERATION); diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); cib_diff_version_details( diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_debug("Processing diff (%s): %d.%d.%d -> %d.%d.%d (%s)", op, diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates, fsa_state2string(fsa_state)); log_cib_diff(LOG_DEBUG_2, diff, op); /* Process anything that was added */ - cib_top = get_xpath_object_relative("//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB, diff, LOG_ERR); - status = first_named_child(cib_top, XML_CIB_TAG_STATUS); - - if(status != NULL) { - /* newly completed resource operations */ - extract_event(status); - } - - /* configuration changes */ + cib_top = get_xpath_object("//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB, diff, LOG_ERR); if(need_abort(cib_top)) { + /* configuration change */ goto bail; } /* Process anything that was removed */ - cib_top = get_xpath_object_relative("//"XML_TAG_DIFF_REMOVED"//"XML_TAG_CIB, diff, LOG_DEBUG); - - /* configuration changes */ + cib_top = get_xpath_object("//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_REMOVED"//"XML_TAG_CIB, diff, LOG_DEBUG); if(need_abort(cib_top)) { + /* configuration change */ goto bail; } - xpathObj = xpath_search(diff, "//"XML_TAG_DIFF_REMOVED"//"XML_TAG_CIB"//"XML_TAG_TRANSIENT_NODEATTRS); - if(xpathObj) { + cib_top = get_xpath_object("//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_REMOVED"//"XML_TAG_TRANSIENT_NODEATTRS, + diff, LOG_DEBUG); + if(cib_top) { abort_transition(INFINITY, tg_restart, "Transient attribute removal", cib_top); goto bail; } - xpathObj = xpath_search(diff, "//"XML_TAG_DIFF_REMOVED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP); + /* Check for node state updates... possibly from a shutdown we requested */ + xpathObj = xpath_search(diff, "//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_ADDED"//"XML_CIB_TAG_STATE); + if(xpathObj) { + int lpc = 0, max = xpathObj->nodesetval->nodeNr; + for(lpc = 0; lpc < max; lpc++) { + xmlNode *node = getXpathResult(xpathObj, lpc); + const char *event_node = crm_element_value(node, XML_ATTR_ID); + const char *ccm_state = crm_element_value(node, XML_CIB_ATTR_INCCM); + const char *ha_state = crm_element_value(node, XML_CIB_ATTR_HASTATE); + const char *shutdown_s = crm_element_value(node, XML_CIB_ATTR_SHUTDOWN); + const char *crmd_state = crm_element_value(node, XML_CIB_ATTR_CRMDSTATE); + + if(safe_str_eq(ccm_state, XML_BOOLEAN_FALSE) + || safe_str_eq(ha_state, DEADSTATUS) + || safe_str_eq(crmd_state, CRMD_JOINSTATE_DOWN)) { + crm_action_t *shutdown = NULL; + shutdown = match_down_event(0, event_node, NULL); + + if(shutdown != NULL) { + update_graph(transition_graph, shutdown); + trigger_graph(); + + } else { + crm_info("Stonith/shutdown of %s not matched", event_node); + abort_transition(INFINITY, tg_restart, "Node failure", node); + } + fail_incompletable_actions(transition_graph, event_node); + } + + if(shutdown_s) { + int shutdown = crm_parse_int(shutdown_s, NULL); + if(shutdown > 0) { + crm_info("Aborting on "XML_CIB_ATTR_SHUTDOWN" attribute for %s", event_node); + abort_transition(INFINITY, tg_restart, "Shutdown request", node); + } + } + } + xmlXPathFreeObject(xpathObj); xpathObj = NULL; + } + + /* + * Check for and fast-track the processing of LRM refreshes + * In large clusters this can result in _huge_ speedups + */ + xpathObj = xpath_search(diff, "//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_REMOVED"//"XML_LRM_TAG_RESOURCE); + if(xpathObj) { + int updates = xpathObj->nodesetval->nodeNr; + + if(updates > 1) { + /* Updates by, or in response to, TE actions will never contain updates + * for more than one resource at a time + */ + crm_info("Detected LRM refresh: Skipping all resource events"); + abort_transition(INFINITY, tg_restart, "LRM Refresh", diff); + goto bail; + } + xmlXPathFreeObject(xpathObj); xpathObj = NULL; + } + + /* Process operation updates */ + xpathObj = xpath_search(diff, "//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_ADDED"//"XML_LRM_TAG_RSC_OP); + if(xpathObj) { + process_resource_updates(xpathObj); + xmlXPathFreeObject(xpathObj); + } + + /* Detect deleted (as opposed to replaced or added) actions */ + xpathObj = xpath_search(diff, "//"XML_TAG_DIFF_REMOVED"//"XML_LRM_TAG_RSC_OP); if(xpathObj) { int lpc = 0, max = xpathObj->nodesetval->nodeNr; for(lpc = 0; lpc < max; lpc++) { int max = 0; const char *op_id = NULL; char *rsc_op_xpath = NULL; xmlXPathObject *op_match = NULL; - xmlNode *match = xpathObj->nodesetval->nodeTab[lpc]; + xmlNode *match = getXpathResult(xpathObj, lpc); CRM_CHECK(match != NULL, continue); - CRM_CHECK(match->type == XML_DOCUMENT_NODE, continue); op_id = ID(match); max = strlen(rsc_op_template) + strlen(op_id) + 1; crm_malloc0(rsc_op_xpath, max); snprintf(rsc_op_xpath, max, rsc_op_template, op_id); op_match = xpath_search(diff, rsc_op_xpath); if(op_match) { xmlXPathFreeObject(op_match); } else { - crm_info("No match for deleted action %s", op_id); + crm_info("No match for deleted action %s (%s)", rsc_op_xpath, op_id); abort_transition(INFINITY, tg_restart, "Resource op removal", cib_top); goto bail; } crm_free(rsc_op_xpath); } } bail: if(xpathObj) { xmlXPathFreeObject(xpathObj); } } gboolean process_te_message(xmlNode *msg, xmlNode *xml_data) { - xmlNode *xml_obj = NULL; - const char *from = crm_element_value(msg, F_ORIG); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *ref = crm_element_value(msg, XML_ATTR_REFERENCE); const char *op = crm_element_value(msg, F_CRM_TASK); const char *type = crm_element_value(msg, F_CRM_MSG_TYPE); crm_debug_2("Processing %s (%s) message", op, ref); crm_log_xml(LOG_DEBUG_3, "ipc", msg); if(op == NULL){ /* error */ } else if(sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) { crm_debug_2("Bad sys-to %s", crm_str(sys_to)); return FALSE; } else if(safe_str_eq(op, CRM_OP_INVOKE_LRM) && safe_str_eq(sys_from, CRM_SYSTEM_LRMD) /* && safe_str_eq(type, XML_ATTR_RESPONSE) */ ){ - xml_obj = xml_data; - CRM_CHECK(xml_obj != NULL, - crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); - return FALSE); - CRM_CHECK(xml_obj != NULL, - crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); - return FALSE); - xml_obj = get_object_root(XML_CIB_TAG_STATUS, xml_obj); - - CRM_CHECK(xml_obj != NULL, - crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); - return FALSE); - - crm_log_xml(LOG_DEBUG_2, "Processing (N)ACK", msg); - crm_info("Processing (N)ACK %s from %s", - crm_element_value(msg, XML_ATTR_REFERENCE), from); - extract_event(xml_obj); + xmlXPathObject *xpathObj = NULL; + crm_log_xml(LOG_DEBUG_2, "Processing (N)ACK", msg); + crm_info("Processing (N)ACK %s from %s", + crm_element_value(msg, XML_ATTR_REFERENCE), from); + + xpathObj = xpath_search(xml_data, "//"XML_LRM_TAG_RSC_OP); + if(xpathObj) { + process_resource_updates(xpathObj); + xmlXPathFreeObject(xpathObj); + xpathObj = NULL; + + } else { + crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); + return FALSE; + } } else { crm_err("Unknown command: %s::%s from %s", type, op, sys_from); } crm_debug_3("finished processing message"); return TRUE; } void tengine_stonith_callback(stonith_ops_t * op) { const char *allow_fail = NULL; int target_rc = -1; int stonith_id = -1; int transition_id = -1; char *uuid = NULL; crm_action_t *stonith_action = NULL; if(op == NULL) { crm_err("Called with a NULL op!"); return; } crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", op->call_id, op->optype, op->node_name, op->op_result, (char *)op->node_list, op->private_data); /* restore the orignal transition timeout */ stonith_op_active--; if(stonith_op_active == 0) { crm_info("Restoring transition timeout: %d", active_timeout); transition_graph->transition_timeout = active_timeout; } /* this will mark the event complete if a match is found */ CRM_CHECK(op->private_data != NULL, return); /* filter out old STONITH actions */ CRM_CHECK(decode_transition_key( op->private_data, &uuid, &transition_id, &stonith_id, &target_rc), crm_err("Invalid event detected"); goto bail; ); if(transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid) || transition_graph->id != transition_id) { crm_info("Ignoring STONITH action initiated outside" " of the current transition"); } stonith_action = get_action(stonith_id, TRUE); if(stonith_action == NULL) { crm_err("Stonith action not matched"); goto bail; } switch(op->op_result) { case STONITH_SUCCEEDED: send_stonith_update(op); break; case STONITH_CANNOT: case STONITH_TIMEOUT: case STONITH_GENERIC: stonith_action->failed = TRUE; allow_fail = crm_meta_value(stonith_action->params, XML_ATTR_TE_ALLOWFAIL); if(FALSE == crm_is_true(allow_fail)) { crm_err("Stonith of %s failed (%d)..." " aborting transition.", op->node_name, op->op_result); abort_transition(INFINITY, tg_restart, "Stonith failed", NULL); } break; default: crm_err("Unsupported action result: %d", op->op_result); abort_transition(INFINITY, tg_restart, "Unsupport Stonith result", NULL); } update_graph(transition_graph, stonith_action); trigger_graph(); bail: crm_free(uuid); return; } void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); crm_log_xml_warn(msg, "Failed update"); } else { erase_status_tag(user_data, XML_CIB_TAG_LRM); } crm_free(user_data); } void cib_action_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if(rc < cib_ok) { crm_err("Update %d FAILED: %s", call_id, cib_error2string(rc)); } } void cib_failcount_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if(rc < cib_ok) { crm_err("Update %d FAILED: %s", call_id, cib_error2string(rc)); } } gboolean action_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_CHECK(timer->action != NULL, return FALSE); if(transition_graph->complete) { crm_warn("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_action_warn) { print_action( LOG_WARNING,"Action missed its timeout: ", timer->action); } else { /* fail the action */ cib_action_update(timer->action, LRM_OP_TIMEOUT, EXECRA_UNKNOWN_ERROR); } return FALSE; } static int unconfirmed_actions(gboolean send_updates) { int unconfirmed = 0; const char *key = NULL; const char *task = NULL; const char *node = NULL; crm_debug_2("Unconfirmed actions..."); slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, /* lookup event */ slist_iter( action, crm_action_t, synapse->actions, lpc2, if(action->executed == FALSE) { continue; } else if(action->confirmed) { continue; } unconfirmed++; task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); key = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); crm_info("Action %s %d unconfirmed from %s", key, action->id, node); if(action->type != action_type_rsc) { continue; } else if(send_updates == FALSE) { continue; } else if(safe_str_eq(task, "cancel")) { /* we dont need to update the CIB with these */ continue; } else if(safe_str_eq(task, "stop")) { /* *never* update the CIB with these */ continue; } cib_action_update(action, LRM_OP_PENDING, EXECRA_STATUS_UNKNOWN); ); ); if(unconfirmed > 0) { crm_warn("Waiting on %d unconfirmed actions", unconfirmed); } return unconfirmed; } gboolean global_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); if(transition_graph == NULL) { crm_err("No current graph"); return FALSE; } crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_CHECK(timer->action == NULL, return FALSE); if(fsa_state != S_TRANSITION_ENGINE) { crm_err("Discarding transition timeout in state: %s", fsa_state2string(fsa_state)); } else if(transition_graph->complete) { crm_err("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_abort) { int unconfirmed = unconfirmed_actions(FALSE); crm_warn("Transition abort timeout reached..." " marking transition complete."); transition_graph->complete = TRUE; abort_transition(INFINITY, tg_restart, "Global Timeout", NULL); if(unconfirmed != 0) { crm_warn("Writing %d unconfirmed actions to the CIB", unconfirmed); unconfirmed_actions(TRUE); } } return FALSE; } diff --git a/crmd/te_events.c b/crmd/te_events.c index 660271169a..32fa7a168d 100644 --- a/crmd/te_events.c +++ b/crmd/te_events.c @@ -1,569 +1,468 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include char *failed_stop_offset = NULL; char *failed_start_offset = NULL; -gboolean need_abort(xmlNode *update); -gboolean process_graph_event(xmlNode *event, const char *event_node); int match_graph_event(int action_id, xmlNode *event, const char *event_node, int op_status, int op_rc, int target_rc); gboolean need_abort(xmlNode *update) { xmlNode *xml = NULL; if(update == NULL) { return FALSE; } xml_prop_iter(update, name, value, if(safe_str_eq(name, XML_ATTR_HAVE_QUORUM)) { goto do_abort; /* possibly not required */ } else if(safe_str_eq(name, XML_ATTR_GENERATION)) { goto do_abort; } else if(safe_str_eq(name, XML_ATTR_GENERATION_ADMIN)) { goto do_abort; } continue; do_abort: abort_transition(INFINITY, tg_restart, "Non-status change", NULL); crm_info("Aborting on change to %s", name); return TRUE; ); xml = get_object_root(XML_CIB_TAG_CONFIGURATION, update); if(xml != NULL) { abort_transition(INFINITY, tg_restart, "Non-status change", xml); return TRUE; } return FALSE; } -static gboolean +gboolean fail_incompletable_actions(crm_graph_t *graph, const char *down_node) { const char *target = NULL; xmlNode *last_action = NULL; slist_iter( synapse, synapse_t, graph->synapses, lpc, if (synapse->confirmed) { continue; } slist_iter( action, crm_action_t, synapse->actions, lpc, if(action->type == action_type_pseudo || action->confirmed) { continue; } target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); if(safe_str_eq(target, down_node)) { action->failed = TRUE; last_action = action->xml; update_graph(graph, action); crm_notice("Action %d (%s) is scheduled for %s (offline)", action->id, ID(action->xml), down_node); } ); ); if(last_action != NULL) { crm_warn("Node %s shutdown resulted in un-runnable actions", down_node); abort_transition(INFINITY, tg_restart, "Node failure", last_action); return TRUE; } return FALSE; } -gboolean -extract_event(xmlNode *msg) -{ - int shutdown = 0; - const char *shutdown_s = NULL; - const char *event_node = NULL; - int have_aborted = 0; - -/* -[cib fragment] -... - - - - - -*/ - xml_child_iter_filter( - msg, node_state, XML_CIB_TAG_STATE, - - xmlNode *attrs = NULL; - xmlNode *resources = NULL; - const char *ha_state = NULL; - const char *ccm_state = NULL; - const char *crmd_state = NULL; - - /* Transient node attribute changes... */ - event_node = crm_element_value(node_state, XML_ATTR_ID); - crm_debug_2("Processing state update from %s", event_node); - - attrs = find_xml_node(node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); - if(attrs != NULL) { - have_aborted++; - crm_info("Aborting on "XML_TAG_TRANSIENT_NODEATTRS" changes for %s", event_node); - abort_transition(INFINITY, tg_restart, XML_TAG_TRANSIENT_NODEATTRS, attrs); - } - - resources = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); - resources = find_xml_node(resources, XML_LRM_TAG_RESOURCES, FALSE); - - /* LRM resource update... */ - xml_child_iter( - resources, rsc, - xml_child_iter( - rsc, rsc_op, - if(process_graph_event(rsc_op, event_node)) { - /* This is an lrm status refresh... - * The transition (if any) was already cancelled - */ - if(transition_graph == NULL || transition_graph->complete) { - crm_info("Detected LRM refresh update: Skipping any remaining resource events"); - return TRUE; - } - } - ); - ); -#if 0 - if(have_aborted && (transition_graph == NULL || transition_graph->complete)) { - /* Any shutdown event would never be expected */ - return TRUE; - } -#endif - ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); - ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); - crmd_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); - - /* - * node state update... possibly from a shutdown we requested - */ - if(safe_str_eq(ccm_state, XML_BOOLEAN_FALSE) - || safe_str_eq(ha_state, DEADSTATUS) - || safe_str_eq(crmd_state, CRMD_JOINSTATE_DOWN)) { - crm_action_t *shutdown = NULL; - shutdown = match_down_event(0, event_node, NULL); - - if(shutdown != NULL) { - update_graph(transition_graph, shutdown); - trigger_graph(); - - } else { - crm_info("Stonith/shutdown of %s not matched", event_node); - abort_transition(INFINITY, tg_restart, "Node failure", node_state); - } - fail_incompletable_actions(transition_graph, event_node); - } - - shutdown_s = crm_element_value(node_state, XML_CIB_ATTR_SHUTDOWN); - if(shutdown_s) { - shutdown = crm_parse_int(shutdown_s, NULL); - } - if(shutdown_s && shutdown > 0) { - crm_info("Aborting on "XML_CIB_ATTR_SHUTDOWN" attribute for %s", event_node); - abort_transition(INFINITY, tg_restart, "Shutdown request", node_state); - } - ); - - return TRUE; -} - static gboolean update_failcount(xmlNode *event, const char *event_node, int rc, int target_rc, gboolean force) { int interval = 0; char *task = NULL; char *rsc_id = NULL; char *attr_name = NULL; const char *id = ID(event); const char *on_uuid = event_node; const char *value = NULL; if(rc == 99) { /* this is an internal code for "we're busy, try again" */ return FALSE; } else if(rc == target_rc) { return FALSE; } if(failed_stop_offset == NULL) { failed_stop_offset = crm_strdup(INFINITY_S); } if(failed_start_offset == NULL) { failed_start_offset = crm_strdup(INFINITY_S); } CRM_CHECK(on_uuid != NULL, return TRUE); CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval), crm_err("Couldn't parse: %s", ID(event)); goto bail); CRM_CHECK(task != NULL, goto bail); CRM_CHECK(rsc_id != NULL, goto bail); if(safe_str_eq(task, CRMD_ACTION_START)) { interval = 1; value = failed_start_offset; } else if(safe_str_eq(task, CRMD_ACTION_STOP)) { interval = 1; value = failed_stop_offset; } if(value == NULL || safe_str_neq(value, INFINITY_S)) { value = XML_NVPAIR_ATTR_VALUE"++"; } if(interval > 0 || force) { int call_id = 0; char *now = crm_itoa(time(NULL)); attr_name = crm_concat("fail-count", rsc_id, '-'); crm_warn("Updating failcount for %s on %s after failed %s:" " rc=%d (update=%s, time=%s)", rsc_id, on_uuid, task, rc, value, now); /* don't let notificatios of these updates cause new transitions */ call_id = update_attr(fsa_cib_conn, cib_inhibit_notify, XML_CIB_TAG_STATUS, on_uuid, NULL,NULL, attr_name, value, FALSE); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, cib_failcount_updated); crm_free(attr_name); attr_name = crm_concat("last-failure", rsc_id, '-'); /* don't let notificatios of these updates cause new transitions */ call_id = update_attr(fsa_cib_conn, cib_inhibit_notify, XML_CIB_TAG_STATUS, on_uuid, NULL,NULL, attr_name, now, FALSE); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, cib_failcount_updated); crm_free(attr_name); crm_free(now); } bail: crm_free(rsc_id); crm_free(task); return TRUE; } static int status_from_rc(crm_action_t *action, int orig_status, int rc, int target_rc) { int status = orig_status; if(target_rc == rc) { crm_debug_2("Target rc: == %d", rc); if(status != LRM_OP_DONE) { crm_debug_2("Re-mapping op status to" " LRM_OP_DONE for rc=%d", rc); status = LRM_OP_DONE; } } else { status = LRM_OP_ERROR; } /* 99 is the code we use for direct nack's */ if(rc != 99 && status != LRM_OP_DONE) { const char *task, *uname; task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); uname = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); crm_warn("Action %d (%s) on %s failed (target: %d vs. rc: %d): %s", action->id, task, uname, target_rc, rc, op_status2text(status)); } return status; } /* * returns the ID of the action if a match is found * returns -1 if a match was not found * returns -2 if a match was found but the action failed (and was * not allowed to) */ int match_graph_event(int action_id, xmlNode *event, const char *event_node, int op_status, int op_rc, int target_rc) { const char *target = NULL; const char *allow_fail = NULL; const char *this_event = NULL; crm_action_t *action = NULL; action = get_action(action_id, FALSE); if(action == NULL) { return -1; } op_status = status_from_rc(action, op_status, op_rc, target_rc); if(op_status != LRM_OP_DONE) { update_failcount(event, event_node, op_rc, target_rc, FALSE); } /* Process OP status */ switch(op_status) { case LRM_OP_PENDING: crm_debug("Ignoring pending operation"); return action->id; break; case LRM_OP_DONE: break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: action->failed = TRUE; break; case LRM_OP_CANCELLED: /* do nothing?? */ crm_err("Dont know what to do for cancelled ops yet"); break; default: action->failed = TRUE; crm_err("Unsupported action result: %d", op_status); } /* stop this event's timer if it had one */ stop_te_timer(action->timer); action->confirmed = TRUE; update_graph(transition_graph, action); trigger_graph(); if(action->failed) { allow_fail = crm_meta_value(action->params, XML_ATTR_TE_ALLOWFAIL); if(crm_is_true(allow_fail)) { action->failed = FALSE; } } if(action->failed) { abort_transition(action->synapse->priority+1, tg_restart, "Event failed", event); } this_event = ID(event); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); te_log_action(LOG_INFO, "Action %s (%d) confirmed on %s (rc=%d)", crm_str(this_event), action->id, crm_str(target), op_status); return action->id; } crm_action_t * get_action(int id, gboolean confirmed) { slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, slist_iter( action, crm_action_t, synapse->actions, lpc2, if(action->id == id) { if(confirmed) { stop_te_timer(action->timer); action->confirmed = TRUE; } return action; } ) ); return NULL; } crm_action_t * match_down_event(int id, const char *target, const char *filter) { const char *this_action = NULL; const char *this_node = NULL; crm_action_t *match = NULL; slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, /* lookup event */ slist_iter( action, crm_action_t, synapse->actions, lpc2, if(id > 0 && action->id == id) { match = action; break; } this_action = crm_element_value( action->xml, XML_LRM_ATTR_TASK); if(action->type != action_type_crm) { continue; } else if(safe_str_eq(this_action, CRM_OP_LRM_REFRESH)){ continue; } else if(filter != NULL && safe_str_neq(this_action, filter)) { continue; } this_node = crm_element_value( action->xml, XML_LRM_ATTR_TARGET_UUID); if(this_node == NULL) { crm_log_xml_err(action->xml, "No node uuid"); } if(safe_str_neq(this_node, target)) { crm_debug("Action %d : Node mismatch: %s", action->id, this_node); continue; } match = action; break; ); if(match != NULL) { /* stop this event's timer if it had one */ break; } ); if(match != NULL) { /* stop this event's timer if it had one */ crm_debug("Match found for action %d: %s on %s", id, crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target); stop_te_timer(match->timer); match->confirmed = TRUE; } else if(id > 0) { crm_err("No match for action %d", id); } else { crm_warn("No match for shutdown action on %s", target); } return match; } gboolean process_graph_event(xmlNode *event, const char *event_node) { int rc = -1; int status = -1; int action = -1; int target_rc = -1; int transition_num = -1; char *update_te_uuid = NULL; gboolean stop_early = FALSE; gboolean passed = FALSE; const char *id = NULL; const char *magic = NULL; CRM_ASSERT(event != NULL); id = ID(event); magic = crm_element_value(event, XML_ATTR_TRANSITION_MAGIC); if(magic == NULL) { /* non-change */ return FALSE; } CRM_CHECK(decode_transition_magic( magic, &update_te_uuid, &transition_num, &action, &status, &rc, &target_rc), crm_err("Invalid event %s detected", id); abort_transition(INFINITY, tg_restart,"Bad event", event); ); if(status == LRM_OP_PENDING) { goto bail; } if(transition_num == -1) { crm_err("Action %s (%s) initiated outside of a transition", id, magic); abort_transition(INFINITY, tg_restart,"Unexpected event",event); } else if(action < 0 || crm_str_eq(update_te_uuid, te_uuid, TRUE) == FALSE) { crm_info("Action %s (%s) initiated by a different transitioner", id, magic); abort_transition(INFINITY, tg_restart,"Foreign event", event); stop_early = TRUE; /* This could be an lrm status refresh */ } else if(transition_graph->id != transition_num) { crm_info("Detected action %s from a different transition:" " %d vs. %d", id, transition_num, transition_graph->id); abort_transition(INFINITY, tg_restart,"Old event", event); stop_early = TRUE; /* This could be an lrm status refresh */ } else if(transition_graph->complete) { crm_info("Action %s arrived after a completed transition", id); abort_transition(INFINITY, tg_restart, "Inactive graph", event); } else if(match_graph_event( action, event, event_node, status, rc, target_rc) < 0) { crm_err("Unknown graph action %s", id); abort_transition(INFINITY, tg_restart, "Unknown event", event); } else { passed = TRUE; crm_debug_2("Processed update to %s: %s", id, magic); } if(passed == FALSE) { if(update_failcount(event, event_node, rc, target_rc, transition_num == -1)) { /* Turns out this wasn't an lrm status refresh update aferall */ stop_early = FALSE; } } bail: crm_free(update_te_uuid); return stop_early; } diff --git a/crmd/tengine.h b/crmd/tengine.h index f1b8c0dd39..50bf327221 100644 --- a/crmd/tengine.h +++ b/crmd/tengine.h @@ -1,72 +1,74 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef TENGINE__H #define TENGINE__H #include #include #include extern void send_stonith_update(stonith_ops_t * op); /* tengine */ extern crm_action_t *match_down_event( int rc, const char *target, const char *filter); extern gboolean cib_action_update(crm_action_t *action, int status, int op_rc); +extern gboolean fail_incompletable_actions(crm_graph_t *graph, const char *down_node); +extern gboolean need_abort(xmlNode *update); +extern gboolean process_graph_event(xmlNode *event, const char *event_node); /* utils */ extern crm_action_t *get_action(int id, gboolean confirmed); extern gboolean start_global_timer(crm_action_timer_t *timer, int timeout); extern gboolean stop_te_timer(crm_action_timer_t *timer); extern const char *get_rsc_state(const char *task, op_status_t status); /* unpack */ -extern gboolean extract_event(xmlNode *msg); extern gboolean process_te_message(xmlNode * msg, xmlNode *xml_data); extern crm_graph_t *transition_graph; extern GTRIGSource *transition_trigger; extern char *te_uuid; extern void notify_crmd(crm_graph_t *graph); #include extern void trigger_graph_processing(const char *fn, int line); extern void abort_transition_graph( int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode *reason, const char *fn, int line); #define trigger_graph() trigger_graph_processing(__FUNCTION__, __LINE__) #define abort_transition(pri, action, text, reason) \ abort_transition_graph(pri, action, text, reason,__FUNCTION__,__LINE__); extern gboolean te_connect_stonith(gpointer user_data); extern GCHSource *stonith_src; extern GTRIGSource *transition_trigger; extern GTRIGSource *stonith_reconnect; extern crm_action_timer_t *transition_timer; extern char *failed_stop_offset; extern char *failed_start_offset; extern int active_timeout; extern int stonith_op_active; #endif