diff --git a/crmd/crmd_utils.h b/crmd/crmd_utils.h
index 2d345528a4..35bd229cce 100644
--- a/crmd/crmd_utils.h
+++ b/crmd/crmd_utils.h
@@ -1,123 +1,124 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef CRMD_UTILS__H
 #  define CRMD_UTILS__H
 
 #  include <crm/crm.h>
 #  include <crm/common/xml.h>
 #  include <crm/cib/internal.h> /* For CIB_OP_MODIFY */
 
 #  define CLIENT_EXIT_WAIT 30
 #  define FAKE_TE_ID	"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
 
 
 #  define fsa_cib_delete(section, data, options, call_id, user_name)	\
 	if(fsa_cib_conn != NULL) {					\
 	    call_id = cib_internal_op(                                  \
 		fsa_cib_conn, CIB_OP_DELETE, NULL, section, data,	\
 		NULL, options, user_name);				\
 									\
 	} else {							\
 		crm_err("No CIB connection available");			\
 	}
 
 #  define fsa_cib_update(section, data, options, call_id, user_name)	\
 	if(fsa_cib_conn != NULL) {					\
 	    call_id = cib_internal_op(                                  \
 		fsa_cib_conn, CIB_OP_MODIFY, NULL, section, data,	\
 		NULL, options, user_name);				\
 									\
 	} else {							\
 		crm_err("No CIB connection available");			\
 	}
 
 #  define fsa_cib_anon_update(section, data, options)			\
 	if(fsa_cib_conn != NULL) {					\
 	    fsa_cib_conn->cmds->modify(					\
 		fsa_cib_conn, section, data, options);			\
 									\
 	} else {							\
 		crm_err("No CIB connection available");			\
 	}
 
 extern gboolean fsa_has_quorum;
 extern int last_peer_update;
 extern int last_resource_update;
 
 enum node_update_flags {
     node_update_none = 0x0000,
     node_update_quick = 0x0001,
     node_update_cluster = 0x0010,
     node_update_peer = 0x0020,
     node_update_join = 0x0040,
     node_update_expected = 0x0100,
 };
 
 gboolean crm_timer_stop(fsa_timer_t * timer);
 gboolean crm_timer_start(fsa_timer_t * timer);
 gboolean crm_timer_popped(gpointer data);
 gboolean is_timer_started(fsa_timer_t * timer);
 
 xmlNode *create_node_state(const char *uname, const char *in_cluster,
                            const char *is_peer, const char *join_state,
                            const char *exp_state, gboolean clear_shutdown, const char *src);
 
 int crmd_exit(int rc);
 gboolean stop_subsystem(struct crm_subsystem_s *centry, gboolean force_quit);
 gboolean start_subsystem(struct crm_subsystem_s *centry);
 
 void fsa_dump_actions(long long action, const char *text);
 void fsa_dump_inputs(int log_level, const char *text, long long input_register);
 
 gboolean update_dc(xmlNode * msg);
 void erase_node_from_join(const char *node);
 xmlNode *do_update_node_cib(crm_node_t * node, int flags, xmlNode * parent, const char *source);
 void populate_cib_nodes(enum node_update_flags flags, const char *source);
 void crm_update_quorum(gboolean quorum, gboolean force_update);
 void erase_status_tag(const char *uname, const char *tag, int options);
 void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node);
 
 int crmd_join_phase_count(enum crm_join_phase phase);
 void crmd_join_phase_log(int level);
 
 const char *get_timer_desc(fsa_timer_t * timer);
 gboolean too_many_st_failures(void);
+void reset_st_fail_count(const char * target);
 
 #  define fsa_register_cib_callback(id, flag, data, fn) do {              \
         fsa_cib_conn->cmds->register_callback(                          \
             fsa_cib_conn, id, 10 * (1 + crm_active_peers()),            \
             flag, data, #fn, fn);                                       \
     } while(0)
 
 #  define start_transition(state) do {					\
 	switch(state) {							\
 	    case S_TRANSITION_ENGINE:					\
 		register_fsa_action(A_TE_CANCEL);			\
 		break;							\
 	    case S_POLICY_ENGINE:					\
 	    case S_IDLE:						\
 		register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);	\
 		break;							\
 	    default:							\
 		crm_debug("NOT starting a new transition in state %s",	\
 			  fsa_state2string(fsa_state));			\
 		break;							\
 	}								\
     } while(0)
 
 #endif
diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c
index 4c80431a9f..d523dc3864 100644
--- a/crmd/te_callbacks.c
+++ b/crmd/te_callbacks.c
@@ -1,535 +1,544 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/stat.h>
 
 #include <crm/crm.h>
 #include <crm/common/xml.h>
 #include <crm/msg_xml.h>
 
 #include <tengine.h>
 #include <te_callbacks.h>
 #include <crmd_fsa.h>
 
 #include <crm/cluster.h>        /* For ONLINESTATUS etc */
 
 void te_update_confirm(const char *event, xmlNode * msg);
 
 extern char *te_uuid;
 gboolean shuttingdown = FALSE;
 crm_graph_t *transition_graph;
 crm_trigger_t *transition_trigger = NULL;
 
 /* #define rsc_op_template "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */
 #define rsc_op_template "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']"
 
 static const char *
 get_node_id(xmlNode * rsc_op)
 {
     xmlNode *node = rsc_op;
 
     while (node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) {
         node = node->parent;
     }
 
     CRM_CHECK(node != NULL, return NULL);
     return ID(node);
 }
 
 static void
 process_resource_updates(xmlXPathObject * xpathObj)
 {
 /*
     <status>
        <node_state id="node1" state=CRMD_JOINSTATE_MEMBER exp_state="active">
           <lrm>
              <lrm_resources>
         	<rsc_state id="" rsc_id="rsc4" node_id="node1" rsc_state="stopped"/>
 */
     int lpc = 0, max = numXpathResults(xpathObj);
 
     for (lpc = 0; lpc < max; lpc++) {
         xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
         const char *node = get_node_id(rsc_op);
 
         process_graph_event(rsc_op, node);
     }
 }
 
 void
 te_update_diff(const char *event, xmlNode * msg)
 {
     int lpc, max;
     int rc = -1;
     const char *op = NULL;
 
     xmlNode *diff = NULL;
     xmlXPathObject *xpathObj = NULL;
 
     int diff_add_updates = 0;
     int diff_add_epoch = 0;
     int diff_add_admin_epoch = 0;
 
     int diff_del_updates = 0;
     int diff_del_epoch = 0;
     int diff_del_admin_epoch = 0;
 
     CRM_CHECK(msg != NULL, return);
     crm_element_value_int(msg, F_CIB_RC, &rc);
 
     if (transition_graph == NULL) {
         crm_trace("No graph");
         return;
 
     } else if (rc < pcmk_ok) {
         crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc));
         return;
 
     } else if (transition_graph->complete == TRUE
                && fsa_state != S_IDLE
                && fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) {
         crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state),
                   transition_graph->complete);
         return;
     }
 
     op = crm_element_value(msg, F_CIB_OPERATION);
     diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
 
     cib_diff_version_details(diff,
                              &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates,
                              &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates);
 
     crm_debug("Processing diff (%s): %d.%d.%d -> %d.%d.%d (%s)", op,
               diff_del_admin_epoch, diff_del_epoch, diff_del_updates,
               diff_add_admin_epoch, diff_add_epoch, diff_add_updates, fsa_state2string(fsa_state));
     log_cib_diff(LOG_DEBUG_2, diff, __FUNCTION__);
 
     if (cib_config_changed(NULL, NULL, &diff)) {
         abort_transition(INFINITY, tg_restart, "Non-status change", diff);
         goto bail;              /* configuration changed */
     }
 
     /* Tickets Attributes - Added/Updated */
     xpathObj =
         xpath_search(diff,
                      "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS);
     if (numXpathResults(xpathObj) > 0) {
         xmlNode *aborted = getXpathResult(xpathObj, 0);
 
         abort_transition(INFINITY, tg_restart, "Ticket attribute: update", aborted);
         goto bail;
 
     }
     freeXpathObject(xpathObj);
 
     /* Tickets Attributes - Removed */
     xpathObj =
         xpath_search(diff,
                      "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS);
     if (numXpathResults(xpathObj) > 0) {
         xmlNode *aborted = getXpathResult(xpathObj, 0);
 
         abort_transition(INFINITY, tg_restart, "Ticket attribute: removal", aborted);
         goto bail;
     }
     freeXpathObject(xpathObj);
 
     /* Transient Attributes - Added/Updated */
     xpathObj =
         xpath_search(diff,
                      "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//"
                      XML_TAG_TRANSIENT_NODEATTRS "//" XML_CIB_TAG_NVPAIR);
     max = numXpathResults(xpathObj);
 
     for (lpc = 0; lpc < max; lpc++) {
         xmlNode *attr = getXpathResult(xpathObj, lpc);
         const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
         const char *value = NULL;
 
         if (safe_str_eq(CRM_OP_PROBED, name)) {
             value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
         }
 
         if (crm_is_true(value) == FALSE) {
             abort_transition(INFINITY, tg_restart, "Transient attribute: update", attr);
             crm_log_xml_trace(attr, "Abort");
             goto bail;
         }
     }
 
     freeXpathObject(xpathObj);
 
     /* Transient Attributes - Removed */
     xpathObj =
         xpath_search(diff,
                      "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//"
                      XML_TAG_TRANSIENT_NODEATTRS);
     if (numXpathResults(xpathObj) > 0) {
         xmlNode *aborted = getXpathResult(xpathObj, 0);
 
         abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted);
         goto bail;
 
     }
     freeXpathObject(xpathObj);
 
     /*
      * Check for and fast-track the processing of LRM refreshes
      * In large clusters this can result in _huge_ speedups
      *
      * Unfortunately we can only do so when there are no pending actions
      * Otherwise we could miss updates we're waiting for and stall
      *
      */
     xpathObj = NULL;
     if (transition_graph->pending == 0) {
         xpathObj =
             xpath_search(diff,
                          "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//"
                          XML_LRM_TAG_RESOURCE);
     }
 
     max = numXpathResults(xpathObj);
     if (max > 1) {
         /* Updates by, or in response to, TE actions will never contain updates
          * for more than one resource at a time
          */
         crm_debug("Detected LRM refresh - %d resources updated: Skipping all resource events", max);
         crm_log_xml_trace(diff, "lrm-refresh");
         abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL);
         goto bail;
     }
     freeXpathObject(xpathObj);
 
     /* Process operation updates */
     xpathObj =
         xpath_search(diff,
                      "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP);
     if (numXpathResults(xpathObj)) {
         process_resource_updates(xpathObj);
     }
     freeXpathObject(xpathObj);
 
     /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */
     xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP);
     max = numXpathResults(xpathObj);
     for (lpc = 0; lpc < max; lpc++) {
         int path_max = 0;
         const char *op_id = NULL;
         char *rsc_op_xpath = NULL;
         xmlXPathObject *op_match = NULL;
         xmlNode *match = getXpathResult(xpathObj, lpc);
 
         CRM_CHECK(match != NULL, continue);
 
         op_id = ID(match);
 
         path_max = strlen(rsc_op_template) + strlen(op_id) + 1;
         rsc_op_xpath = calloc(1, path_max);
         snprintf(rsc_op_xpath, path_max, rsc_op_template, op_id);
 
         op_match = xpath_search(diff, rsc_op_xpath);
         if (numXpathResults(op_match) == 0) {
             /* Prevent false positives by matching cancelations too */
             const char *node = get_node_id(match);
             crm_action_t *cancelled = get_cancel_action(op_id, node);
 
             if (cancelled == NULL) {
                 crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id,
                           node);
                 abort_transition(INFINITY, tg_restart, "Resource op removal", match);
                 freeXpathObject(op_match);
                 free(rsc_op_xpath);
                 goto bail;
 
             } else {
                 crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d",
                           op_id, node, cancelled->id);
             }
         }
 
         freeXpathObject(op_match);
         free(rsc_op_xpath);
     }
 
   bail:
     freeXpathObject(xpathObj);
 }
 
 gboolean
 process_te_message(xmlNode * msg, xmlNode * xml_data)
 {
     const char *from = crm_element_value(msg, F_ORIG);
     const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
     const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
     const char *ref = crm_element_value(msg, F_CRM_REFERENCE);
     const char *op = crm_element_value(msg, F_CRM_TASK);
     const char *type = crm_element_value(msg, F_CRM_MSG_TYPE);
 
     crm_trace("Processing %s (%s) message", op, ref);
     crm_log_xml_trace(msg, "ipc");
 
     if (op == NULL) {
         /* error */
 
     } else if (sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) {
         crm_trace("Bad sys-to %s", crm_str(sys_to));
         return FALSE;
 
     } else if (safe_str_eq(op, CRM_OP_INVOKE_LRM)
                && safe_str_eq(sys_from, CRM_SYSTEM_LRMD)
 /* 		  && safe_str_eq(type, XML_ATTR_RESPONSE) */
         ) {
         xmlXPathObject *xpathObj = NULL;
 
         crm_log_xml_trace(msg, "Processing (N)ACK");
         crm_debug("Processing (N)ACK %s from %s", crm_element_value(msg, F_CRM_REFERENCE), from);
 
         xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP);
         if (numXpathResults(xpathObj)) {
             process_resource_updates(xpathObj);
             freeXpathObject(xpathObj);
 
         } else {
             crm_log_xml_err(msg, "Invalid (N)ACK");
             freeXpathObject(xpathObj);
             return FALSE;
         }
 
     } else {
         crm_err("Unknown command: %s::%s from %s", type, op, sys_from);
     }
 
     crm_trace("finished processing message");
 
     return TRUE;
 }
 
 GHashTable *stonith_failures = NULL;
 struct st_fail_rec {
     int count;
 };
 
 gboolean
 too_many_st_failures(void)
 {
     GHashTableIter iter;
     const char *key = NULL;
     struct st_fail_rec *value = NULL;
 
     if (stonith_failures == NULL) {
         return FALSE;
     }
 
     g_hash_table_iter_init(&iter, stonith_failures);
     while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
         if (value->count > 10) {
             crm_notice("Too many failures to fence %s (%d), giving up", key, value->count);
             return TRUE;
         }
     }
     return FALSE;
 }
 
+void
+reset_st_fail_count(const char *target)
+{
+    struct st_fail_rec *rec = g_hash_table_lookup(stonith_failures, target);
+    if (rec) {
+        rec->count = 0;
+    }
+}
+
 void
 tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
 {
     char *uuid = NULL;
     int target_rc = -1;
     int stonith_id = -1;
     int transition_id = -1;
     crm_action_t *action = NULL;
     struct st_fail_rec *rec = NULL;
     int call_id = data->call_id;
     int rc = data->rc;
     char *userdata = data->userdata;
 
     CRM_CHECK(userdata != NULL, return);
     crm_notice("Stonith operation %d/%s: %s (%d)", call_id, (char *)userdata,
                pcmk_strerror(rc), rc);
 
     if (AM_I_DC == FALSE) {
         return;
     }
 
     /* crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", */
     /*       op->call_id, op->optype, op->node_name, op->op_result, */
     /*       (char *)op->node_list, op->private_data); */
 
     /* filter out old STONITH actions */
     CRM_CHECK(decode_transition_key(userdata, &uuid, &transition_id, &stonith_id, &target_rc),
               crm_err("Invalid event detected");
               goto bail;
         );
 
     if (transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid)
         || transition_graph->id != transition_id) {
         crm_info("Ignoring STONITH action initiated outside of the current transition");
         goto bail;
     }
 
     /* this will mark the event complete if a match is found */
     action = get_action(stonith_id, FALSE);
     if (action == NULL) {
         crm_err("Stonith action not matched");
         goto bail;
     }
 
     stop_te_timer(action->timer);
     if (stonith_failures == NULL) {
         stonith_failures =
             g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, free);
     }
 
     if (rc == pcmk_ok) {
         const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
         const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
 
         crm_debug("Stonith operation %d for %s passed", call_id, target);
         if (action->confirmed == FALSE) {
             action->confirmed = TRUE;
             if (action->sent_update == FALSE) {
                 send_stonith_update(action, target, uuid);
             }
         }
         rec = g_hash_table_lookup(stonith_failures, target);
         if (rec) {
             rec->count = 0;
         }
 
     } else {
         const char *target = crm_element_value_const(action->xml, XML_LRM_ATTR_TARGET);
         const char *allow_fail = crm_meta_value(action->params, XML_ATTR_TE_ALLOWFAIL);
 
         action->failed = TRUE;
         if (crm_is_true(allow_fail) == FALSE) {
             crm_notice("Stonith operation %d for %s failed (%s): aborting transition.", call_id,
                        target, pcmk_strerror(rc));
             abort_transition(INFINITY, tg_restart, "Stonith failed", NULL);
         }
 
         rec = g_hash_table_lookup(stonith_failures, target);
         if (rec) {
             rec->count++;
         } else {
             rec = malloc(sizeof(struct st_fail_rec));
             rec->count = 1;
             g_hash_table_insert(stonith_failures, strdup(target), rec);
         }
     }
 
     update_graph(transition_graph, action);
     trigger_graph();
 
   bail:
     free(userdata);
     free(uuid);
     return;
 }
 
 void
 cib_fencing_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
 {
     if (rc < pcmk_ok) {
         crm_err("Fencing update %d for %s: failed - %s (%d)",
                 call_id, (char *)user_data, pcmk_strerror(rc), rc);
         crm_log_xml_warn(msg, "Failed update");
         abort_transition(INFINITY, tg_shutdown, "CIB update failed", NULL);
 
     } else {
         crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
     }
     free(user_data);
 }
 
 void
 cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
 {
     if (rc < pcmk_ok) {
         crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc));
     }
 }
 
 void
 cib_failcount_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
 {
     if (rc < pcmk_ok) {
         crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc));
     }
 }
 
 gboolean
 action_timer_callback(gpointer data)
 {
     crm_action_timer_t *timer = NULL;
 
     CRM_CHECK(data != NULL, return FALSE);
 
     timer = (crm_action_timer_t *) data;
     stop_te_timer(timer);
 
     crm_warn("Timer popped (timeout=%d, abort_level=%d, complete=%s)",
              timer->timeout,
              transition_graph->abort_priority, transition_graph->complete ? "true" : "false");
 
     CRM_CHECK(timer->action != NULL, return FALSE);
 
     if (transition_graph->complete) {
         crm_warn("Ignoring timeout while not in transition");
 
     } else if (timer->reason == timeout_action_warn) {
         print_action(LOG_WARNING, "Action missed its timeout: ", timer->action);
 
         /* Don't check the FSA state
          *
          * We might also be in S_INTEGRATION or some other state waiting for this
          * action so we can close the transition and continue
          */
 
     } else {
         /* fail the action */
         gboolean send_update = TRUE;
         const char *task = crm_element_value(timer->action->xml, XML_LRM_ATTR_TASK);
 
         print_action(LOG_ERR, "Aborting transition, action lost: ", timer->action);
 
         timer->action->failed = TRUE;
         timer->action->confirmed = TRUE;
         abort_transition(INFINITY, tg_restart, "Action lost", NULL);
 
         update_graph(transition_graph, timer->action);
         trigger_graph();
 
         if (timer->action->type != action_type_rsc) {
             send_update = FALSE;
         } else if (safe_str_eq(task, "cancel")) {
             /* we dont need to update the CIB with these */
             send_update = FALSE;
         }
 
         if (send_update) {
             /* cib_action_update(timer->action, PCMK_LRM_OP_PENDING, PCMK_EXECRA_STATUS_UNKNOWN); */
             cib_action_update(timer->action, PCMK_LRM_OP_TIMEOUT, PCMK_EXECRA_UNKNOWN_ERROR);
         }
     }
 
     return FALSE;
 }
diff --git a/crmd/te_utils.c b/crmd/te_utils.c
index ede1eb0fb4..06b3dafb5a 100644
--- a/crmd/te_utils.c
+++ b/crmd/te_utils.c
@@ -1,405 +1,410 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 #include <crm/crm.h>
 
 #include <crm/msg_xml.h>
 
 #include <crm/common/xml.h>
 #include <tengine.h>
 #include <crmd_fsa.h>
 #include <crmd_messages.h>
 #include <crm/fencing/internal.h>
 
 crm_trigger_t *stonith_reconnect = NULL;
 GListPtr stonith_cleanup_list = NULL;
 
 static gboolean
 fail_incompletable_stonith(crm_graph_t * graph)
 {
     GListPtr lpc = NULL;
     const char *task = NULL;
     xmlNode *last_action = NULL;
 
     if (graph == NULL) {
         return FALSE;
     }
 
     for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
         GListPtr lpc2 = NULL;
         synapse_t *synapse = (synapse_t *) lpc->data;
 
         if (synapse->confirmed) {
             continue;
         }
 
         for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
             crm_action_t *action = (crm_action_t *) lpc2->data;
 
             if (action->type != action_type_crm || action->confirmed) {
                 continue;
             }
 
             task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
             if (task && safe_str_eq(task, CRM_OP_FENCE)) {
                 action->failed = TRUE;
                 last_action = action->xml;
                 update_graph(graph, action);
                 crm_notice("Failing action %d (%s): STONITHd terminated",
                            action->id, ID(action->xml));
             }
         }
     }
 
     if (last_action != NULL) {
         crm_warn("STONITHd failure resulted in un-runnable actions");
         abort_transition(INFINITY, tg_restart, "Stonith failure", last_action);
         return TRUE;
     }
 
     return FALSE;
 }
 
 static void
 tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e)
 {
     if (is_set(fsa_input_register, R_ST_REQUIRED)) {
         crm_crit("Fencing daemon connection failed");
         mainloop_set_trigger(stonith_reconnect);
 
     } else {
         crm_info("Fencing daemon disconnected");
     }
 
     /* cbchan will be garbage at this point, arrange for it to be reset */
     stonith_api->state = stonith_disconnected;
 
     if (AM_I_DC) {
         fail_incompletable_stonith(transition_graph);
         trigger_graph();
     }
 }
 
 #if SUPPORT_CMAN
 #  include <libfenced.h>
 #endif
 
 static void
 tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
 {
     static char *client_id = NULL;
     if(client_id == NULL) {
         client_id = g_strdup_printf("%s.%d", crm_system_name, getpid());
     }
 
     if (st_event == NULL) {
         crm_err("Notify data not found");
         return;
     }
 
     if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
         crm_err("We were alegedly just fenced by %s for %s!", st_event->executioner,
                 st_event->origin);
         register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
         return;
     }
 
+    if (st_event->result == pcmk_ok &&
+        safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
+        reset_st_fail_count(st_event->target);
+    }
+
     crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
                st_event->target, st_event->result == pcmk_ok ? "" : " not",
                st_event->operation,
                st_event->executioner ? st_event->executioner : "<anyone>",
                st_event->origin, pcmk_strerror(st_event->result), st_event->id,
                st_event->client_origin ? st_event->client_origin : "<unknown>");
 
 #if SUPPORT_CMAN
     if (st_event->result == pcmk_ok && is_cman_cluster()) {
         int local_rc = 0;
         char *target_copy = strdup(st_event->target);
 
         /* In case fenced hasn't noticed yet
          *
          * Any fencing that has been inititated will be completed by way of the fence_pcmk redirect
          */
         local_rc = fenced_external(target_copy);
         if (local_rc != 0) {
             crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target,
                     local_rc);
         } else {
             crm_notice("Notified CMAN that '%s' is now fenced", st_event->target);
         }
         free(target_copy);
     }
 #endif
 
     if (st_event->result == pcmk_ok) {
         gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);
 
         crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
         if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) {
             crm_notice("Target %s our leader %s (recorded: %s)",
                        fsa_our_dc ? "was" : "may have been", st_event->target,
                        fsa_our_dc ? fsa_our_dc : "<unset>");
 
             /* Given the CIB resyncing that occurs around elections,
              * have one node update the CIB now and, if the new DC is different,
              * have them do so too after the election
              */
             if (we_are_executioner) {
                 const char *uuid = get_uuid(st_event->target);
 
                 send_stonith_update(NULL, st_event->target, uuid);
             }
             stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target));
 
         } else if (AM_I_DC &&
                    st_event->client_origin &&
                    safe_str_neq(st_event->client_origin, client_id)) {
             const char *uuid = get_uuid(st_event->target);
 
             /* If a remote process outside of pacemaker invoked stonith to
              * fence someone, report the fencing result to the cib
              * and abort the transition graph. */
             crm_info("External fencing operation from %s fenced %s", st_event->client_origin,
                      st_event->target);
             send_stonith_update(NULL, st_event->target, uuid);
             abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
         }
     }
 }
 
 gboolean
 te_connect_stonith(gpointer user_data)
 {
     int lpc = 0;
     int rc = pcmk_ok;
 
     if (stonith_api == NULL) {
         stonith_api = stonith_api_new();
     }
 
     if (stonith_api->state != stonith_disconnected) {
         crm_trace("Still connected");
         return TRUE;
     }
 
     for (lpc = 0; lpc < 30; lpc++) {
         crm_debug("Attempting connection to fencing daemon...");
 
         sleep(1);
         rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
 
         if (rc == pcmk_ok) {
             break;
         }
 
         if (user_data != NULL) {
             crm_err("Sign-in failed: triggered a retry");
             mainloop_set_trigger(stonith_reconnect);
             return TRUE;
         }
 
         crm_err("Sign-in failed: pausing and trying again in 2s...");
         sleep(1);
     }
 
     CRM_CHECK(rc == pcmk_ok, return TRUE);      /* If not, we failed 30 times... just get out */
     stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT,
                                              tengine_stonith_connection_destroy);
 
     stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE,
                                              tengine_stonith_notify);
 
     crm_trace("Connected");
     return TRUE;
 }
 
 gboolean
 stop_te_timer(crm_action_timer_t * timer)
 {
     const char *timer_desc = "action timer";
 
     if (timer == NULL) {
         return FALSE;
     }
     if (timer->reason == timeout_abort) {
         timer_desc = "global timer";
         crm_trace("Stopping %s", timer_desc);
     }
 
     if (timer->source_id != 0) {
         crm_trace("Stopping %s", timer_desc);
         g_source_remove(timer->source_id);
         timer->source_id = 0;
 
     } else {
         crm_trace("%s was already stopped", timer_desc);
         return FALSE;
     }
 
     return TRUE;
 }
 
 gboolean
 te_graph_trigger(gpointer user_data)
 {
     enum transition_status graph_rc = -1;
 
     if (transition_graph == NULL) {
         crm_debug("Nothing to do");
         return TRUE;
     }
 
     crm_trace("Invoking graph %d in state %s", transition_graph->id, fsa_state2string(fsa_state));
 
     switch (fsa_state) {
         case S_STARTING:
         case S_PENDING:
         case S_NOT_DC:
         case S_HALT:
         case S_ILLEGAL:
         case S_STOPPING:
         case S_TERMINATE:
             return TRUE;
             break;
         default:
             break;
     }
 
     if (transition_graph->complete == FALSE) {
         graph_rc = run_graph(transition_graph);
         print_graph(LOG_DEBUG_3, transition_graph);
 
         if (graph_rc == transition_active) {
             crm_trace("Transition not yet complete");
             return TRUE;
 
         } else if (graph_rc == transition_pending) {
             crm_trace("Transition not yet complete - no actions fired");
             return TRUE;
         }
 
         if (graph_rc != transition_complete) {
             crm_warn("Transition failed: %s", transition_status(graph_rc));
             print_graph(LOG_NOTICE, transition_graph);
         }
     }
 
     crm_debug("Transition %d is now complete", transition_graph->id);
     transition_graph->complete = TRUE;
     notify_crmd(transition_graph);
 
     return TRUE;
 }
 
 void
 trigger_graph_processing(const char *fn, int line)
 {
     mainloop_set_trigger(transition_trigger);
     crm_trace("%s:%d - Triggered graph processing", fn, line);
 }
 
 void
 abort_transition_graph(int abort_priority, enum transition_action abort_action,
                        const char *abort_text, xmlNode * reason, const char *fn, int line)
 {
     const char *magic = NULL;
 
     CRM_CHECK(transition_graph != NULL, return);
 
     if (reason) {
         int diff_add_updates = 0;
         int diff_add_epoch = 0;
         int diff_add_admin_epoch = 0;
 
         int diff_del_updates = 0;
         int diff_del_epoch = 0;
         int diff_del_admin_epoch = 0;
         xmlNode *diff = get_xpath_object("//" F_CIB_UPDATE_RESULT "//diff", reason, LOG_DEBUG_2);
 
         magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);
 
         if (diff) {
             cib_diff_version_details(diff,
                                      &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates,
                                      &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates);
             if (crm_str_eq(TYPE(reason), XML_CIB_TAG_NVPAIR, TRUE)) {
                 crm_info
                     ("%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, name=%s, value=%s, magic=%s, cib=%d.%d.%d) : %s",
                      fn, line, transition_graph->complete, TYPE(reason), ID(reason), NAME(reason),
                      VALUE(reason), magic ? magic : "NA", diff_add_admin_epoch, diff_add_epoch,
                      diff_add_updates, abort_text);
             } else {
                 crm_info
                     ("%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, magic=%s, cib=%d.%d.%d) : %s",
                      fn, line, transition_graph->complete, TYPE(reason), ID(reason),
                      magic ? magic : "NA", diff_add_admin_epoch, diff_add_epoch, diff_add_updates,
                      abort_text);
             }
 
         } else {
             crm_info
                 ("%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, magic=%s) : %s",
                  fn, line, transition_graph->complete, TYPE(reason), ID(reason),
                  magic ? magic : "NA", abort_text);
         }
 
     } else {
         crm_info("%s:%d - Triggered transition abort (complete=%d) : %s",
                  fn, line, transition_graph->complete, abort_text);
     }
 
     switch (fsa_state) {
         case S_STARTING:
         case S_PENDING:
         case S_NOT_DC:
         case S_HALT:
         case S_ILLEGAL:
         case S_STOPPING:
         case S_TERMINATE:
             crm_info("Abort suppressed: state=%s (complete=%d)",
                      fsa_state2string(fsa_state), transition_graph->complete);
             return;
         default:
             break;
     }
 
     if (magic == NULL && reason != NULL) {
         crm_log_xml_debug(reason, "Cause");
     }
 
     /* Make sure any queued calculations are discarded ASAP */
     free(fsa_pe_ref);
     fsa_pe_ref = NULL;
 
     if (transition_graph->complete) {
         if (transition_timer->period_ms > 0) {
             crm_timer_stop(transition_timer);
             crm_timer_start(transition_timer);
         } else if (too_many_st_failures() == FALSE) {
             register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
         }
         return;
     }
 
     update_abort_priority(transition_graph, abort_priority, abort_action, abort_text);
 
     mainloop_set_trigger(transition_trigger);
 }