diff --git a/crmd/crmd_lrm.h b/crmd/crmd_lrm.h
index 175eb181c3..0e7ff481f3 100644
--- a/crmd/crmd_lrm.h
+++ b/crmd/crmd_lrm.h
@@ -1,162 +1,163 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crmd_messages.h>
 
 extern gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level);
 extern void lrm_clear_last_failure(const char *rsc_id, const char *node_name);
 void lrm_op_callback(lrmd_event_data_t * op);
 
 typedef struct resource_history_s {
     char *id;
     uint32_t last_callid;
     lrmd_rsc_info_t rsc;
     lrmd_event_data_t *last;
     lrmd_event_data_t *failed;
     GList *recurring_op_list;
 
     /* Resources must be stopped using the same
      * parameters they were started with.  This hashtable
      * holds the parameters that should be used for the next stop
      * cmd on this resource. */
     GHashTable *stop_params;
 } rsc_history_t;
 
 void history_free(gpointer data);
 
 /* TDOD - Replace this with lrmd_event_data_t */
 struct recurring_op_s {
     int call_id;
     int interval;
     gboolean remove;
     gboolean cancelled;
     unsigned int start_time;
     char *rsc_id;
     char *op_type;
     char *op_key;
     char *user_data;
     GHashTable *params;
 };
 
 typedef struct lrm_state_s {
     const char *node_name;
     /* reserved for lrm_state.c usage only */
     void *conn;
     /* reserved for remote_lrmd_ra.c usage only */
     void *remote_ra_data;
 
     GHashTable *resource_history;
     GHashTable *pending_ops;
     GHashTable *deletion_ops;
     GHashTable *rsc_info_cache;
 
     int num_lrm_register_fails;
 } lrm_state_t;
 
 struct pending_deletion_op_s {
     char *rsc;
     ha_msg_input_t *input;
 };
 
 /*!
  * \brief Is this the local ipc connection to the lrmd
  */
 gboolean
 lrm_state_is_local(lrm_state_t *lrm_state);
 
 /*!
  * \brief Clear all state information from a single state entry.
  * \note This does not close the lrmd connection
  */
 void lrm_state_reset_tables(lrm_state_t * lrm_state);
 GList *lrm_state_get_list(void);
 
 /*!
  * \brief Initiate internal state tables
  */
 gboolean lrm_state_init_local(void);
 
 /*!
  * \brief Destroy all state entries and internal state tables
  */
 void lrm_state_destroy_all(void);
 
 /*!
  * \brief Create lrmd connection entry.
  */
 lrm_state_t *lrm_state_create(const char *node_name);
 
 /*!
  * \brief Destroy lrmd connection keyed of node name
  */
 void lrm_state_destroy(const char *node_name);
 
 /*!
  * \brief Find lrm_state data by node name
  */
 lrm_state_t *lrm_state_find(const char *node_name);
 
 /*!
  * \brief Either find or create a new entry
  */
 lrm_state_t *lrm_state_find_or_create(const char *node_name);
 
 /*!
  * The functions below are wrappers for the lrmd api calls the crmd
  * uses.  These wrapper functions allow us to treat the crmd's remote
  * lrmd connection resources the same as regular resources.  Internally
  * Regular resources go to the lrmd, and remote connection resources are
  * handled locally in the crmd.
  */
 void lrm_state_disconnect(lrm_state_t * lrm_state);
 int lrm_state_ipc_connect(lrm_state_t * lrm_state);
 int lrm_state_remote_connect_async(lrm_state_t * lrm_state, const char *server, int port,
                                    int timeout);
 int lrm_state_is_connected(lrm_state_t * lrm_state);
 int lrm_state_poke_connection(lrm_state_t * lrm_state);
 
 int lrm_state_get_metadata(lrm_state_t * lrm_state,
                            const char *class,
                            const char *provider,
                            const char *agent, char **output, enum lrmd_call_options options);
 int lrm_state_cancel(lrm_state_t * lrm_state, const char *rsc_id, const char *action, int interval);
 int lrm_state_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */
                    int timeout, /* ms */
                    int start_delay,     /* ms */
                    lrmd_key_value_t * params);
 lrmd_rsc_info_t *lrm_state_get_rsc_info(lrm_state_t * lrm_state,
                                         const char *rsc_id, enum lrmd_call_options options);
 int lrm_state_register_rsc(lrm_state_t * lrm_state,
                            const char *rsc_id,
                            const char *class,
                            const char *provider, const char *agent, enum lrmd_call_options options);
 int lrm_state_unregister_rsc(lrm_state_t * lrm_state,
                              const char *rsc_id, enum lrmd_call_options options);
 
 /*! These functions are used to manage the remote lrmd connection resources */
 void remote_lrm_op_callback(lrmd_event_data_t * op);
 gboolean is_remote_lrmd_ra(const char *agent, const char *provider, const char *id);
 lrmd_rsc_info_t *remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id);
 int remote_ra_cancel(lrm_state_t * lrm_state, const char *rsc_id, const char *action, int interval);
 int remote_ra_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */
                    int timeout, /* ms */
                    int start_delay,     /* ms */
                    lrmd_key_value_t * params);
 void remote_ra_cleanup(lrm_state_t * lrm_state);
+void remote_ra_fail(const char *node_name);
 
 gboolean process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending);
diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c
index ce54e507a8..b4aebd3e7b 100644
--- a/crmd/remote_lrmd_ra.c
+++ b/crmd/remote_lrmd_ra.c
@@ -1,1027 +1,1048 @@
 /* 
  * Copyright (C) 2013 David Vossel <davidvossel@gmail.com>
  * 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  * 
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 
 #include <crmd.h>
 #include <crmd_fsa.h>
 #include <crmd_messages.h>
 #include <crmd_callbacks.h>
 #include <crmd_lrm.h>
 #include <crm/lrmd.h>
 #include <crm/services.h>
 
 #define REMOTE_LRMD_RA "remote"
 
 /* The max start timeout before cmd retry */
 #define MAX_START_TIMEOUT_MS 10000
 
 typedef struct remote_ra_cmd_s {
     /*! the local node the cmd is issued from */
     char *owner;
     /*! the remote node the cmd is executed on */
     char *rsc_id;
     /*! the action to execute */
     char *action;
     /*! some string the client wants us to give it back */
     char *userdata;
     /*! start delay in ms */
     int start_delay;
     /*! timer id used for start delay. */
     int delay_id;
     /*! timeout in ms for cmd */
     int timeout;
     int remaining_timeout;
     /*! recurring interval in ms */
     int interval;
     /*! interval timer id */
     int interval_id;
     int reported_success;
     int monitor_timeout_id;
     int takeover_timeout_id;
     /*! action parameters */
     lrmd_key_value_t *params;
     /*! executed rc */
     int rc;
     int op_status;
     int call_id;
     time_t start_time;
     gboolean cancel;
 } remote_ra_cmd_t;
 
 enum remote_migration_status {
     expect_takeover = 1,
     takeover_complete,
 };
 
 typedef struct remote_ra_data_s {
     crm_trigger_t *work;
     remote_ra_cmd_t *cur_cmd;
     GList *cmds;
     GList *recurring_cmds;
 
     enum remote_migration_status migrate_status;
 
     gboolean active;
 } remote_ra_data_t;
 
 static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
 static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
 static GList *fail_all_monitor_cmds(GList * list);
 
 static void
 free_cmd(gpointer user_data)
 {
     remote_ra_cmd_t *cmd = user_data;
 
     if (!cmd) {
         return;
     }
     if (cmd->delay_id) {
         g_source_remove(cmd->delay_id);
     }
     if (cmd->interval_id) {
         g_source_remove(cmd->interval_id);
     }
     if (cmd->monitor_timeout_id) {
         g_source_remove(cmd->monitor_timeout_id);
     }
     if (cmd->takeover_timeout_id) {
         g_source_remove(cmd->takeover_timeout_id);
     }
     free(cmd->owner);
     free(cmd->rsc_id);
     free(cmd->action);
     free(cmd->userdata);
     lrmd_key_value_freeall(cmd->params);
     free(cmd);
 }
 
 static int
 generate_callid(void)
 {
     static int remote_ra_callid = 0;
 
     remote_ra_callid++;
     if (remote_ra_callid <= 0) {
         remote_ra_callid = 1;
     }
 
     return remote_ra_callid;
 }
 
 static gboolean
 recurring_helper(gpointer data)
 {
     remote_ra_cmd_t *cmd = data;
     lrm_state_t *connection_rsc = NULL;
 
     cmd->interval_id = 0;
     connection_rsc = lrm_state_find(cmd->rsc_id);
     if (connection_rsc && connection_rsc->remote_ra_data) {
         remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
 
         ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
 
         ra_data->cmds = g_list_append(ra_data->cmds, cmd);
         mainloop_set_trigger(ra_data->work);
     }
     return FALSE;
 }
 
 static gboolean
 start_delay_helper(gpointer data)
 {
     remote_ra_cmd_t *cmd = data;
     lrm_state_t *connection_rsc = NULL;
 
     cmd->delay_id = 0;
     connection_rsc = lrm_state_find(cmd->rsc_id);
     if (connection_rsc && connection_rsc->remote_ra_data) {
         remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
 
         mainloop_set_trigger(ra_data->work);
     }
     return FALSE;
 }
 
 /*!
  * \internal
  * \brief Handle cluster communication related to pacemaker_remote node joining
  *
  * \param[in] node_name  Name of newly integrated pacemaker_remote node
  */
 static void
 remote_node_up(const char *node_name)
 {
     int call_opt, call_id = 0;
     xmlNode *update, *state;
     crm_node_t *node;
 
     CRM_CHECK(node_name != NULL, return);
     crm_info("Announcing pacemaker_remote node %s", node_name);
 
     /* Clear node's operation history and transient attributes */
     call_opt = crmd_cib_smart_opt();
     erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt);
     erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt);
 
     /* Clear node's probed attribute */
     update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
 
     /* Ensure node is in the remote peer cache with member status */
     node = crm_remote_peer_get(node_name);
     CRM_CHECK(node != NULL, return);
     crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, 0);
 
     /* pacemaker_remote nodes don't participate in the membership layer,
      * so cluster nodes don't automatically get notified when they come and go.
      * We send a cluster message to the DC, and update the CIB node state entry,
      * so the DC will get it sooner (via message) or later (via CIB refresh),
      * and any other interested parties can query the CIB.
      */
     send_remote_state_message(node_name, TRUE);
 
     update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
     state = do_update_node_cib(node, node_update_cluster, update, __FUNCTION__);
 
     /* Clear the XML_NODE_IS_FENCED flag in the node state. If the node ever
      * needs to be fenced, this flag will allow various actions to determine
      * whether the fencing has happened yet.
      */
     crm_xml_add(state, XML_NODE_IS_FENCED, "0");
 
     /* TODO: If the remote connection drops, and this (async) CIB update either
      * failed or has not yet completed, later actions could mistakenly think the
      * node has already been fenced (if the XML_NODE_IS_FENCED attribute was
      * previously set, because it won't have been cleared). This could prevent
      * actual fencing or allow recurring monitor failures to be cleared too
      * soon. Ideally, we wouldn't rely on the CIB for the fenced status.
      */
     fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
     if (call_id < 0) {
         crm_perror(LOG_WARNING, "%s CIB node state setup", node_name);
     }
     free_xml(update);
 }
 
 /*!
  * \internal
  * \brief Handle cluster communication related to pacemaker_remote node leaving
  *
  * \param[in] node_name  Name of lost node
  */
 static void
 remote_node_down(const char *node_name)
 {
     xmlNode *update;
     int call_id = 0;
     int call_opt = crmd_cib_smart_opt();
     crm_node_t *node;
 
     /* Clear all node attributes */
     update_attrd_remote_node_removed(node_name, NULL);
 
     /* Ensure node is in the remote peer cache with lost state */
     node = crm_remote_peer_get(node_name);
     CRM_CHECK(node != NULL, return);
     crm_update_peer_state(__FUNCTION__, node, CRM_NODE_LOST, 0);
 
     /* Notify DC */
     send_remote_state_message(node_name, FALSE);
 
     /* Update CIB node state */
     update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
     do_update_node_cib(node, node_update_cluster, update, __FUNCTION__);
     fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
     if (call_id < 0) {
         crm_perror(LOG_ERR, "%s CIB node state update", node_name);
     }
     free_xml(update);
 }
 
 /*!
  * \internal
  * \brief Handle effects of a remote RA command on node state
  *
  * \param[in] cmd  Completed remote RA command
  */
 static void
 check_remote_node_state(remote_ra_cmd_t *cmd)
 {
     /* Only successful actions can change node state */
     if (cmd->rc != PCMK_OCF_OK) {
         return;
     }
 
     if (safe_str_eq(cmd->action, "start")) {
         remote_node_up(cmd->rsc_id);
 
     } else if (safe_str_eq(cmd->action, "migrate_from")) {
         /* Ensure node is in this host's remote peer cache */
         crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
 
         CRM_CHECK(node != NULL, return);
         crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, 0);
 
     } else if (safe_str_eq(cmd->action, "stop")) {
         lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
         remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
 
         if (ra_data) {
             if (ra_data->migrate_status != takeover_complete) {
                 /* Stop means down if we didn't successfully migrate elsewhere */
                 remote_node_down(cmd->rsc_id);
             } else if (AM_I_DC == FALSE) {
                 /* Only the connection host and DC track node state,
                  * so if the connection migrated elsewhere and we aren't DC,
                  * un-cache the node, so we don't have stale info
                  */
                 crm_remote_peer_cache_remove(cmd->rsc_id);
             }
         }
     }
 }
 
 static void
 report_remote_ra_result(remote_ra_cmd_t * cmd)
 {
     lrmd_event_data_t op = { 0, };
 
     check_remote_node_state(cmd);
 
     op.type = lrmd_event_exec_complete;
     op.rsc_id = cmd->rsc_id;
     op.op_type = cmd->action;
     op.user_data = cmd->userdata;
     op.timeout = cmd->timeout;
     op.interval = cmd->interval;
     op.rc = cmd->rc;
     op.op_status = cmd->op_status;
     op.t_run = cmd->start_time;
     op.t_rcchange = cmd->start_time;
     if (cmd->reported_success && cmd->rc != PCMK_OCF_OK) {
         op.t_rcchange = time(NULL);
         /* This edge case will likely never ever occur, but if it does the
          * result is that a failure will not be processed correctly. This is only
          * remotely possible because we are able to detect a connection resource's tcp
          * connection has failed at any moment after start has completed. The actual
          * recurring operation is just a connectivity ping.
          *
          * basically, we are not guaranteed that the first successful monitor op and
          * a subsequent failed monitor op will not occur in the same timestamp. We have to
          * make it look like the operations occurred at separate times though. */
         if (op.t_rcchange == op.t_run) {
             op.t_rcchange++;
         }
     }
 
     if (cmd->params) {
         lrmd_key_value_t *tmp;
 
         op.params = g_hash_table_new_full(crm_str_hash,
                                           g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
         for (tmp = cmd->params; tmp; tmp = tmp->next) {
             g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
         }
 
     }
     op.call_id = cmd->call_id;
     op.remote_nodename = cmd->owner;
 
     lrm_op_callback(&op);
 
     if (op.params) {
         g_hash_table_destroy(op.params);
     }
 }
 
 static void
 update_remaining_timeout(remote_ra_cmd_t * cmd)
 {
     cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
 }
 
 static gboolean
 retry_start_cmd_cb(gpointer data)
 {
     lrm_state_t *lrm_state = data;
     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
     remote_ra_cmd_t *cmd = NULL;
     int rc = -1;
 
     if (!ra_data || !ra_data->cur_cmd) {
         return FALSE;
     }
     cmd = ra_data->cur_cmd;
     if (safe_str_neq(cmd->action, "start") && safe_str_neq(cmd->action, "migrate_from")) {
         return FALSE;
     }
     update_remaining_timeout(cmd);
 
     if (cmd->remaining_timeout > 0) {
         rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
     }
 
     if (rc != 0) {
         cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
         cmd->op_status = PCMK_LRM_OP_ERROR;
         report_remote_ra_result(cmd);
 
         if (ra_data->cmds) {
             mainloop_set_trigger(ra_data->work);
         }
         ra_data->cur_cmd = NULL;
         free_cmd(cmd);
     } else {
         /* wait for connection event */
     }
 
     return FALSE;
 }
 
 
 static gboolean
 connection_takeover_timeout_cb(gpointer data)
 {
     lrm_state_t *lrm_state = NULL;
     remote_ra_cmd_t *cmd = data;
 
     crm_info("takeover event timed out for node %s", cmd->rsc_id);
     cmd->takeover_timeout_id = 0;
 
     lrm_state = lrm_state_find(cmd->rsc_id);
 
     handle_remote_ra_stop(lrm_state, cmd);
     free_cmd(cmd);
 
     return FALSE;
 }
 
 static gboolean
 monitor_timeout_cb(gpointer data)
 {
     lrm_state_t *lrm_state = NULL;
     remote_ra_cmd_t *cmd = data;
 
     lrm_state = lrm_state_find(cmd->rsc_id);
 
     crm_info("Poke async response timed out for node %s (%p)", cmd->rsc_id, lrm_state);
     cmd->monitor_timeout_id = 0;
     cmd->op_status = PCMK_LRM_OP_TIMEOUT;
     cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
 
     if (lrm_state && lrm_state->remote_ra_data) {
         remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
 
         if (ra_data->cur_cmd == cmd) {
             ra_data->cur_cmd = NULL;
         }
         if (ra_data->cmds) {
             mainloop_set_trigger(ra_data->work);
         }
     }
 
     report_remote_ra_result(cmd);
     free_cmd(cmd);
 
     if(lrm_state) {
         lrm_state_disconnect(lrm_state);
     }
     return FALSE;
 }
 
 void
 remote_lrm_op_callback(lrmd_event_data_t * op)
 {
     gboolean cmd_handled = FALSE;
     lrm_state_t *lrm_state = NULL;
     remote_ra_data_t *ra_data = NULL;
     remote_ra_cmd_t *cmd = NULL;
 
     crm_debug("remote connection event - event_type:%s node:%s action:%s rc:%s op_status:%s",
               lrmd_event_type2str(op->type),
               op->remote_nodename,
               op->op_type ? op->op_type : "none",
               services_ocf_exitcode_str(op->rc), services_lrm_status_str(op->op_status));
 
     lrm_state = lrm_state_find(op->remote_nodename);
     if (!lrm_state || !lrm_state->remote_ra_data) {
         crm_debug("lrm_state info not found for remote lrmd connection event");
         return;
     }
     ra_data = lrm_state->remote_ra_data;
 
     /* Another client has connected to the remote daemon,
      * determine if this is expected. */
     if (op->type == lrmd_event_new_client) {
         /* great, we new this was coming */
         if (ra_data->migrate_status == expect_takeover) {
             ra_data->migrate_status = takeover_complete;
         } else {
             crm_err("Unexpected pacemaker_remote client takeover. Disconnecting");
             lrm_state_disconnect(lrm_state);
         }
         return;
     }
 
     /* filter all EXEC events up */
     if (op->type == lrmd_event_exec_complete) {
         if (ra_data->migrate_status == takeover_complete) {
             crm_debug("ignoring event, this connection is taken over by another node");
         } else {
             lrm_op_callback(op);
         }
         return;
     }
 
     if ((op->type == lrmd_event_disconnect) &&
         (ra_data->cur_cmd == NULL) &&
         (ra_data->active == TRUE)) {
 
         crm_err("Unexpected disconnect on remote-node %s", lrm_state->node_name);
         ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
         ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
         return;
     }
 
     if (!ra_data->cur_cmd) {
         crm_debug("no event to match");
         return;
     }
 
     cmd = ra_data->cur_cmd;
 
     /* Start actions and migrate from actions complete after connection
      * comes back to us. */
     if (op->type == lrmd_event_connect && (safe_str_eq(cmd->action, "start") ||
                                            safe_str_eq(cmd->action, "migrate_from"))) {
 
         if (op->connection_rc < 0) {
             update_remaining_timeout(cmd);
             /* There isn't much of a reason to reschedule if the timeout is too small */
             if (cmd->remaining_timeout > 3000) {
                 crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
                 g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
                 return;
             } else {
                 crm_trace("can't reschedule start, remaining timeout too small %d",
                           cmd->remaining_timeout);
             }
             cmd->op_status = PCMK_LRM_OP_TIMEOUT;
             cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
 
         } else {
             lrm_state_reset_tables(lrm_state);
             cmd->rc = PCMK_OCF_OK;
             cmd->op_status = PCMK_LRM_OP_DONE;
             ra_data->active = TRUE;
         }
 
         crm_debug("remote lrmd connect event matched %s action. ", cmd->action);
         report_remote_ra_result(cmd);
         cmd_handled = TRUE;
 
     } else if (op->type == lrmd_event_poke && safe_str_eq(cmd->action, "monitor")) {
 
         if (cmd->monitor_timeout_id) {
             g_source_remove(cmd->monitor_timeout_id);
             cmd->monitor_timeout_id = 0;
         }
 
         /* Only report success the first time, after that only worry about failures.
          * For this function, if we get the poke pack, it is always a success. Pokes
          * only fail if the send fails, or the response times out. */
         if (!cmd->reported_success) {
             cmd->rc = PCMK_OCF_OK;
             cmd->op_status = PCMK_LRM_OP_DONE;
             report_remote_ra_result(cmd);
             cmd->reported_success = 1;
         }
 
         crm_debug("remote lrmd poke event matched %s action. ", cmd->action);
 
         /* success, keep rescheduling if interval is present. */
         if (cmd->interval && (cmd->cancel == FALSE)) {
             ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
             cmd->interval_id = g_timeout_add(cmd->interval, recurring_helper, cmd);
             cmd = NULL;         /* prevent free */
         }
         cmd_handled = TRUE;
 
     } else if (op->type == lrmd_event_disconnect && safe_str_eq(cmd->action, "monitor")) {
         if (ra_data->active == TRUE && (cmd->cancel == FALSE)) {
             cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
             cmd->op_status = PCMK_LRM_OP_ERROR;
             report_remote_ra_result(cmd);
             crm_err("remote-node %s unexpectedly disconneced during monitor operation", lrm_state->node_name);
         }
         cmd_handled = TRUE;
 
     } else if (op->type == lrmd_event_new_client && safe_str_eq(cmd->action, "stop")) {
 
         handle_remote_ra_stop(lrm_state, cmd);
         cmd_handled = TRUE;
 
     } else {
         crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
     }
 
     if (cmd_handled) {
         ra_data->cur_cmd = NULL;
         if (ra_data->cmds) {
             mainloop_set_trigger(ra_data->work);
         }
         free_cmd(cmd);
     }
 }
 
 static void
 handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
 {
     remote_ra_data_t *ra_data = NULL;
 
     CRM_ASSERT(lrm_state);
     ra_data = lrm_state->remote_ra_data;
 
     if (ra_data->migrate_status != takeover_complete) {
         /* delete pending ops when ever the remote connection is intentionally stopped */
         g_hash_table_remove_all(lrm_state->pending_ops);
     } else {
         /* we no longer hold the history if this connection has been migrated */
         lrm_state_reset_tables(lrm_state);
     }
 
     ra_data->active = FALSE;
     lrm_state_disconnect(lrm_state);
     cmd->rc = PCMK_OCF_OK;
     cmd->op_status = PCMK_LRM_OP_DONE;
 
     if (ra_data->cmds) {
         g_list_free_full(ra_data->cmds, free_cmd);
     }
     if (ra_data->recurring_cmds) {
         g_list_free_full(ra_data->recurring_cmds, free_cmd);
     }
     ra_data->cmds = NULL;
     ra_data->recurring_cmds = NULL;
     ra_data->cur_cmd = NULL;
 
     report_remote_ra_result(cmd);
 }
 
 static int
 handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
 {
     const char *server = NULL;
     lrmd_key_value_t *tmp = NULL;
     int port = 0;
     int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
 
     for (tmp = cmd->params; tmp; tmp = tmp->next) {
         if (safe_str_eq(tmp->key, "addr") || safe_str_eq(tmp->key, "server")) {
             server = tmp->value;
         }
         if (safe_str_eq(tmp->key, "port")) {
             port = atoi(tmp->value);
         }
     }
 
     return lrm_state_remote_connect_async(lrm_state, server, port, timeout_used);
 }
 
 static gboolean
 handle_remote_ra_exec(gpointer user_data)
 {
     int rc = 0;
     lrm_state_t *lrm_state = user_data;
     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
     remote_ra_cmd_t *cmd;
     GList *first = NULL;
 
     if (ra_data->cur_cmd) {
         /* still waiting on previous cmd */
         return TRUE;
     }
 
     while (ra_data->cmds) {
         first = ra_data->cmds;
         cmd = first->data;
         if (cmd->delay_id) {
             /* still waiting for start delay timer to trip */
             return TRUE;
         }
 
         ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
         g_list_free_1(first);
 
         if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
             ra_data->migrate_status = 0;
             rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout);
             if (rc == 0) {
                 /* take care of this later when we get async connection result */
                 crm_debug("began remote lrmd connect, waiting for connect event.");
                 ra_data->cur_cmd = cmd;
                 return TRUE;
             } else {
                 crm_debug("connect failed, not expecting to match any connection event later");
                 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
                 cmd->op_status = PCMK_LRM_OP_ERROR;
             }
             report_remote_ra_result(cmd);
 
         } else if (!strcmp(cmd->action, "monitor")) {
 
             if (lrm_state_is_connected(lrm_state) == TRUE) {
                 rc = lrm_state_poke_connection(lrm_state);
                 if (rc < 0) {
                     cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
                     cmd->op_status = PCMK_LRM_OP_ERROR;
                 }
             } else {
                 rc = -1;
                 cmd->op_status = PCMK_LRM_OP_DONE;
                 cmd->rc = PCMK_OCF_NOT_RUNNING;
             }
 
             if (rc == 0) {
                 crm_debug("poked remote lrmd at node %s, waiting for async response.", cmd->rsc_id);
                 ra_data->cur_cmd = cmd;
                 cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
                 return TRUE;
             }
             report_remote_ra_result(cmd);
 
         } else if (!strcmp(cmd->action, "stop")) {
 
             if (ra_data->migrate_status == expect_takeover) {
                 /* briefly wait on stop for the takeover event to occur. If the
                  * takeover event does not occur during the wait period, that's fine.
                  * It just means that the remote-node's lrm_status section is going to get
                  * cleared which will require all the resources running in the remote-node
                  * to be explicitly re-detected via probe actions.  If the takeover does occur
                  * successfully, then we can leave the status section intact. */
                 cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
                 ra_data->cur_cmd = cmd;
                 return TRUE;
             }
 
             handle_remote_ra_stop(lrm_state, cmd);
 
         } else if (!strcmp(cmd->action, "migrate_to")) {
             ra_data->migrate_status = expect_takeover;
             cmd->rc = PCMK_OCF_OK;
             cmd->op_status = PCMK_LRM_OP_DONE;
             report_remote_ra_result(cmd);
         } else if (!strcmp(cmd->action, "reload")) {
             /* reloads are a no-op right now, add logic here when they become important */
             cmd->rc = PCMK_OCF_OK;
             cmd->op_status = PCMK_LRM_OP_DONE;
             report_remote_ra_result(cmd);
         }
 
         free_cmd(cmd);
     }
 
     return TRUE;
 }
 
 static void
 remote_ra_data_init(lrm_state_t * lrm_state)
 {
     remote_ra_data_t *ra_data = NULL;
 
     if (lrm_state->remote_ra_data) {
         return;
     }
 
     ra_data = calloc(1, sizeof(remote_ra_data_t));
     ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
     lrm_state->remote_ra_data = ra_data;
 }
 
 void
 remote_ra_cleanup(lrm_state_t * lrm_state)
 {
     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
 
     if (!ra_data) {
         return;
     }
 
     if (ra_data->cmds) {
         g_list_free_full(ra_data->cmds, free_cmd);
     }
 
     if (ra_data->recurring_cmds) {
         g_list_free_full(ra_data->recurring_cmds, free_cmd);
     }
     mainloop_destroy_trigger(ra_data->work);
     free(ra_data);
     lrm_state->remote_ra_data = NULL;
 }
 
 gboolean
 is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
 {
     if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
         return TRUE;
     }
     if (id && lrm_state_find(id) && safe_str_neq(id, fsa_our_uname)) {
         return TRUE;
     }
 
     return FALSE;
 }
 
 lrmd_rsc_info_t *
 remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
 {
     lrmd_rsc_info_t *info = NULL;
 
     if ((lrm_state_find(rsc_id))) {
         info = calloc(1, sizeof(lrmd_rsc_info_t));
 
         info->id = strdup(rsc_id);
         info->type = strdup(REMOTE_LRMD_RA);
         info->class = strdup("ocf");
         info->provider = strdup("pacemaker");
     }
 
     return info;
 }
 
 static gboolean
 is_remote_ra_supported_action(const char *action)
 {
     if (!action) {
         return FALSE;
     } else if (strcmp(action, "start") &&
                strcmp(action, "stop") &&
                strcmp(action, "reload") &&
                strcmp(action, "migrate_to") &&
                strcmp(action, "migrate_from") && strcmp(action, "monitor")) {
         return FALSE;
     }
 
     return TRUE;
 }
 
 static GList *
 fail_all_monitor_cmds(GList * list)
 {
     GList *rm_list = NULL;
     remote_ra_cmd_t *cmd = NULL;
     GListPtr gIter = NULL;
 
     for (gIter = list; gIter != NULL; gIter = gIter->next) {
         cmd = gIter->data;
         if (cmd->interval > 0 && safe_str_eq(cmd->action, "monitor")) {
             rm_list = g_list_append(rm_list, cmd);
         }
     }
 
     for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
         cmd = gIter->data;
 
         cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
         cmd->op_status = PCMK_LRM_OP_ERROR;
         crm_trace("Pre-emptively failing %s %s (interval=%d, %s)", cmd->action, cmd->rsc_id, cmd->interval, cmd->userdata);
         report_remote_ra_result(cmd);
 
         list = g_list_remove(list, cmd);
         free_cmd(cmd);
     }
 
     /* frees only the list data, not the cmds */
     g_list_free(rm_list);
     return list;
 }
 
 static GList *
 remove_cmd(GList * list, const char *action, int interval)
 {
     remote_ra_cmd_t *cmd = NULL;
     GListPtr gIter = NULL;
 
     for (gIter = list; gIter != NULL; gIter = gIter->next) {
         cmd = gIter->data;
         if (cmd->interval == interval && safe_str_eq(cmd->action, action)) {
             break;
         }
         cmd = NULL;
     }
     if (cmd) {
         list = g_list_remove(list, cmd);
         free_cmd(cmd);
     }
     return list;
 }
 
 int
 remote_ra_cancel(lrm_state_t * lrm_state, const char *rsc_id, const char *action, int interval)
 {
     lrm_state_t *connection_rsc = NULL;
     remote_ra_data_t *ra_data = NULL;
 
     connection_rsc = lrm_state_find(rsc_id);
     if (!connection_rsc || !connection_rsc->remote_ra_data) {
         return -EINVAL;
     }
 
     ra_data = connection_rsc->remote_ra_data;
     ra_data->cmds = remove_cmd(ra_data->cmds, action, interval);
     ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action, interval);
     if (ra_data->cur_cmd &&
         (ra_data->cur_cmd->interval == interval) &&
         (safe_str_eq(ra_data->cur_cmd->action, action))) {
 
         ra_data->cur_cmd->cancel = TRUE;
     }
 
     return 0;
 }
 
 static remote_ra_cmd_t *
 handle_dup_monitor(remote_ra_data_t *ra_data, int interval, const char *userdata)
 {
     GList *gIter = NULL;
     remote_ra_cmd_t *cmd = NULL;
 
     /* there are 3 places a potential duplicate monitor operation
      * could exist.
      * 1. recurring_cmds list. where the op is waiting for its next interval
      * 2. cmds list, where the op is queued to get executed immediately
      * 3. cur_cmd, which means the monitor op is in flight right now.
      */
     if (interval == 0) {
         return NULL;
     }
 
     if (ra_data->cur_cmd &&
         ra_data->cur_cmd->cancel == FALSE &&
         ra_data->cur_cmd->interval == interval &&
         safe_str_eq(ra_data->cur_cmd->action, "monitor")) {
 
         cmd = ra_data->cur_cmd;
         goto handle_dup;
     }
 
     for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
         cmd = gIter->data;
         if (cmd->interval == interval && safe_str_eq(cmd->action, "monitor")) {
             goto handle_dup;
         }
     }
 
     for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
         cmd = gIter->data;
         if (cmd->interval == interval && safe_str_eq(cmd->action, "monitor")) {
             goto handle_dup;
         }
     }
 
     return NULL;
 
 handle_dup:
 
     crm_trace("merging duplicate monitor cmd %s_monitor_%d", cmd->rsc_id, interval);
 
     /* update the userdata */
     if (userdata) {
        free(cmd->userdata);
        cmd->userdata = strdup(userdata);
     }
 
     /* if we've already reported success, generate a new call id */
     if (cmd->reported_success) {
         cmd->start_time = time(NULL);
         cmd->call_id = generate_callid();
         cmd->reported_success = 0;
     }
 
     /* if we have an interval_id set, that means we are in the process of
      * waiting for this cmd's next interval. instead of waiting, cancel
      * the timer and execute the action immediately */
     if (cmd->interval_id) {
         g_source_remove(cmd->interval_id);
         cmd->interval_id = 0;
         recurring_helper(cmd);
     }
 
     return cmd;  
 }
 
 int
 remote_ra_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *action, const char *userdata, int interval,     /* ms */
                int timeout,     /* ms */
                int start_delay, /* ms */
                lrmd_key_value_t * params)
 {
     int rc = 0;
     lrm_state_t *connection_rsc = NULL;
     remote_ra_cmd_t *cmd = NULL;
     remote_ra_data_t *ra_data = NULL;
 
     if (is_remote_ra_supported_action(action) == FALSE) {
         rc = -EINVAL;
         goto exec_done;
     }
 
     connection_rsc = lrm_state_find(rsc_id);
     if (!connection_rsc) {
         rc = -EINVAL;
         goto exec_done;
     }
 
     remote_ra_data_init(connection_rsc);
     ra_data = connection_rsc->remote_ra_data;
 
     cmd = handle_dup_monitor(ra_data, interval, userdata);
     if (cmd) {
        return cmd->call_id;
     }
 
     cmd = calloc(1, sizeof(remote_ra_cmd_t));
     cmd->owner = strdup(lrm_state->node_name);
     cmd->rsc_id = strdup(rsc_id);
     cmd->action = strdup(action);
     cmd->userdata = strdup(userdata);
     cmd->interval = interval;
     cmd->timeout = timeout;
     cmd->start_delay = start_delay;
     cmd->params = params;
     cmd->start_time = time(NULL);
 
     cmd->call_id = generate_callid();
 
     if (cmd->start_delay) {
         cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
     }
 
     ra_data->cmds = g_list_append(ra_data->cmds, cmd);
     mainloop_set_trigger(ra_data->work);
 
     return cmd->call_id;
   exec_done:
 
     lrmd_key_value_freeall(params);
     return rc;
 }
+
+/*!
+ * \internal
+ * \brief Immediately fail all monitors of a remote node, if proxied here
+ *
+ * \param[in] node_name  Name of pacemaker_remote node
+ */
+void
+remote_ra_fail(const char *node_name)
+{
+    lrm_state_t *lrm_state = lrm_state_find(node_name);
+
+    if (lrm_state && lrm_state_is_connected(lrm_state)) {
+        remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+
+        crm_info("Failing monitors on pacemaker_remote node %s", node_name);
+        ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
+        ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
+    }
+}
+
diff --git a/crmd/te_utils.c b/crmd/te_utils.c
index 3a9f491e4f..03e3b04f23 100644
--- a/crmd/te_utils.c
+++ b/crmd/te_utils.c
@@ -1,638 +1,646 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 #include <crm/crm.h>
 
 #include <crm/msg_xml.h>
 
 #include <crm/common/xml.h>
 #include <tengine.h>
 #include <crmd_fsa.h>
+#include <crmd_lrm.h>
 #include <crmd_messages.h>
 #include <throttle.h>
 #include <crm/fencing/internal.h>
 
 crm_trigger_t *stonith_reconnect = NULL;
 
 /*
  * stonith cleanup list
  *
  * If the DC is shot, proper notifications might not go out.
  * The stonith cleanup list allows the cluster to (re-)send
  * notifications once a new DC is elected.
  */
 
 static GListPtr stonith_cleanup_list = NULL;
 
 /*!
  * \internal
  * \brief Add a node to the stonith cleanup list
  *
  * \param[in] target  Name of node to add
  */
 void
 add_stonith_cleanup(const char *target) {
     stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
 }
 
 /*!
  * \internal
  * \brief Remove a node from the stonith cleanup list
  *
  * \param[in] Name of node to remove
  */
 void
 remove_stonith_cleanup(const char *target)
 {
     GListPtr iter = stonith_cleanup_list;
 
     while (iter != NULL) {
         GListPtr tmp = iter;
         char *iter_name = tmp->data;
 
         iter = iter->next;
         if (safe_str_eq(target, iter_name)) {
             crm_trace("Removing %s from the cleanup list", iter_name);
             stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
             free(iter_name);
         }
     }
 }
 
 /*!
  * \internal
  * \brief Purge all entries from the stonith cleanup list
  */
 void
 purge_stonith_cleanup()
 {
     if (stonith_cleanup_list) {
         GListPtr iter = NULL;
 
         for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
             char *target = iter->data;
 
             crm_info("Purging %s from stonith cleanup list", target);
             free(target);
         }
         g_list_free(stonith_cleanup_list);
         stonith_cleanup_list = NULL;
     }
 }
 
 /*!
  * \internal
  * \brief Send stonith updates for all entries in cleanup list, then purge it
  */
 void
 execute_stonith_cleanup()
 {
     GListPtr iter;
 
     for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
         char *target = iter->data;
         crm_node_t *target_node = crm_get_peer(0, target);
         const char *uuid = crm_peer_uuid(target_node);
 
         crm_notice("Marking %s, target of a previous stonith action, as clean", target);
         send_stonith_update(NULL, target, uuid);
         free(target);
     }
     g_list_free(stonith_cleanup_list);
     stonith_cleanup_list = NULL;
 }
 
 /* end stonith cleanup list functions */
 
 static gboolean
 fail_incompletable_stonith(crm_graph_t * graph)
 {
     GListPtr lpc = NULL;
     const char *task = NULL;
     xmlNode *last_action = NULL;
 
     if (graph == NULL) {
         return FALSE;
     }
 
     for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
         GListPtr lpc2 = NULL;
         synapse_t *synapse = (synapse_t *) lpc->data;
 
         if (synapse->confirmed) {
             continue;
         }
 
         for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
             crm_action_t *action = (crm_action_t *) lpc2->data;
 
             if (action->type != action_type_crm || action->confirmed) {
                 continue;
             }
 
             task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
             if (task && safe_str_eq(task, CRM_OP_FENCE)) {
                 action->failed = TRUE;
                 last_action = action->xml;
                 update_graph(graph, action);
                 crm_notice("Failing action %d (%s): STONITHd terminated",
                            action->id, ID(action->xml));
             }
         }
     }
 
     if (last_action != NULL) {
         crm_warn("STONITHd failure resulted in un-runnable actions");
         abort_transition(INFINITY, tg_restart, "Stonith failure", last_action);
         return TRUE;
     }
 
     return FALSE;
 }
 
 static void
 tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e)
 {
     if (is_set(fsa_input_register, R_ST_REQUIRED)) {
         crm_crit("Fencing daemon connection failed");
         mainloop_set_trigger(stonith_reconnect);
 
     } else {
         crm_info("Fencing daemon disconnected");
     }
 
     /* cbchan will be garbage at this point, arrange for it to be reset */
     if(stonith_api) {
         stonith_api->state = stonith_disconnected;
     }
 
     if (AM_I_DC) {
         fail_incompletable_stonith(transition_graph);
         trigger_graph();
     }
 }
 
 #if SUPPORT_CMAN
 #  include <libfenced.h>
 #endif
 
 char *te_client_id = NULL;
 
 #ifdef HAVE_SYS_REBOOT_H
 #  include <unistd.h>
 #  include <sys/reboot.h>
 #endif
 
 static void
 tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
 {
     if(te_client_id == NULL) {
         te_client_id = crm_strdup_printf("%s.%d", crm_system_name, getpid());
     }
 
     if (st_event == NULL) {
         crm_err("Notify data not found");
         return;
     }
 
     crmd_notify_fencing_op(st_event);
 
     if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) {
         crm_notice("%s was successfully unfenced by %s (at the request of %s)",
                    st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin);
                 /* TODO: Hook up st_event->device */
         return;
 
     } else if (safe_str_eq("on", st_event->action)) {
         crm_err("Unfencing of %s by %s failed: %s (%d)",
                 st_event->target, st_event->executioner ? st_event->executioner : "<anyone>",
                 pcmk_strerror(st_event->result), st_event->result);
         return;
 
     } else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
         crm_crit("We were allegedly just fenced by %s for %s!",
                  st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* Dumps blackbox if enabled */
 
         qb_log_fini(); /* Try to get the above log message to disk - somehow */
 
         /* Get out ASAP and do not come back up.
          *
          * Triggering a reboot is also not the worst idea either since
          * the rest of the cluster thinks we're safely down
          */
 
 #ifdef RB_HALT_SYSTEM
         reboot(RB_HALT_SYSTEM);
 #endif
 
         /*
          * If reboot() fails or is not supported, coming back up will
          * probably lead to a situation where the other nodes set our
          * status to 'lost' because of the fencing callback and will
          * discard subsequent election votes with:
          *
          * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
          *
          * So just stay dead, something is seriously messed up anyway.
          *
          */
         exit(100); /* None of our wrappers since we already called qb_log_fini() */
         return;
     }
 
     if (st_event->result == pcmk_ok &&
         safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
         st_fail_count_reset(st_event->target);
     }
 
     crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
                st_event->target, st_event->result == pcmk_ok ? "" : " not",
                st_event->action,
                st_event->executioner ? st_event->executioner : "<anyone>",
                st_event->origin, pcmk_strerror(st_event->result), st_event->id,
                st_event->client_origin ? st_event->client_origin : "<unknown>");
 
 #if SUPPORT_CMAN
     if (st_event->result == pcmk_ok && is_cman_cluster()) {
         int local_rc = 0;
         int confirm = 0;
         char *target_copy = strdup(st_event->target);
 
         /* In case fenced hasn't noticed yet
          *
          * Any fencing that has been inititated will be completed by way of the fence_pcmk redirect
          */
         local_rc = fenced_external(target_copy);
         if (local_rc != 0) {
             crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target,
                     local_rc);
         } else {
             crm_notice("Notified CMAN that '%s' is now fenced", st_event->target);
         }
 
         /* In case fenced is already trying to shoot it */
         confirm = open("/var/run/cluster/fenced_override", O_NONBLOCK|O_WRONLY);
         if (confirm >= 0) {
             int ignore = 0;
             int len = strlen(target_copy);
 
             errno = 0;
             local_rc = write(confirm, target_copy, len);
             ignore = write(confirm, "\n", 1);
 
             if(ignore < 0 && errno == EBADF) {
                 crm_trace("CMAN not expecting %s to be fenced (yet)", st_event->target);
 
             } else if (local_rc < len) {
                 crm_perror(LOG_ERR, "Confirmation of CMAN fencing event for '%s' failed: %d", st_event->target, local_rc);
 
             } else {
                 fsync(confirm);
                 crm_notice("Confirmed CMAN fencing event for '%s'", st_event->target);
             }
             close(confirm);
         }
         free(target_copy);
     }
 #endif
 
     if (st_event->result == pcmk_ok) {
         crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_ANY);
         const char *uuid = NULL;
         gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);
 
         if (peer == NULL) {
             return;
         }
 
         uuid = crm_peer_uuid(peer);
 
         crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
         if(AM_I_DC) {
             /* The DC always sends updates */
             send_stonith_update(NULL, st_event->target, uuid);
 
             if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) {
 
                 /* Abort the current transition graph if it wasn't us
                  * that invoked stonith to fence someone
                  */
                 crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
                 abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
             }
 
             /* Assume it was our leader if we dont currently have one */
         } else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target))
             && !is_set(peer->flags, crm_remote_node)) {
 
             crm_notice("Target %s our leader %s (recorded: %s)",
                        fsa_our_dc ? "was" : "may have been", st_event->target,
                        fsa_our_dc ? fsa_our_dc : "<unset>");
 
             /* Given the CIB resyncing that occurs around elections,
              * have one node update the CIB now and, if the new DC is different,
              * have them do so too after the election
              */
             if (we_are_executioner) {
                 send_stonith_update(NULL, st_event->target, uuid);
             }
             add_stonith_cleanup(st_event->target);
         }
 
+        /* If the target is a remote node, and we host its connection,
+         * immediately fail all monitors so it can be recovered quickly.
+         */
+        if (is_set(peer->flags, crm_remote_node)) {
+            remote_ra_fail(st_event->target);
+        }
+
         crmd_peer_down(peer, TRUE);
      }
 }
 
 gboolean
 te_connect_stonith(gpointer user_data)
 {
     int lpc = 0;
     int rc = pcmk_ok;
 
     if (stonith_api == NULL) {
         stonith_api = stonith_api_new();
     }
 
     if (stonith_api->state != stonith_disconnected) {
         crm_trace("Still connected");
         return TRUE;
     }
 
     for (lpc = 0; lpc < 30; lpc++) {
         crm_debug("Attempting connection to fencing daemon...");
 
         sleep(1);
         rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
 
         if (rc == pcmk_ok) {
             break;
         }
 
         if (user_data != NULL) {
             if (is_set(fsa_input_register, R_ST_REQUIRED)) {
                 crm_err("Sign-in failed: triggered a retry");
                 mainloop_set_trigger(stonith_reconnect);
             } else {
                 crm_info("Sign-in failed, but no longer required");
             }
             return TRUE;
         }
 
         crm_err("Sign-in failed: pausing and trying again in 2s...");
         sleep(1);
     }
 
     CRM_CHECK(rc == pcmk_ok, return TRUE);      /* If not, we failed 30 times... just get out */
     stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT,
                                              tengine_stonith_connection_destroy);
 
     stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE,
                                              tengine_stonith_notify);
 
     crm_trace("Connected");
     return TRUE;
 }
 
 gboolean
 stop_te_timer(crm_action_timer_t * timer)
 {
     const char *timer_desc = "action timer";
 
     if (timer == NULL) {
         return FALSE;
     }
     if (timer->reason == timeout_abort) {
         timer_desc = "global timer";
         crm_trace("Stopping %s", timer_desc);
     }
 
     if (timer->source_id != 0) {
         crm_trace("Stopping %s", timer_desc);
         g_source_remove(timer->source_id);
         timer->source_id = 0;
 
     } else {
         crm_trace("%s was already stopped", timer_desc);
         return FALSE;
     }
 
     return TRUE;
 }
 
 gboolean
 te_graph_trigger(gpointer user_data)
 {
     enum transition_status graph_rc = -1;
 
     if (transition_graph == NULL) {
         crm_debug("Nothing to do");
         return TRUE;
     }
 
     crm_trace("Invoking graph %d in state %s", transition_graph->id, fsa_state2string(fsa_state));
 
     switch (fsa_state) {
         case S_STARTING:
         case S_PENDING:
         case S_NOT_DC:
         case S_HALT:
         case S_ILLEGAL:
         case S_STOPPING:
         case S_TERMINATE:
             return TRUE;
             break;
         default:
             break;
     }
 
     if (transition_graph->complete == FALSE) {
         int limit = transition_graph->batch_limit;
 
         transition_graph->batch_limit = throttle_get_total_job_limit(limit);
         graph_rc = run_graph(transition_graph);
         transition_graph->batch_limit = limit; /* Restore the configured value */
 
         /* significant overhead... */
         /* print_graph(LOG_DEBUG_3, transition_graph); */
 
         if (graph_rc == transition_active) {
             crm_trace("Transition not yet complete");
             return TRUE;
 
         } else if (graph_rc == transition_pending) {
             crm_trace("Transition not yet complete - no actions fired");
             return TRUE;
         }
 
         if (graph_rc != transition_complete) {
             crm_warn("Transition failed: %s", transition_status(graph_rc));
             print_graph(LOG_NOTICE, transition_graph);
         }
     }
 
     crm_debug("Transition %d is now complete", transition_graph->id);
     transition_graph->complete = TRUE;
     notify_crmd(transition_graph);
 
     return TRUE;
 }
 
 void
 trigger_graph_processing(const char *fn, int line)
 {
     crm_trace("%s:%d - Triggered graph processing", fn, line);
     mainloop_set_trigger(transition_trigger);
 }
 
 void
 abort_transition_graph(int abort_priority, enum transition_action abort_action,
                        const char *abort_text, xmlNode * reason, const char *fn, int line)
 {
     int add[] = { 0, 0, 0 };
     int del[] = { 0, 0, 0 };
     int level = LOG_INFO;
     xmlNode *diff = NULL;
     xmlNode *change = NULL;
 
     CRM_CHECK(transition_graph != NULL, return);
 
     switch (fsa_state) {
         case S_STARTING:
         case S_PENDING:
         case S_NOT_DC:
         case S_HALT:
         case S_ILLEGAL:
         case S_STOPPING:
         case S_TERMINATE:
             crm_info("Abort %s suppressed: state=%s (complete=%d)",
                      abort_text, fsa_state2string(fsa_state), transition_graph->complete);
             return;
         default:
             break;
     }
 
     /* Make sure any queued calculations are discarded ASAP */
     free(fsa_pe_ref);
     fsa_pe_ref = NULL;
 
     if (transition_graph->complete == FALSE) {
         if(update_abort_priority(transition_graph, abort_priority, abort_action, abort_text)) {
             level = LOG_NOTICE;
         }
     }
 
     if(reason) {
         xmlNode *search = NULL;
 
         for(search = reason; search; search = search->parent) {
             if (safe_str_eq(XML_TAG_DIFF, TYPE(search))) {
                 diff = search;
                 break;
             }
         }
 
         if(diff) {
             xml_patch_versions(diff, add, del);
             for(search = reason; search; search = search->parent) {
                 if (safe_str_eq(XML_DIFF_CHANGE, TYPE(search))) {
                     change = search;
                     break;
                 }
             }
         }
     }
 
     if(reason == NULL) {
         do_crm_log(level, "Transition aborted: %s (source=%s:%d, %d)",
                    abort_text, fn, line, transition_graph->complete);
 
     } else if(change == NULL) {
         char *local_path = xml_get_path(reason);
 
         do_crm_log(level, "Transition aborted by %s.%s: %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)",
                    TYPE(reason), ID(reason), abort_text, add[0], add[1], add[2], fn, line, local_path, transition_graph->complete);
         free(local_path);
 
     } else {
         const char *kind = NULL;
         const char *op = crm_element_value(change, XML_DIFF_OP);
         const char *path = crm_element_value(change, XML_DIFF_PATH);
 
         if(change == reason) {
             if(strcmp(op, "create") == 0) {
                 reason = reason->children;
 
             } else if(strcmp(op, "modify") == 0) {
                 reason = first_named_child(reason, XML_DIFF_RESULT);
                 if(reason) {
                     reason = reason->children;
                 }
             }
         }
 
         kind = TYPE(reason);
         if(strcmp(op, "delete") == 0) {
             const char *shortpath = strrchr(path, '/');
 
             do_crm_log(level, "Transition aborted by deletion of %s: %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)",
                        shortpath?shortpath+1:path, abort_text, add[0], add[1], add[2], fn, line, path, transition_graph->complete);
 
         } else if (safe_str_eq(XML_CIB_TAG_NVPAIR, kind)) { 
             do_crm_log(level, "Transition aborted by %s, %s=%s: %s (%s cib=%d.%d.%d, source=%s:%d, path=%s, %d)",
                        crm_element_value(reason, XML_ATTR_ID),
                        crm_element_value(reason, XML_NVPAIR_ATTR_NAME),
                        crm_element_value(reason, XML_NVPAIR_ATTR_VALUE),
                        abort_text, op, add[0], add[1], add[2], fn, line, path, transition_graph->complete);
 
         } else if (safe_str_eq(XML_LRM_TAG_RSC_OP, kind)) {
             const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);
 
             do_crm_log(level, "Transition aborted by %s '%s' on %s: %s (magic=%s, cib=%d.%d.%d, source=%s:%d, %d)",
                        crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op,
                        crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text,
                        magic, add[0], add[1], add[2], fn, line, transition_graph->complete);
 
         } else if (safe_str_eq(XML_CIB_TAG_STATE, kind)
                    || safe_str_eq(XML_CIB_TAG_NODE, kind)) {
             const char *uname = crm_peer_uname(ID(reason));
 
             do_crm_log(level, "Transition aborted by %s '%s' on %s: %s (cib=%d.%d.%d, source=%s:%d, %d)",
                        kind, op, uname ? uname : ID(reason), abort_text,
                        add[0], add[1], add[2], fn, line, transition_graph->complete);
 
         } else {
             do_crm_log(level, "Transition aborted by %s.%s '%s': %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)",
                        TYPE(reason), ID(reason), op?op:"change", abort_text, add[0], add[1], add[2], fn, line, path, transition_graph->complete);
         }
     }
 
     if (transition_graph->complete) {
         if (transition_timer->period_ms > 0) {
             crm_timer_stop(transition_timer);
             crm_timer_start(transition_timer);
         } else {
             register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
         }
         return;
     }
 
     mainloop_set_trigger(transition_trigger);
 }