diff --git a/crmd/lrm.c b/crmd/lrm.c
index 74fede4368..52d54255e2 100644
--- a/crmd/lrm.c
+++ b/crmd/lrm.c
@@ -1,2390 +1,2388 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 
 #include <crm/crm.h>
 #include <crm/services.h>
 
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 
 #include <crmd.h>
 #include <crmd_fsa.h>
 #include <crmd_messages.h>
 #include <crmd_callbacks.h>
 #include <crmd_lrm.h>
 
 #define START_DELAY_THRESHOLD 5 * 60 * 1000
 #define MAX_LRM_REG_FAILS 30
 
 struct delete_event_s {
     int rc;
     const char *rsc;
     lrm_state_t *lrm_state;
 };
 
 static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
 static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
 static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
 static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
                              const char *user_name);
 
 static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op,
                                        const char *rsc_id, const char *operation);
 static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation,
                           xmlNode * msg, xmlNode * request);
 
 void send_direct_ack(const char *to_host, const char *to_sys,
                      lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id);
 
 static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
                                          int log_level);
 static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op);
 
 static void
 lrm_connection_destroy(void)
 {
     if (is_set(fsa_input_register, R_LRM_CONNECTED)) {
         crm_crit("LRM Connection failed");
         register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
         clear_bit(fsa_input_register, R_LRM_CONNECTED);
 
     } else {
         crm_info("LRM Connection disconnected");
     }
 
 }
 
 static char *
 make_stop_id(const char *rsc, int call_id)
 {
     char *op_id = NULL;
 
     op_id = calloc(1, strlen(rsc) + 34);
     if (op_id != NULL) {
         snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id);
     }
     return op_id;
 }
 
 static void
 copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
 {
     if (strstr(key, CRM_META "_") == NULL) {
         g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
     }
 }
 
 static void
 copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
 {
     if (strstr(key, CRM_META "_") != NULL) {
         g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
     }
 }
 
 static void
 update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
 {
     int target_rc = 0;
     rsc_history_t *entry = NULL;
 
     if (op->rsc_deleted) {
         crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
         delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL);
         return;
     }
 
     if (safe_str_eq(op->op_type, RSC_NOTIFY)) {
         return;
     }
 
     crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);
 
     entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
     if (entry == NULL && rsc) {
         entry = calloc(1, sizeof(rsc_history_t));
         entry->id = strdup(op->rsc_id);
         g_hash_table_insert(lrm_state->resource_history, entry->id, entry);
 
         entry->rsc.id = entry->id;
         entry->rsc.type = strdup(rsc->type);
         entry->rsc.class = strdup(rsc->class);
         if (rsc->provider) {
             entry->rsc.provider = strdup(rsc->provider);
         } else {
             entry->rsc.provider = NULL;
         }
 
     } else if (entry == NULL) {
         crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
         return;
     }
 
     entry->last_callid = op->call_id;
     target_rc = rsc_op_expected_rc(op);
     if (op->op_status == PCMK_LRM_OP_CANCELLED) {
         if (op->interval > 0) {
             GList *gIter, *gIterNext;
 
             crm_trace("Removing cancelled recurring op: %s_%s_%d", op->rsc_id, op->op_type,
                       op->interval);
 
             for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIterNext) {
                 lrmd_event_data_t *existing = gIter->data;
 
                 gIterNext = gIter->next;
 
                 if (crm_str_eq(op->rsc_id, existing->rsc_id, TRUE)
                     && safe_str_eq(op->op_type, existing->op_type)
                     && op->interval == existing->interval) {
                     lrmd_free_event(existing);
                     entry->recurring_op_list = g_list_delete_link(entry->recurring_op_list, gIter);
                 }
             }
             return;
 
         } else {
             crm_trace("Skipping %s_%s_%d rc=%d, status=%d", op->rsc_id, op->op_type, op->interval,
                       op->rc, op->op_status);
         }
 
     } else if (did_rsc_op_fail(op, target_rc)) {
         /* We must store failed monitors here
          * - otherwise the block below will cause them to be forgetten them when a stop happens
          */
         if (entry->failed) {
             lrmd_free_event(entry->failed);
         }
         entry->failed = lrmd_copy_event(op);
 
     } else if (op->interval == 0) {
         if (entry->last) {
             lrmd_free_event(entry->last);
         }
         entry->last = lrmd_copy_event(op);
 
         if (op->params &&
             (safe_str_eq(CRMD_ACTION_START, op->op_type) ||
              safe_str_eq("reload", op->op_type) ||
              safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) {
 
             if (entry->stop_params) {
                 g_hash_table_destroy(entry->stop_params);
             }
             entry->stop_params = g_hash_table_new_full(crm_str_hash,
                                                        g_str_equal, g_hash_destroy_str,
                                                        g_hash_destroy_str);
 
             g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
         }
     }
 
     if (op->interval > 0) {
         GListPtr iter = NULL;
 
         for(iter = entry->recurring_op_list; iter; iter = iter->next) {
             lrmd_event_data_t *o = iter->data;
 
             /* op->rsc_id is implied */
             if(op->interval == o->interval && strcmp(op->op_type, o->op_type) == 0) {
                 crm_trace("Removing existing recurring op entry: %s_%s_%d", op->rsc_id, op->op_type, op->interval);
                 entry->recurring_op_list = g_list_remove(entry->recurring_op_list, o);
                 break;
             }
         }
 
         crm_trace("Adding recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval);
         entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));
 
     } else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) {
         GList *gIter = entry->recurring_op_list;
 
         crm_trace("Dropping %d recurring ops because of: %s_%s_%d",
                   g_list_length(gIter), op->rsc_id, op->op_type, op->interval);
         for (; gIter != NULL; gIter = gIter->next) {
             lrmd_free_event(gIter->data);
         }
         g_list_free(entry->recurring_op_list);
         entry->recurring_op_list = NULL;
     }
 }
 
 void
 lrm_op_callback(lrmd_event_data_t * op)
 {
     const char *nodename = NULL;
     lrm_state_t *lrm_state = NULL;
 
     CRM_CHECK(op != NULL, return);
 
     /* determine the node name for this connection. */
     nodename = op->remote_nodename ? op->remote_nodename : fsa_our_uname;
 
     if (op->type == lrmd_event_disconnect && (safe_str_eq(nodename, fsa_our_uname))) {
         /* if this is the local lrmd ipc connection, set the right bits in the
          * crmd when the connection goes down */
         lrm_connection_destroy();
         return;
     } else if (op->type != lrmd_event_exec_complete) {
         /* we only need to process execution results */
         return;
     }
 
     lrm_state = lrm_state_find(nodename);
     CRM_ASSERT(lrm_state != NULL);
 
     process_lrm_event(lrm_state, op, NULL);
 }
 
 /*	 A_LRM_CONNECT	*/
 void
 do_lrm_control(long long action,
                enum crmd_fsa_cause cause,
                enum crmd_fsa_state cur_state,
                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     /* This only pertains to local lrmd connections.  Remote connections are handled as
      * resources within the pengine.  Connecting and disconnecting from remote lrmd instances
      * handled differently than the local. */
 
     lrm_state_t *lrm_state = NULL;
 
     if(fsa_our_uname == NULL) {
         return; /* Nothing to do */
     }
     lrm_state = lrm_state_find_or_create(fsa_our_uname);
     if (lrm_state == NULL) {
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
         return;
     }
 
     if (action & A_LRM_DISCONNECT) {
         if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
             if (action == A_LRM_DISCONNECT) {
                 crmd_fsa_stall(FALSE);
                 return;
             }
         }
 
         clear_bit(fsa_input_register, R_LRM_CONNECTED);
         crm_info("Disconnecting from the LRM");
         lrm_state_disconnect(lrm_state);
         lrm_state_reset_tables(lrm_state);
         crm_notice("Disconnected from the LRM");
     }
 
     if (action & A_LRM_CONNECT) {
         int ret = pcmk_ok;
 
         crm_debug("Connecting to the LRM");
         ret = lrm_state_ipc_connect(lrm_state);
 
         if (ret != pcmk_ok) {
             if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
                 crm_warn("Failed to sign on to the LRM %d"
                          " (%d max) times", lrm_state->num_lrm_register_fails, MAX_LRM_REG_FAILS);
 
                 crm_timer_start(wait_timer);
                 crmd_fsa_stall(FALSE);
                 return;
             }
         }
 
         if (ret != pcmk_ok) {
             crm_err("Failed to sign on to the LRM %d" " (max) times",
                     lrm_state->num_lrm_register_fails);
             register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
             return;
         }
 
         set_bit(fsa_input_register, R_LRM_CONNECTED);
         crm_info("LRM connection established");
     }
 
     if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
         crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
     }
 }
 
 static gboolean
 lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
 {
     int counter = 0;
     gboolean rc = TRUE;
     const char *when = "lrm disconnect";
 
     GHashTableIter gIter;
     const char *key = NULL;
     rsc_history_t *entry = NULL;
     struct recurring_op_s *pending = NULL;
 
     crm_debug("Checking for active resources before exit");
 
     if (cur_state == S_TERMINATE) {
         log_level = LOG_ERR;
         when = "shutdown";
 
     } else if (is_set(fsa_input_register, R_SHUTDOWN)) {
         when = "shutdown... waiting";
     }
 
     if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) {
         guint removed = g_hash_table_foreach_remove(
             lrm_state->pending_ops, stop_recurring_actions, lrm_state);
 
         crm_notice("Stopped %u recurring operations at %s (%u ops remaining)",
                    removed, when, g_hash_table_size(lrm_state->pending_ops));
     }
 
     if (lrm_state->pending_ops) {
         g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
         while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
             /* Ignore recurring actions in the shutdown calculations */
             if (pending->interval == 0) {
                 counter++;
             }
         }
     }
 
     if (counter > 0) {
         do_crm_log(log_level, "%d pending LRM operations at %s", counter, when);
 
         if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) {
             g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
             while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) {
                 do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
             }
 
         } else {
             rc = FALSE;
         }
         return rc;
     }
 
     if (lrm_state->resource_history == NULL) {
         return rc;
     }
 
     if (cur_state == S_TERMINATE || is_set(fsa_input_register, R_SHUTDOWN)) {
         /* At this point we're not waiting, we're just shutting down */
         when = "shutdown";
     }
 
     counter = 0;
     g_hash_table_iter_init(&gIter, lrm_state->resource_history);
     while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
         if (is_rsc_active(lrm_state, entry->id) == FALSE) {
             continue;
         }
 
         counter++;
         crm_trace("Found %s active", entry->id);
         if (lrm_state->pending_ops) {
             GHashTableIter hIter;
 
             g_hash_table_iter_init(&hIter, lrm_state->pending_ops);
             while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) {
                 if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) {
                     crm_notice("%sction %s (%s) incomplete at %s",
                                pending->interval == 0 ? "A" : "Recurring a",
                                key, pending->op_key, when);
                 }
             }
         }
     }
 
     if (counter) {
         crm_err("%d resources were active at %s.", counter, when);
     }
 
     return rc;
 }
 
 GHashTable *metadata_hash = NULL;
 
 static char *
 get_rsc_metadata(const char *type, const char *rclass, const char *provider, bool force)
 {
     int rc = pcmk_ok;
     int len = 0;
     char *key = NULL;
     char *metadata = NULL;
 
     /* Always use a local connection for this operation */
     lrm_state_t *lrm_state = lrm_state_find(fsa_our_uname);
 
     CRM_CHECK(type != NULL, return NULL);
     CRM_CHECK(rclass != NULL, return NULL);
     CRM_CHECK(lrm_state != NULL, return NULL);
 
     if (provider == NULL) {
         provider = "heartbeat";
     }
 
     if (metadata_hash == NULL) {
         metadata_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
     }
 
     len = strlen(type) + strlen(rclass) + strlen(provider) + 4;
     key = malloc(len);
     if(key == NULL) {
         return NULL;
     }
 
     snprintf(key, len, "%s::%s:%s", type, rclass, provider);
     if(force == FALSE) {
-        snprintf(key, len, "%s::%s:%s", type, rclass, provider);
-
         crm_trace("Retreiving cached metadata for %s", key);
         metadata = g_hash_table_lookup(metadata_hash, key);
     }
 
     if(metadata == NULL) {
         rc = lrm_state_get_metadata(lrm_state, rclass, provider, type, &metadata, 0);
         crm_trace("Retreived live metadata for %s: %s (%d)", key, pcmk_strerror(rc), rc);
         if(rc == pcmk_ok) {
             CRM_LOG_ASSERT(metadata != NULL);
             g_hash_table_insert(metadata_hash, key, metadata);
             key = NULL;
         } else {
             CRM_LOG_ASSERT(metadata == NULL);
             metadata = NULL;
         }
     }
 
     if (metadata == NULL) {
         crm_warn("No metadata found for %s: %s (%d)", key, pcmk_strerror(rc), rc);
     }
 
     free(key);
     return metadata;
 }
 
 static char *
 build_parameter_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode *result,
                      const char *criteria, bool target, bool invert_for_xml)
 {
     int len = 0;
     int max = 0;
     char *list = NULL;
 
     xmlNode *param = NULL;
     xmlNode *params = NULL;
 
     const char *secure_terms[] = {
         "password",
         "passwd",
         "user",
     };
 
     if(safe_str_eq("private", criteria)) {
         /* It will take time for the agents to be updated
          * Check for some common terms
          */
         max = DIMOF(secure_terms);
     }
 
     params = find_xml_node(metadata, "parameters", TRUE);
     for (param = __xml_first_child(params); param != NULL; param = __xml_next(param)) {
         if (crm_str_eq((const char *)param->name, "parameter", TRUE)) {
             bool accept = FALSE;
             const char *name = crm_element_value(param, "name");
             const char *value = crm_element_value(param, criteria);
 
             if(max && value) {
                 /* Turn off the compatibility logic once an agent has been updated to know about 'private' */
                 max = 0;
             }
 
             if (name == NULL) {
                 crm_err("Invalid parameter in %s metadata", op->rsc_id);
 
             } else if(target == crm_is_true(value)) {
                 accept = TRUE;
 
             } else if(max) {
                 int lpc = 0;
                 bool found = FALSE;
 
                 for(lpc = 0; found == FALSE && lpc < max; lpc++) {
                     if(safe_str_eq(secure_terms[lpc], name)) {
                         found = TRUE;
                     }
                 }
 
                 if(found == target) {
                     accept = TRUE;
                 }
             }
 
             if(accept) {
                 int start = len;
 
                 crm_trace("Attr %s is %s%s", name, target?"":"not ", criteria);
 
                 len += strlen(name) + 2;
                 list = realloc_safe(list, len + 1);
                 sprintf(list + start, " %s ", name);
 
             } else {
                 crm_trace("Rejecting %s for %s", name, criteria);
             }
 
             if(invert_for_xml) {
                 crm_trace("Inverting %s match for %s xml", name, criteria);
                 accept = !accept;
             }
 
             if(result && accept) {
                 value = g_hash_table_lookup(op->params, name);
                 if(value != NULL) {
                     crm_trace("Adding attr to the xml result", name, target?"":"not ", criteria);
                     crm_xml_add(result, name, value);
                 }
             }
         }
     }
 
     return list;
 }
 
 static bool
 resource_supports_action(xmlNode *metadata, const char *name) 
 {
     const char *value = NULL;
 
     xmlNode *action = NULL;
     xmlNode *actions = NULL;
 
     actions = find_xml_node(metadata, "actions", TRUE);
     for (action = __xml_first_child(actions); action != NULL; action = __xml_next(action)) {
         if (crm_str_eq((const char *)action->name, "action", TRUE)) {
             value = crm_element_value(action, "name");
-            if (safe_str_eq("reload", value)) {
+            if (safe_str_eq(name, value)) {
                 return TRUE;
             }
         }
     }
 
     return FALSE;
 }
 
 static void
 append_restart_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode * update, const char *version)
 {
     char *list = NULL;
     char *digest = NULL;
     xmlNode *restart = NULL;
 
     CRM_LOG_ASSERT(op->params != NULL);
 
     if (op->interval > 0) {
         /* monitors are not reloadable */
         return;
     }
 
     if(resource_supports_action(metadata, "reload")) {
         restart = create_xml_node(NULL, XML_TAG_PARAMS);
         list = build_parameter_list(op, metadata, restart, "unique", FALSE, FALSE);
     }
 
     if (list == NULL) {
         /* Resource does not support reloads */
         return;
     }
 
     digest = calculate_operation_digest(restart, version);
     crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list);
     crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest);
 
     crm_trace("%s: %s, %s", op->rsc_id, digest, list);
     crm_log_xml_trace(restart, "restart digest source");
 
     free_xml(restart);
     free(digest);
     free(list);
 }
 
 static void
 append_secure_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode * update, const char *version)
 {
     char *list = NULL;
     char *digest = NULL;
     xmlNode *secure = NULL;
 
     CRM_LOG_ASSERT(op->params != NULL);
 
     /*
      * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the
      * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on
      * the insecure ones
      */
     secure = create_xml_node(NULL, XML_TAG_PARAMS);
     list = build_parameter_list(op, metadata, secure, "private", TRUE, TRUE);
 
     if (list != NULL) {
         digest = calculate_operation_digest(secure, version);
         crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list);
         crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest);
 
         crm_trace("%s: %s, %s", op->rsc_id, digest, list);
         crm_log_xml_trace(secure, "secure digest source");
     } else {
         crm_trace("%s: no secure parameters", op->rsc_id);
     }
 
     free_xml(secure);
     free(digest);
     free(list);
 }
 
 static gboolean
 build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op,
                        const char *src)
 {
     int target_rc = 0;
     xmlNode *xml_op = NULL;
     xmlNode *metadata = NULL;
     const char *m_string = NULL;
     const char *caller_version = NULL;
 
     if (op == NULL) {
         return FALSE;
     }
 
     target_rc = rsc_op_expected_rc(op);
 
     /* there is a small risk in formerly mixed clusters that it will
      * be sub-optimal.
      *
      * however with our upgrade policy, the update we send should
      * still be completely supported anyway
      */
     caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION);
     CRM_LOG_ASSERT(caller_version != NULL);
 
     if(caller_version == NULL) {
         caller_version = CRM_FEATURE_SET;
     }
 
     crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version);
     xml_op = create_operation_update(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG);
     if (xml_op == NULL) {
         return TRUE;
     }
 
     if (rsc == NULL || op->params == NULL || crm_str_eq(CRMD_ACTION_STOP, op->op_type, TRUE)) {
         /* Stopped resources don't need the digest logic */
         crm_trace("No digests needed for %s %p %p %s", op->rsc_id, op->params, rsc, op->op_type);
         return TRUE;
     }
 
     m_string = get_rsc_metadata(rsc->type, rsc->class, rsc->provider, safe_str_eq(op->op_type, RSC_START));
     if(m_string == NULL) {
         crm_err("No metadata for %s::%s:%s", rsc->provider, rsc->class, rsc->type);
         return TRUE;
     }
 
     metadata = string2xml(m_string);
     if(metadata == NULL) {
         crm_err("Metadata for %s::%s:%s is not valid XML", rsc->provider, rsc->class, rsc->type);
         return TRUE;
     }
 
     crm_trace("Includind additional digests for %s::%s:%s", rsc->provider, rsc->class, rsc->type);
     append_restart_list(op, metadata, xml_op, caller_version);
     append_secure_list(op, metadata, xml_op, caller_version);
 
     free_xml(metadata);
     return TRUE;
 }
 
 static gboolean
 is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
 {
     rsc_history_t *entry = NULL;
 
     entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
     if (entry == NULL || entry->last == NULL) {
         return FALSE;
     }
 
     crm_trace("Processing %s: %s.%d=%d",
               rsc_id, entry->last->op_type, entry->last->interval, entry->last->rc);
     if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) {
         return FALSE;
 
     } else if (entry->last->rc == PCMK_OCF_OK
                && safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) {
         /* a stricter check is too complex...
          * leave that to the PE
          */
         return FALSE;
 
     } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
         return FALSE;
 
     } else if (entry->last->interval == 0 && entry->last->rc == PCMK_OCF_NOT_CONFIGURED) {
         /* Badly configured resources can't be reliably stopped */
         return FALSE;
     }
 
     return TRUE;
 }
 
 static gboolean
 build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
 {
     GHashTableIter iter;
     rsc_history_t *entry = NULL;
 
     g_hash_table_iter_init(&iter, lrm_state->resource_history);
     while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
 
         GList *gIter = NULL;
         xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE);
 
         crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id);
         crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type);
         crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.class);
         crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider);
 
         if (entry->last && entry->last->params) {
             const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
             if (container) {
                 crm_trace("Resource %s is a part of container resource %s", entry->id, container);
                 crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container);
             }
         }
         build_operation_update(xml_rsc, &(entry->rsc), entry->failed, __FUNCTION__);
         build_operation_update(xml_rsc, &(entry->rsc), entry->last, __FUNCTION__);
         for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
             build_operation_update(xml_rsc, &(entry->rsc), gIter->data, __FUNCTION__);
         }
     }
 
     return FALSE;
 }
 
 xmlNode *
 do_lrm_query_internal(lrm_state_t * lrm_state, gboolean is_replace)
 {
     xmlNode *xml_state = NULL;
     xmlNode *xml_data = NULL;
     xmlNode *rsc_list = NULL;
     const char *uuid = NULL;
 
     if (safe_str_eq(lrm_state->node_name, fsa_our_uname)) {
         crm_node_t *peer = crm_get_peer(0, lrm_state->node_name);
         xml_state = do_update_node_cib(peer, node_update_cluster|node_update_peer, NULL, __FUNCTION__);
         /* The next two lines shouldn't be necessary for newer DCs */
         crm_xml_add(xml_state, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER);
         crm_xml_add(xml_state, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER);
         uuid = fsa_our_uuid;
 
     } else {
         xml_state = create_xml_node(NULL, XML_CIB_TAG_STATE);
         crm_xml_add(xml_state, XML_NODE_IS_REMOTE, "true");
         crm_xml_add(xml_state, XML_ATTR_ID, lrm_state->node_name);
         crm_xml_add(xml_state, XML_ATTR_UNAME, lrm_state->node_name);
         uuid = lrm_state->node_name;
     }
 
     xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM);
     crm_xml_add(xml_data, XML_ATTR_ID, uuid);
     rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES);
 
     /* Build a list of active (not always running) resources */
     build_active_RAs(lrm_state, rsc_list);
 
     crm_log_xml_trace(xml_state, "Current state of the LRM");
 
     return xml_state;
 }
 
 xmlNode *
 do_lrm_query(gboolean is_replace, const char *node_name)
 {
     lrm_state_t *lrm_state = lrm_state_find(node_name);
 
     if (!lrm_state) {
         crm_err("Could not query lrm state for lrmd node %s", node_name);
         return NULL;
     }
     return do_lrm_query_internal(lrm_state, is_replace);
 }
 
 static void
 notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
 {
     lrmd_event_data_t *op = NULL;
     const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
     const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
 
     crm_info("Notifying %s on %s that %s was%s deleted",
              from_sys, from_host, rsc_id, rc == pcmk_ok ? "" : " not");
 
     op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE);
     CRM_ASSERT(op != NULL);
 
     if (rc == pcmk_ok) {
         op->op_status = PCMK_LRM_OP_DONE;
         op->rc = PCMK_OCF_OK;
     } else {
         op->op_status = PCMK_LRM_OP_ERROR;
         op->rc = PCMK_OCF_UNKNOWN_ERROR;
     }
 
     send_direct_ack(from_host, from_sys, NULL, op, rsc_id);
     lrmd_free_event(op);
 
     if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
         /* this isn't expected - trigger a new transition */
         time_t now = time(NULL);
         char *now_s = crm_itoa(now);
 
         crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc_id);
 
         update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
                              "last-lrm-refresh", now_s, FALSE, NULL, NULL);
 
         free(now_s);
     }
 }
 
 static gboolean
 lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
 {
     struct delete_event_s *event = user_data;
     struct pending_deletion_op_s *op = value;
 
     if (crm_str_eq(event->rsc, op->rsc, TRUE)) {
         notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
         return TRUE;
     }
     return FALSE;
 }
 
 static gboolean
 lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
 {
     const char *rsc = user_data;
     struct recurring_op_s *pending = value;
 
     if (crm_str_eq(rsc, pending->rsc_id, TRUE)) {
         crm_info("Removing op %s:%d for deleted resource %s",
                  pending->op_key, pending->call_id, rsc);
         return TRUE;
     }
     return FALSE;
 }
 
 /*
  * Remove the rsc from the CIB
  *
  * Avoids refreshing the entire LRM section of this host
  */
 #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']"
 
 static int
 delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
                   const char *user_name)
 {
     char *rsc_xpath = NULL;
     int max = 0;
     int rc = pcmk_ok;
 
     CRM_CHECK(rsc_id != NULL, return -ENXIO);
 
     max = strlen(rsc_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + 1;
     rsc_xpath = calloc(1, max);
     snprintf(rsc_xpath, max, rsc_template, lrm_state->node_name, rsc_id);
 
     rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath,
                          NULL, NULL, call_options | cib_xpath, user_name);
 
     free(rsc_xpath);
     return rc;
 }
 
 static void
 delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id,
                  GHashTableIter * rsc_gIter, int rc, const char *user_name)
 {
     struct delete_event_s event;
 
     CRM_CHECK(rsc_id != NULL, return);
 
     if (rc == pcmk_ok) {
         char *rsc_id_copy = strdup(rsc_id);
 
         if (rsc_gIter)
             g_hash_table_iter_remove(rsc_gIter);
         else
             g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
         crm_debug("sync: Sending delete op for %s", rsc_id_copy);
         delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name);
 
         g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy);
         free(rsc_id_copy);
     }
 
     if (input) {
         notify_deleted(lrm_state, input, rsc_id, rc);
     }
 
     event.rc = rc;
     event.rsc = rsc_id;
     event.lrm_state = lrm_state;
     g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
 }
 
 /*
  * Remove the op from the CIB
  *
  * Avoids refreshing the entire LRM section of this host
  */
 
 #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']"
 #define op_call_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']"
 
 static void
 delete_op_entry(lrm_state_t * lrm_state, lrmd_event_data_t * op, const char *rsc_id,
                 const char *key, int call_id)
 {
     xmlNode *xml_top = NULL;
 
     if (op != NULL) {
         xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP);
         crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id);
         crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data);
 
         if (op->interval > 0) {
             char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval);
 
             /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */
             crm_xml_add(xml_top, XML_ATTR_ID, op_id);
             free(op_id);
         }
 
         crm_debug("async: Sending delete op for %s_%s_%d (call=%d)",
                   op->rsc_id, op->op_type, op->interval, op->call_id);
 
         fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override);
 
     } else if (rsc_id != NULL && key != NULL) {
         int max = 0;
         char *op_xpath = NULL;
 
         if (call_id > 0) {
             max =
                 strlen(op_call_template) + strlen(rsc_id) + strlen(lrm_state->node_name) +
                 strlen(key) + 10;
             op_xpath = calloc(1, max);
             snprintf(op_xpath, max, op_call_template, lrm_state->node_name, rsc_id, key, call_id);
 
         } else {
             max =
                 strlen(op_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + strlen(key) +
                 1;
             op_xpath = calloc(1, max);
             snprintf(op_xpath, max, op_template, lrm_state->node_name, rsc_id, key);
         }
 
         crm_debug("sync: Sending delete op for %s (call=%d)", rsc_id, call_id);
         fsa_cib_conn->cmds->delete(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath);
 
         free(op_xpath);
 
     } else {
         crm_err("Not enough information to delete op entry: rsc=%p key=%p", rsc_id, key);
         return;
     }
 
     crm_log_xml_trace(xml_top, "op:cancel");
     free_xml(xml_top);
 }
 
 void
 lrm_clear_last_failure(const char *rsc_id, const char *node_name)
 {
     char *attr = NULL;
     GHashTableIter iter;
     GList *lrm_state_list = lrm_state_get_list();
     GList *state_entry;
     rsc_history_t *entry = NULL;
 
     attr = generate_op_key(rsc_id, "last_failure", 0);
 
     /* This clears last failure for every lrm state that has this rsc.*/
     for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
         lrm_state_t *lrm_state = state_entry->data;
 
         if (node_name != NULL) {
             if (strcmp(node_name, lrm_state->node_name) != 0) {
                 /* filter by node_name if node_name is present */
                 continue;
             }
         }
 
         delete_op_entry(lrm_state, NULL, rsc_id, attr, 0);
 
         if (!lrm_state->resource_history) {
             continue;
         }
 
         g_hash_table_iter_init(&iter, lrm_state->resource_history);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
             if (crm_str_eq(rsc_id, entry->id, TRUE)) {
                 lrmd_free_event(entry->failed);
                 entry->failed = NULL;
             }
         }
     }
     free(attr);
     g_list_free(lrm_state_list);
 }
 
 /* Returns: gboolean - cancellation is in progress */
 static gboolean
 cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove)
 {
     int rc = pcmk_ok;
     char *local_key = NULL;
     struct recurring_op_s *pending = NULL;
 
     CRM_CHECK(op != 0, return FALSE);
     CRM_CHECK(rsc_id != NULL, return FALSE);
     if (key == NULL) {
         local_key = make_stop_id(rsc_id, op);
         key = local_key;
     }
     pending = g_hash_table_lookup(lrm_state->pending_ops, key);
 
     if (pending) {
         if (remove && pending->remove == FALSE) {
             pending->remove = TRUE;
             crm_debug("Scheduling %s for removal", key);
         }
 
         if (pending->cancelled) {
             crm_debug("Operation %s already cancelled", key);
             free(local_key);
             return FALSE;
         }
 
         pending->cancelled = TRUE;
 
     } else {
         crm_info("No pending op found for %s", key);
         free(local_key);
         return FALSE;
     }
 
     crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
     rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval);
     if (rc == pcmk_ok) {
         crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
         free(local_key);
         return TRUE;
     }
 
     crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
     /* The caller needs to make sure the entry is
      * removed from the pending_ops list
      *
      * Usually by returning TRUE inside the worker function
      * supplied to g_hash_table_foreach_remove()
      *
      * Not removing the entry from pending_ops will block
      * the node from shutting down
      */
     free(local_key);
     return FALSE;
 }
 
 struct cancel_data {
     gboolean done;
     gboolean remove;
     const char *key;
     lrmd_rsc_info_t *rsc;
     lrm_state_t *lrm_state;
 };
 
 static gboolean
 cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
 {
     gboolean remove = FALSE;
     struct cancel_data *data = user_data;
     struct recurring_op_s *op = (struct recurring_op_s *)value;
 
     if (crm_str_eq(op->op_key, data->key, TRUE)) {
         data->done = TRUE;
         remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
     }
     return remove;
 }
 
 static gboolean
 cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
 {
     guint removed = 0;
     struct cancel_data data;
 
     CRM_CHECK(rsc != NULL, return FALSE);
     CRM_CHECK(key != NULL, return FALSE);
 
     data.key = key;
     data.rsc = rsc;
     data.done = FALSE;
     data.remove = remove;
     data.lrm_state = lrm_state;
 
     removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data);
     crm_trace("Removed %u op cache entries, new size: %u",
               removed, g_hash_table_size(lrm_state->pending_ops));
     return data.done;
 }
 
 static lrmd_rsc_info_t *
 get_lrm_resource(lrm_state_t * lrm_state, xmlNode * resource, xmlNode * op_msg, gboolean do_create)
 {
     lrmd_rsc_info_t *rsc = NULL;
     const char *id = ID(resource);
     const char *type = crm_element_value(resource, XML_ATTR_TYPE);
     const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS);
     const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER);
     const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG);
 
     crm_trace("Retrieving %s from the LRM.", id);
     CRM_CHECK(id != NULL, return NULL);
 
     rsc = lrm_state_get_rsc_info(lrm_state, id, 0);
 
     if (!rsc && long_id) {
         rsc = lrm_state_get_rsc_info(lrm_state, long_id, 0);
     }
 
     if (!rsc && do_create) {
         CRM_CHECK(class != NULL, return NULL);
         CRM_CHECK(type != NULL, return NULL);
 
         crm_trace("Adding rsc %s before operation", id);
 
         lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring);
 
         rsc = lrm_state_get_rsc_info(lrm_state, id, 0);
 
         if (!rsc) {
             fsa_data_t *msg_data = NULL;
 
             crm_err("Could not add resource %s to LRM %s", id, lrm_state->node_name);
             /* only register this as a internal error if this involves the local
              * lrmd. Otherwise we're likely dealing with an unresponsive remote-node
              * which is not a FSA failure. */
             if (lrm_state_is_local(lrm_state) == TRUE) {
                 register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
             }
         }
     }
 
     return rsc;
 }
 
 static void
 delete_resource(lrm_state_t * lrm_state,
                 const char *id,
                 lrmd_rsc_info_t * rsc,
                 GHashTableIter * gIter,
                 const char *sys,
                 const char *host,
                 const char *user,
                 ha_msg_input_t * request,
                 gboolean unregister)
 {
     int rc = pcmk_ok;
 
     crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host);
 
     if (rsc && unregister) {
         rc = lrm_state_unregister_rsc(lrm_state, id, 0);
     }
 
     if (rc == pcmk_ok) {
         crm_trace("Resource '%s' deleted", id);
     } else if (rc == -EINPROGRESS) {
         crm_info("Deletion of resource '%s' pending", id);
         if (request) {
             struct pending_deletion_op_s *op = NULL;
             char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE);
 
             op = calloc(1, sizeof(struct pending_deletion_op_s));
             op->rsc = strdup(rsc->id);
             op->input = copy_ha_msg_input(request);
             g_hash_table_insert(lrm_state->deletion_ops, ref, op);
         }
         return;
     } else {
         crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d",
                  id, sys, user ? user : "internal", host, rc);
     }
 
     delete_rsc_entry(lrm_state, request, id, gIter, rc, user);
 }
 
 static int
 get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id)
 {
     int call_id = 999999999;
     rsc_history_t *entry = NULL;
 
     if(lrm_state) {
         entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
     }
 
     /* Make sure the call id is greater than the last successful operation,
      * otherwise the failure will not result in a possible recovery of the resource
      * as it could appear the failure occurred before the successful start */
     if (entry) {
         call_id = entry->last_callid + 1;
     }
 
     if (call_id < 0) {
         call_id = 1;
     }
     return call_id;
 }
 
 static void
 force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node)
 {
         GHashTableIter gIter;
         rsc_history_t *entry = NULL;
 
 
         crm_info("clearing resource history on node %s", lrm_state->node_name);
         g_hash_table_iter_init(&gIter, lrm_state->resource_history);
         while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
             /* only unregister the resource during a reprobe if it is not a remote connection
              * resource. otherwise unregistering the connection will terminate remote-node
              * membership */
             gboolean unregister = TRUE;
 
             if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
                 lrm_state_t *remote_lrm_state = lrm_state_find(entry->id);
                 if (remote_lrm_state) {
                     /* when forcing a reprobe, make sure to clear remote node before
                      * clearing the remote node's connection resource */ 
                     force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE);
                 }
                 unregister = FALSE;
             }
 
             delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host,
                             user_name, NULL, unregister);
         }
 
         /* Now delete the copy in the CIB */
         erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local);
 
         /* And finally, _delete_ the value in attrd
          * Setting it to FALSE results in the PE sending us back here again
          */
         update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node);
 }
 
 static void
 synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) 
 {
     lrmd_event_data_t *op = NULL;
     const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK);
     const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET);
     xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE);
 
     if(xml_rsc == NULL) {
         /* Do something else?  driect_ack? */
         crm_info("Skipping %s=%d on %s (%p): no resource",
                  crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node, lrm_state);
         return;
 
     } else if(operation == NULL) {
         /* This probably came from crm_resource -C, nothing to do */
         crm_info("Skipping %s=%d on %s (%p): no operation",
                  crm_element_value(action, XML_ATTR_TRANSITION_KEY), rc, target_node, lrm_state);
         return;
     }
 
     op = construct_op(lrm_state, action, ID(xml_rsc), operation);
     CRM_ASSERT(op != NULL);
 
     op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
     if(safe_str_eq(operation, RSC_NOTIFY)) {
         /* Notifications can't fail yet */
         op->op_status = PCMK_LRM_OP_DONE;
         op->rc = PCMK_OCF_OK;
 
     } else {
         op->op_status = PCMK_LRM_OP_ERROR;
         op->rc = rc;
     }
     op->t_run = time(NULL);
     op->t_rcchange = op->t_run;
 
     crm_info("Faking result %d for %s_%s_%d on %s (%p)", op->rc, op->rsc_id, op->op_type, op->interval, target_node, lrm_state);
 
     if(lrm_state) {
         process_lrm_event(lrm_state, op, NULL);
 
     } else {
         lrmd_rsc_info_t rsc;
 
         rsc.id = strdup(op->rsc_id);
         rsc.type = crm_element_value_copy(xml_rsc, XML_ATTR_TYPE);
         rsc.class = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_CLASS);
         rsc.provider = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_PROVIDER);
 
         do_update_resource(target_node, &rsc, op);
 
         free(rsc.id);
         free(rsc.type);
         free(rsc.class);
         free(rsc.provider);
     }
     lrmd_free_event(op);
 }
 
 
 /*	 A_LRM_INVOKE	*/
 void
 do_lrm_invoke(long long action,
               enum crmd_fsa_cause cause,
               enum crmd_fsa_state cur_state,
               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     gboolean create_rsc = TRUE;
     lrm_state_t *lrm_state = NULL;
     const char *crm_op = NULL;
     const char *from_sys = NULL;
     const char *from_host = NULL;
     const char *operation = NULL;
     ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
     const char *user_name = NULL;
     const char *target_node = NULL;
     gboolean is_remote_node = FALSE;
     gboolean crm_rsc_delete = FALSE;
 
     if (input->xml != NULL) {
         /* Remote node operations are routed here to their remote connections */
         target_node = crm_element_value(input->xml, XML_LRM_ATTR_TARGET);
     }
     if (target_node == NULL) {
         target_node = fsa_our_uname;
     } else if (safe_str_neq(target_node, fsa_our_uname)) {
         is_remote_node = TRUE;
     }
 
     lrm_state = lrm_state_find(target_node);
 
     if (lrm_state == NULL && is_remote_node) {
         crm_err("no lrmd connection for remote node %s found on cluster node %s. Can not process request.",
             target_node, fsa_our_uname);
 
         /* The action must be recorded here and in the CIB as failed */
         synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED);
         return;
     }
 
     CRM_ASSERT(lrm_state != NULL);
 
 #if ENABLE_ACL
     user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL);
     crm_trace("LRM command from user '%s'", user_name);
 #endif
 
     crm_op = crm_element_value(input->msg, F_CRM_TASK);
     from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
     if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
         from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
     }
 
     crm_trace("LRM command from: %s", from_sys);
 
     if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) {
         /* remember this delete op came from crm_resource */
         crm_rsc_delete = TRUE;
         operation = CRMD_ACTION_DELETE;
 
     } else if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
         operation = CRM_OP_LRM_REFRESH;
 
     } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) {
         lrmd_event_data_t *op = NULL;
         lrmd_rsc_info_t *rsc = NULL;
         xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
 
         CRM_CHECK(xml_rsc != NULL, return);
 
         /* The lrmd can not fail a resource, it does not understand the
          * concept of success or failure in relation to a resource, it simply
          * executes operations and reports the results. We determine what a failure is.
          * Becaues of this, if we want to fail a resource we have to fake what we
          * understand a failure to look like.
          *
          * To do this we create a fake lrmd operation event for the resource
          * we want to fail.  We then pass that event to the lrmd client callback
          * so it will be processed as if it actually came from the lrmd. */
         op = construct_op(lrm_state, input->xml, ID(xml_rsc), "asyncmon");
         CRM_ASSERT(op != NULL);
 
         free((char *)op->user_data);
         op->user_data = NULL;
         op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
         op->interval = 0;
         op->op_status = PCMK_LRM_OP_DONE;
         op->rc = PCMK_OCF_UNKNOWN_ERROR;
         op->t_run = time(NULL);
         op->t_rcchange = op->t_run;
 
 #if ENABLE_ACL
         if (user_name && is_privileged(user_name) == FALSE) {
             crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc));
             send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
             lrmd_free_event(op);
             return;
         }
 #endif
 
         rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc);
         if (rsc) {
             crm_info("Failing resource %s...", rsc->id);
             process_lrm_event(lrm_state, op, NULL);
             op->op_status = PCMK_LRM_OP_DONE;
             op->rc = PCMK_OCF_OK;
             lrmd_free_rsc_info(rsc);
         } else {
             crm_info("Cannot find/create resource in order to fail it...");
             crm_log_xml_warn(input->msg, "bad input");
         }
 
         send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
         lrmd_free_event(op);
         return;
 
     } else if (input->xml != NULL) {
         operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK);
     }
 
     if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
         int rc = pcmk_ok;
         xmlNode *fragment = do_lrm_query_internal(lrm_state, TRUE);
 
         fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name);
         crm_info("Forced a local LRM refresh: call=%d", rc);
 
         if(strcmp(CRM_SYSTEM_CRMD, from_sys) != 0) {
             xmlNode *reply = create_request(
                 CRM_OP_INVOKE_LRM, fragment,
                 from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid);
 
             crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host);
 
             if (relay_message(reply, TRUE) == FALSE) {
                 crm_log_xml_err(reply, "Unable to route reply");
             }
             free_xml(reply);
         }
 
         free_xml(fragment);
 
     } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) {
         xmlNode *data = do_lrm_query_internal(lrm_state, FALSE);
         xmlNode *reply = create_reply(input->msg, data);
 
         if (relay_message(reply, TRUE) == FALSE) {
             crm_err("Unable to route reply");
             crm_log_xml_err(reply, "reply");
         }
         free_xml(reply);
         free_xml(data);
 
     } else if (safe_str_eq(operation, CRM_OP_PROBED)) {
         update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node);
 
     } else if (safe_str_eq(operation, CRM_OP_REPROBE) || safe_str_eq(crm_op, CRM_OP_REPROBE)) {
         crm_notice("Forcing the status of all resources to be redetected");
 
         force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node);
 
         if(strcmp(CRM_SYSTEM_TENGINE, from_sys) != 0
            && strcmp(CRM_SYSTEM_TENGINE, from_sys) != 0) {
             xmlNode *reply = create_request(
                 CRM_OP_INVOKE_LRM, NULL,
                 from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid);
 
             crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);
 
             if (relay_message(reply, TRUE) == FALSE) {
                 crm_log_xml_err(reply, "Unable to route reply");
             }
             free_xml(reply);
         }
 
     } else if (operation != NULL) {
         lrmd_rsc_info_t *rsc = NULL;
         xmlNode *params = NULL;
         xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
 
         CRM_CHECK(xml_rsc != NULL, return);
 
         /* only the first 16 chars are used by the LRM */
         params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE);
 
         if (safe_str_eq(operation, CRMD_ACTION_DELETE)) {
             create_rsc = FALSE;
         }
 
         if(lrm_state_is_connected(lrm_state) == FALSE) {
             synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_CONNECTION_DIED);
             return;
         }
 
         rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc);
         if (rsc == NULL && create_rsc) {
             crm_err("Invalid resource definition for %s", ID(xml_rsc));
             crm_log_xml_warn(input->msg, "bad input");
 
             /* if the operation couldn't complete because we can't register
              * the resource, return a generic error */
             synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_NOT_CONFIGURED);
 
         } else if (rsc == NULL) {
             lrmd_event_data_t *op = NULL;
 
             crm_notice("Not creating resource for a %s event: %s", operation, ID(input->xml));
             delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name);
 
             op = construct_op(lrm_state, input->xml, ID(xml_rsc), operation);
 
             /* Deleting something that does not exist is a success */
             op->op_status = PCMK_LRM_OP_DONE;
             op->rc = PCMK_OCF_OK;
             CRM_ASSERT(op != NULL);
 
             send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
             lrmd_free_event(op);
 
         } else if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) {
             char *op_key = NULL;
             char *meta_key = NULL;
             int call = 0;
             const char *call_id = NULL;
             const char *op_task = NULL;
             const char *op_interval = NULL;
             gboolean in_progress = FALSE;
 
             CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command");
                       return);
 
             meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL);
             op_interval = crm_element_value(params, meta_key);
             free(meta_key);
 
             meta_key = crm_meta_name(XML_LRM_ATTR_TASK);
             op_task = crm_element_value(params, meta_key);
             free(meta_key);
 
             meta_key = crm_meta_name(XML_LRM_ATTR_CALLID);
             call_id = crm_element_value(params, meta_key);
             free(meta_key);
 
             CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command");
                       return);
             CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command");
                       return);
 
             op_key = generate_op_key(rsc->id, op_task, crm_parse_int(op_interval, "0"));
 
             crm_debug("PE requested op %s (call=%s) be cancelled",
                       op_key, call_id ? call_id : "NA");
             call = crm_parse_int(call_id, "0");
             if (call == 0) {
                 /* the normal case when the PE cancels a recurring op */
                 in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);
 
             } else {
                 /* the normal case when the PE cancels an orphan op */
                 in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
             }
 
             if (in_progress == FALSE) {
                 lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc->id, op_task);
 
                 crm_info("Nothing known about operation %d for %s", call, op_key);
                 delete_op_entry(lrm_state, NULL, rsc->id, op_key, call);
 
                 CRM_ASSERT(op != NULL);
 
                 op->rc = PCMK_OCF_OK;
                 op->op_status = PCMK_LRM_OP_DONE;
                 send_direct_ack(from_host, from_sys, rsc, op, rsc->id);
                 lrmd_free_event(op);
 
                 /* needed?? surely not otherwise the cancel_op_(_key) wouldn't
                  * have failed in the first place
                  */
                 g_hash_table_remove(lrm_state->pending_ops, op_key);
             }
 
             free(op_key);
 
         } else if (rsc != NULL && safe_str_eq(operation, CRMD_ACTION_DELETE)) {
             gboolean unregister = TRUE;
 
 #if ENABLE_ACL
             int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun | cib_sync_call, user_name);
             if (cib_rc != pcmk_ok) {
                 lrmd_event_data_t *op = NULL;
 
                 crm_err
                     ("Attempted deletion of resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s",
                      rsc->id, from_sys, user_name ? user_name : "unknown", from_host, cib_rc,
                      pcmk_strerror(cib_rc));
 
                 op = construct_op(lrm_state, input->xml, rsc->id, operation);
                 op->op_status = PCMK_LRM_OP_ERROR;
 
                 if (cib_rc == -EACCES) {
                     op->rc = PCMK_OCF_INSUFFICIENT_PRIV;
                 } else {
                     op->rc = PCMK_OCF_UNKNOWN_ERROR;
                 }
                 send_direct_ack(from_host, from_sys, NULL, op, rsc->id);
                 lrmd_free_event(op);
                 return;
             }
 #endif
             if (crm_rsc_delete == TRUE && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
                 unregister = FALSE;
             }
 
             delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input, unregister);
 
         } else if (rsc != NULL) {
             do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg);
         }
 
         lrmd_free_rsc_info(rsc);
 
     } else {
         crm_err("Operation was neither a lrm_query, nor a rsc op.  %s", crm_str(crm_op));
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
     }
 }
 
 static lrmd_event_data_t *
 construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation)
 {
     lrmd_event_data_t *op = NULL;
     const char *op_delay = NULL;
     const char *op_timeout = NULL;
     const char *op_interval = NULL;
     GHashTable *params = NULL;
 
     const char *transition = NULL;
 
     CRM_ASSERT(rsc_id != NULL);
 
     op = calloc(1, sizeof(lrmd_event_data_t));
     op->type = lrmd_event_exec_complete;
     op->op_type = strdup(operation);
     op->op_status = PCMK_LRM_OP_PENDING;
     op->rc = -1;
     op->rsc_id = strdup(rsc_id);
     op->interval = 0;
     op->timeout = 0;
     op->start_delay = 0;
 
     if (rsc_op == NULL) {
         CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation));
         op->user_data = NULL;
         /* the stop_all_resources() case
          * by definition there is no DC (or they'd be shutting
          *   us down).
          * So we should put our version here.
          */
         op->params = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                            g_hash_destroy_str, g_hash_destroy_str);
 
         g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET));
 
         crm_trace("Constructed %s op for %s", operation, rsc_id);
         return op;
     }
 
     params = xml2list(rsc_op);
     g_hash_table_remove(params, CRM_META "_op_target_rc");
 
     op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY);
     op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT);
     op_interval = crm_meta_value(params, XML_LRM_ATTR_INTERVAL);
 
     op->interval = crm_parse_int(op_interval, "0");
     op->timeout = crm_parse_int(op_timeout, "0");
     op->start_delay = crm_parse_int(op_delay, "0");
 
     if (safe_str_neq(operation, RSC_STOP)) {
         op->params = params;
 
     } else {
         rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
 
         /* If we do not have stop parameters cached, use
          * whatever we are given */
         if (!entry || !entry->stop_params) {
             op->params = params;
         } else {
             /* Copy the cached parameter list so that we stop the resource
              * with the old attributes, not the new ones */
             op->params = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                                g_hash_destroy_str, g_hash_destroy_str);
 
             g_hash_table_foreach(params, copy_meta_keys, op->params);
             g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
             g_hash_table_destroy(params);
             params = NULL;
         }
     }
 
     /* sanity */
     if (op->interval < 0) {
         op->interval = 0;
     }
     if (op->timeout <= 0) {
         op->timeout = op->interval;
     }
     if (op->start_delay < 0) {
         op->start_delay = 0;
     }
 
     transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY);
     CRM_CHECK(transition != NULL, return op);
 
     op->user_data = strdup(transition);
 
     if (op->interval != 0) {
         if (safe_str_eq(operation, CRMD_ACTION_START)
             || safe_str_eq(operation, CRMD_ACTION_STOP)) {
             crm_err("Start and Stop actions cannot have an interval: %d", op->interval);
             op->interval = 0;
         }
     }
 
     crm_trace("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval);
 
     return op;
 }
 
 void
 send_direct_ack(const char *to_host, const char *to_sys,
                 lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id)
 {
     xmlNode *reply = NULL;
     xmlNode *update, *iter;
     crm_node_t *peer = NULL;
 
     CRM_CHECK(op != NULL, return);
     if (op->rsc_id == NULL) {
         CRM_ASSERT(rsc_id != NULL);
         op->rsc_id = strdup(rsc_id);
     }
     if (to_sys == NULL) {
         to_sys = CRM_SYSTEM_TENGINE;
     }
 
     peer = crm_get_peer(0, fsa_our_uname);
     update = do_update_node_cib(peer, node_update_none, NULL, __FUNCTION__);
 
     iter = create_xml_node(update, XML_CIB_TAG_LRM);
     crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid);
     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
 
     crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
 
     build_operation_update(iter, rsc, op, __FUNCTION__);
     reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL);
 
     crm_log_xml_trace(update, "ACK Update");
 
     crm_debug("ACK'ing resource op %s_%s_%d from %s: %s",
               op->rsc_id, op->op_type, op->interval, op->user_data,
               crm_element_value(reply, XML_ATTR_REFERENCE));
 
     if (relay_message(reply, TRUE) == FALSE) {
         crm_log_xml_err(reply, "Unable to route reply");
     }
 
     free_xml(update);
     free_xml(reply);
 }
 
 gboolean
 verify_stopped(enum crmd_fsa_state cur_state, int log_level)
 {
     gboolean res = TRUE;
     GList *lrm_state_list = lrm_state_get_list();
     GList *state_entry;
 
     for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
         lrm_state_t *lrm_state = state_entry->data;
 
         if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
             /* keep iterating through all even when false is returned */
             res = FALSE;
         }
     }
 
     set_bit(fsa_input_register, R_SENT_RSC_STOP);
     g_list_free(lrm_state_list); lrm_state_list = NULL;
     return res;
 }
 
 struct stop_recurring_action_s {
     lrmd_rsc_info_t *rsc;
     lrm_state_t *lrm_state;
 };
 
 static gboolean
 stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
 {
     gboolean remove = FALSE;
     struct stop_recurring_action_s *event = user_data;
     struct recurring_op_s *op = (struct recurring_op_s *)value;
 
     if (op->interval != 0 && crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) {
         crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, key);
         remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
     }
 
     return remove;
 }
 
 static gboolean
 stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
 {
     gboolean remove = FALSE;
     lrm_state_t *lrm_state = user_data;
     struct recurring_op_s *op = (struct recurring_op_s *)value;
 
     if (op->interval != 0) {
         crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, key);
         remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
     }
 
     return remove;
 }
 
 static void
 do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg,
               xmlNode * request)
 {
     int call_id = 0;
     char *op_id = NULL;
     lrmd_event_data_t *op = NULL;
     lrmd_key_value_t *params = NULL;
     fsa_data_t *msg_data = NULL;
     const char *transition = NULL;
     gboolean stop_recurring = FALSE;
 
     CRM_CHECK(rsc != NULL, return);
     CRM_CHECK(operation != NULL, return);
 
     if (msg != NULL) {
         transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
         if (transition == NULL) {
             crm_log_xml_err(msg, "Missing transition number");
         }
     }
 
     op = construct_op(lrm_state, msg, rsc->id, operation);
     CRM_CHECK(op != NULL, return);
 
     if (is_remote_lrmd_ra(NULL, NULL, rsc->id)
         && op->interval == 0
         && strcmp(operation, CRMD_ACTION_MIGRATE) == 0) {
 
         /* pcmk remote connections are a special use case.
          * We never ever want to stop monitoring a connection resource until
          * the entire migration has completed. If the connection is ever unexpected
          * severed, even during a migration, this is an event we must detect.*/
         stop_recurring = FALSE;
 
     } else if (op->interval == 0
         && strcmp(operation, CRMD_ACTION_STATUS) != 0
         && strcmp(operation, CRMD_ACTION_NOTIFY) != 0) {
 
         /* stop any previous monitor operations before changing the resource state */
         stop_recurring = TRUE;
     }
 
     if (stop_recurring == TRUE) {
         guint removed = 0;
         struct stop_recurring_action_s data;
 
         data.rsc = rsc;
         data.lrm_state = lrm_state;
         removed = g_hash_table_foreach_remove(
             lrm_state->pending_ops, stop_recurring_action_by_rsc, &data);
 
         crm_debug("Stopped %u recurring operations in preparation for %s_%s_%d",
                   removed, rsc->id, operation, op->interval);
     }
 
     /* now do the op */
     crm_info("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval);
 
     if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE && fsa_state != S_TRANSITION_ENGINE) {
         if (safe_str_neq(operation, "fail")
             && safe_str_neq(operation, CRMD_ACTION_STOP)) {
             crm_info("Discarding attempt to perform action %s on %s in state %s",
                      operation, rsc->id, fsa_state2string(fsa_state));
             op->rc = CRM_DIRECT_NACK_RC;
             op->op_status = PCMK_LRM_OP_ERROR;
             send_direct_ack(NULL, NULL, rsc, op, rsc->id);
             lrmd_free_event(op);
             free(op_id);
             return;
         }
     }
 
     op_id = generate_op_key(rsc->id, op->op_type, op->interval);
 
     if (op->interval > 0) {
         /* cancel it so we can then restart it without conflict */
         cancel_op_key(lrm_state, rsc, op_id, FALSE);
     }
 
     if (op->params) {
         char *key = NULL;
         char *value = NULL;
         GHashTableIter iter;
 
         g_hash_table_iter_init(&iter, op->params);
         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
             params = lrmd_key_value_add(params, key, value);
         }
     }
 
     call_id = lrm_state_exec(lrm_state,
                              rsc->id,
                              op->op_type,
                              op->user_data, op->interval, op->timeout, op->start_delay, params);
 
     if (call_id <= 0 && lrm_state_is_local(lrm_state)) {
         crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id);
         register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
 
     } else if (call_id <= 0) {
 
         crm_err("Operation %s on resource %s failed to execute on remote node %s: %d", operation, rsc->id, lrm_state->node_name, call_id);
         op->call_id = get_fake_call_id(lrm_state, rsc->id);
         op->op_status = PCMK_LRM_OP_DONE;
         op->rc = PCMK_OCF_UNKNOWN_ERROR;
         op->t_run = time(NULL);
         op->t_rcchange = op->t_run;
         process_lrm_event(lrm_state, op, NULL);
 
     } else {
         /* record all operations so we can wait
          * for them to complete during shutdown
          */
         char *call_id_s = make_stop_id(rsc->id, call_id);
         struct recurring_op_s *pending = NULL;
 
         pending = calloc(1, sizeof(struct recurring_op_s));
         crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
 
         pending->call_id = call_id;
         pending->interval = op->interval;
         pending->op_type = strdup(operation);
         pending->op_key = strdup(op_id);
         pending->rsc_id = strdup(rsc->id);
         pending->start_time = time(NULL);
         pending->user_data = strdup(op->user_data);
         g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending);
 
         if (op->interval > 0 && op->start_delay > START_DELAY_THRESHOLD) {
             char *uuid = NULL;
             int dummy = 0, target_rc = 0;
 
             crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
 
             decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc);
             free(uuid);
 
             op->rc = target_rc;
             op->op_status = PCMK_LRM_OP_DONE;
             send_direct_ack(NULL, NULL, rsc, op, rsc->id);
         }
 
         pending->params = op->params;
         op->params = NULL;
     }
 
     free(op_id);
     lrmd_free_event(op);
     return;
 }
 
 int last_resource_update = 0;
 
 static void
 cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
 {
     switch (rc) {
         case pcmk_ok:
         case -pcmk_err_diff_failed:
         case -pcmk_err_diff_resync:
             crm_trace("Resource update %d complete: rc=%d", call_id, rc);
             break;
         default:
             crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc));
     }
 
     if (call_id == last_resource_update) {
         last_resource_update = 0;
         trigger_fsa(fsa_source);
     }
 }
 
 static void
 remote_node_init_status(const char *node_name, int call_opt)
 {
     int call_id = 0;
     xmlNode *update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
     xmlNode *state;
 
     state = simple_remote_node_status(node_name, update,__FUNCTION__);
     crm_xml_add(state, XML_NODE_IS_FENCED, "0");
 
     fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
     if (call_id != pcmk_ok) {
         crm_debug("Failed to init status section for remote-node %s", node_name);
     }
     free_xml(update);
 }
 
 static void
 remote_node_clear_status(const char *node_name, int call_opt)
 {
     if (node_name == NULL) {
         return;
     }
     remote_node_init_status(node_name, call_opt);
     erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt);
     erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt);
 }
 
 static int
 do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
 {
 /*
   <status>
   <nodes_status id=uname>
   <lrm>
   <lrm_resources>
   <lrm_resource id=...>
   </...>
 */
     int rc = pcmk_ok;
     xmlNode *update, *iter = NULL;
     int call_opt = cib_quorum_override;
     const char *uuid = NULL;
 
     CRM_CHECK(op != NULL, return 0);
 
     if (fsa_state == S_ELECTION || fsa_state == S_PENDING) {
         crm_info("Sending update to local CIB in state: %s", fsa_state2string(fsa_state));
         call_opt |= cib_scope_local;
     }
 
     iter = create_xml_node(iter, XML_CIB_TAG_STATUS);
     update = iter;
     iter = create_xml_node(iter, XML_CIB_TAG_STATE);
 
     if (safe_str_eq(node_name, fsa_our_uname)) {
         uuid = fsa_our_uuid;
 
     } else {
         /* remote nodes uuid and uname are equal */
         uuid = node_name;
         crm_xml_add(iter, XML_NODE_IS_REMOTE, "true");
     }
 
     CRM_LOG_ASSERT(uuid != NULL);
     if(uuid == NULL) {
         rc = -EINVAL;
         goto done;
     }
 
     crm_xml_add(iter, XML_ATTR_UUID,  uuid);
     crm_xml_add(iter, XML_ATTR_UNAME, node_name);
     crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__);
 
     iter = create_xml_node(iter, XML_CIB_TAG_LRM);
     crm_xml_add(iter, XML_ATTR_ID, uuid);
 
     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
     crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
 
     build_operation_update(iter, rsc, op, __FUNCTION__);
 
     if (rsc) {
         const char *container = NULL;
 
         crm_xml_add(iter, XML_ATTR_TYPE, rsc->type);
         crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class);
         crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider);
 
         if (op->params) {
             container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
         }
         if (container) {
             crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container);
             crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container);
         }
 
         CRM_CHECK(rsc->type != NULL, crm_err("Resource %s has no value for type", op->rsc_id));
         CRM_CHECK(rsc->class != NULL, crm_err("Resource %s has no value for class", op->rsc_id));
 
         /* check to see if we need to initialize remote-node related status sections */
         if (safe_str_eq(op->op_type, "start") && op->rc == 0 && op->op_status == PCMK_LRM_OP_DONE) {
             const char *remote_node = g_hash_table_lookup(op->params, CRM_META"_remote_node");
 
             if (remote_node) {
                 /* A container for a remote-node has started, initalize remote-node's status */
                 crm_info("Initalizing lrm status for container remote-node %s. Container successfully started.", remote_node);
                 remote_node_clear_status(remote_node, call_opt);
             } else if (container == FALSE && safe_str_eq(rsc->type, "remote") && safe_str_eq(rsc->provider, "pacemaker")) {
                 /* baremetal remote node connection resource has started, initalize remote-node's status */
                 crm_info("Initializing lrm status for baremetal remote-node %s", rsc->id);
                 remote_node_clear_status(rsc->id, call_opt);
             }
         }
 
     } else {
         crm_warn("Resource %s no longer exists in the lrmd", op->rsc_id);
         send_direct_ack(NULL, NULL, rsc, op, op->rsc_id);
         goto cleanup;
     }
 
     crm_log_xml_trace(update, __FUNCTION__);
 
     /* make it an asyncronous call and be done with it
      *
      * Best case:
      *   the resource state will be discovered during
      *   the next signup or election.
      *
      * Bad case:
      *   we are shutting down and there is no DC at the time,
      *   but then why were we shutting down then anyway?
      *   (probably because of an internal error)
      *
      * Worst case:
      *   we get shot for having resources "running" when the really weren't
      *
      * the alternative however means blocking here for too long, which
      * isnt acceptable
      */
     fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL);
 
     if (rc > 0) {
         last_resource_update = rc;
     }
   done:
     /* the return code is a call number, not an error code */
     crm_trace("Sent resource state update message: %d for %s=%d on %s", rc,
               op->op_type, op->interval, op->rsc_id);
     fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback);
 
   cleanup:
     free_xml(update);
     return rc;
 }
 
 void
 do_lrm_event(long long action,
              enum crmd_fsa_cause cause,
              enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)
 {
     CRM_CHECK(FALSE, return);
 }
 
 gboolean
 process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending)
 {
     char *op_id = NULL;
     char *op_key = NULL;
 
     int update_id = 0;
     gboolean remove = FALSE;
     gboolean removed = FALSE;
     lrmd_rsc_info_t *rsc = NULL;
 
     CRM_CHECK(op != NULL, return FALSE);
     CRM_CHECK(op->rsc_id != NULL, return FALSE);
 
     op_id = make_stop_id(op->rsc_id, op->call_id);
     op_key = generate_op_key(op->rsc_id, op->op_type, op->interval);
     rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
     if(pending == NULL) {
         remove = TRUE;
         pending = g_hash_table_lookup(lrm_state->pending_ops, op_id);
     }
 
     if (op->op_status == PCMK_LRM_OP_ERROR) {
         switch(op->rc) {
             case PCMK_OCF_NOT_RUNNING:
             case PCMK_OCF_RUNNING_MASTER:
             case PCMK_OCF_DEGRADED:
             case PCMK_OCF_DEGRADED_MASTER:
                 /* Leave it up to the TE/PE to decide if this is an error */
                 op->op_status = PCMK_LRM_OP_DONE;
                 break;
             default:
                 /* Nothing to do */
                 break;
         }
     }
 
     if (op->op_status != PCMK_LRM_OP_CANCELLED) {
         if (safe_str_eq(op->op_type, RSC_NOTIFY)) {
             /* Keep notify ops out of the CIB */
             send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
         } else {
             update_id = do_update_resource(lrm_state->node_name, rsc, op);
         }
     } else if (op->interval == 0) {
         /* This will occur when "crm resource cleanup" is called while actions are in-flight */
         crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id);
         send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
 
     } else if (pending == NULL) {
         /* We don't need to do anything for cancelled ops
          * that are not in our pending op list. There are no
          * transition actions waiting on these operations. */
 
     } else if (op->user_data == NULL) {
         /* At this point we have a pending entry, but no transition
          * key present in the user_data field. report this */
         crm_err("Op %s (call=%d): No user data", op_key, op->call_id);
 
     } else if (pending->remove) {
         /* The tengine canceled this op, we have been waiting for the cancel to finish. */
         delete_op_entry(lrm_state, op, op->rsc_id, op_key, op->call_id);
 
     } else if (pending && op->rsc_deleted) {
         /* The tengine initiated this op, but it was cancelled outside of the
          * tengine's control during a resource cleanup/re-probe request. The tengine
          * must be alerted that this operation completed, otherwise the tengine
          * will continue waiting for this update to occur until it is timed out.
          * We don't want this update going to the cib though, so use a direct ack. */
         crm_trace("Op %s (call=%d): cancelled due to rsc deletion", op_key, op->call_id);
         send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
 
     } else {
         /* Before a stop is called, no need to direct ack */
         crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id);
     }
 
     if(remove == FALSE) {
         /* The caller will do this afterwards, but keep the logging consistent */
         removed = TRUE;
 
     } else if ((op->interval == 0) && g_hash_table_remove(lrm_state->pending_ops, op_id)) {
         removed = TRUE;
         crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
                   op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops));
 
     } else if(op->interval != 0 && op->op_status == PCMK_LRM_OP_CANCELLED) {
         removed = TRUE;
         g_hash_table_remove(lrm_state->pending_ops, op_id);
     }
 
     switch (op->op_status) {
         case PCMK_LRM_OP_CANCELLED:
             crm_info("Operation %s: %s (node=%s, call=%d, confirmed=%s)",
                      op_key, services_lrm_status_str(op->op_status), lrm_state->node_name,
                      op->call_id, removed ? "true" : "false");
             break;
 
         case PCMK_LRM_OP_DONE:
             do_crm_log(op->interval?LOG_INFO:LOG_NOTICE,
                        "Operation %s: %s (node=%s, call=%d, rc=%d, cib-update=%d, confirmed=%s)",
                        op_key, services_ocf_exitcode_str(op->rc), lrm_state->node_name,
                        op->call_id, op->rc, update_id, removed ? "true" : "false");
             break;
 
         case PCMK_LRM_OP_TIMEOUT:
             crm_err("Operation %s: %s (node=%s, call=%d, timeout=%dms)",
                     op_key, services_lrm_status_str(op->op_status), lrm_state->node_name, op->call_id, op->timeout);
             break;
 
         default:
             crm_err("Operation %s (node=%s, call=%d, status=%d, cib-update=%d, confirmed=%s) %s",
                     op_key, lrm_state->node_name, op->call_id, op->op_status, update_id, removed ? "true" : "false",
                     services_lrm_status_str(op->op_status));
     }
 
     if (op->output) {
         char *prefix =
             crm_strdup_printf("%s-%s_%s_%d:%d", lrm_state->node_name, op->rsc_id, op->op_type, op->interval, op->call_id);
 
         if (op->rc) {
             crm_log_output(LOG_NOTICE, prefix, op->output);
         } else {
             crm_log_output(LOG_DEBUG, prefix, op->output);
         }
         free(prefix);
     }
 
     if (op->rsc_deleted) {
         crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
         delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL);
     }
 
     /* If a shutdown was escalated while operations were pending,
      * then the FSA will be stalled right now... allow it to continue
      */
     mainloop_set_trigger(fsa_source);
     update_history_cache(lrm_state, rsc, op);
 
     lrmd_free_rsc_info(rsc);
     free(op_key);
     free(op_id);
 
     return TRUE;
 }
diff --git a/cts/CTStests.py b/cts/CTStests.py
index 9b2f681772..cd30798426 100644
--- a/cts/CTStests.py
+++ b/cts/CTStests.py
@@ -1,3201 +1,3225 @@
 '''CTS: Cluster Testing System: Tests module
 
 There are a few things we want to do here:
 
  '''
 
 __copyright__ = '''
 Copyright (C) 2000, 2001 Alan Robertson <alanr@unix.sh>
 Licensed under the GNU GPL.
 
 Add RecourceRecover testcase Zhao Kai <zhaokai@cn.ibm.com>
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
 
 #
 #        SPECIAL NOTE:
 #
 #        Tests may NOT implement any cluster-manager-specific code in them.
 #        EXTEND the ClusterManager object to provide the base capabilities
 #        the test needs if you need to do something that the current CM classes
 #        do not.  Otherwise you screw up the whole point of the object structure
 #        in CTS.
 #
 #                Thank you.
 #
 
 import time, os, re, string, tempfile
 from stat import *
 from cts import CTS
 from cts.CTSaudits import *
 from cts.CTSvars   import *
 from cts.patterns  import PatternSelector
 from cts.logging   import LogFactory
 from cts.remote    import RemoteFactory
 from cts.watcher   import LogWatcher
 from cts.environment import EnvFactory
 
 AllTestClasses = [ ]
 
 
 class CTSTest:
     '''
     A Cluster test.
     We implement the basic set of properties and behaviors for a generic
     cluster test.
 
     Cluster tests track their own statistics.
     We keep each of the kinds of counts we track as separate {name,value}
     pairs.
     '''
 
     def __init__(self, cm):
         #self.name="the unnamed test"
         self.Stats = {"calls":0
         ,        "success":0
         ,        "failure":0
         ,        "skipped":0
         ,        "auditfail":0}
 
 #        if not issubclass(cm.__class__, ClusterManager):
 #            raise ValueError("Must be a ClusterManager object")
         self.CM = cm
         self.Env = EnvFactory().getInstance()
         self.rsh = RemoteFactory().getInstance()
         self.logger = LogFactory()
         self.templates = PatternSelector(cm["Name"])
         self.Audits = []
         self.timeout = 120
         self.passed = 1
         self.is_loop = 0
         self.is_unsafe = 0
         self.is_docker_unsafe = 0
         self.is_experimental = 0
         self.is_container = 0
         self.is_valgrind = 0
         self.benchmark = 0  # which tests to benchmark
         self.timer = {}  # timers
 
     def log(self, args):
         self.logger.log(args)
 
     def debug(self, args):
         self.logger.debug(args)
 
     def has_key(self, key):
         return self.Stats.has_key(key)
 
     def __setitem__(self, key, value):
         self.Stats[key] = value
 
     def __getitem__(self, key):
         return self.Stats[key]
 
     def log_mark(self, msg):
         self.debug("MARK: test %s %s %d" % (self.name,msg,time.time()))
         return
 
     def get_timer(self,key = "test"):
         try: return self.timer[key]
         except: return 0
 
     def set_timer(self,key = "test"):
         self.timer[key] = time.time()
         return self.timer[key]
 
     def log_timer(self,key = "test"):
         elapsed = 0
         if key in self.timer:
             elapsed = time.time() - self.timer[key]
             s = key == "test" and self.name or "%s:%s" % (self.name,key)
             self.debug("%s runtime: %.2f" % (s, elapsed))
             del self.timer[key]
         return elapsed
 
     def incr(self, name):
         '''Increment (or initialize) the value associated with the given name'''
         if not self.Stats.has_key(name):
             self.Stats[name] = 0
         self.Stats[name] = self.Stats[name]+1
 
         # Reset the test passed boolean
         if name == "calls":
             self.passed = 1
 
     def failure(self, reason="none"):
         '''Increment the failure count'''
         self.passed = 0
         self.incr("failure")
         self.logger.log(("Test %s" % self.name).ljust(35) + " FAILED: %s" % reason)
         return None
 
     def success(self):
         '''Increment the success count'''
         self.incr("success")
         return 1
 
     def skipped(self):
         '''Increment the skipped count'''
         self.incr("skipped")
         return 1
 
     def __call__(self, node):
         '''Perform the given test'''
         raise ValueError("Abstract Class member (__call__)")
         self.incr("calls")
         return self.failure()
 
     def audit(self):
         passed = 1
         if len(self.Audits) > 0:
             for audit in self.Audits:
                 if not audit():
                     self.logger.log("Internal %s Audit %s FAILED." % (self.name, audit.name()))
                     self.incr("auditfail")
                     passed = 0
         return passed
 
     def setup(self, node):
         '''Setup the given test'''
         return self.success()
 
     def teardown(self, node):
         '''Tear down the given test'''
         return self.success()
 
     def create_watch(self, patterns, timeout, name=None):
         if not name:
             name = self.name
         return LogWatcher(self.Env["LogFileName"], patterns, name, timeout, kind=self.Env["LogWatcher"], hosts=self.Env["nodes"])
 
     def local_badnews(self, prefix, watch, local_ignore=[]):
         errcount = 0
         if not prefix:
             prefix = "LocalBadNews:"
 
         ignorelist = []
         ignorelist.append(" CTS: ")
         ignorelist.append(prefix)
         ignorelist.extend(local_ignore)
 
         while errcount < 100:
             match = watch.look(0)
             if match:
                add_err = 1
                for ignore in ignorelist:
                    if add_err == 1 and re.search(ignore, match):
                        add_err = 0
                if add_err == 1:
                    self.logger.log(prefix + " " + match)
                    errcount = errcount + 1
             else:
               break
         else:
             self.logger.log("Too many errors!")
 
         watch.end()
         return errcount
 
     def is_applicable(self):
         return self.is_applicable_common()
 
     def is_applicable_common(self):
         '''Return TRUE if we are applicable in the current test configuration'''
         #raise ValueError("Abstract Class member (is_applicable)")
 
         if self.is_loop and not self.Env["loop-tests"]:
             return 0
         elif self.is_unsafe and not self.Env["unsafe-tests"]:
             return 0
         elif self.is_valgrind and not self.Env["valgrind-tests"]:
             return 0
         elif self.is_experimental and not self.Env["experimental-tests"]:
             return 0
         elif self.is_docker_unsafe and self.Env["docker"]:
             return 0
         elif self.is_container and not self.Env["container-tests"]:
             return 0
         elif self.Env["benchmark"] and self.benchmark == 0:
             return 0
 
         return 1
 
     def find_ocfs2_resources(self, node):
         self.r_o2cb = None
         self.r_ocfs2 = []
 
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rtype == "o2cb" and r.parent != "NA":
                     self.debug("Found o2cb: %s" % self.r_o2cb)
                     self.r_o2cb = r.parent
             if re.search("^Constraint", line):
                 c = AuditConstraint(self.CM, line)
                 if c.type == "rsc_colocation" and c.target == self.r_o2cb:
                     self.r_ocfs2.append(c.rsc)
 
         self.debug("Found ocfs2 filesystems: %s" % repr(self.r_ocfs2))
         return len(self.r_ocfs2)
 
     def canrunnow(self, node):
         '''Return TRUE if we can meaningfully run right now'''
         return 1
 
     def errorstoignore(self):
         '''Return list of errors which are 'normal' and should be ignored'''
         return []
 
 
 class StopTest(CTSTest):
     '''Stop (deactivate) the cluster manager on a node'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "Stop"
 
     def __call__(self, node):
         '''Perform the 'stop' test. '''
         self.incr("calls")
         if self.CM.ShouldBeStatus[node] != "up":
             return self.skipped()
 
         patterns = []
         # Technically we should always be able to notice ourselves stopping
         patterns.append(self.templates["Pat:We_stopped"] % node)
 
         #if self.Env["use_logd"]:
         #    patterns.append(self.templates["Pat:Logd_stopped"] % node)
 
         # Any active node needs to notice this one left
         # NOTE: This wont work if we have multiple partitions
         for other in self.Env["nodes"]:
             if self.CM.ShouldBeStatus[other] == "up" and other != node:
                 patterns.append(self.templates["Pat:They_stopped"] %(other, self.CM.key_for_node(node)))
                 #self.debug("Checking %s will notice %s left"%(other, node))
 
         watch = self.create_watch(patterns, self.Env["DeadTime"])
         watch.setwatch()
 
         if node == self.CM.OurNode:
             self.incr("us")
         else:
             if self.CM.upcount() <= 1:
                 self.incr("all")
             else:
                 self.incr("them")
 
         self.CM.StopaCM(node)
         watch_result = watch.lookforall()
 
         failreason = None
         UnmatchedList = "||"
         if watch.unmatched:
             (rc, output) = self.rsh(node, "/bin/ps axf", None)
             for line in output:
                 self.debug(line)
 
             (rc, output) = self.rsh(node, "/usr/sbin/dlm_tool dump", None)
             for line in output:
                 self.debug(line)
 
             for regex in watch.unmatched:
                 self.logger.log ("ERROR: Shutdown pattern not found: %s" % (regex))
                 UnmatchedList +=  regex + "||";
                 failreason = "Missing shutdown pattern"
 
         self.CM.cluster_stable(self.Env["DeadTime"])
 
         if not watch.unmatched or self.CM.upcount() == 0:
             return self.success()
 
         if len(watch.unmatched) >= self.CM.upcount():
             return self.failure("no match against (%s)" % UnmatchedList)
 
         if failreason == None:
             return self.success()
         else:
             return self.failure(failreason)
 #
 # We don't register StopTest because it's better when called by
 # another test...
 #
 
 
 class StartTest(CTSTest):
     '''Start (activate) the cluster manager on a node'''
     def __init__(self, cm, debug=None):
         CTSTest.__init__(self,cm)
         self.name = "start"
         self.debug = debug
 
     def __call__(self, node):
         '''Perform the 'start' test. '''
         self.incr("calls")
 
         if self.CM.upcount() == 0:
             self.incr("us")
         else:
             self.incr("them")
 
         if self.CM.ShouldBeStatus[node] != "down":
             return self.skipped()
         elif self.CM.StartaCM(node):
             return self.success()
         else:
             return self.failure("Startup %s on node %s failed"
                                 % (self.Env["Name"], node))
 
 #
 # We don't register StartTest because it's better when called by
 # another test...
 #
 
 
 class FlipTest(CTSTest):
     '''If it's running, stop it.  If it's stopped start it.
        Overthrow the status quo...
     '''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Flip"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
 
     def __call__(self, node):
         '''Perform the 'Flip' test. '''
         self.incr("calls")
         if self.CM.ShouldBeStatus[node] == "up":
             self.incr("stopped")
             ret = self.stop(node)
             type = "up->down"
             # Give the cluster time to recognize it's gone...
             time.sleep(self.Env["StableTime"])
         elif self.CM.ShouldBeStatus[node] == "down":
             self.incr("started")
             ret = self.start(node)
             type = "down->up"
         else:
             return self.skipped()
 
         self.incr(type)
         if ret:
             return self.success()
         else:
             return self.failure("%s failure" % type)
 
 #        Register FlipTest as a good test to run
 AllTestClasses.append(FlipTest)
 
 
 class RestartTest(CTSTest):
     '''Stop and restart a node'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Restart"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
         self.benchmark = 1
 
     def __call__(self, node):
         '''Perform the 'restart' test. '''
         self.incr("calls")
 
         self.incr("node:" + node)
 
         ret1 = 1
         if self.CM.StataCM(node):
             self.incr("WasStopped")
             if not self.start(node):
                 return self.failure("start (setup) failure: "+node)
 
         self.set_timer()
         if not self.stop(node):
             return self.failure("stop failure: "+node)
         if not self.start(node):
             return self.failure("start failure: "+node)
         return self.success()
 
 #        Register RestartTest as a good test to run
 AllTestClasses.append(RestartTest)
 
 
 class StonithdTest(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "Stonithd"
         self.startall = SimulStartLite(cm)
         self.benchmark = 1
 
     def __call__(self, node):
         self.incr("calls")
         if len(self.Env["nodes"]) < 2:
             return self.skipped()
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         is_dc = self.CM.is_node_dc(node)
 
         watchpats = []
         watchpats.append(self.templates["Pat:FenceOpOK"] % node)
         watchpats.append(self.templates["Pat:NodeFenced"] % node)
 
         if self.Env["at-boot"] == 0:
             self.debug("Expecting %s to stay down" % node)
             self.CM.ShouldBeStatus[node] = "down"
         else:
             self.debug("Expecting %s to come up again %d" % (node, self.Env["at-boot"]))
             watchpats.append("%s.* S_STARTING -> S_PENDING" % node)
             watchpats.append("%s.* S_PENDING -> S_NOT_DC" % node)
 
         watch = self.create_watch(watchpats, 30 + self.Env["DeadTime"] + self.Env["StableTime"] + self.Env["StartTime"])
         watch.setwatch()
 
         origin = self.Env.RandomGen.choice(self.Env["nodes"])
 
         rc = self.rsh(origin, "stonith_admin --reboot %s -VVVVVV" % node)
 
         if rc == 194:
             # 194 - 256 = -62 = Timer expired
             #
             # Look for the patterns, usually this means the required
             # device was running on the node to be fenced - or that
             # the required devices were in the process of being loaded
             # and/or moved
             #
             # Effectively the node committed suicide so there will be
             # no confirmation, but pacemaker should be watching and
             # fence the node again
 
             self.logger.log("Fencing command on %s to fence %s timed out" % (origin, node))
 
         elif origin != node and rc != 0:
             self.debug("Waiting for the cluster to recover")
             self.CM.cluster_stable()
 
             self.debug("Waiting STONITHd node to come back up")
             self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)
 
             self.logger.log("Fencing command on %s failed to fence %s (rc=%d)" % (origin, node, rc))
 
         elif origin == node and rc != 255:
             # 255 == broken pipe, ie. the node was fenced as expected
             self.logger.log("Locally originated fencing returned %d" % rc)
 
         self.set_timer("fence")
         matched = watch.lookforall()
         self.log_timer("fence")
         self.set_timer("reform")
         if watch.unmatched:
             self.logger.log("Patterns not found: " + repr(watch.unmatched))
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         self.debug("Waiting STONITHd node to come back up")
         self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)
 
         self.debug("Waiting for the cluster to re-stabilize with all nodes")
         is_stable = self.CM.cluster_stable(self.Env["StartTime"])
 
         if not matched:
             return self.failure("Didn't find all expected patterns")
         elif not is_stable:
             return self.failure("Cluster did not become stable")
 
         self.log_timer("reform")
         return self.success()
 
     def errorstoignore(self):
         return [
             self.templates["Pat:Fencing_start"] % ".*",
             self.templates["Pat:Fencing_ok"] % ".*",
             r"error.*: Resource .*stonith::.* is active on 2 nodes attempting recovery",
             r"error.*: Operation reboot of .*by .* for stonith_admin.*: Timer expired",
         ]
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
 
         if self.Env.has_key("DoFencing"):
             return self.Env["DoFencing"]
 
         return 1
 
 AllTestClasses.append(StonithdTest)
 
 
 class StartOnebyOne(CTSTest):
     '''Start all the nodes ~ one by one'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "StartOnebyOne"
         self.stopall = SimulStopLite(cm)
         self.start = StartTest(cm)
         self.ns = CTS.NodeStatus(cm.Env)
 
     def __call__(self, dummy):
         '''Perform the 'StartOnebyOne' test. '''
         self.incr("calls")
 
         #        We ignore the "node" parameter...
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Test setup failed")
 
         failed = []
         self.set_timer()
         for node in self.Env["nodes"]:
             if not self.start(node):
                 failed.append(node)
 
         if len(failed) > 0:
             return self.failure("Some node failed to start: " + repr(failed))
 
         return self.success()
 
 #        Register StartOnebyOne as a good test to run
 AllTestClasses.append(StartOnebyOne)
 
 
 class SimulStart(CTSTest):
     '''Start all the nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SimulStart"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'SimulStart' test. '''
         self.incr("calls")
 
         #        We ignore the "node" parameter...
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Setup failed")
 
         self.CM.clear_all_caches()
 
         if not self.startall(None):
             return self.failure("Startall failed")
 
         return self.success()
 
 #        Register SimulStart as a good test to run
 AllTestClasses.append(SimulStart)
 
 
 class SimulStop(CTSTest):
     '''Stop all the nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SimulStop"
         self.startall = SimulStartLite(cm)
         self.stopall = SimulStopLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'SimulStop' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         if not self.stopall(None):
             return self.failure("Stopall failed")
 
         return self.success()
 
 #     Register SimulStop as a good test to run
 AllTestClasses.append(SimulStop)
 
 
 class StopOnebyOne(CTSTest):
     '''Stop all the nodes in order'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "StopOnebyOne"
         self.startall = SimulStartLite(cm)
         self.stop = StopTest(cm)
 
     def __call__(self, dummy):
         '''Perform the 'StopOnebyOne' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         failed = []
         self.set_timer()
         for node in self.Env["nodes"]:
             if not self.stop(node):
                 failed.append(node)
 
         if len(failed) > 0:
             return self.failure("Some node failed to stop: " + repr(failed))
 
         self.CM.clear_all_caches()
         return self.success()
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(StopOnebyOne)
 
 
 class RestartOnebyOne(CTSTest):
     '''Restart all the nodes in order'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RestartOnebyOne"
         self.startall = SimulStartLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'RestartOnebyOne' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         did_fail = []
         self.set_timer()
         self.restart = RestartTest(self.CM)
         for node in self.Env["nodes"]:
             if not self.restart(node):
                 did_fail.append(node)
 
         if did_fail:
             return self.failure("Could not restart %d nodes: %s"
                                 % (len(did_fail), repr(did_fail)))
         return self.success()
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(RestartOnebyOne)
 
 
 class PartialStart(CTSTest):
     '''Start a node - but tell it to stop before it finishes starting up'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "PartialStart"
         self.startall = SimulStartLite(cm)
         self.stopall = SimulStopLite(cm)
         self.stop = StopTest(cm)
         #self.is_unsafe = 1
 
     def __call__(self, node):
         '''Perform the 'PartialStart' test. '''
         self.incr("calls")
 
         ret = self.stopall(None)
         if not ret:
             return self.failure("Setup failed")
 
 #   FIXME!  This should use the CM class to get the pattern
 #       then it would be applicable in general
         watchpats = []
         watchpats.append("crmd.*Connecting to cluster infrastructure")
         watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
         watch.setwatch()
 
         self.CM.StartaCMnoBlock(node)
         ret = watch.lookforall()
         if not ret:
             self.logger.log("Patterns not found: " + repr(watch.unmatched))
             return self.failure("Setup of %s failed" % node)
 
         ret = self.stop(node)
         if not ret:
             return self.failure("%s did not stop in time" % node)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
 
         # We might do some fencing in the 2-node case if we make it up far enough
         return [
             """Executing reboot fencing operation""",
         ]
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(PartialStart)
 
 
 class StandbyTest(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Standby"
         self.benchmark = 1
 
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
 
     # make sure the node is active
     # set the node to standby mode
     # check resources, none resource should be running on the node
     # set the node to active mode
     # check resouces, resources should have been migrated back (SHOULD THEY?)
 
     def __call__(self, node):
 
         self.incr("calls")
         ret = self.startall(None)
         if not ret:
             return self.failure("Start all nodes failed")
 
         self.debug("Make sure node %s is active" % node)
         if self.CM.StandbyStatus(node) != "off":
             if not self.CM.SetStandbyMode(node, "off"):
                 return self.failure("can't set node %s to active mode" % node)
 
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "off":
             return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))
 
         self.debug("Getting resources running on node %s" % node)
         rsc_on_node = self.CM.active_resources(node)
 
         watchpats = []
         watchpats.append(r"State transition .* -> S_POLICY_ENGINE")
         watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
         watch.setwatch()
 
         self.debug("Setting node %s to standby mode" % node)
         if not self.CM.SetStandbyMode(node, "on"):
             return self.failure("can't set node %s to standby mode" % node)
 
         self.set_timer("on")
 
         ret = watch.lookforall()
         if not ret:
             self.logger.log("Patterns not found: " + repr(watch.unmatched))
             self.CM.SetStandbyMode(node, "off")
             return self.failure("cluster didn't react to standby change on %s" % node)
 
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "on":
             return self.failure("standby status of %s is [%s] but we expect [on]" % (node, status))
         self.log_timer("on")
 
         self.debug("Checking resources")
         bad_run = self.CM.active_resources(node)
         if len(bad_run) > 0:
             rc = self.failure("%s set to standby, %s is still running on it" % (node, repr(bad_run)))
             self.debug("Setting node %s to active mode" % node)
             self.CM.SetStandbyMode(node, "off")
             return rc
 
         self.debug("Setting node %s to active mode" % node)
         if not self.CM.SetStandbyMode(node, "off"):
             return self.failure("can't set node %s to active mode" % node)
 
         self.set_timer("off")
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "off":
             return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))
         self.log_timer("off")
 
         return self.success()
 
 AllTestClasses.append(StandbyTest)
 
 
 class ValgrindTest(CTSTest):
     '''Check for memory leaks'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Valgrind"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
         self.is_valgrind = 1
         self.is_loop = 1
 
     def setup(self, node):
         self.incr("calls")
 
         ret = self.stopall(None)
         if not ret:
             return self.failure("Stop all nodes failed")
 
         # Enable valgrind
         self.logger.logPat = "/tmp/%s-*.valgrind" % self.name
 
         self.Env["valgrind-prefix"] = self.name
 
         self.rsh(node, "rm -f %s" % self.logger.logPat, None)
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Start all nodes failed")
 
         for node in self.Env["nodes"]:
             (rc, output) = self.rsh(node, "ps u --ppid `pidofproc aisexec`", None)
             for line in output:
                 self.debug(line)
 
         return self.success()
 
     def teardown(self, node):
         # Disable valgrind
         self.Env["valgrind-prefix"] = None
 
         # Return all nodes to normal
         ret = self.stopall(None)
         if not ret:
             return self.failure("Stop all nodes failed")
 
         return self.success()
 
     def find_leaks(self):
         # Check for leaks
         leaked = []
         self.stop = StopTest(self.CM)
 
         for node in self.Env["nodes"]:
             (rc, ps_out) = self.rsh(node, "ps u --ppid `pidofproc aisexec`", None)
             rc = self.stop(node)
             if not rc:
                 self.failure("Couldn't shut down %s" % node)
 
             rc = self.rsh(node, "grep -e indirectly.*lost:.*[1-9] -e definitely.*lost:.*[1-9] -e (ERROR|error).*SUMMARY:.*[1-9].*errors %s" % self.logger.logPat, 0)
             if rc != 1:
                 leaked.append(node)
                 self.failure("Valgrind errors detected on %s" % node)
                 for line in ps_out:
                     self.logger.log(line)
                 (rc, output) = self.rsh(node, "grep -e lost: -e SUMMARY: %s" % self.logger.logPat, None)
                 for line in output:
                     self.logger.log(line)
                 (rc, output) = self.rsh(node, "cat %s" % self.logger.logPat, None)
                 for line in output:
                     self.debug(line)
 
         self.rsh(node, "rm -f %s" % self.logger.logPat, None)
         return leaked
 
     def __call__(self, node):
         leaked = self.find_leaks()
         if len(leaked) > 0:
             return self.failure("Nodes %s leaked" % repr(leaked))
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [
             r"cib.*: \*\*\*\*\*\*\*\*\*\*\*\*\*",
             r"cib.*: .* avoid confusing Valgrind",
             r"HA_VALGRIND_ENABLED",
         ]
 
 
 class StandbyLoopTest(ValgrindTest):
     '''Check for memory leaks by putting a node in and out of standby for an hour'''
     def __init__(self, cm):
         ValgrindTest.__init__(self,cm)
         self.name = "StandbyLoop"
 
     def __call__(self, node):
 
         lpc = 0
         delay = 2
         failed = 0
         done = time.time() + self.Env["loop-minutes"] * 60
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             time.sleep(delay)
             if not self.CM.SetStandbyMode(node, "on"):
                 self.failure("can't set node %s to standby mode" % node)
                 failed = lpc
 
             time.sleep(delay)
             if not self.CM.SetStandbyMode(node, "off"):
                 self.failure("can't set node %s to active mode" % node)
                 failed = lpc
 
         leaked = self.find_leaks()
         if failed:
             return self.failure("Iteration %d failed" % failed)
         elif len(leaked) > 0:
             return self.failure("Nodes %s leaked" % repr(leaked))
 
         return self.success()
 
 AllTestClasses.append(StandbyLoopTest)
 
 
 class BandwidthTest(CTSTest):
 #        Tests should not be cluster-manager-specific
 #        If you need to find out cluster manager configuration to do this, then
 #        it should be added to the generic cluster manager API.
     '''Test the bandwidth which heartbeat uses'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "Bandwidth"
         self.start = StartTest(cm)
         self.__setitem__("min",0)
         self.__setitem__("max",0)
         self.__setitem__("totalbandwidth",0)
         self.tempfile = tempfile.mktemp(".cts")
         self.startall = SimulStartLite(cm)
 
     def __call__(self, node):
         '''Perform the Bandwidth test'''
         self.incr("calls")
 
         if self.CM.upcount() < 1:
             return self.skipped()
 
         Path = self.CM.InternalCommConfig()
         if "ip" not in Path["mediatype"]:
              return self.skipped()
 
         port = Path["port"][0]
         port = int(port)
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Test setup failed")
         time.sleep(5)  # We get extra messages right after startup.
 
         fstmpfile = "/var/run/band_estimate"
         dumpcmd = "tcpdump -p -n -c 102 -i any udp port %d > %s 2>&1" \
         %                (port, fstmpfile)
 
         rc = self.rsh(node, dumpcmd)
         if rc == 0:
             farfile = "root@%s:%s" % (node, fstmpfile)
             self.rsh.cp(farfile, self.tempfile)
             Bandwidth = self.countbandwidth(self.tempfile)
             if not Bandwidth:
                 self.logger.log("Could not compute bandwidth.")
                 return self.success()
             intband = int(Bandwidth + 0.5)
             self.logger.log("...bandwidth: %d bits/sec" % intband)
             self.Stats["totalbandwidth"] = self.Stats["totalbandwidth"] + Bandwidth
             if self.Stats["min"] == 0:
                 self.Stats["min"] = Bandwidth
             if Bandwidth > self.Stats["max"]:
                 self.Stats["max"] = Bandwidth
             if Bandwidth < self.Stats["min"]:
                 self.Stats["min"] = Bandwidth
             self.rsh(node, "rm -f %s" % fstmpfile)
             os.unlink(self.tempfile)
             return self.success()
         else:
             return self.failure("no response from tcpdump command [%d]!" % rc)
 
     def countbandwidth(self, file):
         fp = open(file, "r")
         fp.seek(0)
         count = 0
         sum = 0
         while 1:
             line = fp.readline()
             if not line:
                 return None
             if re.search("udp",line) or re.search("UDP,", line):
                 count = count + 1
                 linesplit = string.split(line," ")
                 for j in range(len(linesplit)-1):
                     if linesplit[j] == "udp": break
                     if linesplit[j] == "length:": break
 
                 try:
                     sum = sum + int(linesplit[j+1])
                 except ValueError:
                     self.logger.log("Invalid tcpdump line: %s" % line)
                     return None
                 T1 = linesplit[0]
                 timesplit = string.split(T1,":")
                 time2split = string.split(timesplit[2],".")
                 time1 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001
                 break
 
         while count < 100:
             line = fp.readline()
             if not line:
                 return None
             if re.search("udp",line) or re.search("UDP,", line):
                 count = count+1
                 linessplit = string.split(line," ")
                 for j in range(len(linessplit)-1):
                     if linessplit[j] == "udp": break
                     if linesplit[j] == "length:": break
                 try:
                     sum = int(linessplit[j+1]) + sum
                 except ValueError:
                     self.logger.log("Invalid tcpdump line: %s" % line)
                     return None
 
         T2 = linessplit[0]
         timesplit = string.split(T2,":")
         time2split = string.split(timesplit[2],".")
         time2 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001
         time = time2-time1
         if (time <= 0):
             return 0
         return (sum*8)/time
 
     def is_applicable(self):
         '''BandwidthTest never applicable'''
         return 0
 
 AllTestClasses.append(BandwidthTest)
 
 
 ###################################################################
 class MaintenanceMode(CTSTest):
 ###################################################################
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "MaintenanceMode"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.max = 30
         #self.is_unsafe = 1
         self.benchmark = 1
         self.action = "asyncmon"
         self.interval = 0
         self.rid = "maintenanceDummy"
 
     def toggleMaintenanceMode(self, node, action):
         pats = []
         pats.append(self.templates["Pat:DC_IDLE"])
 
         # fail the resource right after turning Maintenance mode on
         # verify it is not recovered until maintenance mode is turned off
         if action == "On":
             pats.append("pengine.*: warning: Processing failed op %s for %s on" % (self.action, self.rid))
         else:
             pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0"))
             pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "start_0"))
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
 
         self.debug("Turning maintenance mode %s" % action)
         self.rsh(node, self.templates["MaintenanceMode%s" % (action)])
         if (action == "On"):
             self.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node))
 
         self.set_timer("recover%s" % (action))
         watch.lookforall()
         self.log_timer("recover%s" % (action))
         if watch.unmatched:
             self.debug("Failed to find patterns when turning maintenance mode %s" % action)
             return repr(watch.unmatched)
 
         return ""
 
     def insertMaintenanceDummy(self, node):
         pats = []
         pats.append(("%s.*" % node) + (self.templates["Pat:RscOpOK"] % (self.rid, "start_0")))
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
 
         self.CM.AddDummyRsc(node, self.rid)
 
         self.set_timer("addDummy")
         watch.lookforall()
         self.log_timer("addDummy")
 
         if watch.unmatched:
             self.debug("Failed to find patterns when adding maintenance dummy resource")
             return repr(watch.unmatched)
         return ""
 
     def removeMaintenanceDummy(self, node):
         pats = []
         pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0"))
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
         self.CM.RemoveDummyRsc(node, self.rid)
 
         self.set_timer("removeDummy")
         watch.lookforall()
         self.log_timer("removeDummy")
 
         if watch.unmatched:
             self.debug("Failed to find patterns when removing maintenance dummy resource")
             return repr(watch.unmatched)
         return ""
 
     def managedRscList(self, node):
         rscList = []
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 tmp = AuditResource(self.CM, line)
                 if tmp.managed():
                     rscList.append(tmp.id)
 
         return rscList
 
     def verifyResources(self, node, rscList, managed):
         managedList = list(rscList)
         managed_str = "managed"
         if not managed:
             managed_str = "unmanaged"
 
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 tmp = AuditResource(self.CM, line)
                 if managed and not tmp.managed():
                     continue
                 elif not managed and tmp.managed():
                     continue
                 elif managedList.count(tmp.id):
                     managedList.remove(tmp.id)
 
         if len(managedList) == 0:
             self.debug("Found all %s resources on %s" % (managed_str, node))
             return True
 
         self.logger.log("Could not find all %s resources on %s. %s" % (managed_str, node, managedList))
         return False
 
     def __call__(self, node):
         '''Perform the 'MaintenanceMode' test. '''
         self.incr("calls")
         verify_managed = False
         verify_unmanaged = False
         failPat = ""
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         # get a list of all the managed resources. We use this list
         # after enabling maintenance mode to verify all managed resources
         # become un-managed.  After maintenance mode is turned off, we use
         # this list to verify all the resources become managed again.
         managedResources = self.managedRscList(node)
         if len(managedResources) == 0:
             self.logger.log("No managed resources on %s" % node)
             return self.skipped()
 
         # insert a fake resource we can fail during maintenance mode
         # so we can verify recovery does not take place until after maintenance
         # mode is disabled.
         failPat = failPat + self.insertMaintenanceDummy(node)
 
         # toggle maintenance mode ON, then fail dummy resource.
         failPat = failPat + self.toggleMaintenanceMode(node, "On")
 
         # verify all the resources are now unmanaged
         if self.verifyResources(node, managedResources, False):
             verify_unmanaged = True
 
         # Toggle maintenance mode  OFF, verify dummy is recovered.
         failPat = failPat + self.toggleMaintenanceMode(node, "Off")
 
         # verify all the resources are now managed again
         if self.verifyResources(node, managedResources, True):
             verify_managed = True
 
         # Remove our maintenance dummy resource.
         failPat = failPat + self.removeMaintenanceDummy(node)
 
         self.CM.cluster_stable()
 
         if failPat != "":
             return self.failure("Unmatched patterns: %s" % (failPat))
         elif verify_unmanaged is False:
             return self.failure("Failed to verify resources became unmanaged during maintenance mode")
         elif verify_managed is False:
             return self.failure("Failed to verify resources switched back to managed after disabling maintenance mode")
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [
             r"Updating failcount for %s" % self.rid,
             r"pengine.*: Recover %s\s*\(.*\)" % self.rid,
             r"Unknown operation: fail",
             r"(ERROR|error): sending stonithRA op to stonithd failed.",
             self.templates["Pat:RscOpOK"] % (self.rid, ("%s_%d" % (self.action, self.interval))),
             r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval),
         ]
 
 AllTestClasses.append(MaintenanceMode)
 
 
 class ResourceRecover(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "ResourceRecover"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.max = 30
         self.rid = None
         self.rid_alt = None
         #self.is_unsafe = 1
         self.benchmark = 1
 
         # these are the values used for the new LRM API call
         self.action = "asyncmon"
         self.interval = 0
 
     def __call__(self, node):
         '''Perform the 'ResourceRecover' test. '''
         self.incr("calls")
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         resourcelist = self.CM.active_resources(node)
         # if there are no resourcelist, return directly
         if len(resourcelist) == 0:
             self.logger.log("No active resources on %s" % node)
             return self.skipped()
 
         self.rid = self.Env.RandomGen.choice(resourcelist)
         self.rid_alt = self.rid
 
         rsc = None
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 tmp = AuditResource(self.CM, line)
                 if tmp.id == self.rid:
                     rsc = tmp
                     # Handle anonymous clones that get renamed
                     self.rid = rsc.clone_id
                     break
 
         if not rsc:
             return self.failure("Could not find %s in the resource list" % self.rid)
 
         self.debug("Shooting %s aka. %s" % (rsc.clone_id, rsc.id))
 
         pats = []
         pats.append(r"pengine.*: warning: Processing failed op %s for (%s|%s) on" % (self.action,
             rsc.id, rsc.clone_id))
 
         if rsc.managed():
             pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0"))
             if rsc.unique():
                 pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "start_0"))
             else:
                 # Anonymous clones may get restarted with a different clone number
                 pats.append(self.templates["Pat:RscOpOK"] % (".*", "start_0"))
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
 
         self.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node))
 
         self.set_timer("recover")
         watch.lookforall()
         self.log_timer("recover")
 
         self.CM.cluster_stable()
         recovered = self.CM.ResourceLocation(self.rid)
 
         if watch.unmatched:
             return self.failure("Patterns not found: %s" % repr(watch.unmatched))
 
         elif rsc.unique() and len(recovered) > 1:
             return self.failure("%s is now active on more than one node: %s"%(self.rid, repr(recovered)))
 
         elif len(recovered) > 0:
             self.debug("%s is running on: %s" % (self.rid, repr(recovered)))
 
         elif rsc.managed():
             return self.failure("%s was not recovered and is inactive" % self.rid)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [
             r"Updating failcount for %s" % self.rid,
             r"pengine.*: Recover (%s|%s)\s*\(.*\)" % (self.rid, self.rid_alt),
             r"Unknown operation: fail",
             r"(ERROR|error): sending stonithRA op to stonithd failed.",
             self.templates["Pat:RscOpOK"] % (self.rid, ("%s_%d" % (self.action, self.interval))),
             r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval),
         ]
 
 AllTestClasses.append(ResourceRecover)
 
 
 class ComponentFail(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "ComponentFail"
         # TODO make this work correctly in docker.
         self.is_docker_unsafe = 1
         self.startall = SimulStartLite(cm)
         self.complist = cm.Components()
         self.patterns = []
         self.okerrpatterns = []
         self.is_unsafe = 1
 
     def __call__(self, node):
         '''Perform the 'ComponentFail' test. '''
         self.incr("calls")
         self.patterns = []
         self.okerrpatterns = []
 
         # start all nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         if not self.CM.cluster_stable(self.Env["StableTime"]):
             return self.failure("Setup failed - unstable")
 
         node_is_dc = self.CM.is_node_dc(node, None)
 
         # select a component to kill
         chosen = self.Env.RandomGen.choice(self.complist)
         while chosen.dc_only == 1 and node_is_dc == 0:
             chosen = self.Env.RandomGen.choice(self.complist)
 
         self.debug("...component %s (dc=%d,boot=%d)" % (chosen.name, node_is_dc,chosen.triggersreboot))
         self.incr(chosen.name)
 
         if chosen.name != "aisexec" and chosen.name != "corosync":
             if self.Env["Name"] != "crm-lha" or chosen.name != "pengine":
                 self.patterns.append(self.templates["Pat:ChildKilled"] %(node, chosen.name))
                 self.patterns.append(self.templates["Pat:ChildRespawn"] %(node, chosen.name))
 
         self.patterns.extend(chosen.pats)
         if node_is_dc:
           self.patterns.extend(chosen.dc_pats)
 
         # In an ideal world, this next stuff should be in the "chosen" object as a member function
         if self.Env["Name"] == "crm-lha" and chosen.triggersreboot:
             # Make sure the node goes down and then comes back up if it should reboot...
             for other in self.Env["nodes"]:
                 if other != node:
                     self.patterns.append(self.templates["Pat:They_stopped"] %(other, self.CM.key_for_node(node)))
             self.patterns.append(self.templates["Pat:Slave_started"] % node)
             self.patterns.append(self.templates["Pat:Local_started"] % node)
 
             if chosen.dc_only:
                 # Sometimes these will be in the log, and sometimes they won't...
                 self.okerrpatterns.append("%s .*Process %s:.* exited" % (node, chosen.name))
                 self.okerrpatterns.append("%s .*I_ERROR.*crmdManagedChildDied" % node)
                 self.okerrpatterns.append("%s .*The %s subsystem terminated unexpectedly" % (node, chosen.name))
                 self.okerrpatterns.append("(ERROR|error): Client .* exited with return code")
             else:
                 # Sometimes this won't be in the log...
                 self.okerrpatterns.append(self.templates["Pat:ChildKilled"] %(node, chosen.name))
                 self.okerrpatterns.append(self.templates["Pat:ChildRespawn"] %(node, chosen.name))
                 self.okerrpatterns.append(self.templates["Pat:ChildExit"])
 
         if chosen.name == "stonith":
             # Ignore actions for STONITH resources
             (rc, lines) = self.rsh(node, "crm_resource -c", None)
             for line in lines:
                 if re.search("^Resource", line):
                     r = AuditResource(self.CM, line)
                     if r.rclass == "stonith":
                         self.okerrpatterns.append(self.templates["Pat:Fencing_recover"] % r.id)
 
         # supply a copy so self.patterns doesnt end up empty
         tmpPats = []
         tmpPats.extend(self.patterns)
         self.patterns.extend(chosen.badnews_ignore)
 
         # Look for STONITH ops, depending on Env["at-boot"] we might need to change the nodes status
         stonithPats = []
         stonithPats.append(self.templates["Pat:Fencing_ok"] % node)
         stonith = self.create_watch(stonithPats, 0)
         stonith.setwatch()
 
         # set the watch for stable
         watch = self.create_watch(
             tmpPats, self.Env["DeadTime"] + self.Env["StableTime"] + self.Env["StartTime"])
         watch.setwatch()
 
         # kill the component
         chosen.kill(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         self.debug("Waiting for any STONITHd node to come back up")
         self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)
 
         self.debug("Waiting for the cluster to re-stabilize with all nodes")
         self.CM.cluster_stable(self.Env["StartTime"])
 
         self.debug("Checking if %s was shot" % node)
         shot = stonith.look(60)
         if shot:
             self.debug("Found: " + repr(shot))
             self.okerrpatterns.append(self.templates["Pat:Fencing_start"] % node)
 
             if self.Env["at-boot"] == 0:
                 self.CM.ShouldBeStatus[node] = "down"
 
             # If fencing occurred, chances are many (if not all) the expected logs
             # will not be sent - or will be lost when the node reboots
             return self.success()
 
         # check for logs indicating a graceful recovery
         matched = watch.lookforall(allow_multiple_matches=1)
         if watch.unmatched:
             self.logger.log("Patterns not found: " + repr(watch.unmatched))
 
         self.debug("Waiting for the cluster to re-stabilize with all nodes")
         is_stable = self.CM.cluster_stable(self.Env["StartTime"])
 
         if not matched:
             return self.failure("Didn't find all expected %s patterns" % chosen.name)
         elif not is_stable:
             return self.failure("Cluster did not become stable after killing %s" % chosen.name)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
     # Note that okerrpatterns refers to the last time we ran this test
     # The good news is that this works fine for us...
         self.okerrpatterns.extend(self.patterns)
         return self.okerrpatterns
 
 AllTestClasses.append(ComponentFail)
 
 
 class SplitBrainTest(CTSTest):
     '''It is used to test split-brain. when the path between the two nodes break
        check the two nodes both take over the resource'''
     def __init__(self,cm):
         CTSTest.__init__(self,cm)
         self.name = "SplitBrain"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.is_experimental = 1
 
     def isolate_partition(self, partition):
         other_nodes = []
         other_nodes.extend(self.Env["nodes"])
 
         for node in partition:
             try:
                 other_nodes.remove(node)
             except ValueError:
                 self.logger.log("Node "+node+" not in " + repr(self.Env["nodes"]) + " from " +repr(partition))
 
         if len(other_nodes) == 0:
             return 1
 
         self.debug("Creating partition: " + repr(partition))
         self.debug("Everyone else: " + repr(other_nodes))
 
         for node in partition:
             if not self.CM.isolate_node(node, other_nodes):
                 self.logger.log("Could not isolate %s" % node)
                 return 0
 
         return 1
 
     def heal_partition(self, partition):
         other_nodes = []
         other_nodes.extend(self.Env["nodes"])
 
         for node in partition:
             try:
                 other_nodes.remove(node)
             except ValueError:
                 self.logger.log("Node "+node+" not in " + repr(self.Env["nodes"]))
 
         if len(other_nodes) == 0:
             return 1
 
         self.debug("Healing partition: " + repr(partition))
         self.debug("Everyone else: " + repr(other_nodes))
 
         for node in partition:
             self.CM.unisolate_node(node, other_nodes)
 
     def __call__(self, node):
         '''Perform split-brain test'''
         self.incr("calls")
         self.passed = 1
         partitions = {}
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         while 1:
             # Retry until we get multiple partitions
             partitions = {}
             p_max = len(self.Env["nodes"])
             for node in self.Env["nodes"]:
                 p = self.Env.RandomGen.randint(1, p_max)
                 if not partitions.has_key(p):
                     partitions[p] = []
                 partitions[p].append(node)
             p_max = len(partitions.keys())
             if p_max > 1:
                 break
             # else, try again
 
         self.debug("Created %d partitions" % p_max)
         for key in partitions.keys():
             self.debug("Partition["+str(key)+"]:\t"+repr(partitions[key]))
 
         # Disabling STONITH to reduce test complexity for now
         self.rsh(node, "crm_attribute -V -n stonith-enabled -v false")
 
         for key in partitions.keys():
             self.isolate_partition(partitions[key])
 
         count = 30
         while count > 0:
             if len(self.CM.find_partitions()) != p_max:
                 time.sleep(10)
             else:
                 break
         else:
             self.failure("Expected partitions were not created")
 
         # Target number of partitions formed - wait for stability
         if not self.CM.cluster_stable():
             self.failure("Partitioned cluster not stable")
 
         # Now audit the cluster state
         self.CM.partitions_expected = p_max
         if not self.audit():
             self.failure("Audits failed")
         self.CM.partitions_expected = 1
 
         # And heal them again
         for key in partitions.keys():
             self.heal_partition(partitions[key])
 
         # Wait for a single partition to form
         count = 30
         while count > 0:
             if len(self.CM.find_partitions()) != 1:
                 time.sleep(10)
                 count -= 1
             else:
                 break
         else:
             self.failure("Cluster did not reform")
 
         # Wait for it to have the right number of members
         count = 30
         while count > 0:
             members = []
 
             partitions = self.CM.find_partitions()
             if len(partitions) > 0:
                 members = partitions[0].split()
 
             if len(members) != len(self.Env["nodes"]):
                 time.sleep(10)
                 count -= 1
             else:
                 break
         else:
             self.failure("Cluster did not completely reform")
 
         # Wait up to 20 minutes - the delay is more preferable than
         # trying to continue with in a messed up state
         if not self.CM.cluster_stable(1200):
             self.failure("Reformed cluster not stable")
             answer = raw_input('Continue? [nY]')
             if answer and answer == "n":
                 raise ValueError("Reformed cluster not stable")
 
         # Turn fencing back on
         if self.Env["DoFencing"]:
             self.rsh(node, "crm_attribute -V -D -n stonith-enabled")
 
         self.CM.cluster_stable()
 
         if self.passed:
             return self.success()
         return self.failure("See previous errors")
 
     def errorstoignore(self):
         '''Return list of errors which are 'normal' and should be ignored'''
         return [
             r"Another DC detected:",
             r"(ERROR|error).*: .*Application of an update diff failed",
             r"crmd.*:.*not in our membership list",
             r"CRIT:.*node.*returning after partition",
         ]
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
         return len(self.Env["nodes"]) > 2
 
 AllTestClasses.append(SplitBrainTest)
 
 
 class Reattach(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Reattach"
         self.startall = SimulStartLite(cm)
         self.restart1 = RestartTest(cm)
         self.stopall = SimulStopLite(cm)
         self.is_unsafe = 0 # Handled by canrunnow()
 
     def setup(self, node):
         attempt = 0
         if not self.startall(None):
             return None
 
         # Make sure we are really _really_ stable and that all
         # resources, including those that depend on transient node
         # attributes, are started
         while not self.CM.cluster_stable(double_check=True):
             if attempt < 5:
                 attempt += 1
                 self.debug("Not stable yet, re-testing")
             else:
                 self.logger.log("Cluster is not stable")
                 return None
 
         return 1
 
     def teardown(self, node):
 
         # Make sure 'node' is up
         start = StartTest(self.CM)
         start(node)
 
         is_managed = self.rsh(node, "crm_attribute -Q -G -t crm_config -n is-managed-default -d true", 1)
         is_managed = is_managed[:-1] # Strip off the newline
         if is_managed != "true":
             self.logger.log("Attempting to re-enable resource management on %s (%s)" % (node, is_managed))
             managed = self.create_watch(["is-managed-default"], 60)
             managed.setwatch()
 
             self.rsh(node, "crm_attribute -V -D -n is-managed-default")
 
             if not managed.lookforall():
                 self.logger.log("Patterns not found: " + repr(managed.unmatched))
                 self.logger.log("Could not re-enable resource management")
                 return 0
 
         return 1
 
     def canrunnow(self, node):
         '''Return TRUE if we can meaningfully run right now'''
         if self.find_ocfs2_resources(node):
             self.logger.log("Detach/Reattach scenarios are not possible with OCFS2 services present")
             return 0
         return 1
 
     def __call__(self, node):
         self.incr("calls")
 
         pats = []
         managed = self.create_watch(["is-managed-default"], 60)
         managed.setwatch()
 
         self.debug("Disable resource management")
         self.rsh(node, "crm_attribute -V -n is-managed-default -v false")
 
         if not managed.lookforall():
             self.logger.log("Patterns not found: " + repr(managed.unmatched))
             return self.failure("Resource management not disabled")
 
         pats = []
         pats.append(self.templates["Pat:RscOpOK"] % (".*", "start"))
         pats.append(self.templates["Pat:RscOpOK"] % (".*", "stop"))
         pats.append(self.templates["Pat:RscOpOK"] % (".*", "promote"))
         pats.append(self.templates["Pat:RscOpOK"] % (".*", "demote"))
         pats.append(self.templates["Pat:RscOpOK"] % (".*", "migrate"))
 
         watch = self.create_watch(pats, 60, "ShutdownActivity")
         watch.setwatch()
 
         self.debug("Shutting down the cluster")
         ret = self.stopall(None)
         if not ret:
             self.debug("Re-enable resource management")
             self.rsh(node, "crm_attribute -V -D -n is-managed-default")
             return self.failure("Couldn't shut down the cluster")
 
         self.debug("Bringing the cluster back up")
         ret = self.startall(None)
         time.sleep(5) # allow ping to update the CIB
         if not ret:
             self.debug("Re-enable resource management")
             self.rsh(node, "crm_attribute -V -D -n is-managed-default")
             return self.failure("Couldn't restart the cluster")
 
         if self.local_badnews("ResourceActivity:", watch):
             self.debug("Re-enable resource management")
             self.rsh(node, "crm_attribute -V -D -n is-managed-default")
             return self.failure("Resources stopped or started during cluster restart")
 
         watch = self.create_watch(pats, 60, "StartupActivity")
         watch.setwatch()
 
         managed = self.create_watch(["is-managed-default"], 60)
         managed.setwatch()
 
         self.debug("Re-enable resource management")
         self.rsh(node, "crm_attribute -V -D -n is-managed-default")
 
         if not managed.lookforall():
             self.logger.log("Patterns not found: " + repr(managed.unmatched))
             return self.failure("Resource management not enabled")
 
         self.CM.cluster_stable()
 
         # Ignore actions for STONITH resources
         ignore = []
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rclass == "stonith":
 
                     self.debug("Ignoring start actions for %s" % r.id)
                     ignore.append(self.templates["Pat:RscOpOK"] % (r.id, "start_0"))
 
         if self.local_badnews("ResourceActivity:", watch, ignore):
             return self.failure("Resources stopped or started after resource management was re-enabled")
 
         return ret
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [
             r"resources were active at shutdown",
         ]
 
     def is_applicable(self):
         if self.Env["Name"] == "crm-lha":
             return None
         return 1
 
 AllTestClasses.append(Reattach)
 
 
 class SpecialTest1(CTSTest):
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SpecialTest1"
         self.startall = SimulStartLite(cm)
         self.restart1 = RestartTest(cm)
         self.stopall = SimulStopLite(cm)
 
     def __call__(self, node):
         '''Perform the 'SpecialTest1' test for Andrew. '''
         self.incr("calls")
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Could not stop all nodes")
 
         # Test config recovery when the other nodes come up
         self.rsh(node, "rm -f "+CTSvars.CRM_CONFIG_DIR+"/cib*")
 
         #        Start the selected node
         ret = self.restart1(node)
         if not ret:
             return self.failure("Could not start "+node)
 
         #        Start all remaining nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Could not start the remaining nodes")
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         # Errors that occur as a result of the CIB being wiped
         return [
             r"error.*: v1 patchset error, patch failed to apply: Application of an update diff failed",
             r"error.*: Resource start-up disabled since no STONITH resources have been defined",
             r"error.*: Either configure some or disable STONITH with the stonith-enabled option",
             r"error.*: NOTE: Clusters with shared data need STONITH to ensure data integrity",
         ]
 
 AllTestClasses.append(SpecialTest1)
 
 
 class HAETest(CTSTest):
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "HAETest"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
         self.is_loop = 1
 
     def setup(self, node):
         #  Start all remaining nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Couldn't start all nodes")
         return self.success()
 
     def teardown(self, node):
         # Stop everything
         ret = self.stopall(None)
         if not ret:
             return self.failure("Couldn't stop all nodes")
         return self.success()
 
     def wait_on_state(self, node, resource, expected_clones, attempts=240):
         while attempts > 0:
             active = 0
             (rc, lines) = self.rsh(node, "crm_resource -r %s -W -Q" % resource, stdout=None)
 
             # Hack until crm_resource does the right thing
             if rc == 0 and lines:
                 active = len(lines)
 
             if len(lines) == expected_clones:
                 return 1
 
             elif rc == 1:
                 self.debug("Resource %s is still inactive" % resource)
 
             elif rc == 234:
                 self.logger.log("Unknown resource %s" % resource)
                 return 0
 
             elif rc == 246:
                 self.logger.log("Cluster is inactive")
                 return 0
 
             elif rc != 0:
                 self.logger.log("Call to crm_resource failed, rc=%d" % rc)
                 return 0
 
             else:
                 self.debug("Resource %s is active on %d times instead of %d" % (resource, active, expected_clones))
 
             attempts -= 1
             time.sleep(1)
 
         return 0
 
     def find_dlm(self, node):
         self.r_dlm = None
 
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rtype == "controld" and r.parent != "NA":
                     self.debug("Found dlm: %s" % self.r_dlm)
                     self.r_dlm = r.parent
                     return 1
         return 0
 
     def find_hae_resources(self, node):
         self.r_dlm = None
         self.r_o2cb = None
         self.r_ocfs2 = []
 
         if self.find_dlm(node):
             self.find_ocfs2_resources(node)
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
         if self.Env["Schema"] == "hae":
             return 1
         return None
 
 
 class HAERoleTest(HAETest):
     def __init__(self, cm):
         '''Lars' mount/unmount test for the HA extension. '''
         HAETest.__init__(self,cm)
         self.name = "HAERoleTest"
 
     def change_state(self, node, resource, target):
         rc = self.rsh(node, "crm_resource -V -r %s -p target-role -v %s  --meta" % (resource, target))
         return rc
 
     def __call__(self, node):
         self.incr("calls")
         lpc = 0
         failed = 0
         delay = 2
         done = time.time() + self.Env["loop-minutes"]*60
         self.find_hae_resources(node)
 
         clone_max = len(self.Env["nodes"])
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             self.change_state(node, self.r_dlm, "Stopped")
             if not self.wait_on_state(node, self.r_dlm, 0):
                 self.failure("%s did not go down correctly" % self.r_dlm)
                 failed = lpc
 
             self.change_state(node, self.r_dlm, "Started")
             if not self.wait_on_state(node, self.r_dlm, clone_max):
                 self.failure("%s did not come up correctly" % self.r_dlm)
                 failed = lpc
 
             if not self.wait_on_state(node, self.r_o2cb, clone_max):
                 self.failure("%s did not come up correctly" % self.r_o2cb)
                 failed = lpc
 
             for fs in self.r_ocfs2:
                 if not self.wait_on_state(node, fs, clone_max):
                     self.failure("%s did not come up correctly" % fs)
                     failed = lpc
 
         if failed:
             return self.failure("iteration %d failed" % failed)
         return self.success()
 
 AllTestClasses.append(HAERoleTest)
 
 
 class HAEStandbyTest(HAETest):
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         HAETest.__init__(self,cm)
         self.name = "HAEStandbyTest"
 
     def change_state(self, node, resource, target):
         rc = self.rsh(node, "crm_standby -V -l reboot -v %s" % (target))
         return rc
 
     def __call__(self, node):
         self.incr("calls")
 
         lpc = 0
         failed = 0
         done = time.time() + self.Env["loop-minutes"]*60
         self.find_hae_resources(node)
 
         clone_max = len(self.Env["nodes"])
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             self.change_state(node, self.r_dlm, "true")
             if not self.wait_on_state(node, self.r_dlm, clone_max-1):
                 self.failure("%s did not go down correctly" % self.r_dlm)
                 failed = lpc
 
             self.change_state(node, self.r_dlm, "false")
             if not self.wait_on_state(node, self.r_dlm, clone_max):
                 self.failure("%s did not come up correctly" % self.r_dlm)
                 failed = lpc
 
             if not self.wait_on_state(node, self.r_o2cb, clone_max):
                 self.failure("%s did not come up correctly" % self.r_o2cb)
                 failed = lpc
 
             for fs in self.r_ocfs2:
                 if not self.wait_on_state(node, fs, clone_max):
                     self.failure("%s did not come up correctly" % fs)
                     failed = lpc
 
         if failed:
             return self.failure("iteration %d failed" % failed)
         return self.success()
 
 AllTestClasses.append(HAEStandbyTest)
 
 
 class NearQuorumPointTest(CTSTest):
     '''
     This test brings larger clusters near the quorum point (50%).
     In addition, it will test doing starts and stops at the same time.
 
     Here is how I think it should work:
     - loop over the nodes and decide randomly which will be up and which
       will be down  Use a 50% probability for each of up/down.
     - figure out what to do to get into that state from the current state
     - in parallel, bring up those going up  and bring those going down.
     '''
 
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "NearQuorumPoint"
 
     def __call__(self, dummy):
         '''Perform the 'NearQuorumPoint' test. '''
         self.incr("calls")
         startset = []
         stopset = []
 
         stonith = self.CM.prepare_fencing_watcher("NearQuorumPoint")
         #decide what to do with each node
         for node in self.Env["nodes"]:
             action = self.Env.RandomGen.choice(["start","stop"])
             #action = self.Env.RandomGen.choice(["start","stop","no change"])
             if action == "start" :
                 startset.append(node)
             elif action == "stop" :
                 stopset.append(node)
 
         self.debug("start nodes:" + repr(startset))
         self.debug("stop nodes:" + repr(stopset))
 
         #add search patterns
         watchpats = [ ]
         for node in stopset:
             if self.CM.ShouldBeStatus[node] == "up":
                 watchpats.append(self.templates["Pat:We_stopped"] % node)
 
         for node in startset:
             if self.CM.ShouldBeStatus[node] == "down":
                 #watchpats.append(self.templates["Pat:Slave_started"] % node)
                 watchpats.append(self.templates["Pat:Local_started"] % node)
             else:
                 for stopping in stopset:
                     if self.CM.ShouldBeStatus[stopping] == "up":
                         watchpats.append(self.templates["Pat:They_stopped"] % (node, self.CM.key_for_node(stopping)))
 
         if len(watchpats) == 0:
             return self.skipped()
 
         if len(startset) != 0:
             watchpats.append(self.templates["Pat:DC_IDLE"])
 
         watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
 
         watch.setwatch()
 
         #begin actions
         for node in stopset:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.CM.StopaCMnoBlock(node)
 
         for node in startset:
             if self.CM.ShouldBeStatus[node] == "down":
                 self.CM.StartaCMnoBlock(node)
 
         #get the result
         if watch.lookforall():
             self.CM.cluster_stable()
             self.CM.fencing_cleanup("NearQuorumPoint", stonith)
             return self.success()
 
         self.logger.log("Warn: Patterns not found: " + repr(watch.unmatched))
 
         #get the "bad" nodes
         upnodes = []
         for node in stopset:
             if self.CM.StataCM(node) == 1:
                 upnodes.append(node)
 
         downnodes = []
         for node in startset:
             if self.CM.StataCM(node) == 0:
                 downnodes.append(node)
 
         self.CM.fencing_cleanup("NearQuorumPoint", stonith)
         if upnodes == [] and downnodes == []:
             self.CM.cluster_stable()
 
             # Make sure they're completely down with no residule
             for node in stopset:
                 self.rsh(node, self.templates["StopCmd"])
 
             return self.success()
 
         if len(upnodes) > 0:
             self.logger.log("Warn: Unstoppable nodes: " + repr(upnodes))
 
         if len(downnodes) > 0:
             self.logger.log("Warn: Unstartable nodes: " + repr(downnodes))
 
         return self.failure()
 
     def is_applicable(self):
         if self.Env["Name"] == "crm-cman":
             return None
         return 1
 
 AllTestClasses.append(NearQuorumPointTest)
 
 
 class RollingUpgradeTest(CTSTest):
     '''Perform a rolling upgrade of the cluster'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RollingUpgrade"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
 
     def setup(self, node):
         #  Start all remaining nodes
         ret = self.stopall(None)
         if not ret:
             return self.failure("Couldn't stop all nodes")
 
         for node in self.Env["nodes"]:
             if not self.downgrade(node, None):
                 return self.failure("Couldn't downgrade %s" % node)
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Couldn't start all nodes")
         return self.success()
 
     def teardown(self, node):
         # Stop everything
         ret = self.stopall(None)
         if not ret:
             return self.failure("Couldn't stop all nodes")
 
         for node in self.Env["nodes"]:
             if not self.upgrade(node, None):
                 return self.failure("Couldn't upgrade %s" % node)
 
         return self.success()
 
     def install(self, node, version, start=1, flags="--force"):
 
         target_dir = "/tmp/rpm-%s" % version
         src_dir = "%s/%s" % (self.Env["rpm-dir"], version)
 
         self.logger.log("Installing %s on %s with %s" % (version, node, flags))
         if not self.stop(node):
             return self.failure("stop failure: "+node)
 
         rc = self.rsh(node, "mkdir -p %s" % target_dir)
         rc = self.rsh(node, "rm -f %s/*.rpm" % target_dir)
         (rc, lines) = self.rsh(node, "ls -1 %s/*.rpm" % src_dir, None)
         for line in lines:
             line = line[:-1]
             rc = self.rsh.cp("%s" % (line), "%s:%s/" % (node, target_dir))
         rc = self.rsh(node, "rpm -Uvh %s %s/*.rpm" % (flags, target_dir))
 
         if start and not self.start(node):
             return self.failure("start failure: "+node)
 
         return self.success()
 
     def upgrade(self, node, start=1):
         return self.install(node, self.Env["current-version"], start)
 
     def downgrade(self, node, start=1):
         return self.install(node, self.Env["previous-version"], start, "--force --nodeps")
 
     def __call__(self, node):
         '''Perform the 'Rolling Upgrade' test. '''
         self.incr("calls")
 
         for node in self.Env["nodes"]:
             if self.upgrade(node):
                 return self.failure("Couldn't upgrade %s" % node)
 
             self.CM.cluster_stable()
 
         return self.success()
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return None
 
         if not self.Env.has_key("rpm-dir"):
             return None
         if not self.Env.has_key("current-version"):
             return None
         if not self.Env.has_key("previous-version"):
             return None
 
         return 1
 
 #        Register RestartTest as a good test to run
 AllTestClasses.append(RollingUpgradeTest)
 
 
 class BSC_AddResource(CTSTest):
     '''Add a resource to the cluster'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "AddResource"
         self.resource_offset = 0
         self.cib_cmd = """cibadmin -C -o %s -X '%s' """
 
     def __call__(self, node):
         self.incr("calls")
         self.resource_offset =         self.resource_offset  + 1
 
         r_id = "bsc-rsc-%s-%d" % (node, self.resource_offset)
         start_pat = "crmd.*%s_start_0.*confirmed.*ok"
 
         patterns = []
         patterns.append(start_pat % r_id)
 
         watch = self.create_watch(patterns, self.Env["DeadTime"])
         watch.setwatch()
 
         ip = self.NextIP()
         if not self.make_ip_resource(node, r_id, "ocf", "IPaddr", ip):
             return self.failure("Make resource %s failed" % r_id)
 
         failed = 0
         watch_result = watch.lookforall()
         if watch.unmatched:
             for regex in watch.unmatched:
                 self.logger.log ("Warn: Pattern not found: %s" % (regex))
                 failed = 1
 
         if failed:
             return self.failure("Resource pattern(s) not found")
 
         if not self.CM.cluster_stable(self.Env["DeadTime"]):
             return self.failure("Unstable cluster")
 
         return self.success()
 
     def NextIP(self):
         ip = self.Env["IPBase"]
         if ":" in ip:
             fields = ip.rpartition(":")
             fields[2] = str(hex(int(fields[2], 16)+1))
             print str(hex(int(f[2], 16)+1))
         else:
             fields = ip.rpartition('.')
             fields[2] = str(int(fields[2])+1)
 
         ip = fields[0] + fields[1] + fields[3];
         self.Env["IPBase"] = ip
         return ip.strip()
 
     def make_ip_resource(self, node, id, rclass, type, ip):
         self.logger.log("Creating %s::%s:%s (%s) on %s" % (rclass,type,id,ip,node))
         rsc_xml="""
 <primitive id="%s" class="%s" type="%s"  provider="heartbeat">
     <instance_attributes id="%s"><attributes>
         <nvpair id="%s" name="ip" value="%s"/>
     </attributes></instance_attributes>
 </primitive>""" % (id, rclass, type, id, id, ip)
 
         node_constraint = """
       <rsc_location id="run_%s" rsc="%s">
         <rule id="pref_run_%s" score="100">
           <expression id="%s_loc_expr" attribute="#uname" operation="eq" value="%s"/>
         </rule>
       </rsc_location>""" % (id, id, id, id, node)
 
         rc = 0
         (rc, lines) = self.rsh(node, self.cib_cmd % ("constraints", node_constraint), None)
         if rc != 0:
             self.logger.log("Constraint creation failed: %d" % rc)
             return None
 
         (rc, lines) = self.rsh(node, self.cib_cmd % ("resources", rsc_xml), None)
         if rc != 0:
             self.logger.log("Resource creation failed: %d" % rc)
             return None
 
         return 1
 
     def is_applicable(self):
         if self.Env["DoBSC"]:
             return 1
         return None
 
 AllTestClasses.append(BSC_AddResource)
 
 
 class SimulStopLite(CTSTest):
     '''Stop any active nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SimulStopLite"
 
     def __call__(self, dummy):
         '''Perform the 'SimulStopLite' setup work. '''
         self.incr("calls")
 
         self.debug("Setup: " + self.name)
 
         #     We ignore the "node" parameter...
         watchpats = [ ]
 
         for node in self.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.incr("WasStarted")
                 watchpats.append(self.templates["Pat:We_stopped"] % node)
                 #if self.Env["use_logd"]:
                 #    watchpats.append(self.templates["Pat:Logd_stopped"] % node)
 
         if len(watchpats) == 0:
             self.CM.clear_all_caches()
             return self.success()
 
         #     Stop all the nodes - at about the same time...
         watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
 
         watch.setwatch()
         self.set_timer()
         for node in self.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.CM.StopaCMnoBlock(node)
         if watch.lookforall():
             self.CM.clear_all_caches()
 
             # Make sure they're completely down with no residule
             for node in self.Env["nodes"]:
                 self.rsh(node, self.templates["StopCmd"])
 
             return self.success()
 
         did_fail = 0
         up_nodes = []
         for node in self.Env["nodes"]:
             if self.CM.StataCM(node) == 1:
                 did_fail = 1
                 up_nodes.append(node)
 
         if did_fail:
             return self.failure("Active nodes exist: " + repr(up_nodes))
 
         self.logger.log("Warn: All nodes stopped but CTS didnt detect: "
                     + repr(watch.unmatched))
 
         self.CM.clear_all_caches()
         return self.failure("Missing log message: "+repr(watch.unmatched))
 
     def is_applicable(self):
         '''SimulStopLite is a setup test and never applicable'''
         return 0
 
 
 class SimulStartLite(CTSTest):
     '''Start any stopped nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SimulStartLite"
 
     def __call__(self, dummy):
         '''Perform the 'SimulStartList' setup work. '''
         self.incr("calls")
         self.debug("Setup: " + self.name)
 
         #        We ignore the "node" parameter...
         node_list = []
         for node in self.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "down":
                 self.incr("WasStopped")
                 node_list.append(node)
 
         self.set_timer()
         while len(node_list) > 0:
             # Repeat until all nodes come up
             watchpats = [ ]
 
             uppat = self.templates["Pat:Slave_started"]
             if self.CM.upcount() == 0:
                 uppat = self.templates["Pat:Local_started"]
 
             watchpats.append(self.templates["Pat:DC_IDLE"])
             for node in node_list:
                 watchpats.append(uppat % node)
                 watchpats.append(self.templates["Pat:InfraUp"] % node)
                 watchpats.append(self.templates["Pat:PacemakerUp"] % node)
 
             #   Start all the nodes - at about the same time...
             watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
             watch.setwatch()
 
             stonith = self.CM.prepare_fencing_watcher(self.name)
 
             for node in node_list:
                 self.CM.StartaCMnoBlock(node)
 
             watch.lookforall()
 
             node_list = self.CM.fencing_cleanup(self.name, stonith)
 
             # Remove node_list messages from watch.unmatched
             for node in node_list:
                 self.logger.debug("Dealing with stonith operations for %s" % repr(node_list))
                 if watch.unmatched:
                     try:
                         watch.unmatched.remove(uppat % node)
                     except:
                         self.debug("Already matched: %s" % (uppat % node))
                     try:                        
                         watch.unmatched.remove(self.templates["Pat:InfraUp"] % node)
                     except:
                         self.debug("Already matched: %s" % (self.templates["Pat:InfraUp"] % node))
                     try:
                         watch.unmatched.remove(self.templates["Pat:PacemakerUp"] % node)
                     except:
                         self.debug("Already matched: %s" % (self.templates["Pat:PacemakerUp"] % node))
 
             if watch.unmatched:
                 for regex in watch.unmatched:
                     self.logger.log ("Warn: Startup pattern not found: %s" %(regex))
 
             if not self.CM.cluster_stable():
                 return self.failure("Cluster did not stabilize")
 
         did_fail = 0
         unstable = []
         for node in self.Env["nodes"]:
             if self.CM.StataCM(node) == 0:
                 did_fail = 1
                 unstable.append(node)
 
         if did_fail:
             return self.failure("Unstarted nodes exist: " + repr(unstable))
 
         unstable = []
         for node in self.Env["nodes"]:
             if not self.CM.node_stable(node):
                 did_fail = 1
                 unstable.append(node)
 
         if did_fail:
             return self.failure("Unstable cluster nodes exist: " + repr(unstable))
 
         return self.success()
 
     def is_applicable(self):
         '''SimulStartLite is a setup test and never applicable'''
         return 0
 
 
 def TestList(cm, audits):
     result = []
     for testclass in AllTestClasses:
         bound_test = testclass(cm)
         if bound_test.is_applicable():
             bound_test.Audits = audits
             result.append(bound_test)
     return result
 
 
 class RemoteLXC(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RemoteLXC"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.num_containers = 2
         self.is_container = 1
         self.is_docker_unsafe = 1
         self.failed = 0
         self.fail_string = ""
 
     def start_lxc_simple(self, node):
 
         # restore any artifacts laying around from a previous test.
         self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null")
 
         # generate the containers, put them in the config, add some resources to them
         pats = [ ]
         watch = self.create_watch(pats, 120)
         watch.setwatch()
         pats.append(self.templates["Pat:RscOpOK"] % ("lxc1", "start_0"))
         pats.append(self.templates["Pat:RscOpOK"] % ("lxc2", "start_0"))
         pats.append(self.templates["Pat:RscOpOK"] % ("lxc-ms", "start_0"))
         pats.append(self.templates["Pat:RscOpOK"] % ("lxc-ms", "promote_0"))
 
         self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -g -a -m -s -c %d &>/dev/null" % self.num_containers)
         self.set_timer("remoteSimpleInit")
         watch.lookforall()
         self.log_timer("remoteSimpleInit")
         if watch.unmatched:
             self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
             self.failed = 1
 
     def cleanup_lxc_simple(self, node):
 
         pats = [ ]
         # if the test failed, attempt to clean up the cib and libvirt environment
         # as best as possible 
         if self.failed == 1:
             # restore libvirt and cib
             self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null")
             self.rsh(node, "crm_resource -C -r container1 &>/dev/null")
             self.rsh(node, "crm_resource -C -r container2 &>/dev/null")
             self.rsh(node, "crm_resource -C -r lxc1 &>/dev/null")
             self.rsh(node, "crm_resource -C -r lxc2 &>/dev/null")
             self.rsh(node, "crm_resource -C -r lxc-ms &>/dev/null")
             time.sleep(20)
             return
 
         watch = self.create_watch(pats, 120)
         watch.setwatch()
 
         pats.append(self.templates["Pat:RscOpOK"] % ("container1", "stop_0"))
         pats.append(self.templates["Pat:RscOpOK"] % ("container2", "stop_0"))
 
         self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -p &>/dev/null")
         self.set_timer("remoteSimpleCleanup")
         watch.lookforall()
         self.log_timer("remoteSimpleCleanup")
 
         if watch.unmatched:
             self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
             self.failed = 1
 
         # cleanup libvirt
         self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null")
 
     def __call__(self, node):
         '''Perform the 'RemoteLXC' test. '''
         self.incr("calls")
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed, start all nodes failed.")
 
         rc = self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -v &>/dev/null")
         if rc == 1:
             self.log("Environment test for lxc support failed.")
             return self.skipped()
 
         self.start_lxc_simple(node)
         self.cleanup_lxc_simple(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         if self.failed == 1:
             return self.failure(self.fail_string)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [
             r"Updating failcount for ping",
             r"pengine.*: Recover (ping|lxc-ms|container)\s*\(.*\)",
             # The orphaned lxc-ms resource causes an expected transition error
             # that is a result of the pengine not having knowledge that the 
             # ms resource used to be a clone.  As a result it looks like that 
             # resource is running in multiple locations when it shouldn't... But in
             # this instance we know why this error is occurring and that it is expected.
             r"Calculated Transition .* /var/lib/pacemaker/pengine/pe-error",
             r"Resource lxc-ms .* is active on 2 nodes attempting recovery",
             r"Unknown operation: fail",
             r"(ERROR|error): sending stonithRA op to stonithd failed.",
         ]
 
 AllTestClasses.append(RemoteLXC)
 
 
 ###################################################################
 class RemoteDriver(CTSTest):
 ###################################################################
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RemoteDriver"
         self.is_docker_unsafe = 1
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.stop = StopTest(cm)
         self.pcmk_started = 0
         self.failed = 0
         self.fail_string = ""
         self.remote_node_added = 0
         self.remote_rsc_added = 0
         self.remote_rsc = "remote-rsc"
+        self.remote_use_reconnect_interval = self.Env.RandomGen.choice(["true","false"])
         self.cib_cmd = """cibadmin -C -o %s -X '%s' """
 
-    def del_rsc(self, node, rsc):
-
+    def get_othernode(self, node):
         for othernode in self.Env["nodes"]:
             if othernode == node:
                 # we don't want to try and use the cib that we just shutdown.
                 # find a cluster node that is not our soon to be remote-node.
                 continue
-            rc = self.rsh(othernode, "crm_resource -D -r %s -t primitive" % (rsc))
-            if rc != 0:
-                self.fail_string = ("Removal of resource '%s' failed" % (rsc))
-                self.failed = 1
-            return
+            else:
+                return othernode
+
+    def del_rsc(self, node, rsc):
+        othernode = self.get_othernode(node)
+        rc = self.rsh(othernode, "crm_resource -D -r %s -t primitive" % (rsc))
+        if rc != 0:
+            self.fail_string = ("Removal of resource '%s' failed" % (rsc))
+            self.failed = 1
 
     def add_rsc(self, node, rsc_xml):
-        for othernode in self.CM.Env["nodes"]:
-            if othernode == node:
-                # we don't want to try and use the cib that we just shutdown.
-                # find a cluster node that is not our soon to be remote-node.
-                continue
-            rc = self.rsh(othernode, self.cib_cmd % ("resources", rsc_xml))
-            if rc != 0:
-                self.fail_string = "resource creation failed"
-                self.failed = 1
-            return
+        othernode = self.get_othernode(node)
+        rc = self.rsh(othernode, self.cib_cmd % ("resources", rsc_xml))
+        if rc != 0:
+            self.fail_string = "resource creation failed"
+            self.failed = 1
 
     def add_primitive_rsc(self, node):
         rsc_xml = """
 <primitive class="ocf" id="%s" provider="heartbeat" type="Dummy">
     <operations>
       <op id="remote-rsc-monitor-interval-10s" interval="10s" name="monitor"/>
     </operations>
     <meta_attributes id="remote-meta_attributes"/>
 </primitive>""" % (self.remote_rsc)
         self.add_rsc(node, rsc_xml)
         if self.failed == 0:
             self.remote_rsc_added = 1
 
     def add_connection_rsc(self, node):
-        rsc_xml = """
+        if self.remote_use_reconnect_interval == "true":
+            # use reconnect interval and make sure to set cluster-recheck-interval as well.
+            rsc_xml = """
+<primitive class="ocf" id="%s" provider="pacemaker" type="remote">
+    <instance_attributes id="remote-instance_attributes"/>
+        <instance_attributes id="remote-instance_attributes">
+          <nvpair id="remote-instance_attributes-server" name="server" value="%s"/>
+          <nvpair id="remote-instance_attributes-reconnect_interval" name="reconnect_interval" value="60s"/>
+        </instance_attributes>
+    <operations>
+      <op id="remote-monitor-interval-60s" interval="60s" name="monitor"/>
+      <op id="remote-name-start-interval-0-timeout-120" interval="0" name="start" timeout="60"/>
+    </operations>
+</primitive>""" % (self.remote_node, node)
+            self.rsh(self.get_othernode(node), self.templates["SetCheckInterval"] % ("45s"))
+        else:
+            # not using reconnect interval
+            rsc_xml = """
 <primitive class="ocf" id="%s" provider="pacemaker" type="remote">
     <instance_attributes id="remote-instance_attributes"/>
         <instance_attributes id="remote-instance_attributes">
           <nvpair id="remote-instance_attributes-server" name="server" value="%s"/>
         </instance_attributes>
     <operations>
       <op id="remote-monitor-interval-60s" interval="60s" name="monitor"/>
       <op id="remote-name-start-interval-0-timeout-120" interval="0" name="start" timeout="120"/>
     </operations>
 </primitive>""" % (self.remote_node, node)
+
         self.add_rsc(node, rsc_xml)
         if self.failed == 0:
             self.remote_node_added = 1
 
     def stop_pcmk_remote(self, node):
         # disable pcmk remote
         for i in range(10):
             rc = self.rsh(node, "service pacemaker_remote stop")
             if rc != 0:
                 time.sleep(6)
             else:
                 break
 
     def start_pcmk_remote(self, node):
         for i in range(10):
             rc = self.rsh(node, "service pacemaker_remote start")
             if rc != 0:
                 time.sleep(6)
             else:
                 self.pcmk_started = 1
                 break
 
     def start_metal(self, node):
         pcmk_started = 0
 
         # make sure the resource doesn't already exist for some reason
         self.rsh(node, "crm_resource -D -r %s -t primitive" % (self.remote_rsc))
         self.rsh(node, "crm_resource -D -r %s -t primitive" % (self.remote_node))
 
         if not self.stop(node):
             self.failed = 1
             self.fail_string = "Failed to shutdown cluster node %s" % (node)
             return
 
         self.start_pcmk_remote(node)
 
         if self.pcmk_started == 0:
             self.failed = 1
             self.fail_string = "Failed to start pacemaker_remote on node %s" % (node)
             return
 
         # convert node to baremetal node now that it has shutdow the cluster stack
         pats = [ ]
         watch = self.create_watch(pats, 120)
         watch.setwatch()
         pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "start"))
         pats.append(self.templates["Pat:DC_IDLE"])
 
         self.add_connection_rsc(node)
 
         self.set_timer("remoteMetalInit")
         watch.lookforall()
         self.log_timer("remoteMetalInit")
         if watch.unmatched:
             self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
             self.failed = 1
 
     def migrate_connection(self, node):
         if self.failed == 1:
             return
 
         pats = [ ]
         pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "migrate_to"))
         pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "migrate_from"))
         pats.append(self.templates["Pat:DC_IDLE"])
         watch = self.create_watch(pats, 120)
         watch.setwatch()
 
         (rc, lines) = self.rsh(node, "crm_resource -M -r %s" % (self.remote_node), None)
         if rc != 0:
             self.fail_string = "failed to move remote node connection resource"
             self.logger.log(self.fail_string)
             self.failed = 1
             return
 
         self.set_timer("remoteMetalMigrate")
         watch.lookforall()
         self.log_timer("remoteMetalMigrate")
 
         if watch.unmatched:
             self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
             self.logger.log(self.fail_string)
             self.failed = 1
             return
 
     def fail_rsc(self, node):
         if self.failed == 1:
             return
 
         watchpats = [ ]
         watchpats.append(self.templates["Pat:RscRemoteOpOK"] % (self.remote_rsc, "stop", self.remote_node))
         watchpats.append(self.templates["Pat:RscRemoteOpOK"] % (self.remote_rsc, "start", self.remote_node))
         watchpats.append(self.templates["Pat:DC_IDLE"])
 
         watch = self.create_watch(watchpats, 120)
         watch.setwatch()
 
         self.debug("causing dummy rsc to fail.")
 
         rc = self.rsh(node, "rm -f /var/run/resource-agents/Dummy*")
 
         self.set_timer("remoteRscFail")
         watch.lookforall()
         self.log_timer("remoteRscFail")
         if watch.unmatched:
             self.fail_string = "Unmatched patterns during rsc fail: %s" % (repr(watch.unmatched))
             self.logger.log(self.fail_string)
             self.failed = 1
 
     def fail_connection(self, node):
         if self.failed == 1:
             return
 
         watchpats = [ ]
         watchpats.append(self.templates["Pat:FenceOpOK"] % self.remote_node)
         watchpats.append(self.templates["Pat:NodeFenced"] % self.remote_node)
 
         watch = self.create_watch(watchpats, 120)
         watch.setwatch()
 
         # force stop the pcmk remote daemon. this will result in fencing
         self.debug("Force stopped active remote node")
         self.stop_pcmk_remote(node)
 
         self.debug("Waiting for remote node to be fenced.")
         self.set_timer("remoteMetalFence")
         watch.lookforall()
         self.log_timer("remoteMetalFence")
         if watch.unmatched:
             self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
             self.logger.log(self.fail_string)
             self.failed = 1
             return
 
         self.debug("Waiting for the remote node to come back up")
         self.CM.ns.WaitForNodeToComeUp(node, 120);
 
         pats = [ ]
-        watch = self.create_watch(pats, 120)
+        watch = self.create_watch(pats, 200)
         watch.setwatch()
         pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "start"))
         if self.remote_rsc_added == 1:
             pats.append(self.templates["Pat:RscOpOK"] % (self.remote_rsc, "monitor"))
 
         # start the remote node again watch it integrate back into cluster.
         self.start_pcmk_remote(node)
         if self.pcmk_started == 0:
             self.failed = 1
             self.fail_string = "Failed to start pacemaker_remote on node %s" % (node)
             self.logger.log(self.fail_string)
             return
 
         self.debug("Waiting for remote node to rejoin cluster after being fenced.")
         self.set_timer("remoteMetalRestart")
         watch.lookforall()
         self.log_timer("remoteMetalRestart")
         if watch.unmatched:
             self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
             self.failed = 1
             self.logger.log(self.fail_string)
             return
 
     def add_dummy_rsc(self, node):
         if self.failed == 1:
             return
 
         # verify we can put a resource on the remote node
         pats = [ ]
         watch = self.create_watch(pats, 120)
         watch.setwatch()
         pats.append(self.templates["Pat:RscRemoteOpOK"] % (self.remote_rsc, "start", self.remote_node))
         pats.append(self.templates["Pat:DC_IDLE"])
 
         # Add a resource that must live on remote-node
         self.add_primitive_rsc(node)
 
         # force that rsc to prefer the remote node. 
         (rc, line) = self.CM.rsh(node, "crm_resource -M -r %s -N %s -f" % (self.remote_rsc, self.remote_node), None)
         if rc != 0:
             self.fail_string = "Failed to place remote resource on remote node."
             self.failed = 1
             return
 
         self.set_timer("remoteMetalRsc")
         watch.lookforall()
         self.log_timer("remoteMetalRsc")
         if watch.unmatched:
             self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
             self.failed = 1
 
     def test_attributes(self, node):
         if self.failed == 1:
             return
 
         # This verifies permanent attributes can be set on a remote-node. It also
         # verifies the remote-node can edit it's own cib node section remotely.
         (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -v testval -N %s" % (self.remote_node), None)
         if rc != 0:
             self.fail_string = "Failed to set remote-node attribute. rc:%s output:%s" % (rc, line)
             self.failed = 1
             return
 
         (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -Q -N %s" % (self.remote_node), None)
         if rc != 0:
             self.fail_string = "Failed to get remote-node attribute"
             self.failed = 1
             return
 
         (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -D -N %s" % (self.remote_node), None)
         if rc != 0:
             self.fail_string = "Failed to delete remote-node attribute"
             self.failed = 1
             return
 
     def cleanup_metal(self, node):
         if self.pcmk_started == 0:
             return
 
         pats = [ ]
 
         watch = self.create_watch(pats, 120)
         watch.setwatch()
 
         if self.remote_rsc_added == 1:
             pats.append(self.templates["Pat:RscOpOK"] % (self.remote_rsc, "stop"))
         if self.remote_node_added == 1:
             pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "stop"))
 
         self.set_timer("remoteMetalCleanup")
+
+        if self.remote_use_reconnect_interval == "true":
+            self.debug("Cleaning up re-check interval")
+            self.rsh(self.get_othernode(node), self.templates["ClearCheckInterval"])
         if self.remote_rsc_added == 1:
+            self.debug("Cleaning up dummy rsc put on remote node")
             self.rsh(node, "crm_resource -U -r %s -N %s" % (self.remote_rsc, self.remote_node))
             self.del_rsc(node, self.remote_rsc)
         if self.remote_node_added == 1:
+            self.debug("Cleaning up remote node connection resource")
             self.rsh(node, "crm_resource -U -r %s" % (self.remote_node))
             self.del_rsc(node, self.remote_node)
+
         watch.lookforall()
         self.log_timer("remoteMetalCleanup")
 
         if watch.unmatched:
             self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
             self.failed = 1
 
         self.stop_pcmk_remote(node)
 
     def setup_env(self, node):
 
         self.remote_node = "remote_%s" % (node)
         sync_key = 0
 
         # we are assuming if all nodes have a key, that it is
         # the right key... If any node doesn't have a remote
         # key, we regenerate it everywhere.
         for node in self.Env["nodes"]:
             rc = self.rsh(node, "ls /etc/pacemaker/authkey")
             if rc != 0:
                 sync_key = 1
                 break
 
         if sync_key == 0:
             return
 
         # create key locally
         os.system("/usr/share/pacemaker/tests/cts/lxc_autogen.sh -k &> /dev/null")
 
         # sync key throughout the cluster
         for node in self.Env["nodes"]:
             rc = self.rsh(node, "mkdir /etc/pacemaker")
             self.rsh.cp("/etc/pacemaker/authkey", "%s:/etc/pacemaker/authkey" % (node))
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return False
 
         for node in self.Env["nodes"]:
             rc = self.rsh(node, "type pacemaker_remoted >/dev/null 2>&1")
             if rc != 0:
                 return False
         return True
 
     def __call__(self, node):
         '''Perform the 'RemoteBaremetal' test. '''
         self.incr("calls")
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed, start all nodes failed.")
 
         self.setup_env(node)
         self.start_metal(node)
         self.add_dummy_rsc(node)
         self.test_attributes(node)
         self.cleanup_metal(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
         if self.failed == 1:
             return self.failure(self.fail_string)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [ """is running on remote.*which isn't allowed""",
                  """Connection terminated""",
                  """Failed to send remote""",
                 ]
 
 # Remote driver is called by other tests.
 
 ###################################################################
 class RemoteBasic(CTSTest):
 ###################################################################
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RemoteBasic"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.driver = RemoteDriver(cm)
         self.is_docker_unsafe = 1
 
     def __call__(self, node):
         '''Perform the 'RemoteBaremetal' test. '''
         self.incr("calls")
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed, start all nodes failed.")
 
         self.driver.setup_env(node)
         self.driver.start_metal(node)
         self.driver.add_dummy_rsc(node)
         self.driver.test_attributes(node)
         self.driver.cleanup_metal(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
         if self.driver.failed == 1:
             return self.failure(self.driver.fail_string)
 
         return self.success()
 
     def is_applicable(self):
         return self.driver.is_applicable()
 
     def errorstoignore(self):
         return self.driver.errorstoignore()
 
 AllTestClasses.append(RemoteBasic)
 
 ###################################################################
 class RemoteStonithd(CTSTest):
 ###################################################################
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RemoteStonithd"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.driver = RemoteDriver(cm)
         self.is_docker_unsafe = 1
 
     def __call__(self, node):
         '''Perform the 'RemoteStonithd' test. '''
         self.incr("calls")
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed, start all nodes failed.")
 
         self.driver.setup_env(node)
         self.driver.start_metal(node)
         self.driver.add_dummy_rsc(node)
 
         self.driver.fail_connection(node)
         self.driver.cleanup_metal(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
         if self.driver.failed == 1:
             return self.failure(self.driver.fail_string)
 
         return self.success()
 
     def is_applicable(self):
         if not self.driver.is_applicable():
             return False
 
         if self.Env.has_key("DoFencing"):
             return self.Env["DoFencing"]
 
         return True
 
     def errorstoignore(self):
         ignore_pats = [
             r"Unexpected disconnect on remote-node",
             r"crmd.*: error.*: Operation remote_.*_monitor",
             r"pengine.*: Recover remote_.*\s*\(.*\)",
             r"Calculated Transition .* /var/lib/pacemaker/pengine/pe-error",
             r"error.*: Resource .*ocf::.* is active on 2 nodes attempting recovery",
         ]
 
         ignore_pats.extend(self.driver.errorstoignore())
         return ignore_pats
 
 AllTestClasses.append(RemoteStonithd)
 
 ###################################################################
 class RemoteMigrate(CTSTest):
 ###################################################################
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RemoteMigrate"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.driver = RemoteDriver(cm)
         self.is_docker_unsafe = 1
 
     def __call__(self, node):
         '''Perform the 'RemoteMigrate' test. '''
         self.incr("calls")
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed, start all nodes failed.")
 
         self.driver.setup_env(node)
         self.driver.start_metal(node)
         self.driver.add_dummy_rsc(node)
         self.driver.migrate_connection(node)
         self.driver.cleanup_metal(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
         if self.driver.failed == 1:
             return self.failure(self.driver.fail_string)
 
         return self.success()
 
     def is_applicable(self):
         return self.driver.is_applicable()
 
     def errorstoignore(self):
         return self.driver.errorstoignore()
 
 AllTestClasses.append(RemoteMigrate)
 
 
 ###################################################################
 class RemoteRscFailure(CTSTest):
 ###################################################################
     def __init__(self, cm):
 
         # fail a rsc on a remote node, verify recovery.
         CTSTest.__init__(self,cm)
         self.name = "RemoteRscFailure"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.driver = RemoteDriver(cm)
         self.is_docker_unsafe = 1
 
     def __call__(self, node):
         '''Perform the 'RemoteRscFailure' test. '''
         self.incr("calls")
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed, start all nodes failed.")
 
         self.driver.setup_env(node)
         self.driver.start_metal(node)
         self.driver.add_dummy_rsc(node)
 
         # This is an important step. We are migrating the connection
         # before failing the resource. This verifies that the migration
         # has properly maintained control over the remote-node.
         self.driver.migrate_connection(node)
 
         self.driver.fail_rsc(node)
         self.driver.cleanup_metal(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
         if self.driver.failed == 1:
             return self.failure(self.driver.fail_string)
 
         return self.success()
 
     def is_applicable(self):
         return self.driver.is_applicable()
 
     def errorstoignore(self):
         ignore_pats = [
             r"pengine.*: Recover remote-rsc\s*\(.*\)",
         ]
 
         ignore_pats.extend(self.driver.errorstoignore())
         return ignore_pats
 
 AllTestClasses.append(RemoteRscFailure)
 
 # vim:ts=4:sw=4:et:
diff --git a/cts/patterns.py b/cts/patterns.py
index 305302aacc..1bc05a6232 100644
--- a/cts/patterns.py
+++ b/cts/patterns.py
@@ -1,533 +1,536 @@
 import sys, os
 
 from cts.CTSvars import *
 
 patternvariants = {}
 class BasePatterns:
     def __init__(self, name):
         self.name = name
         patternvariants[name] = self
         self.ignore = []
         self.BadNews = []
         self.components = {}
         self.commands = {
             "StatusCmd"      : "crmadmin -t 60000 -S %s 2>/dev/null",
             "CibQuery"       : "cibadmin -Ql",
             "CibAddXml"      : "cibadmin --modify -c --xml-text %s",
             "CibDelXpath"    : "cibadmin --delete --xpath %s",
             # 300,000 == 5 minutes
             "RscRunning"     : CTSvars.CRM_DAEMON_DIR + "/lrmd_test -R -r %s",
             "CIBfile"        : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml",
             "TmpDir"         : "/tmp",
 
             "BreakCommCmd"   : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1",
             "FixCommCmd"     : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1",
 
 # tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit
 # tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated
 # tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1
 # tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null
             "ReduceCommCmd"  : "",
             "RestoreCommCmd" : "tc qdisc del dev lo root",
 
             "UUIDQueryCmd"    : "crmadmin -N",
 
+            "SetCheckInterval"    : "cibadmin --modify -c --xml-text '<cluster_property_set id=\"cib-bootstrap-options\"><nvpair id=\"cts-recheck-interval-setting\" name=\"cluster-recheck-interval\" value=\"%s\"/></cluster_property_set>'",
+            "ClearCheckInterval"    : "cibadmin --delete --xpath \"//nvpair[@name='cluster-recheck-interval']\"",
+
             "MaintenanceModeOn"    : "cibadmin --modify -c --xml-text '<cluster_property_set id=\"cib-bootstrap-options\"><nvpair id=\"cts-maintenance-mode-setting\" name=\"maintenance-mode\" value=\"true\"/></cluster_property_set>'",
             "MaintenanceModeOff"    : "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"",
 
             "StandbyCmd"      : "crm_attribute -VQ  -U %s -n standby -l forever -v %s 2>/dev/null",
             "StandbyQueryCmd" : "crm_attribute -QG -U %s -n standby -l forever -d off 2>/dev/null",
         }
         self.search = {
             "Pat:DC_IDLE"      : "crmd.*State transition.*-> S_IDLE",
             
             # This wont work if we have multiple partitions
             "Pat:Local_started" : "%s\W.*The local CRM is operational",
             "Pat:Slave_started" : "%s\W.*State transition.*-> S_NOT_DC",
             "Pat:Master_started": "%s\W.*State transition.*-> S_IDLE",
             "Pat:We_stopped"    : "heartbeat.*%s.*Heartbeat shutdown complete",
             "Pat:Logd_stopped"  : "%s\W.*logd:.*Exiting write process",
             "Pat:They_stopped"  : "%s\W.*LOST:.* %s ",
             "Pat:They_dead"     : "node %s.*: is dead",
             "Pat:TransitionComplete" : "Transition status: Complete: complete",
 
             "Pat:Fencing_start" : "Initiating remote operation .* for %s",
             "Pat:Fencing_ok"    : r"stonith.*:\s*Operation .* of %s by .* for .*@.*: OK",
             "Pat:Fencing_recover"    : r"pengine.*: Recover %s",
 
             "Pat:RscOpOK"       : r"crmd.*:\s*Operation %s_%s.*:\s*ok \(.*confirmed=\S+\)",
             "Pat:RscRemoteOpOK" : r"crmd.*:\s*Operation %s_%s.*:\s*ok \(node=%s,.*,\s*confirmed=true\)",
             "Pat:NodeFenced"    : r"crmd.*:\s*Peer\s+%s\s+was\s+terminated\s+\(.*\)\s+by\s+.*\s+for\s+.*:\s+OK",
             "Pat:FenceOpOK"     : "Operation .* for host '%s' with device .* returned: 0",
         }
 
     def get_component(self, key):
         if self.components.has_key(key):
             return self.components[key]
         print "Unknown component '%s' for %s" % (key, self.name)
         return []
 
     def get_patterns(self, key):
         if key == "BadNews":
             return self.BadNews
         elif key == "BadNewsIgnore":
             return self.ignore
         elif key == "Commands":
             return self.commands
         elif key == "Search":
             return self.search
         elif key == "Components":
             return self.components
 
     def __getitem__(self, key):
         if key == "Name":
             return self.name
         elif self.commands.has_key(key):
             return self.commands[key]
         elif self.search.has_key(key):
             return self.search[key]
         else:
             print "Unknown template '%s' for %s" % (key, self.name)
             return None
 
 class crm_lha(BasePatterns):
     def __init__(self, name):
         BasePatterns.__init__(self, name)
 
         self.commands.update({
             "StartCmd"       : "service heartbeat start > /dev/null 2>&1",
             "StopCmd"        : "service heartbeat stop  > /dev/null 2>&1",
             "EpochCmd"      : "crm_node -H -e",
             "QuorumCmd"      : "crm_node -H -q",
             "PartitionCmd"    : "crm_node -H -p",
         })
 
         self.search.update({
             # Patterns to look for in the log files for various occasions...
             "Pat:ChildKilled"  : "%s\W.*heartbeat.*%s.*killed by signal 9",
             "Pat:ChildRespawn" : "%s\W.*heartbeat.*Respawning client.*%s",
             "Pat:ChildExit"    : "(ERROR|error): Client .* exited with return code",            
         })
         self.BadNews = [
                 r"error:",
                 r"crit:",
                 r"ERROR:",
                 r"CRIT:",
                 r"Shutting down...NOW",
                 r"Timer I_TERMINATE just popped",
                 r"input=I_ERROR",
                 r"input=I_FAIL",
                 r"input=I_INTEGRATED cause=C_TIMER_POPPED",
                 r"input=I_FINALIZED cause=C_TIMER_POPPED",
                 r"input=I_ERROR",
                 r", exiting\.",
                 r"WARN.*Ignoring HA message.*vote.*not in our membership list",
                 r"pengine.*Attempting recovery of resource",
                 r"is taking more than 2x its timeout",
                 r"Confirm not received from",
                 r"Welcome reply not received from",
                 r"Attempting to schedule .* after a stop",
                 r"Resource .* was active at shutdown",
                 r"duplicate entries for call_id",
                 r"Search terminated:",
                 r"No need to invoke the TE",
                 r"global_timer_callback:",
                 r"Faking parameter digest creation",
                 r"Parameters to .* action changed:",
                 r"Parameters to .* changed",
             ]
 
         self.ignore = [
                 r"(ERROR|error):.*\s+assert\s+at\s+crm_glib_handler:"
                 "(ERROR|error): Message hist queue is filling up",
                 "stonithd.*CRIT: external_hostlist:.*'vmware gethosts' returned an empty hostlist",
                 "stonithd.*(ERROR|error): Could not list nodes for stonith RA external/vmware.",
                 "pengine.*Preventing .* from re-starting",
                 ]
 
 class crm_cs_v0(BasePatterns):
     def __init__(self, name):
         BasePatterns.__init__(self, name)
 
         self.commands.update({
             "EpochCmd"      : "crm_node -e --openais",
             "QuorumCmd"      : "crm_node -q --openais",
             "PartitionCmd"    : "crm_node -p --openais",
             "StartCmd"       : "service corosync start",
             "StopCmd"        : "service corosync stop",
         })
 
         self.search.update({
 # The next pattern is too early
 #            "Pat:We_stopped"   : "%s.*Service engine unloaded: Pacemaker Cluster Manager",
 # The next pattern would be preferred, but it doesn't always come out
 #            "Pat:We_stopped"   : "%s.*Corosync Cluster Engine exiting with status",
             "Pat:We_stopped"   : "%s\W.*Service engine unloaded: corosync cluster quorum service",
             "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost",
             "Pat:They_dead"    : "corosync:.*Node %s is now: lost",
 
             "Pat:ChildExit"    : "Child process .* exited",
             "Pat:ChildKilled"  : "%s\W.*corosync.*Child process %s terminated with signal 9",
             "Pat:ChildRespawn" : "%s\W.*corosync.*Respawning failed child process: %s",
 
             "Pat:InfraUp"      : "%s\W.*corosync.*Initializing transport",
             "Pat:PacemakerUp"  : "%s\W.*pacemakerd.*Starting Pacemaker",
         })
 
         self.ignore = [
             r"crm_mon:",
             r"crmadmin:",
             r"update_trace_data",
             r"async_notify:.*strange, client not found",
             r"Parse error: Ignoring unknown option .*nodename",
             r"error.*: Operation 'reboot' .* with device 'FencingFail' returned:",
             r"Child process .* terminated with signal 9",
             r"getinfo response error: 1$",
             "sbd.* error: inquisitor_child: DEBUG MODE IS ACTIVE",
             r"sbd.* pcmk:\s*error:.*Connection to cib_ro failed",
             r"sbd.* pcmk:\s*error:.*Connection to cib_ro.* closed .I/O condition=17",
         ]
 
         self.BadNews = [
             r"error:",
             r"crit:",
             r"ERROR:",
             r"CRIT:",
             r"Shutting down...NOW",
             r"Timer I_TERMINATE just popped",
             r"input=I_ERROR",
             r"input=I_FAIL",
             r"input=I_INTEGRATED cause=C_TIMER_POPPED",
             r"input=I_FINALIZED cause=C_TIMER_POPPED",
             r"input=I_ERROR",
             r", exiting\.",
             r"(WARN|warn).*Ignoring HA message.*vote.*not in our membership list",
             r"pengine.*Attempting recovery of resource",
             r"is taking more than 2x its timeout",
             r"Confirm not received from",
             r"Welcome reply not received from",
             r"Attempting to schedule .* after a stop",
             r"Resource .* was active at shutdown",
             r"duplicate entries for call_id",
             r"Search terminated:",
             r":global_timer_callback",
             r"Faking parameter digest creation",
             r"Parameters to .* action changed:",
             r"Parameters to .* changed",
             r"The .* process .* terminated with signal",
             r"Child process .* terminated with signal",
             r"pengine:.*Recover .*\(.* -\> .*\)",
             r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting",
             r"Peer is not part of our cluster",
             r"We appear to be in an election loop",
             r"Unknown node -> we will not deliver message",
             r"(Blackbox dump requested|Problem detected)",
             r"pacemakerd.*Could not connect to Cluster Configuration Database API",
             r"Receiving messages from a node we think is dead",
             r"share the same cluster nodeid",
             r"share the same name",
 
             #r"crm_ipc_send:.*Request .* failed",
             #r"crm_ipc_send:.*Sending to .* is disabled until pending reply is received",
 
                 # Not inherently bad, but worth tracking
             #r"No need to invoke the TE",
             #r"ping.*: DEBUG: Updated connected = 0",
             #r"Digest mis-match:",
             r"crmd:.*Transition failed: terminated",
             r"Local CIB .* differs from .*:",
             r"warn.*:\s*Continuing but .* will NOT be used",
             r"warn.*:\s*Cluster configuration file .* is corrupt",
             #r"Executing .* fencing operation",
             #r"fence_pcmk.* Call to fence",
             #r"fence_pcmk",
             r"cman killed by node",
             r"Election storm",
             r"stalled the FSA with pending inputs",
         ]
 
 
         self.components["common-ignore"] = [
                     "Pending action:",
                     "error: crm_log_message_adv:",
                     "resources were active at shutdown",
                     "pending LRM operations at shutdown",
                     "Lost connection to the CIB service",
                     "Connection to the CIB terminated...",
                     "Sending message to CIB service FAILED",
                     "apply_xml_diff:.*Diff application failed!",
                     r"crmd.*:\s*Action A_RECOVER .* not supported",
                     "unconfirmed_actions:.*Waiting on .* unconfirmed actions",
                     "cib_native_msgready:.*Message pending on command channel",
                     r"crmd.*:\s*Performing A_EXIT_1 - forcefully exiting the CRMd",
                     "verify_stopped:.*Resource .* was active at shutdown.  You may ignore this error if it is unmanaged.",
                     "error: attrd_connection_destroy:.*Lost connection to attrd",
                     r".*:\s*Executing .* fencing operation \(.*\) on ",
                     r"(Blackbox dump requested|Problem detected)",
 #                    "error: native_create_actions: Resource .*stonith::.* is active on 2 nodes attempting recovery",
 #                    "error: process_pe_message: Transition .* ERRORs found during PE processing",
             ]
         
         self.components["corosync-ignore"] = [
             r"error:.*Connection to the CPG API failed: Library error",
             r"The .* process .* exited",
             r"pacemakerd.*error:.*Child process .* exited",
             r"cib.*error:.*Corosync connection lost",
             r"stonith-ng.*error:.*Corosync connection terminated",
             r"The cib process .* exited: Invalid argument",
             r"The attrd process .* exited: Transport endpoint is not connected",
             r"The crmd process .* exited: Link has been severed",
             r"error:.*Child process cib .* exited: Invalid argument",
             r"error:.*Child process attrd .* exited: Transport endpoint is not connected",
             r"error:.*Child process crmd .* exited: Link has been severed",
             r"lrmd.*error:.*Connection to stonith-ng failed",
             r"lrmd.*error:.*Connection to stonith-ng.* closed",
             r"lrmd.*error:.*LRMD lost STONITH connection",
             r"crmd.*State transition .* S_RECOVERY",
             r"crmd.*error:.*FSA: Input I_ERROR",
             r"crmd.*error:.*FSA: Input I_TERMINATE",
             r"crmd.*error:.*Connection to cman failed",
             r"crmd.*error:.*Could not recover from internal error",
             r"error:.*Connection to cib_shm failed",
             r"error:.*Connection to cib_shm.* closed",
             r"error:.*STONITH connection failed",
             r"error: Connection to stonith-ng failed",
             r"crit: Fencing daemon connection failed",
             r"error: Connection to stonith-ng.* closed",
             ]
 
         self.components["corosync"] = [
             r"pacemakerd.*error:.*Connection destroyed",
             r"attrd.*:\s*crit:.*Lost connection to Corosync service",
             r"stonith.*:\s*Corosync connection terminated",
             r"cib.*:\s*Corosync connection lost!\s+Exiting.",
             r"crmd.*:\s*connection terminated",
             r"pengine.*Scheduling Node .* for STONITH",
             r"crmd.*:\s*Peer %s was terminated \(.*\) by .* for .*:\s*OK",
         ]
 
         self.components["cib-ignore"] = [
             "lrmd.*Connection to stonith-ng failed",
             "lrmd.*Connection to stonith-ng.* closed",
             "lrmd.*LRMD lost STONITH connection",
             "lrmd.*STONITH connection failed, finalizing .* pending operations",
             ]
 
         self.components["cib"] = [
                     "State transition .* S_RECOVERY",
                     "Respawning .* crmd",
                     "Respawning .* attrd",
                     "Connection to cib_.* failed",
                     "Connection to cib_.* closed",
                     "Connection to the CIB terminated...",
                     "(Child process|The) crmd .* exited: Generic Pacemaker error",
                     "(Child process|The) attrd .* exited: (Connection reset by peer|Transport endpoint is not connected)",
                     "Lost connection to CIB service",
                     "crmd.*Input I_TERMINATE from do_recover",
                     "crmd.*I_ERROR.*crmd_cib_connection_destroy",
                     "crmd.*Could not recover from internal error",
                     ]
 
         self.components["lrmd"] = [
                     "State transition .* S_RECOVERY",
                     "LRM Connection failed",
                     "Respawning .* crmd",
                     "Connection to lrmd failed",
                     "Connection to lrmd.* closed",
                     "crmd.*I_ERROR.*lrm_connection_destroy",
                     "(Child process|The) crmd .* exited: Generic Pacemaker error",
                     "crmd.*Input I_TERMINATE from do_recover",
                     "crmd.*Could not recover from internal error",
                     ]
         self.components["lrmd-ignore"] = []
 
         self.components["crmd"] = [
 #                    "WARN: determine_online_status: Node .* is unclean",
 #                    "Scheduling Node .* for STONITH",
 #                    "Executing .* fencing operation",
 # Only if the node wasn't the DC:  "State transition S_IDLE",
                     "State transition .* -> S_IDLE",
                     ]
         self.components["crmd-ignore"] = []
 
         self.components["attrd"] = []
         self.components["attrd-ignore"] = []
 
         self.components["pengine"] = [
                     "State transition .* S_RECOVERY",
                     "Respawning .* crmd",
                     "(The|Child process) crmd .* exited: Generic Pacemaker error",
                     "Connection to pengine failed",
                     "Connection to pengine.* closed",
                     "Connection to the Policy Engine failed",
                     "crmd.*I_ERROR.*save_cib_contents",
                     "crmd.*Input I_TERMINATE from do_recover",
                     "crmd.*Could not recover from internal error",
                     ]
         self.components["pengine-ignore"] = []
 
         self.components["stonith"] = [
             "Connection to stonith-ng failed",
             "LRMD lost STONITH connection",
             "Connection to stonith-ng.* closed",
             "Fencing daemon connection failed",
             r"crmd.*:\s*warn.*:\s*Callback already present",
         ]
         self.components["stonith-ignore"] = [
             r"pengine.*: Recover Fencing",
             r"Updating failcount for Fencing",
             r"error:.*Connection to stonith-ng failed",
             r"error:.*Connection to stonith-ng.*closed \(I/O condition=17\)",
             r"crit:.*Fencing daemon connection failed",
             r"error:.*Sign-in failed: triggered a retry",
             "STONITH connection failed, finalizing .* pending operations.",
             r"crmd.*:\s*Operation Fencing.* Error",
         ]
         self.components["stonith-ignore"].extend(self.components["common-ignore"])
 
 class crm_mcp(crm_cs_v0):
     '''
     The crm version 4 cluster manager class.
     It implements the things we need to talk to and manipulate
     crm clusters running on top of native corosync (no plugins)
     '''
     def __init__(self, name):
         crm_cs_v0.__init__(self, name)
 
         self.commands.update({
             "StartCmd"       : "service corosync start && service pacemaker start",
             "StopCmd"        : "service pacemaker stop; [ ! -e /usr/sbin/pacemaker_remoted ] || service pacemaker_remote stop; service corosync stop",
 
             "EpochCmd"      : "crm_node -e",
             "QuorumCmd"      : "crm_node -q",
             "PartitionCmd"    : "crm_node -p",
         })
 
         self.search.update({
             # Close enough... "Corosync Cluster Engine exiting normally" isn't printed
             #   reliably and there's little interest in doing anything about it
             "Pat:We_stopped"   : "%s\W.*Unloading all Corosync service engines",
             "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost",
             "Pat:They_dead"    : "crmd.*Node %s\[.*state is now lost",
 
             "Pat:ChildExit"    : "The .* process exited",
             "Pat:ChildKilled"  : "%s\W.*pacemakerd.*The %s process .* terminated with signal 9",
             "Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning failed child process: %s",
         })
 
 #        if self.Env["have_systemd"]:
 #            self.update({
 #                # When systemd is in use, we can look for this instead
 #                "Pat:We_stopped"   : "%s.*Stopped Corosync Cluster Engine",
 #            })
 
 class crm_mcp_docker(crm_mcp):
     '''
     The crm version 4 cluster manager class.
     It implements the things we need to talk to and manipulate
     crm clusters running on top of native corosync (no plugins)
     '''
     def __init__(self, name):
         crm_mcp.__init__(self, name)
 
         self.commands.update({
             "StartCmd"       : "pcmk_start",
             "StopCmd"        : "pcmk_stop",
         })
 
 class crm_cman(crm_cs_v0):
     '''
     The crm version 3 cluster manager class.
     It implements the things we need to talk to and manipulate
     crm clusters running on top of openais
     '''
     def __init__(self, name):
         crm_cs_v0.__init__(self, name)
 
         self.commands.update({
             "StartCmd"       : "service pacemaker start",
             "StopCmd"        : "service pacemaker stop; [ ! -e /usr/sbin/pacemaker_remoted ] || service pacemaker_remote stop",
 
             "EpochCmd"      : "crm_node -e --cman",
             "QuorumCmd"      : "crm_node -q --cman",
             "PartitionCmd"    : "crm_node -p --cman",
 
             "Pat:We_stopped"   : "%s.*Unloading all Corosync service engines",
             "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost",
             "Pat:They_dead"    : "crmd.*Node %s\[.*state is now lost",
 
             "Pat:ChildKilled"  : "%s\W.*pacemakerd.*The %s process .* terminated with signal 9",
             "Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning failed child process: %s",
         })
 
 
 class PatternSelector:
 
     def __init__(self, name=None):
         self.name = name
         self.base = BasePatterns("crm-base")
 
         if not name:
             crm_cs_v0("crm-plugin-v0")
             crm_cman("crm-cman")
             crm_mcp("crm-mcp")
             crm_lha("crm-lha")
         elif name == "crm-lha":
             crm_lha(name)
         elif name == "crm-plugin-v0":
             crm_cs_v0(name)
         elif name == "crm-cman":
             crm_cman(name)
         elif name == "crm-mcp":
             crm_mcp(name)
         elif name == "crm-mcp-docker":
             crm_mcp_docker(name)
 
     def get_variant(self, variant):
         if patternvariants.has_key(variant):
             return patternvariants[variant]
         print "defaulting to crm-base for %s" % variant
         return self.base
 
     def get_patterns(self, variant, kind):
         return self.get_variant(variant).get_patterns(kind)
 
     def get_template(self, variant, key):
         v = self.get_variant(variant)
         return v[key]
 
     def get_component(self, variant, kind):
         return self.get_variant(variant).get_component(kind)
 
     def __getitem__(self, key):
         return self.get_template(self.name, key)
 
 # python cts/CTSpatt.py -k crm-mcp -t StartCmd
 if __name__ == '__main__':
 
     pdir=os.path.dirname(sys.path[0])
     sys.path.insert(0, pdir) # So that things work from the source directory
 
     kind=None
     template=None
 
     skipthis=None
     args=sys.argv[1:]
     for i in range(0, len(args)):
        if skipthis:
            skipthis=None
            continue
 
        elif args[i] == "-k" or args[i] == "--kind":
            skipthis=1
            kind = args[i+1]
 
        elif args[i] == "-t" or args[i] == "--template":
            skipthis=1
            template = args[i+1]
 
        else:
            print "Illegal argument " + args[i]
 
 
     print PatternSelector(kind)[template]
diff --git a/extra/resources/Dummy b/extra/resources/Dummy
index aec2a0ca45..8a38ef5750 100644
--- a/extra/resources/Dummy
+++ b/extra/resources/Dummy
@@ -1,228 +1,228 @@
 #!/bin/sh
 #
 #
 #	Dummy OCF RA. Does nothing but wait a few seconds, can be
 #	configured to fail occassionally.
 #
 # Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Br�e
 #                    All Rights Reserved.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of version 2 of the GNU General Public License as
 # published by the Free Software Foundation.
 #
 # This program is distributed in the hope that it would be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # Further, this software is distributed without any warranty that it is
 # free of the rightful claim of any third person regarding infringement
 # or the like.  Any license provided herein, whether implied or
 # otherwise, applies only to this software file.  Patent licenses, if
 # any, provided herein do not apply to combinations of this program with
 # other software, or any other product whatsoever.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write the Free Software Foundation,
 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
 #
 
 #######################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
 . ${OCF_FUNCTIONS}
 : ${__OCF_ACTION=$1}
 
 #######################################################################
 
 meta_data() {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="Dummy" version="1.0">
 <version>1.0</version>
 
 <longdesc lang="en">
 This is a Dummy Resource Agent. It does absolutely nothing except 
 keep track of whether its running or not.
 Its purpose in life is for testing and to serve as a template for RA writers.
 
 NB: Please pay attention to the timeouts specified in the actions
 section below. They should be meaningful for the kind of resource
 the agent manages. They should be the minimum advised timeouts,
 but they shouldn't/cannot cover _all_ possible resource
 instances. So, try to be neither overly generous nor too stingy,
 but moderate. The minimum timeouts should never be below 10 seconds.
 </longdesc>
 <shortdesc lang="en">Example stateless resource agent</shortdesc>
 
 <parameters>
 <parameter name="state" unique="1">
 <longdesc lang="en">
 Location to store the resource state in.
 </longdesc>
 <shortdesc lang="en">State file</shortdesc>
 <content type="string" default="${HA_VARRUN}/Dummy-{OCF_RESOURCE_INSTANCE}.state" />
 </parameter>
 
 <parameter name="passwd" unique="1">
 <longdesc lang="en">
 Fake password field
 </longdesc>
 <shortdesc lang="en">Password</shortdesc>
 <content type="string" default="" />
 </parameter>
 
 <parameter name="fake" unique="0">
 <longdesc lang="en">
 Fake attribute that can be changed to cause a reload
 </longdesc>
 <shortdesc lang="en">Fake attribute that can be changed to cause a reload</shortdesc>
 <content type="string" default="dummy" />
 </parameter>
 
 <parameter name="op_sleep" unique="1">
 <longdesc lang="en">
 Number of seconds to sleep during operations.  This can be used to test how
 the cluster reacts to operation timeouts.
 </longdesc>
 <shortdesc lang="en">Operation sleep duration in seconds.</shortdesc>
 <content type="string" default="0" />
 </parameter>
 
 </parameters>
 
 <actions>
 <action name="start"        timeout="20" />
 <action name="stop"         timeout="20" />
 <action name="monitor"      timeout="20" interval="10" depth="0"/>
 <action name="reload"       timeout="20" />
 <action name="migrate_to"   timeout="20" />
 <action name="migrate_from" timeout="20" />
 <action name="validate-all" timeout="20" />
 <action name="meta-data"    timeout="5" />
 </actions>
 </resource-agent>
 END
 }
 
 #######################################################################
 
 # don't exit on TERM, to test that lrmd makes sure that we do exit
 trap sigterm_handler TERM
 sigterm_handler() {
 	ocf_log info "They use TERM to bring us down. No such luck."
 	return
 }
 
 dummy_usage() {
 	cat <<END
 usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
 
 Expects to have a fully populated OCF RA-compliant environment set.
 END
 }
 
 dummy_start() {
     dummy_monitor
     if [ $? =  $OCF_SUCCESS ]; then
 	return $OCF_SUCCESS
     fi
     touch ${OCF_RESKEY_state}
 }
 
 dummy_stop() {
     dummy_monitor
     if [ $? =  $OCF_SUCCESS ]; then
 	rm ${OCF_RESKEY_state}
     fi
-    rm ${VERIFY_SERIALIZED_FILE}
+    rm -f ${VERIFY_SERIALIZED_FILE}
     return $OCF_SUCCESS
 }
 
 dummy_monitor() {
 	# Monitor _MUST!_ differentiate correctly between running
 	# (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING).
 	# That is THREE states, not just yes/no.
 
 	if [ "$OCF_RESKEY_op_sleep" -ne "0" ]; then
 		if [ -f ${VERIFY_SERIALIZED_FILE} ]; then
 			# two monitor ops have occurred at the same time.
 			# this is to verify a condition in the lrmd regression tests.
 			ocf_log err "$VERIFY_SERIALIZED_FILE exists already"
 			return $OCF_ERR_GENERIC
 		fi
 
 		touch ${VERIFY_SERIALIZED_FILE}
 		sleep ${OCF_RESKEY_op_sleep}
 		rm ${VERIFY_SERIALIZED_FILE}
 	fi
 	
 	if [ -f ${OCF_RESKEY_state} ]; then
 		return $OCF_SUCCESS
 	fi
 	if false ; then
 		return $OCF_ERR_GENERIC
 	fi
 	return $OCF_NOT_RUNNING
 }
 
 dummy_validate() {
     
     # Is the state directory writable? 
     state_dir=`dirname "$OCF_RESKEY_state"`
     touch "$state_dir/$$"
     if [ $? != 0 ]; then
 	return $OCF_ERR_ARGS
     fi
     rm "$state_dir/$$"
 
     return $OCF_SUCCESS
 }
 
 : ${OCF_RESKEY_fake=dummy}
 : ${OCF_RESKEY_op_sleep=0}
 : ${OCF_RESKEY_CRM_meta_interval=0}
 : ${OCF_RESKEY_CRM_meta_globally_unique:="true"}
 
 if [ "x$OCF_RESKEY_state" = "x" ]; then
     if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then
 	state="${HA_VARRUN}/Dummy-${OCF_RESOURCE_INSTANCE}.state"
 	
 	# Strip off the trailing clone marker
 	OCF_RESKEY_state=`echo $state | sed s/:[0-9][0-9]*\.state/.state/`
     else 
 	OCF_RESKEY_state="${HA_VARRUN}/Dummy-${OCF_RESOURCE_INSTANCE}.state"
     fi
 fi
 VERIFY_SERIALIZED_FILE="${OCF_RESKEY_state}.serialized"
 
 case $__OCF_ACTION in
 meta-data)	meta_data
 		exit $OCF_SUCCESS
 		;;
 start)		dummy_start;;
 stop)		dummy_stop;;
 monitor)	dummy_monitor;;
 migrate_to)	ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} to ${OCF_RESKEY_CRM_meta_migrate_target}."
 	        dummy_stop
 		;;
 migrate_from)	ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} to ${OCF_RESKEY_CRM_meta_migrate_source}."
 	        dummy_start
 		;;
 reload)		ocf_log err "Reloading..."
 	        dummy_start
 		;;
 validate-all)	dummy_validate;;
 usage|help)	dummy_usage
 		exit $OCF_SUCCESS
 		;;
 *)		dummy_usage
 		exit $OCF_ERR_UNIMPLEMENTED
 		;;
 esac
 rc=$?
 ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
 exit $rc
 
diff --git a/fencing/regression.py.in b/fencing/regression.py.in
index fe6d418d73..da6d4dbbf3 100644
--- a/fencing/regression.py.in
+++ b/fencing/regression.py.in
@@ -1,1081 +1,1081 @@
 #!/usr/bin/python
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
 
 
 import os
 import sys
 import subprocess
 import shlex
 import time
 
 def output_from_command(command):
 	test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 	test.wait()
 
 	return test.communicate()[0].split("\n")
 
 class Test:
 	def __init__(self, name, description, verbose = 0, with_cpg = 0):
 		self.name = name
 		self.description = description
 		self.cmds = []
 		self.verbose = verbose
 
 		self.result_txt = ""
 		self.cmd_tool_output = ""
 		self.result_exitcode = 0;
 
 		self.stonith_options = "-s"
 		self.enable_corosync = 0
 
 		if with_cpg:
 			self.stonith_options = "-c"
 			self.enable_corosync = 1
 
 		self.stonith_process = None
 		self.stonith_output = ""
 		self.stonith_patterns = []
 		self.negative_stonith_patterns = []
 
 		self.executed = 0
 
 		rsc_classes = output_from_command("crm_resource --list-standards")
 
 	def __new_cmd(self, cmd, args, exitcode, stdout_match = "", no_wait = 0, stdout_negative_match = "", kill=None):
 		self.cmds.append(
 			{
 				"cmd" : cmd,
 				"kill" : kill,
 				"args" : args,
 				"expected_exitcode" : exitcode,
 				"stdout_match" : stdout_match,
 				"stdout_negative_match" : stdout_negative_match,
 				"no_wait" : no_wait,
 			}
 		)
 
 	def stop_pacemaker(self):
 		cmd = shlex.split("killall -9 -q pacemakerd")
 		test = subprocess.Popen(cmd, stdout=subprocess.PIPE)
 		test.wait()
 
 	def start_environment(self):
 		### make sure we are in full control here ###
 		self.stop_pacemaker()
 
 		cmd = shlex.split("killall -9 -q stonithd")
 		test = subprocess.Popen(cmd, stdout=subprocess.PIPE)
 		test.wait()
 
 		if self.verbose:
 			self.stonith_options = self.stonith_options + " -V"
 			print "Starting stonithd with %s" % self.stonith_options
 
 		if os.path.exists("/tmp/stonith-regression.log"):
 			os.remove('/tmp/stonith-regression.log')
 
 		self.stonith_process = subprocess.Popen(
 			shlex.split("@CRM_DAEMON_DIR@/stonithd %s -l /tmp/stonith-regression.log" % self.stonith_options))
 
 		time.sleep(1)
 
 	def clean_environment(self):
 		if self.stonith_process:
 			self.stonith_process.terminate()
 			self.stonith_process.wait()
 
 		self.stonith_output = ""
 		self.stonith_process = None
 
 		f = open('/tmp/stonith-regression.log', 'r')
 		for line in f.readlines():
 			self.stonith_output = self.stonith_output + line
 
 		if self.verbose:
 			print "Daemon Output Start"
 			print self.stonith_output
 			print "Daemon Output End"
 		os.remove('/tmp/stonith-regression.log')
 
 	def add_stonith_log_pattern(self, pattern):
 		self.stonith_patterns.append(pattern)
 
 	def add_stonith_negative_log_pattern(self, pattern):
 		self.negative_stonith_patterns.append(pattern)
 
 	def add_cmd(self, cmd, args):
 		self.__new_cmd(cmd, args, 0, "")
 
 	def add_cmd_no_wait(self, cmd, args):
 		self.__new_cmd(cmd, args, 0, "", 1)
 
 	def add_cmd_check_stdout(self, cmd, args, match, no_match = ""):
 		self.__new_cmd(cmd, args, 0, match, 0, no_match)
 
 	def add_expected_fail_cmd(self, cmd, args, exitcode = 255):
 		self.__new_cmd(cmd, args, exitcode, "")
 
 	def get_exitcode(self):
 		return self.result_exitcode
 
 	def print_result(self, filler):
 		print "%s%s" % (filler, self.result_txt)
 
 	def run_cmd(self, args):
 		cmd = shlex.split(args['args'])
 		cmd.insert(0, args['cmd'])
 
 		if self.verbose:
 			print "\n\nRunning: "+" ".join(cmd)
 		test = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
 		if args['kill']:
 			if self.verbose:
 				print "Also running: "+args['kill']
 			subprocess.Popen(shlex.split(args['kill']))
 
 		if args['no_wait'] == 0:
 			test.wait()
 		else:
 			return 0
 
 		output_res = test.communicate()
 		output = output_res[0] + output_res[1]
 
 		if self.verbose:
 			print output
 
 		if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0:
 			test.returncode = -2
 			print "STDOUT string '%s' was not found in cmd output: %s" % (args['stdout_match'], output)
 
 		if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0:
 			test.returncode = -2
 			print "STDOUT string '%s' was found in cmd output: %s" % (args['stdout_negative_match'], output)
 
 		return test.returncode;
 
 
 	def count_negative_matches(self, outline):
 		count = 0
 		for line in self.negative_stonith_patterns:
 			if outline.count(line):
 				count = 1
 				if self.verbose:
 					print "This pattern should not have matched = '%s" % (line)
 		return count
 
 	def match_stonith_patterns(self):
 		negative_matches = 0
 		cur = 0
 		pats = self.stonith_patterns
 		total_patterns = len(self.stonith_patterns)
 
 		if len(self.stonith_patterns) == 0:
 			return
 
 		for line in self.stonith_output.split("\n"):
 			negative_matches = negative_matches + self.count_negative_matches(line)
 			if len(pats) == 0:
 				continue
 			cur = -1
 			for p in pats:
 				cur = cur + 1
 				if line.count(pats[cur]):
 					del pats[cur]
 					break
 
 		if len(pats) > 0 or negative_matches:
 			if self.verbose:
 				for p in pats:
 					print "Pattern Not Matched = '%s'" % p
 
 			self.result_txt = "FAILURE - '%s' failed. %d patterns out of %d not matched. %d negative matches." % (self.name, len(pats), total_patterns, negative_matches)
 			self.result_exitcode = -1
 
 	def run(self):
 		res = 0
 		i = 1
 		self.start_environment()
 
 		if self.verbose:
 			print "\n--- START TEST - %s" % self.name
 
 		self.result_txt = "SUCCESS - '%s'" % (self.name)
 		self.result_exitcode = 0
 		for cmd in self.cmds:
 			res = self.run_cmd(cmd)
 			if res != cmd['expected_exitcode']:
 				print "Step %d FAILED - command returned %d, expected %d" % (i, res, cmd['expected_exitcode'])
 				self.result_txt = "FAILURE - '%s' failed at step %d. Command: %s %s" % (self.name, i, cmd['cmd'], cmd['args'])
 				self.result_exitcode = -1
 				break
 			else:
 				if self.verbose:
 					print "Step %d SUCCESS" % (i)
 			i = i + 1
 		self.clean_environment()
 
 		if self.result_exitcode == 0:
 			self.match_stonith_patterns()
 
 		print self.result_txt
 		if self.verbose:
 			print "--- END TEST - %s\n" % self.name
 
 		self.executed = 1
 		return res
 
 class Tests:
 	def __init__(self, verbose = 0):
 		self.tests = []
 		self.verbose = verbose
 		self.autogen_corosync_cfg = 0
 		if not os.path.exists("/etc/corosync/corosync.conf"):
 			self.autogen_corosync_cfg = 1
 
 	def new_test(self, name, description, with_cpg = 0):
 		test = Test(name, description, self.verbose, with_cpg)
 		self.tests.append(test)
 		return test
 
 	def print_list(self):
 		print "\n==== %d TESTS FOUND ====" % (len(self.tests))
 		print "%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")
 		print "%35s - %s" % ("--------------------", "--------------------")
 		for test in self.tests:
 			print "%35s - %s" % (test.name, test.description)
 		print "==== END OF LIST ====\n"
 
 
 	def start_corosync(self):
 		if self.verbose:
 			print "Starting corosync"
 
 		test = subprocess.Popen("corosync", stdout=subprocess.PIPE)
 		test.wait()
 		time.sleep(10)
 
 	def stop_corosync(self):
 		cmd = shlex.split("killall -9 -q corosync")
 		test = subprocess.Popen(cmd, stdout=subprocess.PIPE)
 		test.wait()
 
 	def run_single(self, name):
 		for test in self.tests:
 			if test.name == name:
 				test.run()
 				break;
 
 	def run_tests_matching(self, pattern):
 		for test in self.tests:
 			if test.name.count(pattern) != 0:
 				test.run()
 
 	def run_cpg_only(self):
 		for test in self.tests:
 			if test.enable_corosync:
 				test.run()
 
 	def run_no_cpg(self):
 		for test in self.tests:
 			if not test.enable_corosync:
 				test.run()
 
 	def run_tests(self):
 		for test in self.tests:
 			test.run()
 
 	def exit(self):
 		for test in self.tests:
 			if test.executed == 0:
 				continue
 
 			if test.get_exitcode() != 0:
 				sys.exit(-1)
 
 		sys.exit(0)
 
 	def print_results(self):
 		failures = 0;
 		success = 0;
 		print "\n\n======= FINAL RESULTS =========="
 		print "\n--- FAILURE RESULTS:"
 		for test in self.tests:
 			if test.executed == 0:
 				continue
 
 			if test.get_exitcode() != 0:
 				failures = failures + 1
 				test.print_result("    ")
 			else:
 				success = success + 1
 
 		if failures == 0:
 			print "    None"
 
 		print "\n--- TOTALS\n    Pass:%d\n    Fail:%d\n" % (success, failures)
 	def build_api_sanity_tests(self):
 		verbose_arg = ""
 		if self.verbose:
 			verbose_arg = "-V"
 
 		test = self.new_test("standalone_low_level_api_test", "Sanity test client api in standalone mode.")
 		test.add_cmd("@CRM_DAEMON_DIR@/stonith-test", "-t %s" % (verbose_arg))
 
 		test = self.new_test("cpg_low_level_api_test", "Sanity test client api using mainloop and cpg.", 1)
 		test.add_cmd("@CRM_DAEMON_DIR@/stonith-test", "-m %s" % (verbose_arg))
 
 	def build_custom_timeout_tests(self):
 		# custom timeout without topology
 		test = self.new_test("cpg_custom_timeout_1",
 				"Verify per device timeouts work as expected without using topology.", 1)
 		test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
 		test.add_cmd("stonith_admin", "-R true1  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=1\"")
 		test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=4\"")
 		test.add_cmd("stonith_admin", "-F node3 -t 2")
 		# timeout is 2+1+4 = 7
 		test.add_stonith_log_pattern("remote op timeout set to 7")
 
 		# custom timeout _WITH_ topology
 		test = self.new_test("cpg_custom_timeout_2",
 				"Verify per device timeouts work as expected _WITH_ topology.", 1)
 		test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
 		test.add_cmd("stonith_admin", "-R true1  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=1\"")
 		test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=4000\"")
 		test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
 		test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1")
 		test.add_cmd("stonith_admin", "-r node3 -i 3 -v false2")
 		test.add_cmd("stonith_admin", "-F node3 -t 2")
 		# timeout is 2+1+4000 = 4003
 		test.add_stonith_log_pattern("remote op timeout set to 4003")
 
 	def build_fence_merge_tests(self):
 
 		### Simple test that overlapping fencing operations get merged
 		test = self.new_test("cpg_custom_merge_single",
 				"Verify overlapping identical fencing operations are merged, no fencing levels used.", 1)
 		test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
 		test.add_cmd("stonith_admin", "-R true1  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ")
 		test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd("stonith_admin", "-F node3 -t 10")
 		### one merger will happen
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		### the pattern below signifies that both the original and duplicate operation completed
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 
 		### Test that multiple mergers occur
 		test = self.new_test("cpg_custom_merge_multiple",
 				"Verify multiple overlapping identical fencing operations are merged", 1)
 		test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
-		test.add_cmd("stonith_admin", "-R true1  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ")
+		test.add_cmd("stonith_admin", "-R true1  -a fence_dummy -o \"mode=pass\" -o \"delay=2\" -o \"pcmk_host_list=node3\" ")
 		test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd("stonith_admin", "-F node3 -t 10")
 		### 4 mergers should occur
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		### the pattern below signifies that both the original and duplicate operation completed
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 
 		### Test that multiple mergers occur with topologies used
 		test = self.new_test("cpg_custom_merge_with_topology",
 				"Verify multiple overlapping identical fencing operations are merged with fencing levels.", 1)
 		test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
 		test.add_cmd("stonith_admin", "-R true1  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ")
 		test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
 		test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
 		test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2")
 		test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd("stonith_admin", "-F node3 -t 10")
 		### 4 mergers should occur
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		### the pattern below signifies that both the original and duplicate operation completed
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 
 
 		test = self.new_test("cpg_custom_no_merge",
 				"Verify differing fencing operations are not merged", 1)
 		test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"")
 		test.add_cmd("stonith_admin", "-R true1  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3 node2\" ")
 		test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"")
 		test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
 		test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2")
 		test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1")
 		test.add_cmd_no_wait("stonith_admin", "-F node2 -t 10")
 		test.add_cmd("stonith_admin", "-F node3 -t 10")
 		test.add_stonith_negative_log_pattern("Merging stonith action off for node node3 originating from client")
 
 	def build_standalone_tests(self):
 		test_types = [
 			{
 				"prefix" : "standalone" ,
 				"use_cpg" : 0,
 			},
 			{
 				"prefix" : "cpg" ,
 				"use_cpg" : 1,
 			},
 		]
 
 		# test what happens when all devices timeout
 		for test_type in test_types:
 			test = self.new_test("%s_fence_multi_device_failure" % test_type["prefix"],
 					"Verify that all devices timeout, a fencing failure is returned.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R false2  -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
 			if test_type["use_cpg"] == 1:
 				test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", 194)
 				test.add_stonith_log_pattern("remote op timeout set to 6")
 			else:
 				test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", 55)
 
 			test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: ")
 			test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: ")
 			test.add_stonith_log_pattern("for host 'node3' with device 'false3' returned: ")
 
 		# test what happens when multiple devices can fence a node, but the first device fails.
 		for test_type in test_types:
 			test = self.new_test("%s_fence_device_failure_rollover" % test_type["prefix"],
 					"Verify that when one fence device fails for a node, the others are tried.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true1  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-F node3 -t 2")
 
 			if test_type["use_cpg"] == 1:
 				test.add_stonith_log_pattern("remote op timeout set to 6")
 
 		# simple topology test for one device
 		for test_type in test_types:
 			if test_type["use_cpg"] == 0:
 				continue
 
 			test = self.new_test("%s_topology_simple" % test_type["prefix"],
 					"Verify all fencing devices at a level are used.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 
 			test.add_cmd("stonith_admin", "-r node3 -i 1 -v true")
 			test.add_cmd("stonith_admin", "-F node3 -t 2")
 
 			test.add_stonith_log_pattern("remote op timeout set to 2")
 			test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0")
 
 
 		# add topology, delete topology, verify fencing still works 
 		for test_type in test_types:
 			if test_type["use_cpg"] == 0:
 				continue
 
 			test = self.new_test("%s_topology_add_remove" % test_type["prefix"],
 					"Verify fencing occurrs after all topology levels are removed", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 
 			test.add_cmd("stonith_admin", "-r node3 -i 1 -v true")
 			test.add_cmd("stonith_admin", "-d node3 -i 1")
 			test.add_cmd("stonith_admin", "-F node3 -t 2")
 
 			test.add_stonith_log_pattern("remote op timeout set to 2")
 			test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0")
 
 		# test what happens when the first fencing level has multiple devices.
 		for test_type in test_types:
 			if test_type["use_cpg"] == 0:
 				continue
 
 			test = self.new_test("%s_topology_device_fails" % test_type["prefix"],
 					"Verify if one device in a level fails, the other is tried.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R false  -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 
 			test.add_cmd("stonith_admin", "-r node3 -i 1 -v false")
 			test.add_cmd("stonith_admin", "-r node3 -i 2 -v true")
 			test.add_cmd("stonith_admin", "-F node3 -t 20")
 
 			test.add_stonith_log_pattern("remote op timeout set to 40")
 			test.add_stonith_log_pattern("for host 'node3' with device 'false' returned: -201")
 			test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0")
 
 		# test what happens when the first fencing level fails.
 		for test_type in test_types:
 			if test_type["use_cpg"] == 0:
 				continue
 
 			test = self.new_test("%s_topology_multi_level_fails" % test_type["prefix"],
 					"Verify if one level fails, the next leve is tried.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true2  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true3  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true4  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
 
 			test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
 			test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1")
 			test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2")
 			test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2")
 			test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3")
 			test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4")
 
-			test.add_cmd("stonith_admin", "-F node3 -t 2")
+			test.add_cmd("stonith_admin", "-F node3 -t 3")
 
-			test.add_stonith_log_pattern("remote op timeout set to 12")
+			test.add_stonith_log_pattern("remote op timeout set to 18")
 			test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201")
 			test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: -201")
 			test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0")
 			test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0")
 
 
 		# test what happens when the first fencing level had devices that no one has registered
 		for test_type in test_types:
 			if test_type["use_cpg"] == 0:
 				continue
 
 			test = self.new_test("%s_topology_missing_devices" % test_type["prefix"],
 					"Verify topology can continue with missing devices.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true2  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true3  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true4  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
 
 			test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
 			test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1")
 			test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2")
 			test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2")
 			test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3")
 			test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4")
 
 			test.add_cmd("stonith_admin", "-F node3 -t 2")
 
 		# Test what happens if multiple fencing levels are defined, and then the first one is removed.
 		for test_type in test_types:
 			if test_type["use_cpg"] == 0:
 				continue
 
 			test = self.new_test("%s_topology_level_removal" % test_type["prefix"],
 					"Verify level removal works.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true2  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true3  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true4  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
 
 			test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
 			test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1")
 
 			test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2")
 			test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2")
 
 			test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3")
 			test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4")
 
 			# Now remove level 2, verify none of the devices in level two are hit.
 			test.add_cmd("stonith_admin", "-d node3 -i 2")
 
 			test.add_cmd("stonith_admin", "-F node3 -t 20")
 
 			test.add_stonith_log_pattern("remote op timeout set to 8")
 			test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201")
 			test.add_stonith_negative_log_pattern("for host 'node3' with device 'false2' returned: ")
 			test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0")
 			test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0")
 
 		# test the stonith builds the correct list of devices that can fence a node.
 		for test_type in test_types:
 			test = self.new_test("%s_list_devices" % test_type["prefix"],
 					"Verify list of devices that can fence a node is correct", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"")
 			test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 
 			test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true2", "true1")
 			test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true3", "true1")
 
 		# simple test of device monitor
 		for test_type in test_types:
 			test = self.new_test("%s_monitor" % test_type["prefix"],
 					"Verify device is reachable", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"")
 			test.add_cmd("stonith_admin", "-R false1  -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
 
 			test.add_cmd("stonith_admin", "-Q true1")
 			test.add_cmd("stonith_admin", "-Q false1")
 			test.add_expected_fail_cmd("stonith_admin", "-Q true2", 237)
 
 		# Verify monitor occurs for duration of timeout period on failure
 		for test_type in test_types:
 			test = self.new_test("%s_monitor_timeout" % test_type["prefix"],
 					"Verify monitor uses duration of timeout period given.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_dummy_monitor_fail -o \"pcmk_host_list=node3\"")
 			test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 5", 195)
 			test.add_stonith_log_pattern("Attempt 2 to execute")
 
 		# Verify monitor occurs for duration of timeout period on failure, but stops at max retries
 		for test_type in test_types:
 			test = self.new_test("%s_monitor_timeout_max_retries" % test_type["prefix"],
 					"Verify monitor retries until max retry value or timeout is hit.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_dummy_monitor_fail -o \"pcmk_host_list=node3\"")
 			test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 15",195)
 			test.add_stonith_log_pattern("Attempted to execute agent fence_dummy_monitor_fail (list) the maximum number of times")
 
 		# simple register test
 		for test_type in test_types:
 			test = self.new_test("%s_register" % test_type["prefix"],
 					"Verify devices can be registered and un-registered", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"")
 
 			test.add_cmd("stonith_admin", "-Q true1")
 
 			test.add_cmd("stonith_admin", "-D true1")
 
 			test.add_expected_fail_cmd("stonith_admin", "-Q true1", 237)
 
 
 		# simple reboot test
 		for test_type in test_types:
 			test = self.new_test("%s_reboot" % test_type["prefix"],
 					"Verify devices can be rebooted", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"")
 
 			test.add_cmd("stonith_admin", "-B node3 -t 2")
 
 			test.add_cmd("stonith_admin", "-D true1")
 
 			test.add_expected_fail_cmd("stonith_admin", "-Q true1", 237)
 
 		# test fencing history.
 		for test_type in test_types:
 			if test_type["use_cpg"] == 0:
 				continue
 			test = self.new_test("%s_fence_history" % test_type["prefix"],
 					"Verify last fencing operation is returned.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"")
 
 			test.add_cmd("stonith_admin", "-F node3 -t 2 -V")
 
 			test.add_cmd_check_stdout("stonith_admin", "-H node3", "was able to turn off node node3", "")
 
 		# simple test of dynamic list query
 		for test_type in test_types:
 			test = self.new_test("%s_dynamic_list_query" % test_type["prefix"],
 					"Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_dummy_list")
 			test.add_cmd("stonith_admin", "-R true2  -a fence_dummy_list")
 			test.add_cmd("stonith_admin", "-R true3  -a fence_dummy_list")
 
 			test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found")
 
 
 		# fence using dynamic list query
 		for test_type in test_types:
 			test = self.new_test("%s_fence_dynamic_list_query" % test_type["prefix"],
 					"Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_dummy_list")
 			test.add_cmd("stonith_admin", "-R true2  -a fence_dummy_list")
 			test.add_cmd("stonith_admin", "-R true3  -a fence_dummy_list")
 
 			test.add_cmd("stonith_admin", "-F fake_port_1 -t 5 -V");
 
 		# simple test of  query using status action
 		for test_type in test_types:
 			test = self.new_test("%s_status_query" % test_type["prefix"],
 					"Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"")
 			test.add_cmd("stonith_admin", "-R true2  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"")
 			test.add_cmd("stonith_admin", "-R true3  -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"")
 
 			test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found")
 
 		# test what happens when no reboot action is advertised
 		for test_type in test_types:
 			test = self.new_test("%s_no_reboot_support" % test_type["prefix"],
 					"Verify reboot action defaults to off when no reboot action is advertised by agent.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_no_reboot -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-B node1 -t 5 -V");
 			test.add_stonith_log_pattern("does not advertise support for 'reboot', performing 'off'")
 			test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)");
 
 		# make sure reboot is used when reboot action is advertised
 		for test_type in test_types:
 			test = self.new_test("%s_with_reboot_support" % test_type["prefix"],
 					"Verify reboot action can be used when metadata advertises it.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-B node1 -t 5 -V");
 			test.add_stonith_negative_log_pattern("does not advertise support for 'reboot', performing 'off'")
 			test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)");
 
 	def build_nodeid_tests(self):
 		our_uname = output_from_command("uname -n")
 		if our_uname:
 			our_uname = our_uname[0]
 
 		### verify nodeid is supplied when nodeid is in the metadata parameters
 		test = self.new_test("cpg_supply_nodeid",
 				"Verify nodeid is given when fence agent has nodeid as parameter", 1)
 
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname))
 		test.add_stonith_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname))
 
 		### verify nodeid is _NOT_ supplied when nodeid is not in the metadata parameters
 		test = self.new_test("cpg_do_not_supply_nodeid",
 				"Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter", 1)
 
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname))
 		test.add_stonith_negative_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname))
 
 		### verify nodeid use doesn't explode standalone mode
 		test = self.new_test("standalone_do_not_supply_nodeid",
 				"Verify nodeid in metadata parameter list doesn't kill standalone mode", 0)
 
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname))
 		test.add_stonith_negative_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname))
 
 
 	def build_unfence_tests(self):
 		our_uname = output_from_command("uname -n")
 		if our_uname:
 			our_uname = our_uname[0]
 
 		### verify unfencing using automatic unfencing
 		test = self.new_test("cpg_unfence_required_1",
 				"Verify require unfencing on all devices when automatic=true in agent's metadata", 1)
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname))
 		# both devices should be executed
 		test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)");
 		test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)");
 
 
 		### verify unfencing using automatic unfencing fails if any of the required agents fail
 		test = self.new_test("cpg_unfence_required_2",
 				"Verify require unfencing on all devices when automatic=true in agent's metadata", 1)
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=fail\" -o \"pcmk_host_list=%s\"" % (our_uname))
 		test.add_expected_fail_cmd("stonith_admin", "-U %s -t 6" % (our_uname), 143)
 
 		### verify unfencing using automatic devices with topology
 		test = self.new_test("cpg_unfence_required_3",
 				"Verify require unfencing on all devices even when required devices are at different topology levels", 1)
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname))
 		test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname))
 		test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname))
 		test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)");
 		test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)");
 
 
 		### verify unfencing using automatic devices with topology
 		test = self.new_test("cpg_unfence_required_4",
 				"Verify all required devices are executed even with topology levels fail.", 1)
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R true3 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R true4 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R false4 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname))
 		test.add_cmd("stonith_admin", "-r %s -i 1 -v false1" % (our_uname))
 		test.add_cmd("stonith_admin", "-r %s -i 2 -v false2" % (our_uname))
 		test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname))
 		test.add_cmd("stonith_admin", "-r %s -i 2 -v false3" % (our_uname))
 		test.add_cmd("stonith_admin", "-r %s -i 2 -v true3" % (our_uname))
 		test.add_cmd("stonith_admin", "-r %s -i 3 -v false4" % (our_uname))
 		test.add_cmd("stonith_admin", "-r %s -i 4 -v true4" % (our_uname))
 		test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname))
 		test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)");
 		test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)");
 		test.add_stonith_log_pattern("with device 'true3' returned: 0 (OK)");
 		test.add_stonith_log_pattern("with device 'true4' returned: 0 (OK)");
 
 		### verify unfencing using on_target device
 		test = self.new_test("cpg_unfence_on_target_1",
 				"Verify unfencing with on_target = true", 1)
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname))
 		test.add_stonith_log_pattern("(on) to be executed on the target node")
 
 
 		### verify failure of unfencing using on_target device
 		test = self.new_test("cpg_unfence_on_target_2",
 				"Verify failure unfencing with on_target = true", 1)
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake_1234\"" % (our_uname))
 		test.add_expected_fail_cmd("stonith_admin", "-U node_fake_1234 -t 3", 237)
 		test.add_stonith_log_pattern("(on) to be executed on the target node")
 
 
 		### verify unfencing using on_target device with topology
 		test = self.new_test("cpg_unfence_on_target_3",
 				"Verify unfencing with on_target = true using topology", 1)
 
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
 
 		test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname))
 		test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname))
 
 		test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname))
 		test.add_stonith_log_pattern("(on) to be executed on the target node")
 
 		### verify unfencing using on_target device with topology fails when victim node doesn't exist
 		test = self.new_test("cpg_unfence_on_target_4",
 				"Verify unfencing failure with on_target = true using topology", 1)
 
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname))
 
 		test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1")
 		test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true2")
 
 		test.add_expected_fail_cmd("stonith_admin", "-U node_fake -t 3", 237)
 		test.add_stonith_log_pattern("(on) to be executed on the target node")
 
 
 	def setup_environment(self, use_corosync):
 		if self.autogen_corosync_cfg and use_corosync:
 			corosync_conf = ("""
 totem {
         version: 2
         crypto_cipher: none
         crypto_hash: none
 
         nodeid:         101
         secauth:        off
 
         interface {
                 ttl: 1
                 ringnumber: 0
                 mcastport: 6666
                 mcastaddr: 226.94.1.1
                 bindnetaddr: 127.0.0.1
         }
 }
 
 logging {
         debug: off
         fileline: off
         to_syslog: no
         to_stderr: no
         syslog_facility: daemon
         timestamp: on
         to_logfile: yes
         logfile: /var/log/corosync.log
         logfile_priority: info
 }
 """)
 
 			os.system("cat <<-END >>/etc/corosync/corosync.conf\n%s\nEND" % (corosync_conf))
 
 
 		if use_corosync:
 			### make sure we are in control ###
 			self.stop_corosync()
 			self.start_corosync()
 
 		monitor_fail_agent = ("""#!/usr/bin/python
 import sys
 def main():
     for line in sys.stdin.readlines():
         if line.count("monitor") > 0:
             sys.exit(-1);
     sys.exit(-1)
 if __name__ == "__main__":
     main()
 """)
 
 		dynamic_list_agent = ("""#!/usr/bin/python
 import sys
 def main():
     for line in sys.stdin.readlines():
         if line.count("list") > 0:
             print "fake_port_1"
             sys.exit(0)
         if line.count("off") > 0:
             sys.exit(0)
     sys.exit(-1)
 if __name__ == "__main__":
     main()
 """)
 
 
 		os.system("cat <<-END >>/usr/sbin/fence_dummy_list\n%s\nEND" % (dynamic_list_agent))
 		os.system("chmod 711 /usr/sbin/fence_dummy_list")
 
 		os.system("cat <<-END >>/usr/sbin/fence_dummy_monitor_fail\n%s\nEND" % (monitor_fail_agent))
 		os.system("chmod 711 /usr/sbin/fence_dummy_monitor_fail")
 
 		os.system("cp /usr/share/pacemaker/tests/cts/fence_dummy /usr/sbin/fence_dummy")
 
 		# modifies dummy agent to do require unfencing 
 		os.system("cat /usr/share/pacemaker/tests/cts/fence_dummy  | sed 's/on_target=/automatic=/g' > /usr/sbin/fence_dummy_automatic_unfence");
 		os.system("chmod 711 /usr/sbin/fence_dummy_automatic_unfence")
 
 		# modifies dummy agent to not advertise reboot
 		os.system("cat /usr/share/pacemaker/tests/cts/fence_dummy  | sed 's/^.*<action.*name.*reboot.*>.*//g' > /usr/sbin/fence_dummy_no_reboot");
 		os.system("chmod 711 /usr/sbin/fence_dummy_no_reboot")
 
 	def cleanup_environment(self, use_corosync):
 		if use_corosync:
 			self.stop_corosync()
 
 			if self.verbose and os.path.exists('/var/log/corosync.log'):
 				print "Corosync output"
 				f = open('/var/log/corosync.log', 'r')
 				for line in f.readlines():
 					print line.strip()
 				os.remove('/var/log/corosync.log')
 
 		if self.autogen_corosync_cfg:
 			os.system("rm -f /etc/corosync/corosync.conf")
 
 		os.system("rm -f /usr/sbin/fence_dummy_monitor_fail")
 		os.system("rm -f /usr/sbin/fence_dummy_list")
 		os.system("rm -f /usr/sbin/fence_dummy")
 		os.system("rm -f /usr/sbin/fence_dummy_automatic_unfence")
 		os.system("rm -f /usr/sbin/fence_dummy_no_reboot")
 
 class TestOptions:
 	def __init__(self):
 		self.options = {}
 		self.options['list-tests'] = 0
 		self.options['run-all'] = 1
 		self.options['run-only'] = ""
 		self.options['run-only-pattern'] = ""
 		self.options['verbose'] = 0
 		self.options['invalid-arg'] = ""
 		self.options['cpg-only'] = 0
 		self.options['no-cpg'] = 0
 		self.options['show-usage'] = 0
 
 	def build_options(self, argv):
 		args = argv[1:]
 		skip = 0
 		for i in range(0, len(args)):
 			if skip:
 				skip = 0
 				continue
 			elif args[i] == "-h" or args[i] == "--help":
 				self.options['show-usage'] = 1
 			elif args[i] == "-l" or args[i] == "--list-tests":
 				self.options['list-tests'] = 1
 			elif args[i] == "-V" or args[i] == "--verbose":
 				self.options['verbose'] = 1
 			elif args[i] == "-n" or args[i] == "--no-cpg":
 				self.options['no-cpg'] = 1
 			elif args[i] == "-c" or args[i] == "--cpg-only":
 				self.options['cpg-only'] = 1
 			elif args[i] == "-r" or args[i] == "--run-only":
 				self.options['run-only'] = args[i+1]
 				skip = 1
 			elif args[i] == "-p" or args[i] == "--run-only-pattern":
 				self.options['run-only-pattern'] = args[i+1]
 				skip = 1
 
 	def show_usage(self):
 		print "usage: " + sys.argv[0] + " [options]"
 		print "If no options are provided, all tests will run"
 		print "Options:"
 		print "\t [--help | -h]                        Show usage"
 		print "\t [--list-tests | -l]                  Print out all registered tests."
 		print "\t [--cpg-only | -c]                    Only run tests that require corosync."
 		print "\t [--no-cpg | -n]                      Only run tests that do not require corosync"
 		print "\t [--run-only | -r 'testname']         Run a specific test"
 		print "\t [--verbose | -V]                     Verbose output"
 		print "\t [--run-only-pattern | -p 'string']   Run only tests containing the string value"
 		print "\n\tExample: Run only the test 'start_top'"
 		print "\t\t python ./regression.py --run-only start_stop"
 		print "\n\tExample: Run only the tests with the string 'systemd' present in them"
 		print "\t\t python ./regression.py --run-only-pattern systemd"
 
 def main(argv):
 	o = TestOptions()
 	o.build_options(argv)
 
 	use_corosync = 1
 
 	tests = Tests(o.options['verbose'])
 	tests.build_standalone_tests()
 	tests.build_custom_timeout_tests()
 	tests.build_api_sanity_tests()
 	tests.build_fence_merge_tests()
 	tests.build_unfence_tests()
 	tests.build_nodeid_tests()
 
 	if o.options['list-tests']:
 		tests.print_list()
 		sys.exit(0)
 	elif o.options['show-usage']:
 		o.show_usage()
 		sys.exit(0)
 
 	print "Starting ..."
 
 	if o.options['no-cpg']:
 		use_corosync = 0
 
 	tests.setup_environment(use_corosync)
 
 	if o.options['run-only-pattern'] != "":
 		tests.run_tests_matching(o.options['run-only-pattern'])
 		tests.print_results()
 	elif o.options['run-only'] != "":
 		tests.run_single(o.options['run-only'])
 		tests.print_results()
 	elif o.options['no-cpg']:
 		tests.run_no_cpg()
 		tests.print_results()
 	elif o.options['cpg-only']:
 		tests.run_cpg_only()
 		tests.print_results()
 	else:
 		tests.run_tests()
 		tests.print_results()
 
 	tests.cleanup_environment(use_corosync)
 	tests.exit()
 if __name__=="__main__":
 	main(sys.argv)
diff --git a/fencing/remote.c b/fencing/remote.c
index 58c2102d77..1f014487c4 100644
--- a/fencing/remote.c
+++ b/fencing/remote.c
@@ -1,1718 +1,1854 @@
 /*
  * Copyright (C) 2009 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #include <sys/utsname.h>
 
 #include <stdlib.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <ctype.h>
 #include <regex.h>
 
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 #include <crm/common/ipc.h>
 #include <crm/common/ipcs.h>
 #include <crm/cluster/internal.h>
 
 #include <crm/stonith-ng.h>
 #include <crm/fencing/internal.h>
 #include <crm/common/xml.h>
 
 #include <crm/common/util.h>
 #include <internal.h>
 
 #define TIMEOUT_MULTIPLY_FACTOR 1.2
 
+/* When one stonithd queries its peers for devices able to handle a fencing
+ * request, each peer will reply with a list of such devices available to it.
+ * Each reply will be parsed into a st_query_result_t, with each device's
+ * information kept in a device_properties_t.
+ */
+
+typedef struct device_properties_s {
+    /* Whether access to this device has been verified */
+    gboolean verified;
+    /* Action-specific timeout */
+    int custom_action_timeout;
+    /* Action-specific maximum random delay */
+    int delay_max;
+    /* Whether this device has been executed */
+    gboolean executed;
+} device_properties_t;
+
 typedef struct st_query_result_s {
     /* Name of peer that sent this result */
     char *host;
-    int ndevices;
     /* Only try peers for non-topology based operations once */
     gboolean tried;
-    GListPtr device_list;
-    GHashTable *custom_action_timeouts;
-    GHashTable *delay_maxes;
-    /* Subset of devices that peer has verified connectivity on */
-    GHashTable *verified_devices;
-
+    /* Number of entries in the devices table */
+    int ndevices;
+    /* Devices available to this host that are capable of fencing the target */
+    GHashTable *devices;
 } st_query_result_t;
 
 GHashTable *remote_op_list = NULL;
 void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer);
 static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup);
 extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
                                   int call_options);
 
 static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
-static int get_op_total_timeout(remote_fencing_op_t * op, st_query_result_t * chosen_peer,
-                                int default_timeout);
+static int get_op_total_timeout(const remote_fencing_op_t *op,
+                                const st_query_result_t *chosen_peer);
 
 static gint
 sort_strings(gconstpointer a, gconstpointer b)
 {
     return strcmp(a, b);
 }
 
 static void
 free_remote_query(gpointer data)
 {
     if (data) {
         st_query_result_t *query = data;
 
         crm_trace("Free'ing query result from %s", query->host);
+        g_hash_table_destroy(query->devices);
         free(query->host);
-        g_list_free_full(query->device_list, free);
-        g_hash_table_destroy(query->custom_action_timeouts);
-        g_hash_table_destroy(query->delay_maxes);
-        g_hash_table_destroy(query->verified_devices);
         free(query);
     }
 }
 
+struct peer_count_data {
+    gboolean verified_only;
+    int count;
+};
+
+/*!
+ * \internal
+ * \brief Increment a counter if a device has not been executed yet
+ *
+ * \param[in] key        Device ID (ignored)
+ * \param[in] value      Device properties
+ * \param[in] user_data  Peer count data
+ */
+static void
+count_peer_device(gpointer key, gpointer value, gpointer user_data)
+{
+    device_properties_t *props = (device_properties_t*)value;
+    struct peer_count_data *data = user_data;
+
+    if (!props->executed
+        && (!data->verified_only || props->verified)) {
+        ++(data->count);
+    }
+}
+
 /*!
  * \internal
  * \brief Check the number of available devices in a peer's query results
  *
  * \param[in] peer           Peer to count
  * \param[in] verified_only  Whether to count only verified devices
  *
- * \return Number of devices available to peer
+ * \return Number of devices available to peer that were not already executed
  */
 static int
 count_peer_devices(const st_query_result_t *peer, gboolean verified_only)
 {
-    return verified_only? g_hash_table_size(peer->verified_devices)
-           : g_list_length(peer->device_list);
+    struct peer_count_data data;
+
+    data.verified_only = verified_only;
+    data.count = 0;
+    if (peer) {
+        g_hash_table_foreach(peer->devices, count_peer_device, &data);
+    }
+    return data.count;
 }
 
 /*!
  * \internal
  * \brief Search for a device in a query result
  *
  * \param[in] peer    Query result for a peer
  * \param[in] device  Device ID to search for
  *
- * \return Device list entry if found, NULL otherwise
+ * \return Device properties if found, NULL otherwise
  */
-static GListPtr
+static device_properties_t *
 find_peer_device(const st_query_result_t *peer, const char *device)
 {
-    return g_list_find_custom(peer->device_list, device, sort_strings);
+    device_properties_t *props = g_hash_table_lookup(peer->devices, device);
+
+    return (props && !props->executed)? props : NULL;
 }
 
 /*!
  * \internal
- * \brief Remove a device from a peer's device list if it is present
+ * \brief Find a device in a peer's device list and mark it as executed
  *
  * \param[in,out] peer                   Peer with results to search
- * \param[in]     device                 ID of device to find and remove
+ * \param[in]     device                 ID of device to mark as done
  * \param[in]     verified_devices_only  Only consider verified devices
  *
- * \return TRUE if device was found and removed, FALSE otherwise
+ * \return TRUE if device was found and marked, FALSE otherwise
  */
 static gboolean
 grab_peer_device(st_query_result_t *peer, const char *device,
                  gboolean verified_devices_only)
 {
-    GListPtr match;
+    device_properties_t *props = find_peer_device(peer, device);
 
-    if (verified_devices_only && !g_hash_table_lookup(peer->verified_devices, device)) {
-        return FALSE;
-    }
-    match = find_peer_device(peer, device);
-    if (match == NULL) {
+    if ((props == NULL) || (verified_devices_only && !props->verified)) {
         return FALSE;
     }
+
     crm_trace("Removing %s from %s (%d remaining)",
               device, peer->host, count_peer_devices(peer, FALSE));
-    peer->device_list = g_list_remove(peer->device_list, match->data);
+    props->executed = TRUE;
     return TRUE;
 }
 
+/*
+ * \internal
+ * \brief Free the list of required devices
+ *
+ * \param[in,out] op     Operation to modify
+ */
+static void
+free_required_list(remote_fencing_op_t *op)
+{
+    if (op->required_list) {
+        g_list_free_full(op->required_list, free);
+        op->required_list = NULL;
+    }
+}
+
 static void
 clear_remote_op_timers(remote_fencing_op_t * op)
 {
     if (op->query_timer) {
         g_source_remove(op->query_timer);
         op->query_timer = 0;
     }
     if (op->op_timer_total) {
         g_source_remove(op->op_timer_total);
         op->op_timer_total = 0;
     }
     if (op->op_timer_one) {
         g_source_remove(op->op_timer_one);
         op->op_timer_one = 0;
     }
 }
 
 static void
 free_remote_op(gpointer data)
 {
     remote_fencing_op_t *op = data;
 
     crm_trace("Free'ing op %s for %s", op->id, op->target);
     crm_log_xml_debug(op->request, "Destroying");
 
     clear_remote_op_timers(op);
 
     free(op->id);
     free(op->action);
     free(op->target);
     free(op->client_id);
     free(op->client_name);
     free(op->originator);
 
     if (op->query_results) {
         g_list_free_full(op->query_results, free_remote_query);
     }
     if (op->request) {
         free_xml(op->request);
         op->request = NULL;
     }
     if (op->devices_list) {
         g_list_free_full(op->devices_list, free);
         op->devices_list = NULL;
     }
-    if (op->required_list) {
-        g_list_free_full(op->required_list, free);
-        op->required_list = NULL;
-    }
+    free_required_list(op);
     free(op);
 }
 
 static xmlNode *
 create_op_done_notify(remote_fencing_op_t * op, int rc)
 {
     xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
 
     crm_xml_add_int(notify_data, "state", op->state);
     crm_xml_add_int(notify_data, F_STONITH_RC, rc);
     crm_xml_add(notify_data, F_STONITH_TARGET, op->target);
     crm_xml_add(notify_data, F_STONITH_ACTION, op->action);
     crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate);
     crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id);
     crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator);
     crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
     crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
 
     return notify_data;
 }
 
 static void
 bcast_result_to_peers(remote_fencing_op_t * op, int rc)
 {
     static int count = 0;
     xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
     xmlNode *notify_data = create_op_done_notify(op, rc);
 
     count++;
     crm_trace("Broadcasting result to peers");
     crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY);
     crm_xml_add(bcast, F_SUBTYPE, "broadcast");
     crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY);
     crm_xml_add_int(bcast, "count", count);
     add_message_xml(bcast, F_STONITH_CALLDATA, notify_data);
     send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
     free_xml(notify_data);
     free_xml(bcast);
 
     return;
 }
 
 static void
 handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
 {
     xmlNode *notify_data = NULL;
     xmlNode *reply = NULL;
 
     if (op->notify_sent == TRUE) {
         /* nothing to do */
         return;
     }
 
     /* Do notification with a clean data object */
     notify_data = create_op_done_notify(op, rc);
     crm_xml_add_int(data, "state", op->state);
     crm_xml_add(data, F_STONITH_TARGET, op->target);
     crm_xml_add(data, F_STONITH_OPERATION, op->action);
 
     reply = stonith_construct_reply(op->request, NULL, data, rc);
     crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
 
     /* Send fencing OP reply to local client that initiated fencing */
     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
 
     /* bcast to all local clients that the fencing operation happend */
     do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
 
     /* mark this op as having notify's already sent */
     op->notify_sent = TRUE;
     free_xml(reply);
     free_xml(notify_data);
 }
 
 static void
 handle_duplicates(remote_fencing_op_t * op, xmlNode * data, int rc)
 {
     GListPtr iter = NULL;
 
     for (iter = op->duplicates; iter != NULL; iter = iter->next) {
         remote_fencing_op_t *other = iter->data;
 
         if (other->state == st_duplicate) {
             /* Ie. it hasn't timed out already */
             other->state = op->state;
             crm_debug("Peforming duplicate notification for %s@%s.%.8s = %s", other->client_name,
                       other->originator, other->id, pcmk_strerror(rc));
             remote_op_done(other, data, rc, TRUE);
 
         } else {
             crm_err("Skipping duplicate notification for %s@%s - %d", other->client_name,
                     other->originator, other->state);
         }
     }
 }
 
 /*!
  * \internal
  * \brief Finalize a remote operation.
  *
  * \description This function has two code paths.
  *
  * Path 1. This node is the owner of the operation and needs
  *         to notify the cpg group via a broadcast as to the operation's
  *         results.
  *
  * Path 2. The cpg broadcast is received. All nodes notify their local
  *         stonith clients the operation results.
  *
  * So, The owner of the operation first notifies the cluster of the result,
  * and once that cpg notify is received back it notifies all the local clients.
  *
  * Nodes that are passive watchers of the operation will receive the
  * broadcast and only need to notify their local clients the operation finished.
  *
  * \param op, The fencing operation to finalize
  * \param data, The xml msg reply (if present) of the last delegated fencing
  *              operation.
  * \param dup, Is this operation a duplicate, if so treat it a little differently
  *             making sure the broadcast is not sent out.
  */
 static void
 remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
 {
     int level = LOG_ERR;
     const char *subt = NULL;
     xmlNode *local_data = NULL;
 
     op->completed = time(NULL);
     clear_remote_op_timers(op);
 
     if (op->notify_sent == TRUE) {
         crm_err("Already sent notifications for '%s of %s by %s' (for=%s@%s.%.8s, state=%d): %s",
                 op->action, op->target, op->delegate ? op->delegate : "<no-one>",
                 op->client_name, op->originator, op->id, op->state, pcmk_strerror(rc));
         goto remote_op_done_cleanup;
     }
 
     if (!op->delegate && data && rc != -ENODEV && rc != -EHOSTUNREACH) {
         xmlNode *ndata = get_xpath_object("//@" F_STONITH_DELEGATE, data, LOG_TRACE);
         if(ndata) {
             op->delegate = crm_element_value_copy(ndata, F_STONITH_DELEGATE);
         } else { 
             op->delegate = crm_element_value_copy(data, F_ORIG);
         }
     }
 
     if (data == NULL) {
         data = create_xml_node(NULL, "remote-op");
         local_data = data;
     }
 
     /* Tell everyone the operation is done, we will continue
      * with doing the local notifications once we receive
      * the broadcast back. */
     subt = crm_element_value(data, F_SUBTYPE);
     if (dup == FALSE && safe_str_neq(subt, "broadcast")) {
         /* Defer notification until the bcast message arrives */
         bcast_result_to_peers(op, rc);
         goto remote_op_done_cleanup;
     }
 
     if (rc == pcmk_ok || dup) {
         level = LOG_NOTICE;
     } else if (safe_str_neq(op->originator, stonith_our_uname)) {
         level = LOG_NOTICE;
     }
 
     do_crm_log(level,
                "Operation %s of %s by %s for %s@%s.%.8s: %s",
                op->action, op->target, op->delegate ? op->delegate : "<no-one>",
                op->client_name, op->originator, op->id, pcmk_strerror(rc));
 
     handle_local_reply_and_notify(op, data, rc);
 
     if (dup == FALSE) {
         handle_duplicates(op, data, rc);
     }
 
     /* Free non-essential parts of the record
      * Keep the record around so we can query the history
      */
     if (op->query_results) {
         g_list_free_full(op->query_results, free_remote_query);
         op->query_results = NULL;
     }
 
     if (op->request) {
         free_xml(op->request);
         op->request = NULL;
     }
 
   remote_op_done_cleanup:
     free_xml(local_data);
 }
 
 static gboolean
 remote_op_watchdog_done(gpointer userdata)
 {
     remote_fencing_op_t *op = userdata;
 
     op->op_timer_one = 0;
 
     crm_notice("Remote %s operation on %s for %s.%8s assumed complete",
                op->action, op->target, op->client_name, op->id);
     op->state = st_done;
     remote_op_done(op, NULL, pcmk_ok, FALSE);
     return FALSE;
 }
 
 static gboolean
 remote_op_timeout_one(gpointer userdata)
 {
     remote_fencing_op_t *op = userdata;
 
     op->op_timer_one = 0;
 
     crm_notice("Remote %s operation on %s for %s.%8s timed out",
                op->action, op->target, op->client_name, op->id);
     call_remote_stonith(op, NULL);
     return FALSE;
 }
 
 static gboolean
 remote_op_timeout(gpointer userdata)
 {
     remote_fencing_op_t *op = userdata;
 
     op->op_timer_total = 0;
 
     if (op->state == st_done) {
         crm_debug("Action %s (%s) for %s (%s) already completed",
                   op->action, op->id, op->target, op->client_name);
         return FALSE;
     }
 
     crm_debug("Action %s (%s) for %s (%s) timed out",
               op->action, op->id, op->target, op->client_name);
     op->state = st_failed;
 
     remote_op_done(op, NULL, -ETIME, FALSE);
 
     return FALSE;
 }
 
 static gboolean
 remote_op_query_timeout(gpointer data)
 {
     remote_fencing_op_t *op = data;
 
     op->query_timer = 0;
     if (op->state == st_done) {
         crm_debug("Operation %s for %s already completed", op->id, op->target);
     } else if (op->state == st_exec) {
         crm_debug("Operation %s for %s already in progress", op->id, op->target);
     } else if (op->query_results) {
         crm_debug("Query %s for %s complete: %d", op->id, op->target, op->state);
         call_remote_stonith(op, NULL);
     } else {
         crm_debug("Query %s for %s timed out: %d", op->id, op->target, op->state);
         if (op->op_timer_total) {
             g_source_remove(op->op_timer_total);
             op->op_timer_total = 0;
         }
         remote_op_timeout(op);
     }
 
     return FALSE;
 }
 
 static gboolean
 topology_is_empty(stonith_topology_t *tp)
 {
     int i;
 
     if (tp == NULL) {
         return TRUE;
     }
 
     for (i = 0; i < ST_LEVEL_MAX; i++) {
         if (tp->levels[i] != NULL) {
             return FALSE;
         }
     }
     return TRUE;
 }
 
+/*
+ * \internal
+ * \brief Add a device to the required list
+ *
+ * \param[in,out] op      Operation to modify
+ * \param[in]     device  Device ID to add
+ */
 static void
 add_required_device(remote_fencing_op_t * op, const char *device)
 {
     GListPtr match  = g_list_find_custom(op->required_list, device, sort_strings);
     if (match) {
         /* device already marked required */
         return;
     }
     op->required_list = g_list_prepend(op->required_list, strdup(device));
+}
 
-    /* make sure the required devices is in the current list of devices to be executed */
-    if (op->devices_list) {
-        GListPtr match  = g_list_find_custom(op->devices_list, device, sort_strings);
-        if (match == NULL) {
-           op->devices_list = g_list_append(op->devices_list, strdup(device));
-        }
+/*
+ * \internal
+ * \brief Remove a device from the required list
+ *
+ * \param[in,out] op      Operation to modify
+ * \param[in]     device  Device ID to remove
+ */
+static void
+remove_required_device(remote_fencing_op_t *op, const char *device)
+{
+    GListPtr match = g_list_find_custom(op->required_list, device,
+                                        sort_strings);
+
+    if (match) {
+        op->required_list = g_list_remove(op->required_list, match->data);
     }
 }
 
 /* deep copy the device list */
 static void
 set_op_device_list(remote_fencing_op_t * op, GListPtr devices)
 {
     GListPtr lpc = NULL;
 
     if (op->devices_list) {
         g_list_free_full(op->devices_list, free);
         op->devices_list = NULL;
     }
     for (lpc = devices; lpc != NULL; lpc = lpc->next) {
         op->devices_list = g_list_append(op->devices_list, strdup(lpc->data));
     }
-
-    /* tack on whatever required devices have not been executed
-     * to the end of the current devices list. This ensures that
-     * the required devices will get executed regardless of what topology
-     * level they exist at. */
-    for (lpc = op->required_list; lpc != NULL; lpc = lpc->next) {
-        GListPtr match  = g_list_find_custom(op->devices_list, lpc->data, sort_strings);
-        if (match == NULL) {
-            crm_trace("Adding required device %s to device list for %s",
-                      lpc->data, op->id);
-            op->devices_list = g_list_append(op->devices_list, strdup(lpc->data));
-        }
-    }
-
     op->devices = op->devices_list;
 }
 
 stonith_topology_t *
 find_topology_for_host(const char *host) 
 {
     stonith_topology_t *tp = g_hash_table_lookup(topology, host);
 
     if(tp == NULL) {
         int status = 1;
         regex_t r_patt;
         GHashTableIter tIter;
 
         crm_trace("Testing %d topologies for a match", g_hash_table_size(topology));
         g_hash_table_iter_init(&tIter, topology);
         while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) {
 
             if (regcomp(&r_patt, tp->node, REG_EXTENDED)) {
                 crm_info("Bad regex '%s' for fencing level", tp->node);
             } else {
                 status = regexec(&r_patt, host, 0, NULL, 0);
             }
 
             if (status == 0) {
                 crm_notice("Matched %s with %s", host, tp->node);
                 break;
             }
             crm_trace("No match for %s with %s", host, tp->node);
             tp = NULL;
         }
     }
 
     return tp;
 }
 
 /*!
  * \internal
  * \brief Set fencing operation's device list to target's next topology level
  *
  * \param[in,out] op  Remote fencing operation to modify
  *
  * \return pcmk_ok if successful, target was not specified (i.e. queries) or
  *         target has no topology, or -EINVAL if no more topology levels to try
  */
 static int
 stonith_topology_next(remote_fencing_op_t * op)
 {
     stonith_topology_t *tp = NULL;
 
     if (op->target) {
         /* Queries don't have a target set */
         tp = find_topology_for_host(op->target);
     }
     if (topology_is_empty(tp)) {
         return pcmk_ok;
     }
 
     set_bit(op->call_options, st_opt_topology);
 
     do {
         op->level++;
 
     } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL);
 
     if (op->level < ST_LEVEL_MAX) {
         crm_trace("Attempting fencing level %d for %s (%d devices) - %s@%s.%.8s",
                   op->level, op->target, g_list_length(tp->levels[op->level]),
                   op->client_name, op->originator, op->id);
         set_op_device_list(op, tp->levels[op->level]);
         return pcmk_ok;
     }
 
     crm_notice("All fencing options to fence %s for %s@%s.%.8s failed",
                op->target, op->client_name, op->originator, op->id);
     return -EINVAL;
 }
 
 /*!
  * \brief Check to see if this operation is a duplicate of another in flight
  * operation. If so merge this operation into the inflight operation, and mark
  * it as a duplicate.
  */
 static void
 merge_duplicates(remote_fencing_op_t * op)
 {
     GHashTableIter iter;
     remote_fencing_op_t *other = NULL;
 
     time_t now = time(NULL);
 
     g_hash_table_iter_init(&iter, remote_op_list);
     while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) {
         crm_node_t *peer = NULL;
 
         if (other->state > st_exec) {
             /* Must be in-progress */
             continue;
         } else if (safe_str_neq(op->target, other->target)) {
             /* Must be for the same node */
             continue;
         } else if (safe_str_neq(op->action, other->action)) {
             crm_trace("Must be for the same action: %s vs. ", op->action, other->action);
             continue;
         } else if (safe_str_eq(op->client_name, other->client_name)) {
             crm_trace("Must be for different clients: %s", op->client_name);
             continue;
         } else if (safe_str_eq(other->target, other->originator)) {
             crm_trace("Can't be a suicide operation: %s", other->target);
             continue;
         }
 
         peer = crm_get_peer(0, other->originator);
         if(fencing_peer_active(peer) == FALSE) {
             crm_notice("Failing stonith action %s for node %s originating from %s@%s.%.8s: Originator is dead",
                        other->action, other->target, other->client_name, other->originator, other->id);
             other->state = st_failed;
             continue;
 
         } else if(other->total_timeout > 0 && now > (other->total_timeout + other->created)) {
             crm_info("Stonith action %s for node %s originating from %s@%s.%.8s is too old: %d vs. %d + %d",
                      other->action, other->target, other->client_name, other->originator, other->id,
                      now, other->created, other->total_timeout);
             continue;
         }
 
         /* There is another in-flight request to fence the same host
          * Piggyback on that instead.  If it fails, so do we.
          */
         other->duplicates = g_list_append(other->duplicates, op);
         if (other->total_timeout == 0) {
             crm_trace("Making a best-guess as to the timeout used");
             other->total_timeout = op->total_timeout =
-                TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL, op->base_timeout);
+                TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL);
         }
         crm_notice
             ("Merging stonith action %s for node %s originating from client %s.%.8s with identical request from %s@%s.%.8s (%ds)",
              op->action, op->target, op->client_name, op->id, other->client_name, other->originator,
              other->id, other->total_timeout);
         report_timeout_period(op, other->total_timeout);
         op->state = st_duplicate;
     }
 }
 
 static uint32_t fencing_active_peers(void)
 {
     uint32_t count = 0;
     crm_node_t *entry;
     GHashTableIter gIter;
 
     g_hash_table_iter_init(&gIter, crm_peer_cache);
     while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
         if(fencing_peer_active(entry)) {
             count++;
         }
     }
     return count;
 }
 
 int
 stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op)
 {
     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
 
     op->state = st_done;
     op->completed = time(NULL);
     op->delegate = strdup("a human");
 
     crm_notice("Injecting manual confirmation that %s is safely off/down",
                crm_element_value(dev, F_STONITH_TARGET));
 
     remote_op_done(op, msg, pcmk_ok, FALSE);
 
     /* Replies are sent via done_cb->stonith_send_async_reply()->do_local_reply() */
     return -EINPROGRESS;
 }
 
 /*!
  * \internal
  * \brief Create a new remote stonith op
  * \param client, he local stonith client id that initaited the operation
  * \param request, The request from the client that started the operation
  * \param peer, Is this operation owned by another stonith peer? Operations
  *        owned by other peers are stored on all the stonith nodes, but only the
  *        owner executes the operation.  All the nodes get the results to the operation
  *        once the owner finishes executing it.
  */
 void *
 create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
 {
     remote_fencing_op_t *op = NULL;
     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
     int call_options = 0;
 
     if (remote_op_list == NULL) {
         remote_op_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_remote_op);
     }
 
     /* If this operation is owned by another node, check to make
      * sure we haven't already created this operation. */
     if (peer && dev) {
         const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
 
         CRM_CHECK(op_id != NULL, return NULL);
 
         op = g_hash_table_lookup(remote_op_list, op_id);
         if (op) {
             crm_debug("%s already exists", op_id);
             return op;
         }
     }
 
     op = calloc(1, sizeof(remote_fencing_op_t));
 
     crm_element_value_int(request, F_STONITH_TIMEOUT, (int *)&(op->base_timeout));
 
     if (peer && dev) {
         op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID);
     } else {
         op->id = crm_generate_uuid();
     }
 
     g_hash_table_replace(remote_op_list, op->id, op);
     CRM_LOG_ASSERT(g_hash_table_lookup(remote_op_list, op->id) != NULL);
     crm_trace("Created %s", op->id);
 
     op->state = st_query;
     op->replies_expected = fencing_active_peers();
     op->action = crm_element_value_copy(dev, F_STONITH_ACTION);
     op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN);
     op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */
     op->created = time(NULL);
 
     if (op->originator == NULL) {
         /* Local or relayed request */
         op->originator = strdup(stonith_our_uname);
     }
 
     CRM_LOG_ASSERT(client != NULL);
     if (client) {
         op->client_id = strdup(client);
     }
 
     op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME);
 
     op->target = crm_element_value_copy(dev, F_STONITH_TARGET);
     op->request = copy_xml(request);    /* TODO: Figure out how to avoid this */
     crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
     op->call_options = call_options;
 
     crm_element_value_int(request, F_STONITH_CALLID, (int *)&(op->client_callid));
 
     crm_trace("%s new stonith op: %s - %s of %s for %s",
               (peer
                && dev) ? "Recorded" : "Generated", op->id, op->action, op->target, op->client_name);
 
     if (op->call_options & st_opt_cs_nodeid) {
         int nodeid = crm_atoi(op->target, NULL);
         crm_node_t *node = crm_get_peer(nodeid, NULL);
 
         /* Ensure the conversion only happens once */
         op->call_options &= ~st_opt_cs_nodeid;
 
         if (node && node->uname) {
             free(op->target);
             op->target = strdup(node->uname);
         } else {
             crm_warn("Could not expand nodeid '%s' into a host name (%p)", op->target, node);
         }
     }
 
     /* check to see if this is a duplicate operation of another in-flight operation */
     merge_duplicates(op);
 
     return op;
 }
 
 remote_fencing_op_t *
 initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean manual_ack)
 {
     int query_timeout = 0;
     xmlNode *query = NULL;
     const char *client_id = NULL;
     remote_fencing_op_t *op = NULL;
 
     if (client) {
         client_id = client->id;
     } else {
         client_id = crm_element_value(request, F_STONITH_CLIENTID);
     }
 
     CRM_LOG_ASSERT(client_id != NULL);
     op = create_remote_stonith_op(client_id, request, FALSE);
     op->owner = TRUE;
     if (manual_ack) {
         crm_notice("Initiating manual confirmation for %s: %s",
                    op->target, op->id);
         return op;
     }
 
     CRM_CHECK(op->action, return NULL);
 
     if (stonith_topology_next(op) != pcmk_ok) {
         op->state = st_failed;
     }
 
     switch (op->state) {
         case st_failed:
             crm_warn("Initiation of remote operation %s for %s: failed (%s)", op->action,
                      op->target, op->id);
             remote_op_done(op, NULL, -EINVAL, FALSE);
             return op;
 
         case st_duplicate:
             crm_info("Initiating remote operation %s for %s: %s (duplicate)", op->action,
                      op->target, op->id);
             return op;
 
         default:
             crm_notice("Initiating remote operation %s for %s: %s (%d)", op->action, op->target,
                        op->id, op->state);
     }
 
     query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY,
                               NULL, op->call_options);
 
     crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
     crm_xml_add(query, F_STONITH_TARGET, op->target);
     crm_xml_add(query, F_STONITH_ACTION, op->action);
     crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
     crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
     crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
     crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);
 
     send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
     free_xml(query);
 
     query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
     op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);
 
     return op;
 }
 
 enum find_best_peer_options {
     /*! Skip checking the target peer for capable fencing devices */
     FIND_PEER_SKIP_TARGET = 0x0001,
     /*! Only check the target peer for capable fencing devices */
     FIND_PEER_TARGET_ONLY = 0x0002,
     /*! Skip peers and devices that are not verified */
     FIND_PEER_VERIFIED_ONLY = 0x0004,
 };
 
 static st_query_result_t *
 find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options)
 {
     GListPtr iter = NULL;
     gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE;
 
     if (!device && is_set(op->call_options, st_opt_topology)) {
         return NULL;
     }
 
     for (iter = op->query_results; iter != NULL; iter = iter->next) {
         st_query_result_t *peer = iter->data;
 
         crm_trace("Testing result from %s for %s with %d devices: %d %x",
                   peer->host, op->target, peer->ndevices, peer->tried, options);
         if ((options & FIND_PEER_SKIP_TARGET) && safe_str_eq(peer->host, op->target)) {
             continue;
         }
         if ((options & FIND_PEER_TARGET_ONLY) && safe_str_neq(peer->host, op->target)) {
             continue;
         }
 
         if (is_set(op->call_options, st_opt_topology)) {
 
             if (grab_peer_device(peer, device, verified_devices_only)) {
                 return peer;
             }
 
         } else if ((peer->tried == FALSE)
                    && count_peer_devices(peer, verified_devices_only)) {
 
             /* No topology: Use the current best peer */
             crm_trace("Simple fencing");
             return peer;
         }
     }
 
     return NULL;
 }
 
 static st_query_result_t *
 stonith_choose_peer(remote_fencing_op_t * op)
 {
     const char *device = NULL;
     st_query_result_t *peer = NULL;
     uint32_t active = fencing_active_peers();
 
     do {
         if (op->devices) {
             device = op->devices->data;
             crm_trace("Checking for someone to fence %s with %s", op->target, device);
         } else {
             crm_trace("Checking for someone to fence %s", op->target);
         }
 
         peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY);
         if (peer) {
             crm_trace("Found verified peer %s for %s", peer->host, device?device:"<any>");
             return peer;
         }
 
         if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) {
             crm_trace("Waiting before looking for unverified devices to fence %s", op->target);
             return NULL;
         }
 
         peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET);
         if (peer) {
             crm_trace("Found best unverified peer %s", peer->host);
             return peer;
         }
 
         peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY);
         if(peer) {
             crm_trace("%s will fence itself", peer->host);
             return peer;
         }
 
         /* Try the next fencing level if there is one */
     } while (is_set(op->call_options, st_opt_topology)
              && stonith_topology_next(op) == pcmk_ok);
 
     crm_notice("Couldn't find anyone to fence %s with %s", op->target, device?device:"<any>");
     return NULL;
 }
 
 static int
-get_device_timeout(st_query_result_t * peer, const char *device, int default_timeout)
+get_device_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer,
+                   const char *device)
 {
-    gpointer res;
-    int delay_max = 0;
+    device_properties_t *props;
 
     if (!peer || !device) {
-        return default_timeout;
+        return op->base_timeout;
     }
 
-    res = g_hash_table_lookup(peer->delay_maxes, device);
-    if (res && GPOINTER_TO_INT(res) > 0) {
-        delay_max = GPOINTER_TO_INT(res);
+    props = g_hash_table_lookup(peer->devices, device);
+    if (!props) {
+        return op->base_timeout;
     }
 
-    res = g_hash_table_lookup(peer->custom_action_timeouts, device);
+    return (props->custom_action_timeout?
+           props->custom_action_timeout : op->base_timeout)
+           + props->delay_max;
+}
+
+struct timeout_data {
+    const remote_fencing_op_t *op;
+    const st_query_result_t *peer;
+    int total_timeout;
+};
 
-    return res ? GPOINTER_TO_INT(res) + delay_max : default_timeout + delay_max;
+/*!
+ * \internal
+ * \brief Add timeout to a total if device has not been executed yet
+ *
+ * \param[in] key        GHashTable key (device ID)
+ * \param[in] value      GHashTable value (device properties)
+ * \param[in] user_data  Timeout data
+ */
+static void
+add_device_timeout(gpointer key, gpointer value, gpointer user_data)
+{
+    const char *device_id = key;
+    device_properties_t *props = value;
+    struct timeout_data *timeout = user_data;
+
+    if (!props->executed) {
+        timeout->total_timeout += get_device_timeout(timeout->op,
+                                                     timeout->peer, device_id);
+    }
 }
 
 static int
-get_peer_timeout(st_query_result_t * peer, int default_timeout)
+get_peer_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer)
 {
-    int total_timeout = 0;
+    struct timeout_data timeout;
 
-    GListPtr cur = NULL;
+    timeout.op = op;
+    timeout.peer = peer;
+    timeout.total_timeout = 0;
 
-    for (cur = peer->device_list; cur; cur = cur->next) {
-        total_timeout += get_device_timeout(peer, cur->data, default_timeout);
-    }
+    g_hash_table_foreach(peer->devices, add_device_timeout, &timeout);
 
-    return total_timeout ? total_timeout : default_timeout;
+    return (timeout.total_timeout? timeout.total_timeout : op->base_timeout);
 }
 
 static int
-get_op_total_timeout(remote_fencing_op_t * op, st_query_result_t * chosen_peer, int default_timeout)
+get_op_total_timeout(const remote_fencing_op_t *op,
+                     const st_query_result_t *chosen_peer)
 {
     int total_timeout = 0;
     stonith_topology_t *tp = find_topology_for_host(op->target);
 
     if (is_set(op->call_options, st_opt_topology) && tp) {
         int i;
         GListPtr device_list = NULL;
         GListPtr iter = NULL;
 
         /* Yep, this looks scary, nested loops all over the place.
          * Here is what is going on.
          * Loop1: Iterate through fencing levels.
          * Loop2: If a fencing level has devices, loop through each device
          * Loop3: For each device in a fencing level, see what peer owns it
          *        and what that peer has reported the timeout is for the device.
          */
         for (i = 0; i < ST_LEVEL_MAX; i++) {
             if (!tp->levels[i]) {
                 continue;
             }
             for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
                 for (iter = op->query_results; iter != NULL; iter = iter->next) {
-                    st_query_result_t *peer = iter->data;
+                    const st_query_result_t *peer = iter->data;
 
                     if (find_peer_device(peer, device_list->data)) {
-                        total_timeout +=
-                            get_device_timeout(peer, device_list->data, default_timeout);
+                        total_timeout += get_device_timeout(op, peer,
+                                                            device_list->data);
                         break;
                     }
                 }               /* End Loop3: match device with peer that owns device, find device's timeout period */
             }                   /* End Loop2: iterate through devices at a specific level */
         }                       /*End Loop1: iterate through fencing levels */
 
     } else if (chosen_peer) {
-        total_timeout = get_peer_timeout(chosen_peer, default_timeout);
+        total_timeout = get_peer_timeout(op, chosen_peer);
     } else {
-        total_timeout = default_timeout;
+        total_timeout = op->base_timeout;
     }
 
-    return total_timeout ? total_timeout : default_timeout;
+    return total_timeout ? total_timeout : op->base_timeout;
 }
 
 static void
 report_timeout_period(remote_fencing_op_t * op, int op_timeout)
 {
     GListPtr iter = NULL;
     xmlNode *update = NULL;
     const char *client_node = NULL;
     const char *client_id = NULL;
     const char *call_id = NULL;
 
     if (op->call_options & st_opt_sync_call) {
         /* There is no reason to report the timeout for a syncronous call. It
          * is impossible to use the reported timeout to do anything when the client
          * is blocking for the response.  This update is only important for
          * async calls that require a callback to report the results in. */
         return;
     } else if (!op->request) {
         return;
     }
 
     crm_trace("Reporting timeout for %s.%.8s", op->client_name, op->id);
     client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE);
     call_id = crm_element_value(op->request, F_STONITH_CALLID);
     client_id = crm_element_value(op->request, F_STONITH_CLIENTID);
     if (!client_node || !call_id || !client_id) {
         return;
     }
 
     if (safe_str_eq(client_node, stonith_our_uname)) {
         /* The client is connected to this node, send the update direclty to them */
         do_stonith_async_timeout_update(client_id, call_id, op_timeout);
         return;
     }
 
     /* The client is connected to another node, relay this update to them */
     update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
     crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id);
     crm_xml_add(update, F_STONITH_CLIENTID, client_id);
     crm_xml_add(update, F_STONITH_CALLID, call_id);
     crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout);
 
     send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE);
 
     free_xml(update);
 
     for (iter = op->duplicates; iter != NULL; iter = iter->next) {
         remote_fencing_op_t *dup = iter->data;
 
         crm_trace("Reporting timeout for duplicate %s.%.8s", dup->client_name, dup->id);
         report_timeout_period(iter->data, op_timeout);
     }
 }
 
+/*
+ * \internal
+ * \brief Advance an operation to the next device in its topology
+ *
+ * \param[in,out] op      Operation to advance
+ * \param[in]     device  ID of device just completed
+ * \param[in]     msg     XML reply that contained device result (if available)
+ * \param[in]     rc      Return code of device's execution
+ */
+static void
+advance_op_topology(remote_fencing_op_t *op, const char *device, xmlNode *msg,
+                    int rc)
+{
+    /* Advance to the next device at this topology level, if any */
+    if (op->devices) {
+        op->devices = op->devices->next;
+    }
+
+    /* If this device was required, it's not anymore */
+    remove_required_device(op, device);
+
+    /* If there are no more devices at this topology level,
+     * run through any required devices not already executed
+     */
+    if (op->devices == NULL) {
+        op->devices = op->required_list;
+    }
+
+    if (op->devices) {
+        /* Necessary devices remain, so execute the next one */
+        crm_trace("Next for %s on behalf of %s@%s (rc was %d)",
+                  op->target, op->originator, op->client_name, rc);
+        call_remote_stonith(op, NULL);
+    } else {
+        /* We're done with all devices, so finalize operation */
+        crm_trace("Marking complex fencing op for %s as complete", op->target);
+        op->state = st_done;
+        remote_op_done(op, msg, rc, FALSE);
+    }
+}
+
 void
 call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
 {
     const char *device = NULL;
     int timeout = op->base_timeout;
 
     crm_trace("State for %s.%.8s: %s %d", op->target, op->client_name, op->id, op->state);
     if (peer == NULL && !is_set(op->call_options, st_opt_topology)) {
         peer = stonith_choose_peer(op);
     }
 
     if (!op->op_timer_total) {
-        int total_timeout = get_op_total_timeout(op, peer, op->base_timeout);
+        int total_timeout = get_op_total_timeout(op, peer);
 
         op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * total_timeout;
         op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op);
         report_timeout_period(op, op->total_timeout);
         crm_info("Total remote op timeout set to %d for fencing of node %s for %s.%.8s",
                  total_timeout, op->target, op->client_name, op->id);
     }
 
     if (is_set(op->call_options, st_opt_topology) && op->devices) {
-        /* Ignore any preference, they might not have the device we need */
-        /* When using topology, the stonith_choose_peer function pops off
-         * the peer from the op's query results.  Make sure to calculate
-         * the op_timeout before calling this function when topology is in use */
+        /* Ignore any peer preference, they might not have the device we need */
+        /* When using topology, stonith_choose_peer() removes the device from
+         * further consideration, so be sure to calculate timeout beforehand */
         peer = stonith_choose_peer(op);
+
         device = op->devices->data;
-        timeout = get_device_timeout(peer, device, op->base_timeout);
+        timeout = get_device_timeout(op, peer, device);
     }
 
     if (peer) {
         int timeout_one = 0;
         xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
 
         crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id);
         crm_xml_add(remote_op, F_STONITH_TARGET, op->target);
         crm_xml_add(remote_op, F_STONITH_ACTION, op->action);
         crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator);
         crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id);
         crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name);
         crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
         crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);
 
         if (device) {
-            timeout_one =
-                TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(peer, device, op->base_timeout);
+            timeout_one = TIMEOUT_MULTIPLY_FACTOR *
+                          get_device_timeout(op, peer, device);
             crm_info("Requesting that %s perform op %s %s with %s for %s (%ds)", peer->host,
                      op->action, op->target, device, op->client_name, timeout_one);
             crm_xml_add(remote_op, F_STONITH_DEVICE, device);
             crm_xml_add(remote_op, F_STONITH_MODE, "slave");
 
         } else {
-            timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(peer, op->base_timeout);
+            timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer);
             crm_info("Requesting that %s perform op %s %s for %s (%ds, %ds)",
                      peer->host, op->action, op->target, op->client_name, timeout_one, stonith_watchdog_timeout_ms);
             crm_xml_add(remote_op, F_STONITH_MODE, "smart");
 
         }
 
         op->state = st_exec;
         if (op->op_timer_one) {
             g_source_remove(op->op_timer_one);
         }
 
         if(stonith_watchdog_timeout_ms > 0 && device && safe_str_eq(device, "watchdog")) {
             crm_notice("Waiting %ds for %s to self-terminate for %s.%.8s (%p)",
                        stonith_watchdog_timeout_ms/1000, op->target, op->client_name, op->id, device);
             op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
 
-            /* TODO: We should probably look into peer->device_list to verify watchdog is going to be in use */
+            /* TODO check devices to verify watchdog will be in use */
         } else if(stonith_watchdog_timeout_ms > 0
                   && safe_str_eq(peer->host, op->target)
                   && safe_str_neq(op->action, "on")) {
             crm_notice("Waiting %ds for %s to self-terminate for %s.%.8s (%p)",
                        stonith_watchdog_timeout_ms/1000, op->target, op->client_name, op->id, device);
             op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
 
         } else {
             op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
         }
 
 
         send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE);
         peer->tried = TRUE;
         free_xml(remote_op);
         return;
 
     } else if (op->owner == FALSE) {
         crm_err("The termination of %s for %s is not ours to control", op->target, op->client_name);
 
     } else if (op->query_timer == 0) {
         /* We've exhausted all available peers */
         crm_info("No remaining peers capable of terminating %s for %s (%d)", op->target,
                  op->client_name, op->state);
         CRM_LOG_ASSERT(op->state < st_done);
         remote_op_timeout(op);
 
     } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
         int rc = -EHOSTUNREACH;
 
         /* if the operation never left the query state,
          * but we have all the expected replies, then no devices
          * are available to execute the fencing operation. */
         if(stonith_watchdog_timeout_ms && (device == NULL || safe_str_eq(device, "watchdog"))) {
             crm_notice("Waiting %ds for %s to self-terminate for %s.%.8s (%p)",
                      stonith_watchdog_timeout_ms/1000, op->target, op->client_name, op->id, device);
 
             op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
             return;
         }
 
         if (op->state == st_query) {
            crm_info("None of the %d peers have devices capable of terminating %s for %s (%d)",
                    op->replies, op->target, op->client_name, op->state);
 
             rc = -ENODEV;
         } else {
            crm_info("None of the %d peers are capable of terminating %s for %s (%d)",
                    op->replies, op->target, op->client_name, op->state);
         }
 
         op->state = st_failed;
         remote_op_done(op, NULL, rc, FALSE);
 
     } else if (device) {
         crm_info("Waiting for additional peers capable of terminating %s with %s for %s.%.8s",
                  op->target, device, op->client_name, op->id);
     } else {
         crm_info("Waiting for additional peers capable of terminating %s for %s%.8s",
                  op->target, op->client_name, op->id);
     }
 }
 
 /*!
  * \internal
  * \brief Comparison function for sorting query results
  *
  * \param[in] a  GList item to compare
  * \param[in] b  GList item to compare
  *
  * \return Per the glib documentation, "a negative integer if the first value
  *         comes before the second, 0 if they are equal, or a positive integer
  *         if the first value comes after the second."
  */
 static gint
 sort_peers(gconstpointer a, gconstpointer b)
 {
     const st_query_result_t *peer_a = a;
     const st_query_result_t *peer_b = b;
 
     return (peer_b->ndevices - peer_a->ndevices);
 }
 
 /*!
  * \internal
  * \brief Determine if all the devices in the topology are found or not
  */
 static gboolean
 all_topology_devices_found(remote_fencing_op_t * op)
 {
     GListPtr device = NULL;
     GListPtr iter = NULL;
-    GListPtr match = NULL;
+    device_properties_t *match = NULL;
     stonith_topology_t *tp = NULL;
     gboolean skip_target = FALSE;
     int i;
 
     tp = find_topology_for_host(op->target);
     if (!tp) {
         return FALSE;
     }
     if (safe_str_eq(op->action, "off") || safe_str_eq(op->action, "reboot")) {
         /* Don't count the devices on the target node if we are killing
          * the target node. */
         skip_target = TRUE;
     }
 
     for (i = 0; i < ST_LEVEL_MAX; i++) {
         for (device = tp->levels[i]; device; device = device->next) {
             match = NULL;
             for (iter = op->query_results; iter && !match; iter = iter->next) {
                 st_query_result_t *peer = iter->data;
 
                 if (skip_target && safe_str_eq(peer->host, op->target)) {
                     continue;
                 }
                 match = find_peer_device(peer, device->data);
             }
             if (!match) {
                 return FALSE;
             }
         }
     }
 
     return TRUE;
 }
 
 /*
  * \internal
  * \brief Parse action-specific device properties from XML
  *
  * \param[in]     msg     XML element containing the properties
  * \param[in]     peer    Name of peer that sent XML (for logs)
  * \param[in]     device  Device ID (for logs)
  * \param[in]     action  Action the properties relate to
- * \param[in,out] values  3-integer array to contain timeout, delay, required
+ * \param[in,out] props   Device properties to update
  */
 static void
 parse_action_specific(xmlNode *xml, const char *peer, const char *device,
-                      const char *action, int *values)
+                      const char *action, remote_fencing_op_t *op,
+                      device_properties_t *props)
 {
-    values[0] = 0;
-    crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT, &values[0]);
-    if (values[0]) {
+    int required;
+
+    props->custom_action_timeout = 0;
+    crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT,
+                          &props->custom_action_timeout);
+    if (props->custom_action_timeout) {
         crm_trace("Peer %s with device %s returned %s action timeout %d",
-                  peer, device, action, values[0]);
+                  peer, device, action, props->custom_action_timeout);
     }
 
-    values[1] = 0;
-    crm_element_value_int(xml, F_STONITH_DELAY_MAX, &values[1]);
-    if (values[1]) {
+    props->delay_max = 0;
+    crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max);
+    if (props->delay_max) {
         crm_trace("Peer %s with device %s returned maximum of random delay %d for %s",
-                  peer, device, values[1], action);
+                  peer, device, props->delay_max, action);
     }
 
-    values[2] = 0;
-    crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &values[2]);
-    if (values[2]) {
+    required = 0;
+    crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required);
+    if (required) {
+        /* If the action is marked as required, add the device to the
+         * operation's list of required devices. We use this
+         * for unfencing when executing a topology.
+         * Required devices get executed regardless of their topology level.
+         */
         crm_trace("Peer %s requires device %s to execute for action %s",
                   peer, device, action);
+        add_required_device(op, device);
+    }
+}
+
+/*
+ * \internal
+ * \brief Parse one device's properties from peer's XML query reply
+ *
+ * \param[in]     xml       XML node containing device properties
+ * \param[in,out] op        Operation that query and reply relate to
+ * \param[in,out] result    Peer's results
+ * \param[in]     device    ID of device being parsed
+ */
+static void
+add_device_properties(xmlNode *xml, remote_fencing_op_t *op,
+                      st_query_result_t *result, const char *device)
+{
+    int verified = 0;
+    device_properties_t *props = calloc(1, sizeof(device_properties_t));
+
+    /* Add a new entry to this result's devices list */
+    CRM_ASSERT(props != NULL);
+    g_hash_table_insert(result->devices, strdup(device), props);
+
+    /* Peers with verified (monitored) access will be preferred */
+    crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified);
+    if (verified) {
+        crm_trace("Peer %s has confirmed a verified device %s",
+                  result->host, device);
+        props->verified = TRUE;
+    }
+
+    /* Parse action-specific device properties */
+    parse_action_specific(xml, result->host, device, op->action,
+                          op, props);
+}
+
+/*
+ * \internal
+ * \brief Parse a peer's XML query reply and add it to operation's results
+ *
+ * \param[in,out] op        Operation that query and reply relate to
+ * \param[in]     host      Name of peer that sent this reply
+ * \param[in]     ndevices  Number of devices expected in reply
+ * \param[in]     xml       XML node containing device list
+ *
+ * \return Newly allocated result structure with parsed reply
+ */
+static st_query_result_t *
+add_result(remote_fencing_op_t *op, const char *host, int ndevices, xmlNode *xml)
+{
+    st_query_result_t *result = calloc(1, sizeof(st_query_result_t));
+    xmlNode *child;
+
+    CRM_CHECK(result != NULL, return NULL);
+    result->host = strdup(host);
+    result->devices = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free);
+
+    /* Each child element describes one capable device available to the peer */
+    for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) {
+        const char *device = ID(child);
+
+        if (device) {
+            add_device_properties(child, op, result, device);
+        }
     }
+
+    result->ndevices = g_hash_table_size(result->devices);
+    CRM_CHECK(ndevices == result->ndevices,
+              crm_err("Query claimed to have %d devices but %d found",
+                      ndevices, result->ndevices));
+
+    op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers);
+    return result;
 }
 
 /*
  * \internal
  * \brief Handle a peer's reply to our fencing query
  *
  * Parse a query result from XML and store it in the remote operation
  * table, and when enough replies have been received, issue a fencing request.
  *
  * \param[in] msg  XML reply received
  *
  * \return pcmk_ok on success, -errno on error
  *
  * \note See initiate_remote_stonith_op() for how the XML query was initially
  *       formed, and stonith_query() for how the peer formed its XML reply.
  */
 int
 process_remote_stonith_query(xmlNode * msg)
 {
     int ndevices = 0;
     gboolean host_is_target = FALSE;
     gboolean have_all_replies = FALSE;
     const char *id = NULL;
     const char *host = NULL;
     remote_fencing_op_t *op = NULL;
     st_query_result_t *result = NULL;
     uint32_t replies_expected;
     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
-    xmlNode *child = NULL;
 
     CRM_CHECK(dev != NULL, return -EPROTO);
 
     id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
     CRM_CHECK(id != NULL, return -EPROTO);
 
     dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR);
     CRM_CHECK(dev != NULL, return -EPROTO);
     crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices);
 
     op = g_hash_table_lookup(remote_op_list, id);
     if (op == NULL) {
         crm_debug("Unknown or expired remote op: %s", id);
         return -EOPNOTSUPP;
     }
 
     replies_expected = QB_MIN(op->replies_expected, fencing_active_peers());
     if ((++op->replies >= replies_expected) && (op->state == st_query)) {
         have_all_replies = TRUE;
     }
     host = crm_element_value(msg, F_ORIG);
     host_is_target = safe_str_eq(host, op->target);
 
-    if (ndevices <= 0) {
-        /* If we're doing 'known' then we might need to fire anyway */
-        crm_trace("Query result %d of %d from %s for %s/%s (%d devices) %s",
-                  op->replies, replies_expected, host,
-                  op->target, op->action, ndevices, id);
-        if (have_all_replies) {
-            crm_info("All query replies have arrived, continuing (%d expected/%d received for id %s)",
-                     replies_expected, op->replies, id);
-            call_remote_stonith(op, NULL);
-        }
-        return pcmk_ok;
-    }
-
     crm_info("Query result %d of %d from %s for %s/%s (%d devices) %s",
              op->replies, replies_expected, host,
              op->target, op->action, ndevices, id);
-    result = calloc(1, sizeof(st_query_result_t));
-    result->host = strdup(host);
-    result->ndevices = ndevices;
-    result->custom_action_timeouts = g_hash_table_new_full(crm_str_hash, g_str_equal, free, NULL);
-    result->delay_maxes = g_hash_table_new_full(crm_str_hash, g_str_equal, free, NULL);
-    result->verified_devices = g_hash_table_new_full(crm_str_hash, g_str_equal, free, NULL);
-
-    for (child = __xml_first_child(dev); child != NULL; child = __xml_next(child)) {
-        const char *device = ID(child);
-
-        if (device) {
-            int values[3];
-            int verified = 0;
-
-            result->device_list = g_list_prepend(result->device_list, strdup(device));
-            crm_element_value_int(child, F_STONITH_DEVICE_VERIFIED, &verified);
-
-            /* Parse properties specific to the operation's action */
-            parse_action_specific(child, result->host, device, op->action, values);
-            if (values[0]) {
-                g_hash_table_insert(result->custom_action_timeouts,
-                                    strdup(device), GINT_TO_POINTER(values[0]));
-            }
-            if (values[1] > 0) {
-                g_hash_table_insert(result->delay_maxes,
-                                    strdup(device), GINT_TO_POINTER(values[1]));
-            }
-            if (verified) {
-                crm_trace("Peer %s has confirmed a verified device %s", result->host, device);
-                g_hash_table_insert(result->verified_devices,
-                                    strdup(device), GINT_TO_POINTER(verified));
-            }
-            if (values[2]) {
-                /* This matters when executing a topology. Required devices will get 
-                 * executed regardless of their topology level. We use this for unfencing. */
-                add_required_device(op, device);
-            }
-        }
+    if (ndevices > 0) {
+        result = add_result(op, host, ndevices, dev);
     }
 
-    CRM_CHECK(ndevices == g_list_length(result->device_list),
-              crm_err("Mis-match: Query claimed to have %d devices but %d found", ndevices,
-                      g_list_length(result->device_list)));
-
-    op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers);
-
     if (is_set(op->call_options, st_opt_topology)) {
         /* If we start the fencing before all the topology results are in,
          * it is possible fencing levels will be skipped because of the missing
          * query results. */
         if (op->state == st_query && all_topology_devices_found(op)) {
             /* All the query results are in for the topology, start the fencing ops. */
             crm_trace("All topology devices found");
             call_remote_stonith(op, result);
 
         } else if (have_all_replies) {
             crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
                      replies_expected, op->replies);
             call_remote_stonith(op, NULL);
         }
 
     } else if (op->state == st_query) {
         int nverified = count_peer_devices(result, TRUE);
 
         /* We have a result for a non-topology fencing op that looks promising,
          * go ahead and start fencing before query timeout */
-        if ((host_is_target == FALSE) && nverified) {
+        if (result && (host_is_target == FALSE) && nverified) {
             /* we have a verified device living on a peer that is not the target */
             crm_trace("Found %d verified devices", nverified);
             call_remote_stonith(op, result);
 
         } else if (have_all_replies) {
             crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
                      replies_expected, op->replies);
             call_remote_stonith(op, NULL);
 
         } else {
             crm_trace("Waiting for more peer results before launching fencing operation");
         }
 
-    } else if (op->state == st_done) {
+    } else if (result && (op->state == st_done)) {
         crm_info("Discarding query result from %s (%d devices): Operation is in state %d",
                  result->host, result->ndevices, op->state);
     }
 
     return pcmk_ok;
 }
 
 /*
  * \internal
  * \brief Handle a peer's reply to a fencing request
  *
  * Parse a fencing reply from XML, and either finalize the operation
  * or attempt another device as appropriate.
  *
  * \param[in] msg  XML reply received
  *
  * \return pcmk_ok on success, -errno on error
  */
 int
 process_remote_stonith_exec(xmlNode * msg)
 {
     int rc = 0;
     const char *id = NULL;
     const char *device = NULL;
     remote_fencing_op_t *op = NULL;
     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
 
     CRM_CHECK(dev != NULL, return -EPROTO);
 
     id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
     CRM_CHECK(id != NULL, return -EPROTO);
 
     dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR);
     CRM_CHECK(dev != NULL, return -EPROTO);
 
     crm_element_value_int(dev, F_STONITH_RC, &rc);
 
     device = crm_element_value(dev, F_STONITH_DEVICE);
 
     if (remote_op_list) {
         op = g_hash_table_lookup(remote_op_list, id);
     }
 
     if (op == NULL && rc == pcmk_ok) {
         /* Record successful fencing operations */
         const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID);
 
         op = create_remote_stonith_op(client_id, dev, TRUE);
     }
 
     if (op == NULL) {
         /* Could be for an event that began before we started */
         /* TODO: Record the op for later querying */
         crm_info("Unknown or expired remote op: %s", id);
         return -EOPNOTSUPP;
     }
 
     if (op->devices && device && safe_str_neq(op->devices->data, device)) {
         crm_err
             ("Received outdated reply for device %s (instead of %s) to %s node %s. Operation already timed out at remote level.",
              device, op->devices->data, op->action, op->target);
         return rc;
     }
 
     if (safe_str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast")) {
         crm_debug("Marking call to %s for %s on behalf of %s@%s.%.8s: %s (%d)",
                   op->action, op->target, op->client_name, op->id, op->originator,
                   pcmk_strerror(rc), rc);
         if (rc == pcmk_ok) {
             op->state = st_done;
         } else {
             op->state = st_failed;
         }
         remote_op_done(op, msg, rc, FALSE);
         return pcmk_ok;
     } else if (safe_str_neq(op->originator, stonith_our_uname)) {
         /* If this isn't a remote level broadcast, and we are not the
          * originator of the operation, we should not be receiving this msg. */
         crm_err
             ("%s received non-broadcast fencing result for operation it does not own (device %s targeting %s)",
              stonith_our_uname, device, op->target);
         return rc;
     }
 
     if (is_set(op->call_options, st_opt_topology)) {
         const char *device = crm_element_value(msg, F_STONITH_DEVICE);
 
         crm_notice("Call to %s for %s on behalf of %s@%s: %s (%d)",
                    device, op->target, op->client_name, op->originator,
                    pcmk_strerror(rc), rc);
 
         /* We own the op, and it is complete. broadcast the result to all nodes
          * and notify our local clients. */
         if (op->state == st_done) {
             remote_op_done(op, msg, rc, FALSE);
             return rc;
         }
 
-        /* An operation completed succesfully but has not yet been marked as done.
-         * Continue the topology if more devices exist at the current level, otherwise
-         * mark as done. */
         if (rc == pcmk_ok) {
-            GListPtr required_match = g_list_find_custom(op->required_list, device, sort_strings);
-            if (op->devices) {
-                /* Success, are there any more? */
-                op->devices = op->devices->next;
-            }
-            if (required_match) {
-                op->required_list = g_list_remove(op->required_list, required_match->data);
-            }
-            /* if no more devices at this fencing level, we are done,
-             * else we need to contine with executing the next device in the list */
-            if (op->devices == NULL) {
-                crm_trace("Marking complex fencing op for %s as complete", op->target);
-                op->state = st_done;
-                remote_op_done(op, msg, rc, FALSE);
-                return rc;
-            }
+            /* An operation completed successfully. Try another device if
+             * necessary, otherwise mark the operation as done. */
+            advance_op_topology(op, device, msg, rc);
+            return rc;
         } else {
             /* This device failed, time to try another topology level. If no other
              * levels are available, mark this operation as failed and report results. */
             if (stonith_topology_next(op) != pcmk_ok) {
                 op->state = st_failed;
                 remote_op_done(op, msg, rc, FALSE);
                 return rc;
             }
         }
     } else if (rc == pcmk_ok && op->devices == NULL) {
         crm_trace("All done for %s", op->target);
 
         op->state = st_done;
         remote_op_done(op, msg, rc, FALSE);
         return rc;
     } else if (rc == -ETIME && op->devices == NULL) {
         /* If the operation timed out don't bother retrying other peers. */
         op->state = st_failed;
         remote_op_done(op, msg, rc, FALSE);
         return rc;
     } else {
         /* fall-through and attempt other fencing action using another peer */
     }
 
-    /* Retry on failure or execute the rest of the topology */
+    /* Retry on failure */
     crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator,
               op->client_name, rc);
     call_remote_stonith(op, NULL);
     return rc;
 }
 
 int
 stonith_fence_history(xmlNode * msg, xmlNode ** output)
 {
     int rc = 0;
     const char *target = NULL;
     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_TRACE);
 
     if (dev) {
         int options = 0;
 
         target = crm_element_value(dev, F_STONITH_TARGET);
         crm_element_value_int(msg, F_STONITH_CALLOPTS, &options);
         if (target && (options & st_opt_cs_nodeid)) {
             int nodeid = crm_atoi(target, NULL);
             crm_node_t *node = crm_get_peer(nodeid, NULL);
 
             if (node) {
                 target = node->uname;
             }
         }
     }
 
     crm_trace("Looking for operations on %s in %p", target, remote_op_list);
 
     *output = create_xml_node(NULL, F_STONITH_HISTORY_LIST);
     if (remote_op_list) {
         GHashTableIter iter;
         remote_fencing_op_t *op = NULL;
 
         g_hash_table_iter_init(&iter, remote_op_list);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
             xmlNode *entry = NULL;
 
             if (target && strcmp(op->target, target) != 0) {
                 continue;
             }
 
             rc = 0;
             crm_trace("Attaching op %s", op->id);
             entry = create_xml_node(*output, STONITH_OP_EXEC);
             crm_xml_add(entry, F_STONITH_TARGET, op->target);
             crm_xml_add(entry, F_STONITH_ACTION, op->action);
             crm_xml_add(entry, F_STONITH_ORIGIN, op->originator);
             crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate);
             crm_xml_add(entry, F_STONITH_CLIENTNAME, op->client_name);
             crm_xml_add_int(entry, F_STONITH_DATE, op->completed);
             crm_xml_add_int(entry, F_STONITH_STATE, op->state);
         }
     }
 
     return rc;
 }
 
 gboolean
 stonith_check_fence_tolerance(int tolerance, const char *target, const char *action)
 {
     GHashTableIter iter;
     time_t now = time(NULL);
     remote_fencing_op_t *rop = NULL;
 
     crm_trace("tolerance=%d, remote_op_list=%p", tolerance, remote_op_list);
 
     if (tolerance <= 0 || !remote_op_list || target == NULL || action == NULL) {
         return FALSE;
     }
 
     g_hash_table_iter_init(&iter, remote_op_list);
     while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) {
         if (strcmp(rop->target, target) != 0) {
             continue;
         } else if (rop->state != st_done) {
             continue;
         } else if (strcmp(rop->action, action) != 0) {
             continue;
         } else if ((rop->completed + tolerance) < now) {
             continue;
         }
 
         crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
                    target, action, tolerance, rop->delegate, rop->originator);
         return TRUE;
     }
     return FALSE;
 }
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index 73c44a82df..106c674188 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -1,3408 +1,3412 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include <crm_internal.h>
 
 #include <glib.h>
 
 #include <crm/crm.h>
 #include <crm/services.h>
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 
 #include <crm/common/util.h>
 #include <crm/pengine/rules.h>
 #include <crm/pengine/internal.h>
 #include <unpack.h>
 
 CRM_TRACE_INIT_DATA(pe_status);
 
 #define set_config_flag(data_set, option, flag) do {			\
 	const char *tmp = pe_pref(data_set->config_hash, option);	\
 	if(tmp) {							\
 	    if(crm_is_true(tmp)) {					\
 		set_bit(data_set->flags, flag);			\
 	    } else {							\
 		clear_bit(data_set->flags, flag);		\
 	    }								\
 	}								\
     } while(0)
 
 gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op,
                        enum action_fail_response *failed, pe_working_set_t * data_set);
 static gboolean determine_remote_online_status(node_t * this_node);
 
 static gboolean
 is_dangling_container_remote_node(node_t *node)
 {
     /* we are looking for a remote-node that was supposed to be mapped to a
      * container resource, but all traces of that container have disappeared 
      * from both the config and the status section. */
     if (is_remote_node(node) &&
         node->details->remote_rsc &&
         node->details->remote_rsc->container == NULL &&
         is_set(node->details->remote_rsc->flags, pe_rsc_orphan_container_filler)) {
         return TRUE;
     }
 
     return FALSE;
 }
 
 void
 pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason)
 {
     CRM_CHECK(node, return);
 
     /* fence remote nodes living in a container by marking the container as failed. */
     if (is_container_remote_node(node)) {
         resource_t *rsc = node->details->remote_rsc->container;
         if (is_set(rsc->flags, pe_rsc_failed) == FALSE) {
             crm_warn("Remote node %s will be fenced by recovering container resource %s",
                 node->details->uname, rsc->id, reason);
             set_bit(rsc->flags, pe_rsc_failed);
         }
     } else if (is_dangling_container_remote_node(node)) {
         crm_info("Fencing remote node %s has already occurred, container no longer exists. cleaning up dangling connection resource:  %s",
                   node->details->uname, reason);
         set_bit(node->details->remote_rsc->flags, pe_rsc_failed);
 
     } else if (is_baremetal_remote_node(node)) {
         if(pe_can_fence(data_set, node)) {
             crm_warn("Node %s will be fenced %s", node->details->uname, reason);
         } else {
             crm_warn("Node %s is unclean %s", node->details->uname, reason);
         }
         node->details->unclean = TRUE;
         node->details->remote_requires_reset = TRUE;
 
     } else if (node->details->unclean == FALSE) {
         if(pe_can_fence(data_set, node)) {
             crm_warn("Node %s will be fenced %s", node->details->uname, reason);
         } else {
             crm_warn("Node %s is unclean %s", node->details->uname, reason);
         }
         node->details->unclean = TRUE;
     } else {
         crm_trace("Huh? %s %s", node->details->uname, reason);
     }
 }
 
 gboolean
 unpack_config(xmlNode * config, pe_working_set_t * data_set)
 {
     const char *value = NULL;
     GHashTable *config_hash =
         g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
 
     xmlXPathObjectPtr xpathObj = NULL;
 
     if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) {
         xpathObj = xpath_search(data_set->input, "//nvpair[@name='provides' and @value='unfencing']");
         if(xpathObj && numXpathResults(xpathObj) > 0) {
             set_bit(data_set->flags, pe_flag_enable_unfencing);
         }
         freeXpathObject(xpathObj);
     }
 
     if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) {
         xpathObj = xpath_search(data_set->input, "//nvpair[@name='requires' and @value='unfencing']");
         if(xpathObj && numXpathResults(xpathObj) > 0) {
             set_bit(data_set->flags, pe_flag_enable_unfencing);
         }
         freeXpathObject(xpathObj);
     }
 
 
 #ifdef REDHAT_COMPAT_6
     if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) {
         xpathObj = xpath_search(data_set->input, "//primitive[@type='fence_scsi']");
         if(xpathObj && numXpathResults(xpathObj) > 0) {
             set_bit(data_set->flags, pe_flag_enable_unfencing);
         }
         freeXpathObject(xpathObj);
     }
 #endif
 
     data_set->config_hash = config_hash;
 
     unpack_instance_attributes(data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash,
                                CIB_OPTIONS_FIRST, FALSE, data_set->now);
 
     verify_pe_options(data_set->config_hash);
 
     set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes);
     if(is_not_set(data_set->flags, pe_flag_startup_probes)) {
         crm_info("Startup probes: disabled (dangerous)");
     }
 
     value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG);
     if (value && crm_is_true(value)) {
         crm_notice("Relying on watchdog integration for fencing");
         set_bit(data_set->flags, pe_flag_have_stonith_resource);
     }
 
     value = pe_pref(data_set->config_hash, "stonith-timeout");
     data_set->stonith_timeout = crm_get_msec(value);
     crm_debug("STONITH timeout: %d", data_set->stonith_timeout);
 
     set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled);
     crm_debug("STONITH of failed nodes is %s",
               is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled");
 
     data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action");
     crm_trace("STONITH will %s nodes", data_set->stonith_action);
 
     set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything);
     crm_debug("Stop all active resources: %s",
               is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false");
 
     set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster);
     if (is_set(data_set->flags, pe_flag_symmetric_cluster)) {
         crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
     }
 
     value = pe_pref(data_set->config_hash, "default-resource-stickiness");
     data_set->default_resource_stickiness = char2score(value);
     crm_debug("Default stickiness: %d", data_set->default_resource_stickiness);
 
     value = pe_pref(data_set->config_hash, "no-quorum-policy");
 
     if (safe_str_eq(value, "ignore")) {
         data_set->no_quorum_policy = no_quorum_ignore;
 
     } else if (safe_str_eq(value, "freeze")) {
         data_set->no_quorum_policy = no_quorum_freeze;
 
     } else if (safe_str_eq(value, "suicide")) {
         gboolean do_panic = FALSE;
 
         crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic);
 
         if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
             crm_config_err
                 ("Setting no-quorum-policy=suicide makes no sense if stonith-enabled=false");
         }
 
         if (do_panic && is_set(data_set->flags, pe_flag_stonith_enabled)) {
             data_set->no_quorum_policy = no_quorum_suicide;
 
         } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && do_panic == FALSE) {
             crm_notice("Resetting no-quorum-policy to 'stop': The cluster has never had quorum");
             data_set->no_quorum_policy = no_quorum_stop;
         }
 
     } else {
         data_set->no_quorum_policy = no_quorum_stop;
     }
 
     switch (data_set->no_quorum_policy) {
         case no_quorum_freeze:
             crm_debug("On loss of CCM Quorum: Freeze resources");
             break;
         case no_quorum_stop:
             crm_debug("On loss of CCM Quorum: Stop ALL resources");
             break;
         case no_quorum_suicide:
             crm_notice("On loss of CCM Quorum: Fence all remaining nodes");
             break;
         case no_quorum_ignore:
             crm_notice("On loss of CCM Quorum: Ignore");
             break;
     }
 
     set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans);
     crm_trace("Orphan resources are %s",
               is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored");
 
     set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans);
     crm_trace("Orphan resource actions are %s",
               is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored");
 
     set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop);
     crm_trace("Stopped resources are removed from the status section: %s",
               is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false");
 
     set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode);
     crm_trace("Maintenance mode: %s",
               is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false");
 
     if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
         clear_bit(data_set->flags, pe_flag_is_managed_default);
     } else {
         set_config_flag(data_set, "is-managed-default", pe_flag_is_managed_default);
     }
     crm_trace("By default resources are %smanaged",
               is_set(data_set->flags, pe_flag_is_managed_default) ? "" : "not ");
 
     set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal);
     crm_trace("Start failures are %s",
               is_set(data_set->flags,
                      pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount");
 
     node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red"));
     node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green"));
     node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow"));
 
     crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s",
              pe_pref(data_set->config_hash, "node-health-red"),
              pe_pref(data_set->config_hash, "node-health-yellow"),
              pe_pref(data_set->config_hash, "node-health-green"));
 
     data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
     crm_trace("Placement strategy: %s", data_set->placement_strategy);
 
     return TRUE;
 }
 
 static void
 destroy_digest_cache(gpointer ptr)
 {
     op_digest_cache_t *data = ptr;
 
     free_xml(data->params_all);
     free_xml(data->params_restart);
     free(data->digest_all_calc);
     free(data->digest_restart_calc);
     free(data);
 }
 
 static node_t *
 create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set)
 {
     node_t *new_node = NULL;
 
     if (pe_find_node(data_set->nodes, uname) != NULL) {
         crm_config_warn("Detected multiple node entries with uname=%s"
                         " - this is rarely intended", uname);
     }
 
     new_node = calloc(1, sizeof(node_t));
     if (new_node == NULL) {
         return NULL;
     }
 
     new_node->weight = char2score(score);
     new_node->fixed = FALSE;
     new_node->details = calloc(1, sizeof(struct node_shared_s));
 
     if (new_node->details == NULL) {
         free(new_node);
         return NULL;
     }
 
     crm_trace("Creating node for entry %s/%s", uname, id);
     new_node->details->id = id;
     new_node->details->uname = uname;
     new_node->details->online = FALSE;
     new_node->details->shutdown = FALSE;
     new_node->details->rsc_discovery_enabled = TRUE;
     new_node->details->running_rsc = NULL;
     new_node->details->type = node_ping;
 
     if (safe_str_eq(type, "remote")) {
         new_node->details->type = node_remote;
         set_bit(data_set->flags, pe_flag_have_remote_nodes);
     } else if (type == NULL || safe_str_eq(type, "member")
         || safe_str_eq(type, NORMALNODE)) {
         new_node->details->type = node_member;
     }
 
     new_node->details->attrs = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                                      g_hash_destroy_str,
                                                      g_hash_destroy_str);
 
     if (is_remote_node(new_node)) {
         g_hash_table_insert(new_node->details->attrs, strdup("#kind"), strdup("remote"));
     } else {
         g_hash_table_insert(new_node->details->attrs, strdup("#kind"), strdup("cluster"));
     }
 
     new_node->details->utilization =
         g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str,
                               g_hash_destroy_str);
 
     new_node->details->digest_cache =
         g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str,
                               destroy_digest_cache);
 
     data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname);
     return new_node;
 }
 
 static const char *
 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, GHashTable **rsc_name_check)
 {
     xmlNode *xml_rsc = NULL;
     xmlNode *xml_tmp = NULL;
     xmlNode *attr_set = NULL;
     xmlNode *attr = NULL;
 
     const char *container_id = ID(xml_obj);
     const char *remote_name = NULL;
     const char *remote_server = NULL;
     const char *remote_port = NULL;
     const char *connect_timeout = "60s";
     const char *remote_allow_migrate=NULL;
     char *tmp_id = NULL;
 
     for (attr_set = __xml_first_child(xml_obj); attr_set != NULL; attr_set = __xml_next_element(attr_set)) {
         if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) {
             continue;
         }
 
         for (attr = __xml_first_child(attr_set); attr != NULL; attr = __xml_next_element(attr)) {
             const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
             const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
 
             if (safe_str_eq(name, XML_RSC_ATTR_REMOTE_NODE)) {
                 remote_name = value;
             } else if (safe_str_eq(name, "remote-addr")) {
                 remote_server = value;
             } else if (safe_str_eq(name, "remote-port")) {
                 remote_port = value;
             } else if (safe_str_eq(name, "remote-connect-timeout")) {
                 connect_timeout = value;
             } else if (safe_str_eq(name, "remote-allow-migrate")) {
                 remote_allow_migrate=value;
             }
         }
     }
 
     if (remote_name == NULL) {
         return NULL;
     }
 
     if (*rsc_name_check == NULL) {
         *rsc_name_check = g_hash_table_new(crm_str_hash, g_str_equal);
         for (xml_rsc = __xml_first_child(parent); xml_rsc != NULL; xml_rsc = __xml_next_element(xml_rsc)) {
             const char *id = ID(xml_rsc);
 
             /* avoiding heap allocation here because we know the duration of this hashtable allows us to */
             g_hash_table_insert(*rsc_name_check, (char *) id, (char *) id);
         }
     }
 
     if (g_hash_table_lookup(*rsc_name_check, remote_name)) {
 
         crm_err("Naming conflict with remote-node=%s.  remote-nodes can not have the same name as a resource.",
                 remote_name);
         return NULL;
     }
 
     xml_rsc = create_xml_node(parent, XML_CIB_TAG_RESOURCE);
 
     crm_xml_add(xml_rsc, XML_ATTR_ID, remote_name);
     crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, "ocf");
     crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, "pacemaker");
     crm_xml_add(xml_rsc, XML_ATTR_TYPE, "remote");
 
     xml_tmp = create_xml_node(xml_rsc, XML_TAG_META_SETS);
     tmp_id = crm_concat(remote_name, XML_TAG_META_SETS, '_');
     crm_xml_add(xml_tmp, XML_ATTR_ID, tmp_id);
     free(tmp_id);
 
     attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR);
     tmp_id = crm_concat(remote_name, "meta-attributes-container", '_');
     crm_xml_add(attr, XML_ATTR_ID, tmp_id);
     crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_RSC_ATTR_CONTAINER);
     crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, container_id);
     free(tmp_id);
 
     attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR);
     tmp_id = crm_concat(remote_name, "meta-attributes-internal", '_');
     crm_xml_add(attr, XML_ATTR_ID, tmp_id);
     crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_RSC_ATTR_INTERNAL_RSC);
     crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, "true");
     free(tmp_id);
 
     if (remote_allow_migrate) {
         attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR);
         tmp_id = crm_concat(remote_name, "meta-attributes-container", '_');
         crm_xml_add(attr, XML_ATTR_ID, tmp_id);
         crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_OP_ATTR_ALLOW_MIGRATE);
         crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_allow_migrate);
         free(tmp_id);
     }
 
     xml_tmp = create_xml_node(xml_rsc, "operations");
     attr = create_xml_node(xml_tmp, XML_ATTR_OP);
     tmp_id = crm_concat(remote_name, "monitor-interval-30s", '_');
     crm_xml_add(attr, XML_ATTR_ID, tmp_id);
     crm_xml_add(attr, XML_ATTR_TIMEOUT, "30s");
     crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "30s");
     crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "monitor");
     free(tmp_id);
 
     if (connect_timeout) {
         attr = create_xml_node(xml_tmp, XML_ATTR_OP);
         tmp_id = crm_concat(remote_name, "start-interval-0", '_');
         crm_xml_add(attr, XML_ATTR_ID, tmp_id);
         crm_xml_add(attr, XML_ATTR_TIMEOUT, connect_timeout);
         crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "0");
         crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "start");
         free(tmp_id);
     }
 
     if (remote_port || remote_server) {
         xml_tmp = create_xml_node(xml_rsc, XML_TAG_ATTR_SETS);
         tmp_id = crm_concat(remote_name, XML_TAG_ATTR_SETS, '_');
         crm_xml_add(xml_tmp, XML_ATTR_ID, tmp_id);
         free(tmp_id);
 
         if (remote_server) {
             attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR);
             tmp_id = crm_concat(remote_name, "instance-attributes-addr", '_');
             crm_xml_add(attr, XML_ATTR_ID, tmp_id);
             crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "addr");
             crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_server);
             free(tmp_id);
         }
         if (remote_port) {
             attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR);
             tmp_id = crm_concat(remote_name, "instance-attributes-port", '_');
             crm_xml_add(attr, XML_ATTR_ID, tmp_id);
             crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "port");
             crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_port);
             free(tmp_id);
         }
     }
 
     return remote_name;
 }
 
 static void
 handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node)
 {
     static const char *blind_faith = NULL;
     static gboolean unseen_are_unclean = TRUE;
 
     if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) {
         /* ignore fencing remote-nodes that don't have a conneciton resource associated
          * with them. This happens when remote-node entries get left in the nodes section
          * after the connection resource is removed */
         return;
     }
 
     blind_faith = pe_pref(data_set->config_hash, "startup-fencing");
 
     if (crm_is_true(blind_faith) == FALSE) {
         unseen_are_unclean = FALSE;
         crm_warn("Blind faith: not fencing unseen nodes");
     }
 
     if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE
         || unseen_are_unclean == FALSE) {
         /* blind faith... */
         new_node->details->unclean = FALSE;
 
     } else {
         /* all nodes are unclean until we've seen their
          * status entry
          */
         new_node->details->unclean = TRUE;
     }
 
     /* We need to be able to determine if a node's status section
      * exists or not separate from whether the node is unclean. */
     new_node->details->unseen = TRUE;
 }
 
 gboolean
 unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set)
 {
     xmlNode *xml_obj = NULL;
     node_t *new_node = NULL;
     const char *id = NULL;
     const char *uname = NULL;
     const char *type = NULL;
     const char *score = NULL;
 
     for (xml_obj = __xml_first_child(xml_nodes); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
         if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) {
             new_node = NULL;
 
             id = crm_element_value(xml_obj, XML_ATTR_ID);
             uname = crm_element_value(xml_obj, XML_ATTR_UNAME);
             type = crm_element_value(xml_obj, XML_ATTR_TYPE);
             score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
             crm_trace("Processing node %s/%s", uname, id);
 
             if (id == NULL) {
                 crm_config_err("Must specify id tag in <node>");
                 continue;
             }
             new_node = create_node(id, uname, type, score, data_set);
 
             if (new_node == NULL) {
                 return FALSE;
             }
 
 /* 		if(data_set->have_quorum == FALSE */
 /* 		   && data_set->no_quorum_policy == no_quorum_stop) { */
 /* 			/\* start shutting resources down *\/ */
 /* 			new_node->weight = -INFINITY; */
 /* 		} */
 
             handle_startup_fencing(data_set, new_node);
 
             add_node_attrs(xml_obj, new_node, FALSE, data_set);
             unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL,
                                        new_node->details->utilization, NULL, FALSE, data_set->now);
 
             crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
         }
     }
 
     if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) {
         crm_info("Creating a fake local node");
         create_node(data_set->localhost, data_set->localhost, NULL, 0, data_set);
     }
 
     return TRUE;
 }
 
 static void
 setup_container(resource_t * rsc, pe_working_set_t * data_set)
 {
     const char *container_id = NULL;
 
     if (rsc->children) {
         GListPtr gIter = rsc->children;
 
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child_rsc = (resource_t *) gIter->data;
 
             setup_container(child_rsc, data_set);
         }
         return;
     }
 
     container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
     if (container_id && safe_str_neq(container_id, rsc->id)) {
         resource_t *container = pe_find_resource(data_set->resources, container_id);
 
         if (container) {
             rsc->container = container;
             container->fillers = g_list_append(container->fillers, rsc);
             pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
         } else {
             pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
         }
     }
 }
 
 gboolean
 unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set)
 {
     xmlNode *xml_obj = NULL;
     GHashTable *rsc_name_check = NULL;
 
     /* generate remote nodes from resource config before unpacking resources */
     for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
         const char *new_node_id = NULL;
 
         /* first check if this is a bare metal remote node. Bare metal remote nodes
          * are defined as a resource primitive only. */
         if (xml_contains_remote_node(xml_obj)) {
             new_node_id = ID(xml_obj);
             /* The "pe_find_node" check is here to make sure we don't iterate over
              * an expanded node that has already been added to the node list. */
             if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
                 crm_trace("Found baremetal remote node %s in container resource %s", new_node_id, ID(xml_obj));
                 create_node(new_node_id, new_node_id, "remote", NULL, data_set);
             }
             continue;
         }
 
         /* Now check for guest remote nodes.
          * guest remote nodes are defined within a resource primitive.
          * Example1: a vm resource might be configured as a remote node.
          * Example2: a vm resource might be configured within a group to be a remote node.
          * Note: right now we only support guest remote nodes in as a standalone primitive
          * or a primitive within a group. No cloned primitives can be a guest remote node
          * right now */
         if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, TRUE)) {
             /* expands a metadata defined remote resource into the xml config
              * as an actual rsc primitive to be unpacked later. */
             new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, &rsc_name_check);
 
             if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
                 crm_trace("Found guest remote node %s in container resource %s", new_node_id, ID(xml_obj));
                 create_node(new_node_id, new_node_id, "remote", NULL, data_set);
             }
             continue;
 
         } else if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, TRUE)) {
             xmlNode *xml_obj2 = NULL;
             /* search through a group to see if any of the primitive contain a remote node. */
             for (xml_obj2 = __xml_first_child(xml_obj); xml_obj2 != NULL; xml_obj2 = __xml_next_element(xml_obj2)) {
 
                 new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, &rsc_name_check);
 
                 if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
                     crm_trace("Found guest remote node %s in container resource %s which is in group %s", new_node_id, ID(xml_obj2), ID(xml_obj));
                     create_node(new_node_id, new_node_id, "remote", NULL, data_set);
                 }
             }
         }
     }
     if (rsc_name_check) {
         g_hash_table_destroy(rsc_name_check);
     }
 
     return TRUE;
 }
 
 
 /* Call this after all the nodes and resources have been
  * unpacked, but before the status section is read.
  *
  * A remote node's online status is reflected by the state
  * of the remote node's connection resource. We need to link
  * the remote node to this connection resource so we can have
  * easy access to the connection resource during the PE calculations.
  */
 static void
 link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc)
 {
     node_t *remote_node = NULL;
 
     if (new_rsc->is_remote_node == FALSE) {
         return;
     }
 
     if (is_set(data_set->flags, pe_flag_quick_location)) {
         /* remote_nodes and remote_resources are not linked in quick location calculations */
         return;
     }
 
     print_resource(LOG_DEBUG_3, "Linking remote-node connection resource, ", new_rsc, FALSE);
 
     remote_node = pe_find_node(data_set->nodes, new_rsc->id);
     CRM_CHECK(remote_node != NULL, return;);
 
     remote_node->details->remote_rsc = new_rsc;
     /* If this is a baremetal remote-node (no container resource
      * associated with it) then we need to handle startup fencing the same way
      * as cluster nodes. */
     if (new_rsc->container == NULL) {
         handle_startup_fencing(data_set, remote_node);
     } else {
         /* At this point we know if the remote node is a container or baremetal
          * remote node, update the #kind attribute if a container is involved */
         g_hash_table_replace(remote_node->details->attrs, strdup("#kind"), strdup("container"));
     }
 }
 
 static void
 destroy_tag(gpointer data)
 {
     tag_t *tag = data;
 
     if (tag) {
         free(tag->id);
         g_list_free_full(tag->refs, free);
         free(tag);
     }
 }
 
 gboolean
 unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set)
 {
     xmlNode *xml_obj = NULL;
     GListPtr gIter = NULL;
 
     data_set->template_rsc_sets =
         g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str,
                               destroy_tag);
 
     for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
         resource_t *new_rsc = NULL;
 
         if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) {
             const char *template_id = ID(xml_obj);
 
             if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets,
                                                             template_id, NULL, NULL) == FALSE) {
                 /* Record the template's ID for the knowledge of its existence anyway. */
                 g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL);
             }
             continue;
         }
 
         crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj));
         if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) {
             data_set->resources = g_list_append(data_set->resources, new_rsc);
 
             if (xml_contains_remote_node(xml_obj)) {
                 new_rsc->is_remote_node = TRUE;
             }
             print_resource(LOG_DEBUG_3, "Added ", new_rsc, FALSE);
 
         } else {
             crm_config_err("Failed unpacking %s %s",
                            crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID));
             if (new_rsc != NULL && new_rsc->fns != NULL) {
                 new_rsc->fns->free(new_rsc);
             }
         }
     }
 
     for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
 
         setup_container(rsc, data_set);
         link_rsc2remotenode(data_set, rsc);
     }
 
     data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority);
     if (is_set(data_set->flags, pe_flag_quick_location)) {
         /* Ignore */
 
     } else if (is_set(data_set->flags, pe_flag_stonith_enabled)
                && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
 
         crm_config_err("Resource start-up disabled since no STONITH resources have been defined");
         crm_config_err("Either configure some or disable STONITH with the stonith-enabled option");
         crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
     }
 
     return TRUE;
 }
 
 gboolean
 unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set)
 {
     xmlNode *xml_tag = NULL;
 
     data_set->tags =
         g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_tag);
 
     for (xml_tag = __xml_first_child(xml_tags); xml_tag != NULL; xml_tag = __xml_next_element(xml_tag)) {
         xmlNode *xml_obj_ref = NULL;
         const char *tag_id = ID(xml_tag);
 
         if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) {
             continue;
         }
 
         if (tag_id == NULL) {
             crm_config_err("Failed unpacking %s: %s should be specified",
                            crm_element_name(xml_tag), XML_ATTR_ID);
             continue;
         }
 
         for (xml_obj_ref = __xml_first_child(xml_tag); xml_obj_ref != NULL; xml_obj_ref = __xml_next_element(xml_obj_ref)) {
             const char *obj_ref = ID(xml_obj_ref);
 
             if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) {
                 continue;
             }
 
             if (obj_ref == NULL) {
                 crm_config_err("Failed unpacking %s for tag %s: %s should be specified",
                                crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID);
                 continue;
             }
 
             if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) {
                 return FALSE;
             }
         }
     }
 
     return TRUE;
 }
 
 /* The ticket state section:
  * "/cib/status/tickets/ticket_state" */
 static gboolean
 unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set)
 {
     const char *ticket_id = NULL;
     const char *granted = NULL;
     const char *last_granted = NULL;
     const char *standby = NULL;
     xmlAttrPtr xIter = NULL;
 
     ticket_t *ticket = NULL;
 
     ticket_id = ID(xml_ticket);
     if (ticket_id == NULL || strlen(ticket_id) == 0) {
         return FALSE;
     }
 
     crm_trace("Processing ticket state for %s", ticket_id);
 
     ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
     if (ticket == NULL) {
         ticket = ticket_new(ticket_id, data_set);
         if (ticket == NULL) {
             return FALSE;
         }
     }
 
     for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
         const char *prop_name = (const char *)xIter->name;
         const char *prop_value = crm_element_value(xml_ticket, prop_name);
 
         if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) {
             continue;
         }
         g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
     }
 
     granted = g_hash_table_lookup(ticket->state, "granted");
     if (granted && crm_is_true(granted)) {
         ticket->granted = TRUE;
         crm_info("We have ticket '%s'", ticket->id);
     } else {
         ticket->granted = FALSE;
         crm_info("We do not have ticket '%s'", ticket->id);
     }
 
     last_granted = g_hash_table_lookup(ticket->state, "last-granted");
     if (last_granted) {
         ticket->last_granted = crm_parse_int(last_granted, 0);
     }
 
     standby = g_hash_table_lookup(ticket->state, "standby");
     if (standby && crm_is_true(standby)) {
         ticket->standby = TRUE;
         if (ticket->granted) {
             crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
         }
     } else {
         ticket->standby = FALSE;
     }
 
     crm_trace("Done with ticket state for %s", ticket_id);
 
     return TRUE;
 }
 
 static gboolean
 unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set)
 {
     xmlNode *xml_obj = NULL;
 
     for (xml_obj = __xml_first_child(xml_tickets); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
         if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) {
             continue;
         }
         unpack_ticket_state(xml_obj, data_set);
     }
 
     return TRUE;
 }
 
 /* Compatibility with the deprecated ticket state section:
  * "/cib/status/tickets/instance_attributes" */
 static void
 get_ticket_state_legacy(gpointer key, gpointer value, gpointer user_data)
 {
     const char *long_key = key;
     char *state_key = NULL;
 
     const char *granted_prefix = "granted-ticket-";
     const char *last_granted_prefix = "last-granted-";
     static int granted_prefix_strlen = 0;
     static int last_granted_prefix_strlen = 0;
 
     const char *ticket_id = NULL;
     const char *is_granted = NULL;
     const char *last_granted = NULL;
     const char *sep = NULL;
 
     ticket_t *ticket = NULL;
     pe_working_set_t *data_set = user_data;
 
     if (granted_prefix_strlen == 0) {
         granted_prefix_strlen = strlen(granted_prefix);
     }
 
     if (last_granted_prefix_strlen == 0) {
         last_granted_prefix_strlen = strlen(last_granted_prefix);
     }
 
     if (strstr(long_key, granted_prefix) == long_key) {
         ticket_id = long_key + granted_prefix_strlen;
         if (strlen(ticket_id)) {
             state_key = strdup("granted");
             is_granted = value;
         }
     } else if (strstr(long_key, last_granted_prefix) == long_key) {
         ticket_id = long_key + last_granted_prefix_strlen;
         if (strlen(ticket_id)) {
             state_key = strdup("last-granted");
             last_granted = value;
         }
     } else if ((sep = strrchr(long_key, '-'))) {
         ticket_id = sep + 1;
         state_key = strndup(long_key, strlen(long_key) - strlen(sep));
     }
 
     if (ticket_id == NULL || strlen(ticket_id) == 0) {
         free(state_key);
         return;
     }
 
     if (state_key == NULL || strlen(state_key) == 0) {
         free(state_key);
         return;
     }
 
     ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
     if (ticket == NULL) {
         ticket = ticket_new(ticket_id, data_set);
         if (ticket == NULL) {
             free(state_key);
             return;
         }
     }
 
     g_hash_table_replace(ticket->state, state_key, strdup(value));
 
     if (is_granted) {
         if (crm_is_true(is_granted)) {
             ticket->granted = TRUE;
             crm_info("We have ticket '%s'", ticket->id);
         } else {
             ticket->granted = FALSE;
             crm_info("We do not have ticket '%s'", ticket->id);
         }
 
     } else if (last_granted) {
         ticket->last_granted = crm_parse_int(last_granted, 0);
     }
 }
 
 /* remove nodes that are down, stopping */
 /* create +ve rsc_to_node constraints between resources and the nodes they are running on */
 /* anything else? */
 gboolean
 unpack_status(xmlNode * status, pe_working_set_t * data_set)
 {
     const char *id = NULL;
     const char *uname = NULL;
 
     xmlNode *state = NULL;
     xmlNode *lrm_rsc = NULL;
     node_t *this_node = NULL;
 
     crm_trace("Beginning unpack");
 
     if (data_set->tickets == NULL) {
         data_set->tickets =
             g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket);
     }
 
     for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
         if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) {
             xmlNode *xml_tickets = state;
             GHashTable *state_hash = NULL;
 
             /* Compatibility with the deprecated ticket state section:
              * Unpack the attributes in the deprecated "/cib/status/tickets/instance_attributes" if it exists. */
             state_hash =
                 g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str,
                                       g_hash_destroy_str);
 
             unpack_instance_attributes(data_set->input, xml_tickets, XML_TAG_ATTR_SETS, NULL,
                                        state_hash, NULL, TRUE, data_set->now);
 
             g_hash_table_foreach(state_hash, get_ticket_state_legacy, data_set);
 
             if (state_hash) {
                 g_hash_table_destroy(state_hash);
             }
 
             /* Unpack the new "/cib/status/tickets/ticket_state"s */
             unpack_tickets_state(xml_tickets, data_set);
         }
 
         if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) {
             xmlNode *attrs = NULL;
             const char *resource_discovery_enabled = NULL;
 
             id = crm_element_value(state, XML_ATTR_ID);
             uname = crm_element_value(state, XML_ATTR_UNAME);
             this_node = pe_find_node_any(data_set->nodes, id, uname);
 
             if (uname == NULL) {
                 /* error */
                 continue;
 
             } else if (this_node == NULL) {
                 crm_config_warn("Node %s in status section no longer exists", uname);
                 continue;
 
             } else if (is_remote_node(this_node)) {
                 /* online state for remote nodes is determined by the
                  * rsc state after all the unpacking is done. we do however
                  * need to mark whether or not the node has been fenced as this plays
                  * a role during unpacking cluster node resource state */
                 this_node->details->remote_was_fenced = 
                     crm_atoi(crm_element_value(state, XML_NODE_IS_FENCED), "0");
                 continue;
             }
 
             crm_trace("Processing node id=%s, uname=%s", id, uname);
 
             /* Mark the node as provisionally clean
              * - at least we have seen it in the current cluster's lifetime
              */
             this_node->details->unclean = FALSE;
             this_node->details->unseen = FALSE;
             attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
             add_node_attrs(attrs, this_node, TRUE, data_set);
 
             if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) {
                 crm_info("Node %s is in standby-mode", this_node->details->uname);
                 this_node->details->standby = TRUE;
             }
 
             if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance"))) {
                 crm_info("Node %s is in maintenance-mode", this_node->details->uname);
                 this_node->details->maintenance = TRUE;
             }
 
             resource_discovery_enabled = g_hash_table_lookup(this_node->details->attrs, XML_NODE_ATTR_RSC_DISCOVERY);
             if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
                 crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes",
                     XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname);
             }
 
             crm_trace("determining node state");
             determine_online_status(state, this_node, data_set);
 
             if (this_node->details->online && data_set->no_quorum_policy == no_quorum_suicide) {
                 /* Everything else should flow from this automatically
                  * At least until the PE becomes able to migrate off healthy resources
                  */
                 pe_fence_node(data_set, this_node, "because the cluster does not have quorum");
             }
         }
     }
 
     /* Now that we know all node states, we can safely handle migration ops */
     for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
         if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
             continue;
         }
 
         id = crm_element_value(state, XML_ATTR_ID);
         uname = crm_element_value(state, XML_ATTR_UNAME);
         this_node = pe_find_node_any(data_set->nodes, id, uname);
 
         if (this_node == NULL) {
             crm_info("Node %s is unknown", id);
             continue;
 
         } else if (is_remote_node(this_node)) {
 
             /* online status of remote node can not be determined until all other
              * resource status is unpacked. */
             continue;
         } else if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
             crm_trace("Processing lrm resource entries on healthy node: %s",
                       this_node->details->uname);
             lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
             lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
             unpack_lrm_resources(this_node, lrm_rsc, data_set);
         }
     }
 
     /* now that the rest of the cluster's status is determined
      * calculate remote-nodes */
     unpack_remote_status(status, data_set);
 
     return TRUE;
 }
 
 gboolean
 unpack_remote_status(xmlNode * status, pe_working_set_t * data_set)
 {
     const char *id = NULL;
     const char *uname = NULL;
     GListPtr gIter = NULL;
 
     xmlNode *state = NULL;
     xmlNode *lrm_rsc = NULL;
     node_t *this_node = NULL;
 
     if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) {
         crm_trace("no remote nodes to unpack");
         return TRUE;
     }
 
     /* get online status */
     for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
         this_node = gIter->data;
 
         if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) {
             continue;
         }
         determine_remote_online_status(this_node);
     }
 
     /* process attributes */
     for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
         const char *resource_discovery_enabled = NULL;
         xmlNode *attrs = NULL;
         if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
             continue;
         }
 
         id = crm_element_value(state, XML_ATTR_ID);
         uname = crm_element_value(state, XML_ATTR_UNAME);
         this_node = pe_find_node_any(data_set->nodes, id, uname);
 
         if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) {
             continue;
         }
         crm_trace("Processing remote node id=%s, uname=%s", id, uname);
 
         if (this_node->details->remote_requires_reset == FALSE) {
             this_node->details->unclean = FALSE;
             this_node->details->unseen = FALSE;
         }
         attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
         add_node_attrs(attrs, this_node, TRUE, data_set);
 
         if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) {
             crm_info("Node %s is in standby-mode", this_node->details->uname);
             this_node->details->standby = TRUE;
         }
 
         if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance"))) {
             crm_info("Node %s is in maintenance-mode", this_node->details->uname);
             this_node->details->maintenance = TRUE;
         }
 
         resource_discovery_enabled = g_hash_table_lookup(this_node->details->attrs, XML_NODE_ATTR_RSC_DISCOVERY);
         if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
             if (is_baremetal_remote_node(this_node) && is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
                 crm_warn("ignoring %s attribute on baremetal remote node %s, disabling resource discovery requires stonith to be enabled.",
                     XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname);
             } else {
                 /* if we're here, this is either a baremetal node and fencing is enabled,
                  * or this is a container node which we don't care if fencing is enabled 
                  * or not on. container nodes are 'fenced' by recovering the container resource
                  * regardless of whether fencing is enabled. */
                 crm_info("Node %s has resource discovery disabled", this_node->details->uname);
                 this_node->details->rsc_discovery_enabled = FALSE;
             }
         }
     }
 
     /* process node rsc status */
     for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
         if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
             continue;
         }
 
         id = crm_element_value(state, XML_ATTR_ID);
         uname = crm_element_value(state, XML_ATTR_UNAME);
         this_node = pe_find_node_any(data_set->nodes, id, uname);
 
         if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) {
             continue;
         }
         crm_trace("Processing lrm resource entries on healthy remote node: %s",
                   this_node->details->uname);
         lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
         lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
         unpack_lrm_resources(this_node, lrm_rsc, data_set);
     }
 
     return TRUE;
 }
 
 static gboolean
 determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state,
                                    node_t * this_node)
 {
     gboolean online = FALSE;
     const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
     const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
     const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
     const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
 
     if (!crm_is_true(in_cluster)) {
         crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster));
 
     } else if (safe_str_eq(is_peer, ONLINESTATUS)) {
         if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
             online = TRUE;
         } else {
             crm_debug("Node is not ready to run resources: %s", join);
         }
 
     } else if (this_node->details->expected_up == FALSE) {
         crm_trace("CRMd is down: in_cluster=%s", crm_str(in_cluster));
         crm_trace("\tis_peer=%s, join=%s, expected=%s",
                   crm_str(is_peer), crm_str(join), crm_str(exp_state));
 
     } else {
         /* mark it unclean */
         pe_fence_node(data_set, this_node, "unexpectedly down");
         crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s",
                  crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state));
     }
     return online;
 }
 
 static gboolean
 determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state,
                                 node_t * this_node)
 {
     gboolean online = FALSE;
     gboolean do_terminate = FALSE;
     const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
     const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
     const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
     const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
     const char *terminate = g_hash_table_lookup(this_node->details->attrs, "terminate");
 
 /*
   - XML_NODE_IN_CLUSTER    ::= true|false
   - XML_NODE_IS_PEER       ::= true|false|online|offline
   - XML_NODE_JOIN_STATE    ::= member|down|pending|banned
   - XML_NODE_EXPECTED      ::= member|down
 */
 
     if (crm_is_true(terminate)) {
         do_terminate = TRUE;
 
     } else if (terminate != NULL && strlen(terminate) > 0) {
         /* could be a time() value */
         char t = terminate[0];
 
         if (t != '0' && isdigit(t)) {
             do_terminate = TRUE;
         }
     }
 
     crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d",
               this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
               crm_str(join), crm_str(exp_state), do_terminate);
 
     online = crm_is_true(in_cluster);
     if (safe_str_eq(is_peer, ONLINESTATUS)) {
         is_peer = XML_BOOLEAN_YES;
     }
     if (exp_state == NULL) {
         exp_state = CRMD_JOINSTATE_DOWN;
     }
 
     if (this_node->details->shutdown) {
         crm_debug("%s is shutting down", this_node->details->uname);
         online = crm_is_true(is_peer);  /* Slightly different criteria since we cant shut down a dead peer */
 
     } else if (in_cluster == NULL) {
         pe_fence_node(data_set, this_node, "because the peer has not been seen by the cluster");
 
     } else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) {
         pe_fence_node(data_set, this_node, "because it failed the pacemaker membership criteria");
 
     } else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) {
 
         if (crm_is_true(in_cluster) || crm_is_true(is_peer)) {
             crm_info("- Node %s is not ready to run resources", this_node->details->uname);
             this_node->details->standby = TRUE;
             this_node->details->pending = TRUE;
 
         } else {
             crm_trace("%s is down or still coming up", this_node->details->uname);
         }
 
     } else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN)
                && crm_is_true(in_cluster) == FALSE && crm_is_true(is_peer) == FALSE) {
         crm_info("Node %s was just shot", this_node->details->uname);
         online = FALSE;
 
     } else if (crm_is_true(in_cluster) == FALSE) {
         pe_fence_node(data_set, this_node, "because the node is no longer part of the cluster");
 
     } else if (crm_is_true(is_peer) == FALSE) {
         pe_fence_node(data_set, this_node, "because our peer process is no longer available");
 
         /* Everything is running at this point, now check join state */
     } else if (do_terminate) {
         pe_fence_node(data_set, this_node, "because termination was requested");
 
     } else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
         crm_info("Node %s is active", this_node->details->uname);
 
     } else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING)
                || safe_str_eq(join, CRMD_JOINSTATE_DOWN)) {
         crm_info("Node %s is not ready to run resources", this_node->details->uname);
         this_node->details->standby = TRUE;
         this_node->details->pending = TRUE;
 
     } else {
         pe_fence_node(data_set, this_node, "because the peer was in an unknown state");
         crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d",
                  this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
                  crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown);
     }
 
     return online;
 }
 
 static gboolean
 determine_remote_online_status(node_t * this_node)
 {
     resource_t *rsc = this_node->details->remote_rsc;
     resource_t *container = NULL;
 
     if (rsc == NULL) {
         this_node->details->online = FALSE;
         goto remote_online_done;
     }
 
     container = rsc->container;
 
     CRM_ASSERT(rsc != NULL);
 
     /* If the resource is currently started, mark it online. */
     if (rsc->role == RSC_ROLE_STARTED) {
         crm_trace("Remote node %s is set to ONLINE. role == started", this_node->details->id);
         this_node->details->online = TRUE;
     }
 
     /* consider this node shutting down if transitioning start->stop */
     if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) {
         crm_trace("Remote node %s shutdown. transition from start to stop role", this_node->details->id);
         this_node->details->shutdown = TRUE;
     }
 
     /* Now check all the failure conditions. */
     if (is_set(rsc->flags, pe_rsc_failed) ||
         (rsc->role == RSC_ROLE_STOPPED) ||
         (container && is_set(container->flags, pe_rsc_failed)) ||
         (container && container->role == RSC_ROLE_STOPPED)) {
 
         crm_trace("Remote node %s is set to OFFLINE. node is stopped or rsc failed.", this_node->details->id);
         this_node->details->online = FALSE;
     }
 
 remote_online_done:
     crm_trace("Remote node %s online=%s",
         this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
     return this_node->details->online;
 }
 
 gboolean
 determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set)
 {
     gboolean online = FALSE;
     const char *shutdown = NULL;
     const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
 
     if (this_node == NULL) {
         crm_config_err("No node to check");
         return online;
     }
 
     this_node->details->shutdown = FALSE;
     this_node->details->expected_up = FALSE;
     shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN);
 
     if (shutdown != NULL && safe_str_neq("0", shutdown)) {
         this_node->details->shutdown = TRUE;
 
     } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) {
         this_node->details->expected_up = TRUE;
     }
 
     if (this_node->details->type == node_ping) {
         this_node->details->unclean = FALSE;
         online = FALSE;         /* As far as resource management is concerned,
                                  * the node is safely offline.
                                  * Anyone caught abusing this logic will be shot
                                  */
 
     } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
         online = determine_online_status_no_fencing(data_set, node_state, this_node);
 
     } else {
         online = determine_online_status_fencing(data_set, node_state, this_node);
     }
 
     if (online) {
         this_node->details->online = TRUE;
 
     } else {
         /* remove node from contention */
         this_node->fixed = TRUE;
         this_node->weight = -INFINITY;
     }
 
     if (online && this_node->details->shutdown) {
         /* dont run resources here */
         this_node->fixed = TRUE;
         this_node->weight = -INFINITY;
     }
 
     if (this_node->details->type == node_ping) {
         crm_info("Node %s is not a pacemaker node", this_node->details->uname);
 
     } else if (this_node->details->unclean) {
         pe_proc_warn("Node %s is unclean", this_node->details->uname);
 
     } else if (this_node->details->online) {
         crm_info("Node %s is %s", this_node->details->uname,
                  this_node->details->shutdown ? "shutting down" :
                  this_node->details->pending ? "pending" :
                  this_node->details->standby ? "standby" :
                  this_node->details->maintenance ? "maintenance" : "online");
 
     } else {
         crm_trace("Node %s is offline", this_node->details->uname);
     }
 
     return online;
 }
 
 char *
 clone_strip(const char *last_rsc_id)
 {
     int lpc = 0;
     char *zero = NULL;
 
     CRM_CHECK(last_rsc_id != NULL, return NULL);
     lpc = strlen(last_rsc_id);
     while (--lpc > 0) {
         switch (last_rsc_id[lpc]) {
             case 0:
                 crm_err("Empty string: %s", last_rsc_id);
                 return NULL;
                 break;
             case '0':
             case '1':
             case '2':
             case '3':
             case '4':
             case '5':
             case '6':
             case '7':
             case '8':
             case '9':
                 break;
             case ':':
                 zero = calloc(1, lpc + 1);
                 memcpy(zero, last_rsc_id, lpc);
                 zero[lpc] = 0;
                 return zero;
             default:
                 goto done;
         }
     }
   done:
     zero = strdup(last_rsc_id);
     return zero;
 }
 
 char *
 clone_zero(const char *last_rsc_id)
 {
     int lpc = 0;
     char *zero = NULL;
 
     CRM_CHECK(last_rsc_id != NULL, return NULL);
     if (last_rsc_id != NULL) {
         lpc = strlen(last_rsc_id);
     }
 
     while (--lpc > 0) {
         switch (last_rsc_id[lpc]) {
             case 0:
                 return NULL;
                 break;
             case '0':
             case '1':
             case '2':
             case '3':
             case '4':
             case '5':
             case '6':
             case '7':
             case '8':
             case '9':
                 break;
             case ':':
                 zero = calloc(1, lpc + 3);
                 memcpy(zero, last_rsc_id, lpc);
                 zero[lpc] = ':';
                 zero[lpc + 1] = '0';
                 zero[lpc + 2] = 0;
                 return zero;
             default:
                 goto done;
         }
     }
   done:
     lpc = strlen(last_rsc_id);
     zero = calloc(1, lpc + 3);
     memcpy(zero, last_rsc_id, lpc);
     zero[lpc] = ':';
     zero[lpc + 1] = '0';
     zero[lpc + 2] = 0;
     crm_trace("%s -> %s", last_rsc_id, zero);
     return zero;
 }
 
 static resource_t *
 create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set)
 {
     resource_t *rsc = NULL;
     xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
 
     copy_in_properties(xml_rsc, rsc_entry);
     crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
     crm_log_xml_debug(xml_rsc, "Orphan resource");
 
     if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) {
         return NULL;
     }
 
     if (xml_contains_remote_node(xml_rsc)) {
         node_t *node;
 
         crm_debug("Detected orphaned remote node %s", rsc_id);
         rsc->is_remote_node = TRUE;
         node = pe_find_node(data_set->nodes, rsc_id);
         if (node == NULL) {
 	        node = create_node(rsc_id, rsc_id, "remote", NULL, data_set);
         }
         link_rsc2remotenode(data_set, rsc);
 
         if (node) {
             crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
             node->details->shutdown = TRUE;
         }
     }
 
     if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
         /* This orphaned rsc needs to be mapped to a container. */
         crm_trace("Detected orphaned container filler %s", rsc_id);
         set_bit(rsc->flags, pe_rsc_orphan_container_filler);
     }
     set_bit(rsc->flags, pe_rsc_orphan);
     data_set->resources = g_list_append(data_set->resources, rsc);
     return rsc;
 }
 
 extern resource_t *create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set);
 
 static resource_t *
 find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent,
                      const char *rsc_id)
 {
     GListPtr rIter = NULL;
     resource_t *rsc = NULL;
     gboolean skip_inactive = FALSE;
 
     CRM_ASSERT(parent != NULL);
     CRM_ASSERT(parent->variant == pe_clone || parent->variant == pe_master);
     CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique));
 
     /* Find an instance active (or partially active for grouped clones) on the specified node */
     pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id);
     for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
         GListPtr nIter = NULL;
         GListPtr locations = NULL;
         resource_t *child = rIter->data;
 
         child->fns->location(child, &locations, TRUE);
         if (locations == NULL) {
             pe_rsc_trace(child, "Resource %s, skip inactive", child->id);
             continue;
         }
 
         for (nIter = locations; nIter && rsc == NULL; nIter = nIter->next) {
             node_t *childnode = nIter->data;
 
             if (childnode->details == node->details) {
                 /* ->find_rsc() because we might be a cloned group */
                 rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
                 if(rsc) {
                     pe_rsc_trace(rsc, "Resource %s, active", rsc->id);
                 }
             }
 
             /* Keep this block, it means we'll do the right thing if
              * anyone toggles the unique flag to 'off'
              */
             if (rsc && rsc->running_on) {
                 crm_notice("/Anonymous/ clone %s is already running on %s",
                            parent->id, node->details->uname);
                 skip_inactive = TRUE;
                 rsc = NULL;
             }
         }
 
         g_list_free(locations);
     }
 
     /* Find an inactive instance */
     if (skip_inactive == FALSE) {
         pe_rsc_trace(parent, "Looking for %s anywhere", rsc_id);
         for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
             GListPtr locations = NULL;
             resource_t *child = rIter->data;
 
             if (is_set(child->flags, pe_rsc_block)) {
                 pe_rsc_trace(child, "Skip: blocked in stopped state");
                 continue;
             }
 
             child->fns->location(child, &locations, TRUE);
             if (locations == NULL) {
                 /* ->find_rsc() because we might be a cloned group */
                 rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
                 pe_rsc_trace(parent, "Resource %s, empty slot", rsc->id);
             }
             g_list_free(locations);
         }
     }
 
     if (rsc == NULL) {
         /* Create an extra orphan */
         resource_t *top = create_child_clone(parent, -1, data_set);
 
         /* ->find_rsc() because we might be a cloned group */
         rsc = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone);
         CRM_ASSERT(rsc != NULL);
 
         pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id,
                      node->details->uname);
     }
 
     if (safe_str_neq(rsc_id, rsc->id)) {
         pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
                     rsc_id, node->details->uname, rsc->id,
                     is_set(rsc->flags, pe_rsc_orphan) ? " (ORPHAN)" : "");
     }
 
     return rsc;
 }
 
 static resource_t *
 unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id,
                      xmlNode * rsc_entry)
 {
     resource_t *rsc = NULL;
     resource_t *parent = NULL;
 
     crm_trace("looking for %s", rsc_id);
     rsc = pe_find_resource(data_set->resources, rsc_id);
 
     /* no match */
     if (rsc == NULL) {
         /* Even when clone-max=0, we still create a single :0 orphan to match against */
         char *tmp = clone_zero(rsc_id);
         resource_t *clone0 = pe_find_resource(data_set->resources, tmp);
 
         if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) {
             rsc = clone0;
         } else {
             crm_trace("%s is not known as %s either", rsc_id, tmp);
         }
 
         parent = uber_parent(clone0);
         free(tmp);
 
         crm_trace("%s not found: %s", rsc_id, parent ? parent->id : "orphan");
 
     } else if (rsc->variant > pe_native) {
         crm_trace("%s is no longer a primitve resource, the lrm_resource entry is obsolete",
                   rsc_id);
         return NULL;
 
     } else {
         parent = uber_parent(rsc);
     }
 
     if (parent && parent->variant > pe_group) {
         if (is_not_set(parent->flags, pe_rsc_unique)) {
             char *base = clone_strip(rsc_id);
 
             rsc = find_anonymous_clone(data_set, node, parent, base);
             CRM_ASSERT(rsc != NULL);
             free(base);
         }
 
         if (rsc && safe_str_neq(rsc_id, rsc->id)) {
             free(rsc->clone_name);
             rsc->clone_name = strdup(rsc_id);
         }
     }
 
     return rsc;
 }
 
 static resource_t *
 process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set)
 {
     resource_t *rsc = NULL;
     const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
 
     crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname);
     rsc = create_fake_resource(rsc_id, rsc_entry, data_set);
 
     if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) {
         clear_bit(rsc->flags, pe_rsc_managed);
 
     } else {
         GListPtr gIter = NULL;
 
         print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE);
 
         CRM_CHECK(rsc != NULL, return NULL);
         resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set);
 
         for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
             node_t *node = (node_t *) gIter->data;
 
             if (node->details->online && get_failcount(node, rsc, NULL, data_set)) {
                 action_t *clear_op = NULL;
                 action_t *ready = NULL;
 
                 if (is_remote_node(node)) {
                     char *pseudo_op_name = crm_concat(CRM_OP_PROBED, node->details->id, '_');
                     ready = get_pseudo_op(pseudo_op_name, data_set);
                     free(pseudo_op_name);
                 } else {
                     ready = get_pseudo_op(CRM_OP_PROBED, data_set);
                 }
 
                 clear_op = custom_action(rsc, crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'),
                                          CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set);
 
                 add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
                 pe_rsc_info(rsc, "Clearing failcount (%d) for orphaned resource %s on %s (%s)",
                             get_failcount(node, rsc, NULL, data_set), rsc->id, node->details->uname,
                             clear_op->uuid);
 
                 order_actions(clear_op, ready, pe_order_optional);
             }
         }
     }
     return rsc;
 }
 
 static void
 process_rsc_state(resource_t * rsc, node_t * node,
                   enum action_fail_response on_fail,
                   xmlNode * migrate_op, pe_working_set_t * data_set)
 {
     node_t *tmpnode = NULL;
     CRM_ASSERT(rsc);
     pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
                  rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail));
 
     /* process current state */
     if (rsc->role != RSC_ROLE_UNKNOWN) {
         resource_t *iter = rsc;
 
         while (iter) {
             if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
                 node_t *n = node_copy(node);
 
                 pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name,
                              n->details->uname);
                 g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
             }
             if (is_set(iter->flags, pe_rsc_unique)) {
                 break;
             }
             iter = iter->parent;
         }
     }
 
     if (rsc->role > RSC_ROLE_STOPPED
         && node->details->online == FALSE && is_set(rsc->flags, pe_rsc_managed)) {
 
         char *reason = NULL;
         gboolean should_fence = FALSE;
 
         /* if this is a remote_node living in a container, fence the container
          * by recovering it. Mark the resource as unmanaged. Once the container
          * and remote connenction are re-established, the status section will
          * get reset in the crmd freeing up this resource to run again once we
          * are sure we know the resources state. */
         if (is_container_remote_node(node)) {
             set_bit(rsc->flags, pe_rsc_failed);
 
             should_fence = TRUE;
         } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
             if (is_baremetal_remote_node(node) && node->details->remote_rsc && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) {
                 /* setting unseen = true means that fencing of the remote node will
                  * only occur if the connection resource is not going to start somewhere.
                  * This allows connection resources on a failed cluster-node to move to
                  * another node without requiring the baremetal remote nodes to be fenced
                  * as well. */
                 node->details->unseen = TRUE;
                 reason = crm_strdup_printf("because %s is active there. Fencing will be revoked if remote-node connection can be re-established on another cluster-node.", rsc->id);
             }
             should_fence = TRUE;
         }
 
         if (should_fence) {
             if (reason == NULL) {
                reason = crm_strdup_printf("because %s is thought to be active there", rsc->id);
             }
             pe_fence_node(data_set, node, reason);
         }
         free(reason);
     }
 
     if (node->details->unclean) {
         /* No extra processing needed
          * Also allows resources to be started again after a node is shot
          */
         on_fail = action_fail_ignore;
     }
 
     switch (on_fail) {
         case action_fail_ignore:
             /* nothing to do */
             break;
 
         case action_fail_fence:
             /* treat it as if it is still running
              * but also mark the node as unclean
              */
             pe_fence_node(data_set, node, "because of resource failure(s)");
             break;
 
         case action_fail_standby:
             node->details->standby = TRUE;
             node->details->standby_onfail = TRUE;
             break;
 
         case action_fail_block:
             /* is_managed == FALSE will prevent any
              * actions being sent for the resource
              */
             clear_bit(rsc->flags, pe_rsc_managed);
             set_bit(rsc->flags, pe_rsc_block);
             break;
 
         case action_fail_migrate:
             /* make sure it comes up somewhere else
              * or not at all
              */
             resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set);
             break;
 
         case action_fail_stop:
             rsc->next_role = RSC_ROLE_STOPPED;
             break;
 
         case action_fail_recover:
             if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
                 set_bit(rsc->flags, pe_rsc_failed);
                 stop_action(rsc, node, FALSE);
             }
             break;
 
         case action_fail_restart_container:
             set_bit(rsc->flags, pe_rsc_failed);
 
             if (rsc->container) {
                 stop_action(rsc->container, node, FALSE);
             } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
                 stop_action(rsc, node, FALSE);
             }
             break;
         case action_fail_reset_remote:
             set_bit(rsc->flags, pe_rsc_failed);
             if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
                 tmpnode = NULL;
                 if (rsc->is_remote_node) {
                     tmpnode = pe_find_node(data_set->nodes, rsc->id);
                 }
                 if (tmpnode &&
                     is_baremetal_remote_node(tmpnode) &&
                     tmpnode->details->remote_was_fenced == 0) {
 
                     /* connection resource to baremetal resource failed in a way that
                      * should result in fencing the remote-node. */
                     pe_fence_node(data_set, tmpnode, "because of connection failure(s)");
                 }
             }
 
             /* require the stop action regardless if fencing is occuring or not. */
             if (rsc->role > RSC_ROLE_STOPPED) {
                 stop_action(rsc, node, FALSE);
             }
 
             /* if reconnect delay is in use, prevent the connection from exiting the
              * "STOPPED" role until the failure is cleared by the delay timeout. */
             if (rsc->remote_reconnect_interval) {
                 rsc->next_role = RSC_ROLE_STOPPED;
             }
             break;
     }
 
     /* ensure a remote-node connection failure forces an unclean remote-node
      * to be fenced. By setting unseen = FALSE, the remote-node failure will
      * result in a fencing operation regardless if we're going to attempt to 
      * reconnect to the remote-node in this transition or not. */
     if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) {
         tmpnode = pe_find_node(data_set->nodes, rsc->id);
         if (tmpnode && tmpnode->details->unclean) {
             tmpnode->details->unseen = FALSE;
         }
     }
 
     if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
         if (is_set(rsc->flags, pe_rsc_orphan)) {
             if (is_set(rsc->flags, pe_rsc_managed)) {
                 crm_config_warn("Detected active orphan %s running on %s",
                                 rsc->id, node->details->uname);
             } else {
                 crm_config_warn("Cluster configured not to stop active orphans."
                                 " %s must be stopped manually on %s",
                                 rsc->id, node->details->uname);
             }
         }
 
         native_add_running(rsc, node, data_set);
         if (on_fail != action_fail_ignore) {
             set_bit(rsc->flags, pe_rsc_failed);
         }
 
     } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
         /* Only do this for older status sections that included instance numbers
          * Otherwise stopped instances will appear as orphans
          */
         pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
         free(rsc->clone_name);
         rsc->clone_name = NULL;
 
     } else {
         char *key = stop_key(rsc);
         GListPtr possible_matches = find_actions(rsc->actions, key, node);
         GListPtr gIter = possible_matches;
 
         for (; gIter != NULL; gIter = gIter->next) {
             action_t *stop = (action_t *) gIter->data;
 
             stop->flags |= pe_action_optional;
         }
 
         g_list_free(possible_matches);
         free(key);
     }
 }
 
 /* create active recurring operations as optional */
 static void
 process_recurring(node_t * node, resource_t * rsc,
                   int start_index, int stop_index,
                   GListPtr sorted_op_list, pe_working_set_t * data_set)
 {
     int counter = -1;
     const char *task = NULL;
     const char *status = NULL;
     GListPtr gIter = sorted_op_list;
 
     CRM_ASSERT(rsc);
     pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
 
     for (; gIter != NULL; gIter = gIter->next) {
         xmlNode *rsc_op = (xmlNode *) gIter->data;
 
         int interval = 0;
         char *key = NULL;
         const char *id = ID(rsc_op);
         const char *interval_s = NULL;
 
         counter++;
 
         if (node->details->online == FALSE) {
             pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname);
             break;
 
             /* Need to check if there's a monitor for role="Stopped" */
         } else if (start_index < stop_index && counter <= stop_index) {
             pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname);
             continue;
 
         } else if (counter < start_index) {
             pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter);
             continue;
         }
 
         interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL);
         interval = crm_parse_int(interval_s, "0");
         if (interval == 0) {
             pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname);
             continue;
         }
 
         status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
         if (safe_str_eq(status, "-1")) {
             pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname);
             continue;
         }
         task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
         /* create the action */
         key = generate_op_key(rsc->id, task, interval);
         pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname);
         custom_action(rsc, key, task, node, TRUE, TRUE, data_set);
     }
 }
 
 void
 calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
 {
     int counter = -1;
     int implied_monitor_start = -1;
     int implied_master_start = -1;
     const char *task = NULL;
     const char *status = NULL;
     GListPtr gIter = sorted_op_list;
 
     *stop_index = -1;
     *start_index = -1;
 
     for (; gIter != NULL; gIter = gIter->next) {
         xmlNode *rsc_op = (xmlNode *) gIter->data;
 
         counter++;
 
         task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
         status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
 
         if (safe_str_eq(task, CRMD_ACTION_STOP)
             && safe_str_eq(status, "0")) {
             *stop_index = counter;
 
         } else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
             *start_index = counter;
 
         } else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
             const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
 
             if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) {
                 implied_monitor_start = counter;
             }
         } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
             implied_master_start = counter;
         }
     }
 
     if (*start_index == -1) {
         if (implied_master_start != -1) {
             *start_index = implied_master_start;
         } else if (implied_monitor_start != -1) {
             *start_index = implied_monitor_start;
         }
     }
 }
 
 static resource_t *
 unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     int stop_index = -1;
     int start_index = -1;
     enum rsc_role_e req_role = RSC_ROLE_UNKNOWN;
 
     const char *task = NULL;
     const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
 
     resource_t *rsc = NULL;
     GListPtr op_list = NULL;
     GListPtr sorted_op_list = NULL;
 
     xmlNode *migrate_op = NULL;
     xmlNode *rsc_op = NULL;
 
     enum action_fail_response on_fail = FALSE;
     enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN;
 
     crm_trace("[%s] Processing %s on %s",
               crm_element_name(rsc_entry), rsc_id, node->details->uname);
 
     /* extract operations */
     op_list = NULL;
     sorted_op_list = NULL;
 
     for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
         if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
             op_list = g_list_prepend(op_list, rsc_op);
         }
     }
 
     if (op_list == NULL) {
         /* if there are no operations, there is nothing to do */
         return NULL;
     }
 
     /* find the resource */
     rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
     if (rsc == NULL) {
         rsc = process_orphan_resource(rsc_entry, node, data_set);
     }
     CRM_ASSERT(rsc != NULL);
 
     /* process operations */
     saved_role = rsc->role;
     on_fail = action_fail_ignore;
     rsc->role = RSC_ROLE_UNKNOWN;
     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
 
     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
         xmlNode *rsc_op = (xmlNode *) gIter->data;
 
         task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
         if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
             migrate_op = rsc_op;
         }
 
         unpack_rsc_op(rsc, node, rsc_op, &on_fail, data_set);
     }
 
     /* create active recurring operations as optional */
     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
     process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set);
 
     /* no need to free the contents */
     g_list_free(sorted_op_list);
 
     process_rsc_state(rsc, node, on_fail, migrate_op, data_set);
 
     if (get_target_role(rsc, &req_role)) {
         if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) {
             pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s"
                          " with requested next role %s",
                          rsc->id, role2text(rsc->next_role), role2text(req_role));
             rsc->next_role = req_role;
 
         } else if (req_role > rsc->next_role) {
             pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
                         " with requested next role %s",
                         rsc->id, role2text(rsc->next_role), role2text(req_role));
         }
     }
 
     if (saved_role > rsc->role) {
         rsc->role = saved_role;
     }
 
     return rsc;
 }
 
 static void
 handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
 {
     xmlNode *rsc_entry = NULL;
     for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL;
         rsc_entry = __xml_next_element(rsc_entry)) {
 
         resource_t *rsc;
         resource_t *container;
         const char *rsc_id;
         const char *container_id;
 
         if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) {
             continue;
         }
 
         container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
         rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
         if (container_id == NULL || rsc_id == NULL) {
             continue;
         }
 
         container = pe_find_resource(data_set->resources, container_id);
         if (container == NULL) {
             continue;
         }
 
         rsc = pe_find_resource(data_set->resources, rsc_id);
         if (rsc == NULL ||
             is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE ||
             rsc->container != NULL) {
             continue;
         }
 
         pe_rsc_trace(rsc, "Mapped orphaned rsc %s's container to  %s", rsc->id, container_id);
         rsc->container = container;
         container->fillers = g_list_append(container->fillers, rsc);
     }
 }
 
 gboolean
 unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
 {
     xmlNode *rsc_entry = NULL;
     gboolean found_orphaned_container_filler = FALSE;
     GListPtr unexpected_containers = NULL;
     GListPtr gIter = NULL;
     resource_t *remote = NULL;
 
     CRM_CHECK(node != NULL, return FALSE);
 
     crm_trace("Unpacking resources on %s", node->details->uname);
 
     for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL;
          rsc_entry = __xml_next_element(rsc_entry)) {
 
         if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
             resource_t *rsc;
             rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set);
             if (!rsc) {
                 continue;
             }
             if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) {
                 found_orphaned_container_filler = TRUE;
             }
             if (is_set(rsc->flags, pe_rsc_unexpectedly_running)) {
                 remote = rsc_contains_remote_node(data_set, rsc);
                 if (remote) {
                     unexpected_containers = g_list_append(unexpected_containers, remote);
                 }
             }
         }
     }
 
     /* If a container resource is unexpectedly up... and the remote-node
      * connection resource for that container is not up, the entire container
      * must be recovered. */
     for (gIter = unexpected_containers; gIter != NULL; gIter = gIter->next) {
         remote = (resource_t *) gIter->data;
         if (remote->role != RSC_ROLE_STARTED) {
             crm_warn("Recovering container resource %s. Resource is unexpectedly running and involves a remote-node.", remote->container->id);
             set_bit(remote->container->flags, pe_rsc_failed);
         }
     }
 
     /* now that all the resource state has been unpacked for this node
      * we have to go back and map any orphaned container fillers to their
      * container resource */
     if (found_orphaned_container_filler) {
         handle_orphaned_container_fillers(lrm_rsc_list, data_set);
     }
     g_list_free(unexpected_containers);
     return TRUE;
 }
 
 static void
 set_active(resource_t * rsc)
 {
     resource_t *top = uber_parent(rsc);
 
     if (top && top->variant == pe_master) {
         rsc->role = RSC_ROLE_SLAVE;
     } else {
         rsc->role = RSC_ROLE_STARTED;
     }
 }
 
 static void
 set_node_score(gpointer key, gpointer value, gpointer user_data)
 {
     node_t *node = value;
     int *score = user_data;
 
     node->weight = *score;
 }
 
 #define STATUS_PATH_MAX 1024
 static xmlNode *
 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
             pe_working_set_t * data_set)
 {
     int offset = 0;
     char xpath[STATUS_PATH_MAX];
 
     offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node);
     offset +=
         snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']",
                  resource);
 
     /* Need to check against transition_magic too? */
     if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) {
         offset +=
             snprintf(xpath + offset, STATUS_PATH_MAX - offset,
                      "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op,
                      source);
     } else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) {
         offset +=
             snprintf(xpath + offset, STATUS_PATH_MAX - offset,
                      "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op,
                      source);
     } else {
         offset +=
             snprintf(xpath + offset, STATUS_PATH_MAX - offset,
                      "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op);
     }
 
     CRM_LOG_ASSERT(offset > 0);
     return get_xpath_object(xpath, data_set->input, LOG_DEBUG);
 }
 
 static void
 unpack_rsc_migration(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) 
 {
                 
     /*
      * The normal sequence is (now): migrate_to(Src) -> migrate_from(Tgt) -> stop(Src)
      *
      * So if a migrate_to is followed by a stop, then we dont need to care what
      * happended on the target node
      *
      * Without the stop, we need to look for a successful migrate_from.
      * This would also imply we're no longer running on the source
      *
      * Without the stop, and without a migrate_from op we make sure the resource
      * gets stopped on both source and target (assuming the target is up)
      *
      */
     int stop_id = 0;
     int task_id = 0;
     xmlNode *stop_op =
         find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id, NULL, data_set);
 
     if (stop_op) {
         crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
     }
 
     crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
 
     if (stop_op == NULL || stop_id < task_id) {
         int from_rc = 0, from_status = 0;
         const char *migrate_source =
             crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
         const char *migrate_target =
             crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
 
         node_t *target = pe_find_node(data_set->nodes, migrate_target);
         node_t *source = pe_find_node(data_set->nodes, migrate_source);
         xmlNode *migrate_from =
             find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source,
                         data_set);
 
         rsc->role = RSC_ROLE_STARTED;       /* can be master? */
         if (migrate_from) {
             crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
             crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status);
             pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d",
                          ID(migrate_from), migrate_target, from_status, from_rc);
         }
 
         if (migrate_from && from_rc == PCMK_OCF_OK
             && from_status == PCMK_LRM_OP_DONE) {
             pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op),
                          migrate_source);
 
             /* all good
              * just need to arrange for the stop action to get sent
              * but _without_ affecting the target somehow
              */
             rsc->role = RSC_ROLE_STOPPED;
             rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
 
         } else if (migrate_from) {  /* Failed */
             if (target && target->details->online) {
                 pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
                              target->details->online);
                 native_add_running(rsc, target, data_set);
             }
 
         } else {    /* Pending or complete but erased */
             if (target && target->details->online) {
                 pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
                              target->details->online);
 
                 native_add_running(rsc, target, data_set);
                 if (source && source->details->online) {
                     /* If we make it here we have a partial migration.  The migrate_to
                      * has completed but the migrate_from on the target has not. Hold on
                      * to the target and source on the resource. Later on if we detect that
                      * the resource is still going to run on that target, we may continue
                      * the migration */
                     rsc->partial_migration_target = target;
                     rsc->partial_migration_source = source;
                 }
             } else {
                 /* Consider it failed here - forces a restart, prevents migration */
                 set_bit(rsc->flags, pe_rsc_failed);
                 clear_bit(rsc->flags, pe_rsc_allow_migrate);
             }
         }
     }
 }
 
 static void
 unpack_rsc_migration_failure(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) 
 {
     const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
 
     CRM_ASSERT(rsc);
     if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
         int stop_id = 0;
         int migrate_id = 0;
         const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
         const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
 
         xmlNode *stop_op =
             find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_source, NULL, data_set);
         xmlNode *migrate_op =
             find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, migrate_source, migrate_target,
                         data_set);
 
         if (stop_op) {
             crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
         }
         if (migrate_op) {
             crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
         }
 
         /* Get our state right */
         rsc->role = RSC_ROLE_STARTED;   /* can be master? */
 
         if (stop_op == NULL || stop_id < migrate_id) {
             node_t *source = pe_find_node(data_set->nodes, migrate_source);
 
             if (source && source->details->online) {
                 native_add_running(rsc, source, data_set);
             }
         }
 
     } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
         int stop_id = 0;
         int migrate_id = 0;
         const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
         const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
 
         xmlNode *stop_op =
             find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_target, NULL, data_set);
         xmlNode *migrate_op =
             find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source,
                         data_set);
 
         if (stop_op) {
             crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
         }
         if (migrate_op) {
             crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
         }
 
         /* Get our state right */
         rsc->role = RSC_ROLE_STARTED;   /* can be master? */
 
         if (stop_op == NULL || stop_id < migrate_id) {
             node_t *target = pe_find_node(data_set->nodes, migrate_target);
 
             pe_rsc_trace(rsc, "Stop: %p %d, Migrated: %p %d", stop_op, stop_id, migrate_op,
                          migrate_id);
             if (target && target->details->online) {
                 native_add_running(rsc, target, data_set);
             }
 
         } else if (migrate_op == NULL) {
             /* Make sure it gets cleaned up, the stop may pre-date the migrate_from */
             rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
         }
     }
 }
 
 static void
 record_failed_op(xmlNode *op, node_t* node, pe_working_set_t * data_set)
 {
     xmlNode *xIter = NULL;
     const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY);
 
     if (node->details->shutdown) {
         return;
     } else if(node->details->online == FALSE) {
         return;
     }
 
     for (xIter = data_set->failed->children; xIter; xIter = xIter->next) {
         const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY);
         const char *uname = crm_element_value(xIter, XML_ATTR_UNAME);
 
         if(safe_str_eq(op_key, key) && safe_str_eq(uname, node->details->uname)) {
             crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname);
             return;
         }
     }
 
     crm_trace("Adding entry %s on %s", op_key, node->details->uname);
     crm_xml_add(op, XML_ATTR_UNAME, node->details->uname);
     add_node_copy(data_set->failed, op);
 }
 
 static const char *get_op_key(xmlNode *xml_op)
 {
     const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
     if(key == NULL) {
         key = ID(xml_op);
     }
     return key;
 }
 
 static void
 unpack_rsc_op_failure(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, enum action_fail_response *on_fail, pe_working_set_t * data_set) 
 {
     int interval = 0;
     bool is_probe = FALSE;
     action_t *action = NULL;
 
     const char *key = get_op_key(xml_op);
     const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
     const char *op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
 
     CRM_ASSERT(rsc);
     crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
     if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) {
         is_probe = TRUE;
         pe_rsc_trace(rsc, "is a probe: %s", key);
     }
 
     if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
         crm_warn("Processing failed op %s for %s on %s: %s (%d)",
                  task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc),
                  rc);
 
         record_failed_op(xml_op, node, data_set);
 
     } else {
         crm_trace("Processing failed op %s for %s on %s: %s (%d)",
                  task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc),
                  rc);
     }
 
     action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
     if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) ||
         (action->on_fail == action_fail_reset_remote && *on_fail <= action_fail_recover) ||
         (action->on_fail == action_fail_restart_container && *on_fail <= action_fail_recover) ||
         (*on_fail == action_fail_restart_container && action->on_fail >= action_fail_migrate)) {
         pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail),
                      fail2text(action->on_fail), action->uuid, key);
         *on_fail = action->on_fail;
     }
 
     if (safe_str_eq(task, CRMD_ACTION_STOP)) {
         resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set);
 
     } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
         unpack_rsc_migration_failure(rsc, node, xml_op, data_set);
 
     } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
         rsc->role = RSC_ROLE_MASTER;
 
     } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
         /*
          * staying in role=master ends up putting the PE/TE into a loop
          * setting role=slave is not dangerous because no master will be
          * promoted until the failed resource has been fully stopped
          */
         rsc->next_role = RSC_ROLE_STOPPED;
         if (action->on_fail == action_fail_block) {
             rsc->role = RSC_ROLE_MASTER;
 
         } else {
             crm_warn("Forcing %s to stop after a failed demote action", rsc->id);
             rsc->role = RSC_ROLE_SLAVE;
         }
 
     } else if (compare_version("2.0", op_version) > 0 && safe_str_eq(task, CRMD_ACTION_START)) {
         crm_warn("Compatibility handling for failed op %s on %s", key, node->details->uname);
         resource_location(rsc, node, -INFINITY, "__legacy_start__", data_set);
     }
 
     if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) {
         /* leave stopped */
         pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id);
         rsc->role = RSC_ROLE_STOPPED;
 
     } else if (rsc->role < RSC_ROLE_STARTED) {
         pe_rsc_trace(rsc, "Setting %s active", rsc->id);
         set_active(rsc);
     }
 
     pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
                  rsc->id, role2text(rsc->role),
                  node->details->unclean ? "true" : "false",
                  fail2text(action->on_fail), role2text(action->fail_role));
 
     if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) {
         rsc->next_role = action->fail_role;
     }
 
     if (action->fail_role == RSC_ROLE_STOPPED) {
         int score = -INFINITY;
 
         resource_t *fail_rsc = rsc;
 
         if (fail_rsc->parent) {
             resource_t *parent = uber_parent(fail_rsc);
 
             if ((parent->variant == pe_clone || parent->variant == pe_master)
                 && is_not_set(parent->flags, pe_rsc_unique)) {
                 /* for clone and master resources, if a child fails on an operation
                  * with on-fail = stop, all the resources fail.  Do this by preventing
                  * the parent from coming up again. */
                 fail_rsc = parent;
             }
         }
         crm_warn("Making sure %s doesn't come up again", fail_rsc->id);
         /* make sure it doesnt come up again */
         g_hash_table_destroy(fail_rsc->allowed_nodes);
         fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes);
         g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
     }
 
     pe_free_action(action);
 }
 
 static int
 determine_op_status(
     resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set) 
 {
     int interval = 0;
     int result = PCMK_LRM_OP_DONE;
 
     const char *key = get_op_key(xml_op);
     const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
 
     bool is_probe = FALSE;
 
     CRM_ASSERT(rsc);
     crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
     if (interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) {
         is_probe = TRUE;
     }
 
     if (target_rc >= 0 && target_rc != rc) {
         result = PCMK_LRM_OP_ERROR;
         pe_rsc_debug(rsc, "%s on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
                      key, node->details->uname,
                      services_ocf_exitcode_str(rc), rc,
                      services_ocf_exitcode_str(target_rc), target_rc);
     }
 
     /* we could clean this up significantly except for old LRMs and CRMs that
      * didnt include target_rc and liked to remap status
      */
     switch (rc) {
         case PCMK_OCF_OK:
             if (is_probe && target_rc == 7) {
                 result = PCMK_LRM_OP_DONE;
                 set_bit(rsc->flags, pe_rsc_unexpectedly_running);
                 pe_rsc_info(rsc, "Operation %s found resource %s active on %s",
                             task, rsc->id, node->details->uname);
 
                 /* legacy code for pre-0.6.5 operations */
             } else if (target_rc < 0 && interval > 0 && rsc->role == RSC_ROLE_MASTER) {
                 /* catch status ops that return 0 instead of 8 while they
                  *   are supposed to be in master mode
                  */
                 result = PCMK_LRM_OP_ERROR;
             }
             break;
 
         case PCMK_OCF_NOT_RUNNING:
             if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) {
                 result = PCMK_LRM_OP_DONE;
                 rsc->role = RSC_ROLE_STOPPED;
 
                 /* clear any previous failure actions */
                 *on_fail = action_fail_ignore;
                 rsc->next_role = RSC_ROLE_UNKNOWN;
 
             } else if (safe_str_neq(task, CRMD_ACTION_STOP)) {
                 result = PCMK_LRM_OP_ERROR;
             }
             break;
 
         case PCMK_OCF_RUNNING_MASTER:
             if (is_probe) {
                 result = PCMK_LRM_OP_DONE;
                 pe_rsc_info(rsc, "Operation %s found resource %s active in master mode on %s",
                             task, rsc->id, node->details->uname);
 
             } else if (target_rc == rc) {
                 /* nothing to do */
 
             } else if (target_rc >= 0) {
                 result = PCMK_LRM_OP_ERROR;
 
                 /* legacy code for pre-0.6.5 operations */
             } else if (safe_str_neq(task, CRMD_ACTION_STATUS)
                        || rsc->role != RSC_ROLE_MASTER) {
                 result = PCMK_LRM_OP_ERROR;
                 if (rsc->role != RSC_ROLE_MASTER) {
                     crm_err("%s reported %s in master mode on %s",
                             key, rsc->id, node->details->uname);
                 }
             }
             rsc->role = RSC_ROLE_MASTER;
             break;
 
         case PCMK_OCF_DEGRADED_MASTER:
         case PCMK_OCF_FAILED_MASTER:
             rsc->role = RSC_ROLE_MASTER;
             result = PCMK_LRM_OP_ERROR;
             break;
 
         case PCMK_OCF_NOT_CONFIGURED:
             result = PCMK_LRM_OP_ERROR_FATAL;
             break;
 
         case PCMK_OCF_NOT_INSTALLED:
         case PCMK_OCF_INVALID_PARAM:
         case PCMK_OCF_INSUFFICIENT_PRIV:
         case PCMK_OCF_UNIMPLEMENT_FEATURE:
             if (rc == PCMK_OCF_UNIMPLEMENT_FEATURE && interval > 0) {
                 result = PCMK_LRM_OP_NOTSUPPORTED;
                 break;
 
             } else if(pe_can_fence(data_set, node) == FALSE
                && safe_str_eq(task, CRMD_ACTION_STOP)) {
                 /* If a stop fails and we can't fence, there's nothing else we can do */
                 pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)",
                             rsc->id, task, services_ocf_exitcode_str(rc), rc);
                 clear_bit(rsc->flags, pe_rsc_managed);
                 set_bit(rsc->flags, pe_rsc_block);
             }
             result = PCMK_LRM_OP_ERROR_HARD;
             break;
 
         default:
             if (result == PCMK_LRM_OP_DONE) {
                 crm_info("Treating %s (rc=%d) on %s as an ERROR",
                          key, rc, node->details->uname);
                 result = PCMK_LRM_OP_ERROR;
             }
     }
 
     return result;
 }
 
 static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, pe_working_set_t * data_set)
 {
     bool expired = FALSE;
     time_t last_failure = 0;
     int clear_failcount = 0;
     int interval = 0;
     int failure_timeout = rsc->failure_timeout;
     const char *key = get_op_key(xml_op);
     const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
 
     /* clearing recurring monitor operation failures automatically
      * needs to be carefully considered */
     if (safe_str_eq(crm_element_value(xml_op, XML_LRM_ATTR_TASK), "monitor") &&
         safe_str_neq(crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL), "0")) {
 
         /* TODO, in the future we should consider not clearing recurring monitor
          * op failures unless the last action for a resource was a "stop" action.
          * otherwise it is possible that clearing the monitor failure will result
          * in the resource being in an undeterministic state.
          *
          * For now we handle this potential undeterministic condition for remote
          * node connection resources by not clearing a recurring monitor op failure
          * until after the node has been fenced. */
 
         if (is_set(data_set->flags, pe_flag_stonith_enabled) &&
             (rsc->remote_reconnect_interval)) {
 
             node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
             if (remote_node && remote_node->details->remote_was_fenced == 0) {
-            
-                crm_info("Waiting to clear monitor failure for remote node %s until fencing has occured", rsc->id); 
+                if (strstr(ID(xml_op), "last_failure")) {
+                    crm_info("Waiting to clear monitor failure for remote node %s until fencing has occured", rsc->id); 
+                }
                 /* disabling failure timeout for this operation because we believe
                  * fencing of the remote node should occur first. */ 
                 failure_timeout = 0;
             }
         }
     }
 
     if (failure_timeout > 0) {
         int last_run = 0;
 
         if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) {
             time_t now = get_effective_time(data_set);
 
             if (now > (last_run + failure_timeout)) {
                 expired = TRUE;
             }
         }
     }
 
     if (expired) {
         if (failure_timeout > 0) {
             int fc = get_failcount_full(node, rsc, &last_failure, FALSE, xml_op, data_set);
             if(fc) {
                 if (get_failcount_full(node, rsc, &last_failure, TRUE, xml_op, data_set) == 0) {
                     clear_failcount = 1;
                     crm_notice("Clearing expired failcount for %s on %s", rsc->id, node->details->uname);
 
                 } else {
                     expired = FALSE;
                 }
+            } else if (rsc->remote_reconnect_interval && strstr(ID(xml_op), "last_failure")) {
+                /* always clear last failure when reconnect interval is set */
+                clear_failcount = 1;
             }
         }
 
     } else if (strstr(ID(xml_op), "last_failure") &&
                ((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) {
 
         op_digest_cache_t *digest_data = NULL;
 
         digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set);
 
         if (digest_data->rc == RSC_DIGEST_UNKNOWN) {
             crm_trace("rsc op %s on node %s does not have a op digest to compare against", rsc->id,
                       key, node->details->id);
         } else if (digest_data->rc != RSC_DIGEST_MATCH) {
             clear_failcount = 1;
             crm_info
                 ("Clearing failcount for %s on %s, %s failed and now resource parameters have changed.",
                  task, rsc->id, node->details->uname);
         }
     }
 
     if (clear_failcount) {
         action_t *clear_op = NULL;
 
         clear_op = custom_action(rsc, crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'),
                                  CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set);
         add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
     }
 
     crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
     if(expired && interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) {
         switch(rc) {
             case PCMK_OCF_OK:
             case PCMK_OCF_NOT_RUNNING:
             case PCMK_OCF_RUNNING_MASTER:
             case PCMK_OCF_DEGRADED:
             case PCMK_OCF_DEGRADED_MASTER:
                 /* Don't expire probes that return these values */ 
                 expired = FALSE;
                 break;
         }
     }
     
     return expired;
 }
 
 int get_target_rc(xmlNode *xml_op)
 {
     int dummy = 0;
     int target_rc = 0;
     char *dummy_string = NULL;
     const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
     if (key == NULL) {
         return -1;
     }
 
     decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc);
     free(dummy_string);
 
     return target_rc;
 }
 
 static enum action_fail_response
 get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set) 
 {
     int result = action_fail_recover;
     action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
 
     result = action->on_fail;
     pe_free_action(action);
 
     return result;
 }
 
 static void
 update_resource_state(resource_t *rsc, node_t * node, xmlNode * xml_op, const char *task, int rc,
                       enum action_fail_response *on_fail, pe_working_set_t * data_set) 
 {
     gboolean clear_past_failure = FALSE;
 
     CRM_ASSERT(rsc);
     if (rc == PCMK_OCF_NOT_RUNNING) {
         clear_past_failure = TRUE;
 
     } else if (rc == PCMK_OCF_NOT_INSTALLED) {
         rsc->role = RSC_ROLE_STOPPED;
 
     } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
         clear_past_failure = TRUE;
         if (rsc->role < RSC_ROLE_STARTED) {
             set_active(rsc);
         }
 
     } else if (safe_str_eq(task, CRMD_ACTION_START)) {
         rsc->role = RSC_ROLE_STARTED;
         clear_past_failure = TRUE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
         rsc->role = RSC_ROLE_STOPPED;
         clear_past_failure = TRUE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
         rsc->role = RSC_ROLE_MASTER;
         clear_past_failure = TRUE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
         /* Demote from Master does not clear an error */
         rsc->role = RSC_ROLE_SLAVE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
         rsc->role = RSC_ROLE_STARTED;
         clear_past_failure = TRUE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
         unpack_rsc_migration(rsc, node, xml_op, data_set);
 
     } else if (rsc->role < RSC_ROLE_STARTED) {
         /* migrate_to and migrate_from will land here */
         pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname);
         set_active(rsc);
     }
 
     /* clear any previous failure actions */
     if (clear_past_failure) {
         switch (*on_fail) {
             case action_fail_stop:
             case action_fail_fence:
             case action_fail_migrate:
             case action_fail_standby:
                 pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop",
                              rsc->id, fail2text(*on_fail));
                 break;
 
             case action_fail_block:
             case action_fail_ignore:
             case action_fail_recover:
             case action_fail_restart_container:
                 *on_fail = action_fail_ignore;
                 rsc->next_role = RSC_ROLE_UNKNOWN;
                 break;
             case action_fail_reset_remote:
                 if (rsc->remote_reconnect_interval == 0) {
                     /* when reconnect delay is not in use, the connection is allowed
                      * to start again after the remote node is fenced and completely
                      * stopped. Otherwise, with reconnect delay we wait for the failure
                      * to be cleared entirely before reconnected can be attempted. */ 
                     *on_fail = action_fail_ignore;
                     rsc->next_role = RSC_ROLE_UNKNOWN;
                 }
                 break;
         }
     }
 }
 
 gboolean
 unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op,
               enum action_fail_response * on_fail, pe_working_set_t * data_set)
 {
     int task_id = 0;
 
     const char *key = NULL;
     const char *task = NULL;
     const char *task_key = NULL;
 
     int rc = 0;
     int status = PCMK_LRM_OP_PENDING-1;
     int target_rc = get_target_rc(xml_op);
     int interval = 0;
 
     gboolean expired = FALSE;
     resource_t *parent = rsc;
     enum action_fail_response failure_strategy = action_fail_recover;
 
     CRM_CHECK(rsc != NULL, return FALSE);
     CRM_CHECK(node != NULL, return FALSE);
     CRM_CHECK(xml_op != NULL, return FALSE);
 
     task_key = get_op_key(xml_op);
 
     task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
     key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
 
     crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc);
     crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
     crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status);
     crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
 
     CRM_CHECK(task != NULL, return FALSE);
     CRM_CHECK(status <= PCMK_LRM_OP_NOT_INSTALLED, return FALSE);
     CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE);
 
     if (safe_str_eq(task, CRMD_ACTION_NOTIFY)) {
         /* safe to ignore these */
         return TRUE;
     }
 
     if (is_not_set(rsc->flags, pe_rsc_unique)) {
         parent = uber_parent(rsc);
     }
     
     pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)",
                  task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role));
 
     if (node->details->unclean) {
         pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean."
                      " Further action depends on the value of the stop's on-fail attribue",
                      node->details->uname, rsc->id);
     }
 
     if (status == PCMK_LRM_OP_ERROR) {
         /* Older versions set this if rc != 0 but its up to us to decide */
         status = PCMK_LRM_OP_DONE;
     }
 
     if(status != PCMK_LRM_OP_NOT_INSTALLED) {
         expired = check_operation_expiry(rsc, node, rc, xml_op, data_set);
     }
 
     /* Degraded results are informational only, re-map them to their error-free equivalents */
     if (rc == PCMK_OCF_DEGRADED && safe_str_eq(task, CRMD_ACTION_STATUS)) {
         rc = PCMK_OCF_OK;
 
         /* Add them to the failed list to highlight them for the user */
         if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
             crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED, PCMK_OCF_OK);
             record_failed_op(xml_op, node, data_set);
         }
 
     } else if (rc == PCMK_OCF_DEGRADED_MASTER && safe_str_eq(task, CRMD_ACTION_STATUS)) {
         rc = PCMK_OCF_RUNNING_MASTER;
 
         /* Add them to the failed list to highlight them for the user */
         if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
             crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED_MASTER, PCMK_OCF_RUNNING_MASTER);
             record_failed_op(xml_op, node, data_set);
         }
     }
 
     if (expired && target_rc != rc) {
         const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC);
 
         pe_rsc_debug(rsc, "Expired operation '%s' on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
                      key, node->details->uname,
                      services_ocf_exitcode_str(rc), rc,
                      services_ocf_exitcode_str(target_rc), target_rc);
 
         if(interval == 0) {
             crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s",
                        task_key, rc, magic, node->details->uname);
             goto done;
 
         } else if(node->details->online && node->details->unclean == FALSE) {
             crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s",
                        task_key, rc, magic, node->details->uname);
             /* This is SO horrible, but we don't have access to CancelXmlOp() yet */
             crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout");
             goto done;
         }
     }
 
     if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) {
         status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set);
     }
 
     pe_rsc_trace(rsc, "Handling status: %d", status);
     switch (status) {
         case PCMK_LRM_OP_CANCELLED:
             /* do nothing?? */
             pe_err("Don't know what to do for cancelled ops yet");
             break;
 
         case PCMK_LRM_OP_PENDING:
             if (safe_str_eq(task, CRMD_ACTION_START)) {
                 set_bit(rsc->flags, pe_rsc_start_pending);
                 set_active(rsc);
 
             } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
                 rsc->role = RSC_ROLE_MASTER;
 
             } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) && node->details->unclean) {
                 /* If a pending migrate_to action is out on a unclean node,
                  * we have to force the stop action on the target. */
                 const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
                 node_t *target = pe_find_node(data_set->nodes, migrate_target);
                 if (target) {
                     stop_action(rsc, target, FALSE);
                 }
             }
 
             if (rsc->pending_task == NULL) {
                 if (safe_str_eq(task, CRMD_ACTION_STATUS) && interval == 0) {
                     /* Pending probes are not printed, even if pending
                      * operations are requested. If someone ever requests that
                      * behavior, uncomment this and the corresponding part of
                      * native.c:native_pending_task().
                      */
                     /*rsc->pending_task = strdup("probe");*/
 
                 } else {
                     rsc->pending_task = strdup(task);
                 }
             }
             break;
 
         case PCMK_LRM_OP_DONE:
             pe_rsc_trace(rsc, "%s/%s completed on %s", rsc->id, task, node->details->uname);
             update_resource_state(rsc, node, xml_op, task, rc, on_fail, data_set);
             break;
 
         case PCMK_LRM_OP_NOT_INSTALLED:
             failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
             if (failure_strategy == action_fail_ignore) {
                 crm_warn("Cannot ignore failed %s (status=%d, rc=%d) on %s: "
                          "Resource agent doesn't exist",
                          task_key, status, rc, node->details->uname);
                 /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */
                 *on_fail = action_fail_migrate;
             }
             resource_location(parent, node, -INFINITY, "hard-error", data_set);
             unpack_rsc_op_failure(rsc, node, rc, xml_op, on_fail, data_set);
             break;
 
         case PCMK_LRM_OP_ERROR:
         case PCMK_LRM_OP_ERROR_HARD:
         case PCMK_LRM_OP_ERROR_FATAL:
         case PCMK_LRM_OP_TIMEOUT:
         case PCMK_LRM_OP_NOTSUPPORTED:
 
             failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
             if ((failure_strategy == action_fail_ignore)
                 || (failure_strategy == action_fail_restart_container
                     && safe_str_eq(task, CRMD_ACTION_STOP))) {
 
                 crm_warn("Pretending the failure of %s (rc=%d) on %s succeeded",
                          task_key, rc, node->details->uname);
 
                 update_resource_state(rsc, node, xml_op, task, target_rc, on_fail, data_set);
                 crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
                 set_bit(rsc->flags, pe_rsc_failure_ignored);
 
                 record_failed_op(xml_op, node, data_set);
 
                 if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) {
                     *on_fail = failure_strategy;
                 }
 
             } else {
                 unpack_rsc_op_failure(rsc, node, rc, xml_op, on_fail, data_set);
 
                 if(status == PCMK_LRM_OP_ERROR_HARD) {
                     do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE,
                                "Preventing %s from re-starting on %s: operation %s failed '%s' (%d)",
                                parent->id, node->details->uname,
                                task, services_ocf_exitcode_str(rc), rc);
 
                     resource_location(parent, node, -INFINITY, "hard-error", data_set);
 
                 } else if(status == PCMK_LRM_OP_ERROR_FATAL) {
                     crm_err("Preventing %s from re-starting anywhere: operation %s failed '%s' (%d)",
                             parent->id, task, services_ocf_exitcode_str(rc), rc);
 
                     resource_location(parent, NULL, -INFINITY, "fatal-error", data_set);
                 }
             }
             break;
     }
 
   done:
     pe_rsc_trace(rsc, "Resource %s after %s: role=%s", rsc->id, task, role2text(rsc->role));
     return TRUE;
 }
 
 gboolean
 add_node_attrs(xmlNode * xml_obj, node_t * node, gboolean overwrite, pe_working_set_t * data_set)
 {
     const char *cluster_name = NULL;
 
     g_hash_table_insert(node->details->attrs,
                         strdup("#uname"), strdup(node->details->uname));
 
     g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_ID), strdup(node->details->id));
     if (safe_str_eq(node->details->id, data_set->dc_uuid)) {
         data_set->dc_node = node;
         node->details->is_dc = TRUE;
         g_hash_table_insert(node->details->attrs,
                             strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_TRUE));
     } else {
         g_hash_table_insert(node->details->attrs,
                             strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_FALSE));
     }
 
     cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name");
     if (cluster_name) {
         g_hash_table_insert(node->details->attrs, strdup("#cluster-name"), strdup(cluster_name));
     }
 
     unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL,
                                node->details->attrs, NULL, overwrite, data_set->now);
 
     if (g_hash_table_lookup(node->details->attrs, "#site-name") == NULL) {
         const char *site_name = g_hash_table_lookup(node->details->attrs, "site-name");
 
         if (site_name) {
             /* Prefix '#' to the key */
             g_hash_table_insert(node->details->attrs, strdup("#site-name"), strdup(site_name));
 
         } else if (cluster_name) {
             /* Default to cluster-name if unset */
             g_hash_table_insert(node->details->attrs, strdup("#site-name"), strdup(cluster_name));
         }
     }
     return TRUE;
 }
 
 static GListPtr
 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
 {
     int counter = -1;
     int stop_index = -1;
     int start_index = -1;
 
     xmlNode *rsc_op = NULL;
 
     GListPtr gIter = NULL;
     GListPtr op_list = NULL;
     GListPtr sorted_op_list = NULL;
 
     /* extract operations */
     op_list = NULL;
     sorted_op_list = NULL;
 
     for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
         if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
             crm_xml_add(rsc_op, "resource", rsc);
             crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
             op_list = g_list_prepend(op_list, rsc_op);
         }
     }
 
     if (op_list == NULL) {
         /* if there are no operations, there is nothing to do */
         return NULL;
     }
 
     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
 
     /* create active recurring operations as optional */
     if (active_filter == FALSE) {
         return sorted_op_list;
     }
 
     op_list = NULL;
 
     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
 
     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
         xmlNode *rsc_op = (xmlNode *) gIter->data;
 
         counter++;
 
         if (start_index < stop_index) {
             crm_trace("Skipping %s: not active", ID(rsc_entry));
             break;
 
         } else if (counter < start_index) {
             crm_trace("Skipping %s: old", ID(rsc_op));
             continue;
         }
         op_list = g_list_append(op_list, rsc_op);
     }
 
     g_list_free(sorted_op_list);
     return op_list;
 }
 
 GListPtr
 find_operations(const char *rsc, const char *node, gboolean active_filter,
                 pe_working_set_t * data_set)
 {
     GListPtr output = NULL;
     GListPtr intermediate = NULL;
 
     xmlNode *tmp = NULL;
     xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE);
 
     node_t *this_node = NULL;
 
     xmlNode *node_state = NULL;
 
     for (node_state = __xml_first_child(status); node_state != NULL;
          node_state = __xml_next_element(node_state)) {
 
         if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
             const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
 
             if (node != NULL && safe_str_neq(uname, node)) {
                 continue;
             }
 
             this_node = pe_find_node(data_set->nodes, uname);
             if(this_node == NULL) {
                 CRM_LOG_ASSERT(this_node != NULL);
                 continue;
 
             } else if (is_remote_node(this_node)) {
                 determine_remote_online_status(this_node);
             } else {
                 determine_online_status(node_state, this_node, data_set);
             }
 
             if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
                 /* offline nodes run no resources...
                  * unless stonith is enabled in which case we need to
                  *   make sure rsc start events happen after the stonith
                  */
                 xmlNode *lrm_rsc = NULL;
 
                 tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
                 tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
 
                 for (lrm_rsc = __xml_first_child(tmp); lrm_rsc != NULL;
                      lrm_rsc = __xml_next_element(lrm_rsc)) {
                     if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) {
 
                         const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
 
                         if (rsc != NULL && safe_str_neq(rsc_id, rsc)) {
                             continue;
                         }
 
                         intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
                         output = g_list_concat(output, intermediate);
                     }
                 }
             }
         }
     }
 
     return output;
 }
diff --git a/pengine/allocate.c b/pengine/allocate.c
index 4b6fca1c99..68cafd47cd 100644
--- a/pengine/allocate.c
+++ b/pengine/allocate.c
@@ -1,2659 +1,2687 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 
 #include <crm/crm.h>
 #include <crm/cib.h>
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 
 #include <glib.h>
 
 #include <crm/pengine/status.h>
 #include <pengine.h>
 #include <allocate.h>
 #include <utils.h>
 
 CRM_TRACE_INIT_DATA(pe_allocate);
 
 void set_alloc_actions(pe_working_set_t * data_set);
 void migrate_reload_madness(pe_working_set_t * data_set);
 
 resource_alloc_functions_t resource_class_alloc_functions[] = {
     {
      native_merge_weights,
      native_color,
      native_create_actions,
      native_create_probe,
      native_internal_constraints,
      native_rsc_colocation_lh,
      native_rsc_colocation_rh,
      native_rsc_location,
      native_action_flags,
      native_update_actions,
      native_expand,
      native_append_meta,
      },
     {
      group_merge_weights,
      group_color,
      group_create_actions,
      native_create_probe,
      group_internal_constraints,
      group_rsc_colocation_lh,
      group_rsc_colocation_rh,
      group_rsc_location,
      group_action_flags,
      group_update_actions,
      group_expand,
      group_append_meta,
      },
     {
      clone_merge_weights,
      clone_color,
      clone_create_actions,
      clone_create_probe,
      clone_internal_constraints,
      clone_rsc_colocation_lh,
      clone_rsc_colocation_rh,
      clone_rsc_location,
      clone_action_flags,
      clone_update_actions,
      clone_expand,
      clone_append_meta,
      },
     {
      master_merge_weights,
      master_color,
      master_create_actions,
      clone_create_probe,
      master_internal_constraints,
      clone_rsc_colocation_lh,
      master_rsc_colocation_rh,
      clone_rsc_location,
      clone_action_flags,
      clone_update_actions,
      clone_expand,
      master_append_meta,
      }
 };
 
 gboolean
 update_action_flags(action_t * action, enum pe_action_flags flags)
 {
     static unsigned long calls = 0;
     gboolean changed = FALSE;
     gboolean clear = is_set(flags, pe_action_clear);
     enum pe_action_flags last = action->flags;
 
     if (clear) {
         pe_clear_action_bit(action, flags);
     } else {
         pe_set_action_bit(action, flags);
     }
 
     if (last != action->flags) {
         calls++;
         changed = TRUE;
         /* Useful for tracking down _who_ changed a specific flag */
         /* CRM_ASSERT(calls != 534); */
         clear_bit(flags, pe_action_clear);
         crm_trace("%s on %s: %sset flags 0x%.6x (was 0x%.6x, now 0x%.6x, %lu)",
                   action->uuid, action->node ? action->node->details->uname : "[none]",
                   clear ? "un-" : "", flags, last, action->flags, calls);
     }
 
     return changed;
 }
 
 static gboolean
 check_rsc_parameters(resource_t * rsc, node_t * node, xmlNode * rsc_entry,
                      gboolean active_here, pe_working_set_t * data_set)
 {
     int attr_lpc = 0;
     gboolean force_restart = FALSE;
     gboolean delete_resource = FALSE;
     gboolean changed = FALSE;
 
     const char *value = NULL;
     const char *old_value = NULL;
 
     const char *attr_list[] = {
         XML_ATTR_TYPE,
         XML_AGENT_ATTR_CLASS,
         XML_AGENT_ATTR_PROVIDER
     };
 
     for (; attr_lpc < DIMOF(attr_list); attr_lpc++) {
         value = crm_element_value(rsc->xml, attr_list[attr_lpc]);
         old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]);
         if (value == old_value  /* ie. NULL */
             || crm_str_eq(value, old_value, TRUE)) {
             continue;
         }
 
         changed = TRUE;
         trigger_unfencing(rsc, node, "Device definition changed", NULL, data_set);
         if (active_here) {
             force_restart = TRUE;
             crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s",
                        rsc->id, node->details->uname, attr_list[attr_lpc],
                        crm_str(old_value), crm_str(value));
         }
     }
     if (force_restart) {
         /* make sure the restart happens */
         stop_action(rsc, node, FALSE);
         set_bit(rsc->flags, pe_rsc_start_pending);
         delete_resource = TRUE;
 
     } else if (changed) {
         delete_resource = TRUE;
     }
     return delete_resource;
 }
 
 static void
 CancelXmlOp(resource_t * rsc, xmlNode * xml_op, node_t * active_node,
             const char *reason, pe_working_set_t * data_set)
 {
     int interval = 0;
     action_t *cancel = NULL;
 
     char *key = NULL;
     const char *task = NULL;
     const char *call_id = NULL;
     const char *interval_s = NULL;
 
     CRM_CHECK(xml_op != NULL, return);
     CRM_CHECK(active_node != NULL, return);
 
     task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
     call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
     interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL);
 
     interval = crm_parse_int(interval_s, "0");
 
     /* we need to reconstruct the key because of the way we used to construct resource IDs */
     key = generate_op_key(rsc->id, task, interval);
 
     crm_info("Action %s on %s will be stopped: %s",
              key, active_node->details->uname, reason ? reason : "unknown");
 
     /* TODO: This looks highly dangerous if we ever try to schedule 'key' too */
     cancel = custom_action(rsc, strdup(key), RSC_CANCEL, active_node, FALSE, TRUE, data_set);
 
     free(cancel->task);
     free(cancel->cancel_task);
     cancel->task = strdup(RSC_CANCEL);
     cancel->cancel_task = strdup(task);
 
     add_hash_param(cancel->meta, XML_LRM_ATTR_TASK, task);
     add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id);
     add_hash_param(cancel->meta, XML_LRM_ATTR_INTERVAL, interval_s);
 
     custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set);
     free(key);
     key = NULL;
 }
 
 static gboolean
 check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op,
                         pe_working_set_t * data_set)
 {
     char *key = NULL;
     int interval = 0;
     const char *interval_s = NULL;
     const op_digest_cache_t *digest_data = NULL;
     gboolean did_change = FALSE;
 
     const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
     const char *op_version;
     const char *digest_secure = NULL;
 
     CRM_CHECK(active_node != NULL, return FALSE);
     if (safe_str_eq(task, RSC_STOP)) {
         return FALSE;
     }
 
     interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL);
     interval = crm_parse_int(interval_s, "0");
 
     if (interval > 0) {
         xmlNode *op_match = NULL;
 
         /* we need to reconstruct the key because of the way we used to construct resource IDs */
         key = generate_op_key(rsc->id, task, interval);
 
         pe_rsc_trace(rsc, "Checking parameters for %s", key);
         op_match = find_rsc_op_entry(rsc, key);
 
         if (op_match == NULL && is_set(data_set->flags, pe_flag_stop_action_orphans)) {
             CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set);
             free(key);
             return TRUE;
 
         } else if (op_match == NULL) {
             pe_rsc_debug(rsc, "Orphan action detected: %s on %s", key, active_node->details->uname);
             free(key);
             return TRUE;
         }
         free(key);
         key = NULL;
     }
 
     crm_trace("Testing %s_%s_%d on %s", rsc->id, task, interval, active_node?active_node->details->uname:"N/A");
     if (interval == 0 && safe_str_eq(task, RSC_STATUS)) {
         /* Reload based on the start action not a probe */
         task = RSC_START;
 
     } else if (interval == 0 && safe_str_eq(task, RSC_MIGRATED)) {
         /* Reload based on the start action not a migrate */
         task = RSC_START;
     } else if (interval == 0 && safe_str_eq(task, RSC_PROMOTE)) {
         /* Reload based on the start action not a promote */
         task = RSC_START;
     }
 
     op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
     digest_data = rsc_action_digest_cmp(rsc, xml_op, active_node, data_set);
 
     if(is_set(data_set->flags, pe_flag_sanitized)) {
         digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST);
     }
 
     if(digest_data->rc != RSC_DIGEST_MATCH
        && digest_secure
        && digest_data->digest_secure_calc
        && strcmp(digest_data->digest_secure_calc, digest_secure) == 0) {
         fprintf(stdout, "Only 'private' parameters to %s_%s_%d on %s changed: %s\n",
                 rsc->id, task, interval, active_node->details->uname,
                 crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
 
     } else if (digest_data->rc == RSC_DIGEST_RESTART) {
         /* Changes that force a restart */
         const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
 
         did_change = TRUE;
         key = generate_op_key(rsc->id, task, interval);
         crm_log_xml_info(digest_data->params_restart, "params:restart");
         pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s",
                  key, active_node->details->uname,
                  crm_str(digest_restart), digest_data->digest_restart_calc,
                  op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
 
         custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set);
         trigger_unfencing(rsc, NULL, "Device parameters changed", NULL, data_set);
 
     } else if ((digest_data->rc == RSC_DIGEST_ALL) || (digest_data->rc == RSC_DIGEST_UNKNOWN)) {
         /* Changes that can potentially be handled by a reload */
         const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
         const char *digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST);
 
         did_change = TRUE;
         trigger_unfencing(rsc, NULL, "Device parameters changed (reload)", NULL, data_set);
         crm_log_xml_info(digest_data->params_all, "params:reload");
         key = generate_op_key(rsc->id, task, interval);
         pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (reload:%s) %s",
                  key, active_node->details->uname,
                  crm_str(digest_all), digest_data->digest_all_calc, op_version,
                  crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
 
         if (interval > 0) {
             action_t *op = NULL;
 
 #if 0
             /* Always reload/restart the entire resource */
             op = custom_action(rsc, start_key(rsc), RSC_START, NULL, FALSE, TRUE, data_set);
             update_action_flags(op, pe_action_allow_reload_conversion);
 #else
             /* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */
             op = custom_action(rsc, key, task, active_node, TRUE, TRUE, data_set);
             set_bit(op->flags, pe_action_reschedule);
 #endif
 
         } else if (digest_restart && rsc->isolation_wrapper == NULL && (uber_parent(rsc))->isolation_wrapper == NULL) {
             pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id);
 
             /* Allow this resource to reload - unless something else causes a full restart */
             set_bit(rsc->flags, pe_rsc_try_reload);
 
             /* Create these for now, it keeps the action IDs the same in the regression outputs */
             custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set);
 
         } else {
             pe_rsc_trace(rsc, "Resource %s doesn't know how to reload", rsc->id);
 
             /* Re-send the start/demote/promote op
              * Recurring ops will be detected independantly
              */
             custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set);
         }
     }
 
     return did_change;
 }
 
 extern gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional,
                           pe_working_set_t * data_set);
 
 static void
 check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     int offset = -1;
     int interval = 0;
     int stop_index = 0;
     int start_index = 0;
 
     const char *task = NULL;
     const char *interval_s = NULL;
 
     xmlNode *rsc_op = NULL;
     GListPtr op_list = NULL;
     GListPtr sorted_op_list = NULL;
     gboolean is_probe = FALSE;
     gboolean did_change = FALSE;
 
     CRM_CHECK(node != NULL, return);
 
     if (is_set(rsc->flags, pe_rsc_orphan)) {
         resource_t *parent = uber_parent(rsc);
         if(parent == NULL
            || parent->variant < pe_clone
            || is_set(parent->flags, pe_rsc_unique)) {
             pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id);
             DeleteRsc(rsc, node, FALSE, data_set);
         } else {
             pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id);
         }
         return;
 
     } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) {
         if (check_rsc_parameters(rsc, node, rsc_entry, FALSE, data_set)) {
             DeleteRsc(rsc, node, FALSE, data_set);
         }
         pe_rsc_trace(rsc, "Skipping param check for %s: no longer active on %s",
                      rsc->id, node->details->uname);
         return;
     }
 
     pe_rsc_trace(rsc, "Processing %s on %s", rsc->id, node->details->uname);
 
     if (check_rsc_parameters(rsc, node, rsc_entry, TRUE, data_set)) {
         DeleteRsc(rsc, node, FALSE, data_set);
     }
 
     for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
         if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
             op_list = g_list_prepend(op_list, rsc_op);
         }
     }
 
     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
 
     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
         xmlNode *rsc_op = (xmlNode *) gIter->data;
 
         offset++;
 
         if (start_index < stop_index) {
             /* stopped */
             continue;
         } else if (offset < start_index) {
             /* action occurred prior to a start */
             continue;
         }
 
         is_probe = FALSE;
         did_change = FALSE;
         task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
 
         interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL);
         interval = crm_parse_int(interval_s, "0");
 
         if (interval == 0 && safe_str_eq(task, RSC_STATUS)) {
             is_probe = TRUE;
         }
 
         if (interval > 0 &&
             (is_set(rsc->flags, pe_rsc_maintenance) || node->details->maintenance)) {
             CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set);
 
         } else if (is_probe || safe_str_eq(task, RSC_START) || safe_str_eq(task, RSC_PROMOTE) || interval > 0
                    || safe_str_eq(task, RSC_MIGRATED)) {
             did_change = check_action_definition(rsc, node, rsc_op, data_set);
         }
 
         if (did_change && get_failcount(node, rsc, NULL, data_set)) {
             char *key = NULL;
             action_t *action_clear = NULL;
 
             key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0);
             action_clear =
                 custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set);
             set_bit(action_clear->flags, pe_action_runnable);
         }
     }
 
     g_list_free(sorted_op_list);
 
 }
 
 static GListPtr
 find_rsc_list(GListPtr result, resource_t * rsc, const char *id, gboolean renamed_clones,
               gboolean partial, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     gboolean match = FALSE;
 
     if (id == NULL) {
         return NULL;
 
     } else if (rsc == NULL && data_set) {
 
         for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
             resource_t *child = (resource_t *) gIter->data;
 
             result = find_rsc_list(result, child, id, renamed_clones, partial, NULL);
         }
 
         return result;
 
     } else if (rsc == NULL) {
         return NULL;
     }
 
     if (partial) {
         if (strstr(rsc->id, id)) {
             match = TRUE;
 
         } else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) {
             match = TRUE;
         }
 
     } else {
         if (strcmp(rsc->id, id) == 0) {
             match = TRUE;
 
         } else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) {
             match = TRUE;
         }
     }
 
     if (match) {
         result = g_list_prepend(result, rsc);
     }
 
     if (rsc->children) {
         gIter = rsc->children;
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child = (resource_t *) gIter->data;
 
             result = find_rsc_list(result, child, id, renamed_clones, partial, NULL);
         }
     }
 
     return result;
 }
 
 static void
 check_actions(pe_working_set_t * data_set)
 {
     const char *id = NULL;
     node_t *node = NULL;
     xmlNode *lrm_rscs = NULL;
     xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input);
 
     xmlNode *node_state = NULL;
 
     for (node_state = __xml_first_child(status); node_state != NULL;
          node_state = __xml_next_element(node_state)) {
         if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
             id = crm_element_value(node_state, XML_ATTR_ID);
             lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
             lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE);
 
             node = pe_find_node_id(data_set->nodes, id);
 
             if (node == NULL) {
                 continue;
 
             /* Still need to check actions for a maintenance node to cancel existing monitor operations */
             } else if (can_run_resources(node) == FALSE && node->details->maintenance == FALSE) {
                 crm_trace("Skipping param check for %s: cant run resources", node->details->uname);
                 continue;
             }
 
             crm_trace("Processing node %s", node->details->uname);
             if (node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
                 xmlNode *rsc_entry = NULL;
 
                 for (rsc_entry = __xml_first_child(lrm_rscs); rsc_entry != NULL;
                      rsc_entry = __xml_next_element(rsc_entry)) {
                     if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
 
                         if (xml_has_children(rsc_entry)) {
                             GListPtr gIter = NULL;
                             GListPtr result = NULL;
                             const char *rsc_id = ID(rsc_entry);
 
                             CRM_CHECK(rsc_id != NULL, return);
 
                             result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set);
                             for (gIter = result; gIter != NULL; gIter = gIter->next) {
                                 resource_t *rsc = (resource_t *) gIter->data;
 
                                 if (rsc->variant != pe_native) {
                                     continue;
                                 }
                                 check_actions_for(rsc_entry, rsc, node, data_set);
                             }
                             g_list_free(result);
                         }
                     }
                 }
             }
         }
     }
 }
 
 static gboolean
 apply_placement_constraints(pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
 
     crm_trace("Applying constraints...");
 
     for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) {
         rsc_to_node_t *cons = (rsc_to_node_t *) gIter->data;
 
         cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons);
     }
 
     return TRUE;
 
 }
 
 static gboolean
 failcount_clear_action_exists(node_t * node, resource_t * rsc)
 {
     gboolean rc = FALSE;
     char *key = crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_');
     GListPtr list = find_actions_exact(rsc->actions, key, node);
 
     if (list) {
         rc = TRUE;
     }
     g_list_free(list);
     free(key);
 
     return rc;
 }
 
 static void
 common_apply_stickiness(resource_t * rsc, node_t * node, pe_working_set_t * data_set)
 {
     int fail_count = 0;
     resource_t *failed = rsc;
 
     if (rsc->children) {
         GListPtr gIter = rsc->children;
 
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child_rsc = (resource_t *) gIter->data;
 
             common_apply_stickiness(child_rsc, node, data_set);
         }
         return;
     }
 
     if (is_set(rsc->flags, pe_rsc_managed)
         && rsc->stickiness != 0 && g_list_length(rsc->running_on) == 1) {
         node_t *current = pe_find_node_id(rsc->running_on, node->details->id);
         node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
 
         if (current == NULL) {
 
         } else if (match != NULL || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
             resource_t *sticky_rsc = rsc;
 
             resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set);
             pe_rsc_debug(sticky_rsc, "Resource %s: preferring current location"
                          " (node=%s, weight=%d)", sticky_rsc->id,
                          node->details->uname, rsc->stickiness);
         } else {
             GHashTableIter iter;
             node_t *nIter = NULL;
 
             pe_rsc_debug(rsc, "Ignoring stickiness for %s: the cluster is asymmetric"
                          " and node %s is not explicitly allowed", rsc->id, node->details->uname);
             g_hash_table_iter_init(&iter, rsc->allowed_nodes);
             while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) {
                 crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight);
             }
         }
     }
 
     /* only check failcount here if a failcount clear action
      * has not already been placed for this resource on the node.
      * There is no sense in potentially forcing the rsc from this
      * node if the failcount is being reset anyway. */
     if (failcount_clear_action_exists(node, rsc) == FALSE) {
         fail_count = get_failcount_all(node, rsc, NULL, data_set);
     }
 
     if (fail_count > 0 && rsc->migration_threshold != 0) {
         if (is_not_set(rsc->flags, pe_rsc_unique)) {
             failed = uber_parent(rsc);
         }
         if (rsc->migration_threshold <= fail_count) {
             resource_location(failed, node, -INFINITY, "__fail_limit__", data_set);
             crm_warn("Forcing %s away from %s after %d failures (max=%d)",
                      failed->id, node->details->uname, fail_count, rsc->migration_threshold);
         } else {
             crm_info("%s can fail %d more times on %s before being forced off",
                      failed->id, rsc->migration_threshold - fail_count, node->details->uname);
         }
     }
 }
 
 static void
 complex_set_cmds(resource_t * rsc)
 {
     GListPtr gIter = rsc->children;
 
     rsc->cmds = &resource_class_alloc_functions[rsc->variant];
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         complex_set_cmds(child_rsc);
     }
 }
 
 void
 set_alloc_actions(pe_working_set_t * data_set)
 {
 
     GListPtr gIter = data_set->resources;
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
 
         complex_set_cmds(rsc);
     }
 }
 
 static void
 calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data)
 {
     const char *key = (const char *)gKey;
     const char *value = (const char *)gValue;
     int *system_health = (int *)user_data;
 
     if (!gKey || !gValue || !user_data) {
         return;
     }
 
     /* Does it start with #health? */
     if (0 == strncmp(key, "#health", 7)) {
         int score;
 
         /* Convert the value into an integer */
         score = char2score(value);
 
         /* Add it to the running total */
         *system_health = merge_weights(score, *system_health);
     }
 }
 
 static gboolean
 apply_system_health(pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy");
 
     if (health_strategy == NULL || safe_str_eq(health_strategy, "none")) {
         /* Prevent any accidental health -> score translation */
         node_score_red = 0;
         node_score_yellow = 0;
         node_score_green = 0;
         return TRUE;
 
     } else if (safe_str_eq(health_strategy, "migrate-on-red")) {
 
         /* Resources on nodes which have health values of red are
          * weighted away from that node.
          */
         node_score_red = -INFINITY;
         node_score_yellow = 0;
         node_score_green = 0;
 
     } else if (safe_str_eq(health_strategy, "only-green")) {
 
         /* Resources on nodes which have health values of red or yellow
          * are forced away from that node.
          */
         node_score_red = -INFINITY;
         node_score_yellow = -INFINITY;
         node_score_green = 0;
 
     } else if (safe_str_eq(health_strategy, "progressive")) {
         /* Same as the above, but use the r/y/g scores provided by the user
          * Defaults are provided by the pe_prefs table
          */
 
     } else if (safe_str_eq(health_strategy, "custom")) {
 
         /* Requires the admin to configure the rsc_location constaints for
          * processing the stored health scores
          */
         /* TODO: Check for the existance of appropriate node health constraints */
         return TRUE;
 
     } else {
         crm_err("Unknown node health strategy: %s", health_strategy);
         return FALSE;
     }
 
     crm_info("Applying automated node health strategy: %s", health_strategy);
 
     for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
         int system_health = 0;
         node_t *node = (node_t *) gIter->data;
 
         /* Search through the node hash table for system health entries. */
         g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health);
 
         crm_info(" Node %s has an combined system health of %d",
                  node->details->uname, system_health);
 
         /* If the health is non-zero, then create a new rsc2node so that the
          * weight will be added later on.
          */
         if (system_health != 0) {
 
             GListPtr gIter2 = data_set->resources;
 
             for (; gIter2 != NULL; gIter2 = gIter2->next) {
                 resource_t *rsc = (resource_t *) gIter2->data;
 
                 rsc2node_new(health_strategy, rsc, system_health, NULL, node, data_set);
             }
         }
 
     }
 
     return TRUE;
 }
 
 gboolean
 stage0(pe_working_set_t * data_set)
 {
     xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input);
 
     if (data_set->input == NULL) {
         return FALSE;
     }
 
     if (is_set(data_set->flags, pe_flag_have_status) == FALSE) {
         crm_trace("Calculating status");
         cluster_status(data_set);
     }
 
     set_alloc_actions(data_set);
     apply_system_health(data_set);
     unpack_constraints(cib_constraints, data_set);
 
     return TRUE;
 }
 
 static void
 wait_for_probe(resource_t * rsc, const char *action, action_t * probe_complete,
                pe_working_set_t * data_set)
 {
     if (probe_complete == NULL) {
         return;
     }
 
     if (rsc->children) {
         GListPtr gIter = rsc->children;
 
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child = (resource_t *) gIter->data;
 
             wait_for_probe(child, action, probe_complete, data_set);
         }
 
     } else {
         char *key = NULL;
 
         if (safe_str_eq(action, RSC_STOP) && g_list_length(rsc->running_on) == 1) {
             node_t *node = (node_t *) rsc->running_on->data;
 
             /* Stop actions on nodes that are shutting down do not need to wait for probes to complete
              * Doing so prevents node shutdown in the presence of nodes that are coming up
              * The purpose of waiting is to not stop resources until we know for sure the
              *  intended destination is able to take them
              */
             if (node && node->details->shutdown) {
                 crm_debug("Skipping %s before %s_%s_0 due to %s shutdown",
                           probe_complete->uuid, rsc->id, action, node->details->uname);
                 return;
             }
         }
 
         key = generate_op_key(rsc->id, action, 0);
         custom_action_order(NULL, NULL, probe_complete, rsc, key, NULL,
                             pe_order_optional, data_set);
     }
 }
 
 /*
  * Check nodes for resources started outside of the LRM
  */
 gboolean
 probe_resources(pe_working_set_t * data_set)
 {
     action_t *probe_complete = NULL;
     action_t *probe_node_complete = NULL;
     action_t *probe_cluster_nodes_complete = NULL;
 
     GListPtr gIter = NULL;
     GListPtr gIter2 = NULL;
 
     gIter = data_set->nodes;
     for (; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
         const char *probed = g_hash_table_lookup(node->details->attrs, CRM_OP_PROBED);
 
         if (node->details->online == FALSE) {
             continue;
 
         } else if (node->details->unclean) {
             continue;
 
         } else if (is_remote_node(node) && node->details->shutdown) {
             /* Don't try and probe a remote node we're shutting down.
              * It causes constraint conflicts to try and run any sort of action
              * other that 'stop' on resources living within a remote-node when
              * it is being shutdown. */
             continue;
 
         } else if (is_container_remote_node(node)) {
             /* TODO enable container node probes once ordered probing is implemented. */
             continue;
 
         } else if (node->details->rsc_discovery_enabled == FALSE) {
             /* resource discovery is disabled for this node */
             continue;
 
         } else if (probe_complete == NULL) {
             probe_complete = get_pseudo_op(CRM_OP_PROBED, data_set);
             if (is_set(data_set->flags, pe_flag_have_remote_nodes)) {
                 probe_cluster_nodes_complete = get_pseudo_op(CRM_OP_NODES_PROBED, data_set);
             }
         }
 
         if (probed != NULL && crm_is_true(probed) == FALSE) {
             action_t *probe_op = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_REPROBE, node->details->uname),
                                                CRM_OP_REPROBE, node, FALSE, TRUE, data_set);
 
             add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
             continue;
         }
 
         probe_node_complete = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_PROBED, node->details->uname),
                                             CRM_OP_PROBED, node, FALSE, TRUE, data_set);
         if (crm_is_true(probed)) {
             crm_trace("unset");
             update_action_flags(probe_node_complete, pe_action_optional);
         } else {
             crm_trace("set");
             update_action_flags(probe_node_complete, pe_action_optional | pe_action_clear);
         }
         crm_trace("%s - %d", node->details->uname, probe_node_complete->flags & pe_action_optional);
         probe_node_complete->priority = INFINITY;
         add_hash_param(probe_node_complete->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
 
         if (node->details->pending) {
             update_action_flags(probe_node_complete, pe_action_runnable | pe_action_clear);
             crm_info("Action %s on %s is unrunnable (pending)",
                      probe_node_complete->uuid, probe_node_complete->node->details->uname);
         }
 
         if (is_remote_node(node)) {
             order_actions(probe_node_complete, probe_complete,
                       pe_order_runnable_left /*|pe_order_implies_then */ );
         } else if (probe_cluster_nodes_complete == NULL) {
             order_actions(probe_node_complete, probe_complete,
                       pe_order_runnable_left /*|pe_order_implies_then */ );
         } else {
             order_actions(probe_node_complete, probe_cluster_nodes_complete,
                       pe_order_runnable_left /*|pe_order_implies_then */ );
         }
 
         gIter2 = data_set->resources;
         for (; gIter2 != NULL; gIter2 = gIter2->next) {
             resource_t *rsc = (resource_t *) gIter2->data;
 
             if (rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set)) {
                 update_action_flags(probe_complete, pe_action_optional | pe_action_clear);
                 update_action_flags(probe_node_complete, pe_action_optional | pe_action_clear);
 
                 if (probe_cluster_nodes_complete
                     && (rsc->is_remote_node || rsc_contains_remote_node(data_set, rsc))) {
                     update_action_flags(probe_cluster_nodes_complete, pe_action_optional | pe_action_clear);
                     /* allow remote connection resources and resources
                      * containing remote connection resources to run after all
                      * cluster nodes are probed */
                     wait_for_probe(rsc, RSC_START, probe_cluster_nodes_complete, data_set);
                 } else {
                     wait_for_probe(rsc, RSC_START, probe_complete, data_set);
                 }
             }
         }
     }
 
     gIter = data_set->resources;
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
 
         if (rsc->is_remote_node || rsc_contains_remote_node(data_set, rsc)) {
             /* allow remote connection resources and any resources containing
              * remote connection resources to run after cluster nodes are probed.*/
             wait_for_probe(rsc, RSC_STOP, probe_cluster_nodes_complete, data_set);
         } else {
             wait_for_probe(rsc, RSC_STOP, probe_complete, data_set);
         }
     }
 
     return TRUE;
 }
 
 static void
 rsc_discover_filter(resource_t *rsc, node_t *node)
 {
     GListPtr gIter = rsc->children;
     resource_t *top = uber_parent(rsc);
     node_t *match;
 
     if (rsc->exclusive_discover == FALSE && top->exclusive_discover == FALSE) {
         return;
     }
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
         rsc_discover_filter(child_rsc, node);
     }
 
     match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
     if (match && match->rsc_discover_mode != discover_exclusive) {
         match->weight = -INFINITY;
     }
 }
 
 /*
  * Count how many valid nodes we have (so we know the maximum number of
  *  colors we can resolve).
  *
  * Apply node constraints (ie. filter the "allowed_nodes" part of resources
  */
 gboolean
 stage2(pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
 
     crm_trace("Applying placement constraints");
 
     gIter = data_set->nodes;
     for (; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
 
         if (node == NULL) {
             /* error */
 
         } else if (node->weight >= 0.0  /* global weight */
                    && node->details->online && node->details->type != node_ping) {
             data_set->max_valid_nodes++;
         }
     }
 
     apply_placement_constraints(data_set);
 
     gIter = data_set->nodes;
     for (; gIter != NULL; gIter = gIter->next) {
         GListPtr gIter2 = NULL;
         node_t *node = (node_t *) gIter->data;
 
         gIter2 = data_set->resources;
         for (; gIter2 != NULL; gIter2 = gIter2->next) {
             resource_t *rsc = (resource_t *) gIter2->data;
 
             common_apply_stickiness(rsc, node, data_set);
             rsc_discover_filter(rsc, node);
         }
     }
 
     return TRUE;
 }
 
 /*
  * Create internal resource constraints before allocation
  */
 gboolean
 stage3(pe_working_set_t * data_set)
 {
 
     GListPtr gIter = data_set->resources;
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
 
         rsc->cmds->internal_constraints(rsc, data_set);
     }
 
     return TRUE;
 }
 
 /*
  * Check for orphaned or redefined actions
  */
 gboolean
 stage4(pe_working_set_t * data_set)
 {
     check_actions(data_set);
     return TRUE;
 }
 
 static gint
 sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data)
 {
     int rc = 0;
     int r1_weight = -INFINITY;
     int r2_weight = -INFINITY;
 
     const char *reason = "existance";
 
     const GListPtr nodes = (GListPtr) data;
     resource_t *resource1 = (resource_t *) convert_const_pointer(a);
     resource_t *resource2 = (resource_t *) convert_const_pointer(b);
 
     node_t *r1_node = NULL;
     node_t *r2_node = NULL;
     GListPtr gIter = NULL;
     GHashTable *r1_nodes = NULL;
     GHashTable *r2_nodes = NULL;
 
     if (a == NULL && b == NULL) {
         goto done;
     }
     if (a == NULL) {
         return 1;
     }
     if (b == NULL) {
         return -1;
     }
 
     reason = "priority";
     r1_weight = resource1->priority;
     r2_weight = resource2->priority;
 
     if (r1_weight > r2_weight) {
         rc = -1;
         goto done;
     }
 
     if (r1_weight < r2_weight) {
         rc = 1;
         goto done;
     }
 
     reason = "no node list";
     if (nodes == NULL) {
         goto done;
     }
 
     r1_nodes =
         rsc_merge_weights(resource1, resource1->id, NULL, NULL, 1,
                           pe_weights_forward | pe_weights_init);
     dump_node_scores(LOG_TRACE, NULL, resource1->id, r1_nodes);
     r2_nodes =
         rsc_merge_weights(resource2, resource2->id, NULL, NULL, 1,
                           pe_weights_forward | pe_weights_init);
     dump_node_scores(LOG_TRACE, NULL, resource2->id, r2_nodes);
 
     /* Current location score */
     reason = "current location";
     r1_weight = -INFINITY;
     r2_weight = -INFINITY;
 
     if (resource1->running_on) {
         r1_node = g_list_nth_data(resource1->running_on, 0);
         r1_node = g_hash_table_lookup(r1_nodes, r1_node->details->id);
         if (r1_node != NULL) {
             r1_weight = r1_node->weight;
         }
     }
     if (resource2->running_on) {
         r2_node = g_list_nth_data(resource2->running_on, 0);
         r2_node = g_hash_table_lookup(r2_nodes, r2_node->details->id);
         if (r2_node != NULL) {
             r2_weight = r2_node->weight;
         }
     }
 
     if (r1_weight > r2_weight) {
         rc = -1;
         goto done;
     }
 
     if (r1_weight < r2_weight) {
         rc = 1;
         goto done;
     }
 
     reason = "score";
     for (gIter = nodes; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
 
         r1_node = NULL;
         r2_node = NULL;
 
         r1_weight = -INFINITY;
         if (r1_nodes) {
             r1_node = g_hash_table_lookup(r1_nodes, node->details->id);
         }
         if (r1_node) {
             r1_weight = r1_node->weight;
         }
 
         r2_weight = -INFINITY;
         if (r2_nodes) {
             r2_node = g_hash_table_lookup(r2_nodes, node->details->id);
         }
         if (r2_node) {
             r2_weight = r2_node->weight;
         }
 
         if (r1_weight > r2_weight) {
             rc = -1;
             goto done;
         }
 
         if (r1_weight < r2_weight) {
             rc = 1;
             goto done;
         }
     }
 
   done:
     crm_trace("%s (%d) on %s %c %s (%d) on %s: %s",
               resource1->id, r1_weight, r1_node ? r1_node->details->id : "n/a",
               rc < 0 ? '>' : rc > 0 ? '<' : '=',
               resource2->id, r2_weight, r2_node ? r2_node->details->id : "n/a", reason);
 
     if (r1_nodes) {
         g_hash_table_destroy(r1_nodes);
     }
     if (r2_nodes) {
         g_hash_table_destroy(r2_nodes);
     }
 
     return rc;
 }
 
 static void
 allocate_resources(pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
 
     if (is_set(data_set->flags, pe_flag_have_remote_nodes)) {
         /* Force remote connection resources to be allocated first. This
          * also forces any colocation dependencies to be allocated as well */
         for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
             resource_t *rsc = (resource_t *) gIter->data;
             if (rsc->is_remote_node == FALSE) {
                 continue;
             }
             pe_rsc_trace(rsc, "Allocating: %s", rsc->id);
             /* for remote node connection resources, always prefer the partial migration
              * target during resource allocation if the rsc is in the middle of a
              * migration */ 
             rsc->cmds->allocate(rsc, rsc->partial_migration_target, data_set);
         }
     }
 
     /* now do the rest of the resources */
     for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
         if (rsc->is_remote_node == TRUE) {
             continue;
         }
         pe_rsc_trace(rsc, "Allocating: %s", rsc->id);
         rsc->cmds->allocate(rsc, NULL, data_set);
     }
 }
 
 gboolean
 stage5(pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
 
     if (safe_str_neq(data_set->placement_strategy, "default")) {
         GListPtr nodes = g_list_copy(data_set->nodes);
 
         nodes = g_list_sort_with_data(nodes, sort_node_weight, NULL);
 
         data_set->resources =
             g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes);
 
         g_list_free(nodes);
     }
 
     gIter = data_set->nodes;
     for (; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
 
         dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Original", node);
     }
 
     crm_trace("Allocating services");
     /* Take (next) highest resource, assign it and create its actions */
 
     allocate_resources(data_set);
 
     gIter = data_set->nodes;
     for (; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
 
         dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Remaining", node);
     }
 
     if (is_set(data_set->flags, pe_flag_startup_probes)) {
         crm_trace("Calculating needed probes");
         /* This code probably needs optimization
          * ptest -x with 100 nodes, 100 clones and clone-max=100:
 
          With probes:
 
          ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
          ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
          ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
          ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions
          ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
          ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services
          ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes
          ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions
          ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done
          ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
          ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
          36s
          ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
 
          Without probes:
 
          ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
          ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
          ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
          ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions
          ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
          ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services
          ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions
          ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done
          ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
          ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
          ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
          */
 
         probe_resources(data_set);
     }
 
     crm_trace("Creating actions");
 
     gIter = data_set->resources;
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
 
         rsc->cmds->create_actions(rsc, data_set);
     }
 
     crm_trace("Creating done");
     return TRUE;
 }
 
 static gboolean
 is_managed(const resource_t * rsc)
 {
     GListPtr gIter = rsc->children;
 
     if (is_set(rsc->flags, pe_rsc_managed)) {
         return TRUE;
     }
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         if (is_managed(child_rsc)) {
             return TRUE;
         }
     }
 
     return FALSE;
 }
 
 static gboolean
 any_managed_resources(pe_working_set_t * data_set)
 {
 
     GListPtr gIter = data_set->resources;
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
 
         if (is_managed(rsc)) {
             return TRUE;
         }
     }
     return FALSE;
 }
 
 /*
  * Create dependencies for stonith and shutdown operations
  */
 gboolean
 stage6(pe_working_set_t * data_set)
 {
     action_t *dc_down = NULL;
     action_t *dc_fence = NULL;
     action_t *stonith_op = NULL;
     action_t *last_stonith = NULL;
     gboolean integrity_lost = FALSE;
     action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set);
     action_t *done = get_pseudo_op(STONITH_DONE, data_set);
     gboolean need_stonith = TRUE;
     GListPtr gIter = data_set->nodes;
 
     crm_trace("Processing fencing and shutdown cases");
 
     if (any_managed_resources(data_set) == FALSE) {
         crm_notice("Delaying fencing operations until there are resources to manage");
         need_stonith = FALSE;
     }
 
     for (; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
 
         /* remote-nodes associated with a container resource (such as a vm) are not fenced */
         if (is_container_remote_node(node)) {
             continue;
         }
 
         stonith_op = NULL;
         if (need_stonith && node->details->unclean && pe_can_fence(data_set, node)) {
             pe_warn("Scheduling Node %s for STONITH", node->details->uname);
 
             stonith_op = pe_fence_op(node, NULL, FALSE, data_set);
 
             stonith_constraints(node, stonith_op, data_set);
 
             if (node->details->is_dc) {
                 dc_down = stonith_op;
                 dc_fence = stonith_op;
 
             } else {
                 if (last_stonith) {
                     order_actions(last_stonith, stonith_op, pe_order_optional);
                 }
                 last_stonith = stonith_op;
             }
 
         } else if (node->details->online && node->details->shutdown &&
                 /* TODO define what a shutdown op means for a baremetal remote node.
                  * For now we do not send shutdown operations for remote nodes, but
                  * if we can come up with a good use for this in the future, we will. */
                     is_remote_node(node) == FALSE) {
 
             action_t *down_op = NULL;
 
             crm_notice("Scheduling Node %s for shutdown", node->details->uname);
 
             down_op = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_SHUTDOWN, node->details->uname),
                                     CRM_OP_SHUTDOWN, node, FALSE, TRUE, data_set);
 
             shutdown_constraints(node, down_op, data_set);
             add_hash_param(down_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
 
             if (node->details->is_dc) {
                 dc_down = down_op;
             }
         }
 
         if (node->details->unclean && stonith_op == NULL) {
             integrity_lost = TRUE;
             pe_warn("Node %s is unclean!", node->details->uname);
         }
     }
 
     if (integrity_lost) {
         if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
             pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED");
             pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE");
 
         } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE) {
             crm_notice("Cannot fence unclean nodes until quorum is"
                        " attained (or no-quorum-policy is set to ignore)");
         }
     }
 
     if (dc_down != NULL) {
         GListPtr gIter = NULL;
 
         crm_trace("Ordering shutdowns before %s on %s (DC)",
                   dc_down->task, dc_down->node->details->uname);
 
         add_hash_param(dc_down->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
 
         for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
             action_t *node_stop = (action_t *) gIter->data;
 
             if (safe_str_neq(CRM_OP_SHUTDOWN, node_stop->task)) {
                 continue;
             } else if (node_stop->node->details->is_dc) {
                 continue;
             }
 
             crm_debug("Ordering shutdown on %s before %s on %s",
                       node_stop->node->details->uname,
                       dc_down->task, dc_down->node->details->uname);
 
             order_actions(node_stop, dc_down, pe_order_optional);
         }
 
         if (last_stonith && dc_down != last_stonith) {
             order_actions(last_stonith, dc_down, pe_order_optional);
         }
     }
 
     if (last_stonith) {
         order_actions(last_stonith, done, pe_order_implies_then);
 
     } else if (dc_fence) {
         order_actions(dc_down, done, pe_order_implies_then);
     }
 
     order_actions(done, all_stopped, pe_order_implies_then);
     return TRUE;
 }
 
 /*
  * Determin the sets of independant actions and the correct order for the
  *  actions in each set.
  *
  * Mark dependencies of un-runnable actions un-runnable
  *
  */
 static GListPtr
 find_actions_by_task(GListPtr actions, resource_t * rsc, const char *original_key)
 {
     GListPtr list = NULL;
 
     list = find_actions(actions, original_key, NULL);
     if (list == NULL) {
         /* we're potentially searching a child of the original resource */
         char *key = NULL;
         char *tmp = NULL;
         char *task = NULL;
         int interval = 0;
 
         if (parse_op_key(original_key, &tmp, &task, &interval)) {
             key = generate_op_key(rsc->id, task, interval);
             /* crm_err("looking up %s instead of %s", key, original_key); */
             /* slist_iter(action, action_t, actions, lpc, */
             /*         crm_err("  - %s", action->uuid)); */
             list = find_actions(actions, key, NULL);
 
         } else {
             crm_err("search key: %s", original_key);
         }
 
         free(key);
         free(tmp);
         free(task);
     }
 
     return list;
 }
 
 static void
 rsc_order_then(action_t * lh_action, resource_t * rsc, order_constraint_t * order)
 {
     GListPtr gIter = NULL;
     GListPtr rh_actions = NULL;
     action_t *rh_action = NULL;
     enum pe_ordering type = order->type;
 
     CRM_CHECK(rsc != NULL, return);
     CRM_CHECK(order != NULL, return);
 
     rh_action = order->rh_action;
     crm_trace("Processing RH of ordering constraint %d", order->id);
 
     if (rh_action != NULL) {
         rh_actions = g_list_prepend(NULL, rh_action);
 
     } else if (rsc != NULL) {
         rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task);
     }
 
     if (rh_actions == NULL) {
         pe_rsc_trace(rsc, "No RH-Side (%s/%s) found for constraint..."
                      " ignoring", rsc->id, order->rh_action_task);
         if (lh_action) {
             pe_rsc_trace(rsc, "LH-Side was: %s", lh_action->uuid);
         }
         return;
     }
 
     if (lh_action && lh_action->rsc == rsc && is_set(lh_action->flags, pe_action_dangle)) {
         pe_rsc_trace(rsc, "Detected dangling operation %s -> %s", lh_action->uuid,
                      order->rh_action_task);
         clear_bit(type, pe_order_implies_then);
     }
 
     gIter = rh_actions;
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *rh_action_iter = (action_t *) gIter->data;
 
         if (lh_action) {
             order_actions(lh_action, rh_action_iter, type);
 
         } else if (type & pe_order_implies_then) {
             update_action_flags(rh_action_iter, pe_action_runnable | pe_action_clear);
             crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type);
         } else {
             crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type);
         }
     }
 
     g_list_free(rh_actions);
 }
 
 static void
 rsc_order_first(resource_t * lh_rsc, order_constraint_t * order, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     GListPtr lh_actions = NULL;
     action_t *lh_action = order->lh_action;
     resource_t *rh_rsc = order->rh_rsc;
 
     crm_trace("Processing LH of ordering constraint %d", order->id);
     CRM_ASSERT(lh_rsc != NULL);
 
     if (lh_action != NULL) {
         lh_actions = g_list_prepend(NULL, lh_action);
 
     } else if (lh_action == NULL) {
         lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task);
     }
 
     if (lh_actions == NULL && lh_rsc != rh_rsc) {
         char *key = NULL;
         char *rsc_id = NULL;
         char *op_type = NULL;
         int interval = 0;
 
         parse_op_key(order->lh_action_task, &rsc_id, &op_type, &interval);
         key = generate_op_key(lh_rsc->id, op_type, interval);
 
         if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && safe_str_eq(op_type, RSC_STOP)) {
             free(key);
             pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
                          lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
 
         } else if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_SLAVE && safe_str_eq(op_type, RSC_DEMOTE)) {
             free(key);
             pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
                          lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
 
         } else {
             pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - creating",
                          lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
             lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set);
             lh_actions = g_list_prepend(NULL, lh_action);
         }
 
         free(op_type);
         free(rsc_id);
     }
 
     gIter = lh_actions;
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *lh_action_iter = (action_t *) gIter->data;
 
         if (rh_rsc == NULL && order->rh_action) {
             rh_rsc = order->rh_action->rsc;
         }
         if (rh_rsc) {
             rsc_order_then(lh_action_iter, rh_rsc, order);
 
         } else if (order->rh_action) {
             order_actions(lh_action_iter, order->rh_action, order->type);
         }
     }
 
     g_list_free(lh_actions);
 }
 
 extern gboolean update_action(action_t * action);
 extern void update_colo_start_chain(action_t * action);
 
 static void
 apply_remote_node_ordering(pe_working_set_t *data_set)
 {
     GListPtr gIter = data_set->actions;
 
     if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) {
         return;
     }
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
         resource_t *remote_rsc = NULL;
         resource_t *container = NULL;
 
+        if (action->rsc == NULL) {
+            continue;
+        }
+
+        /* Special case. */
+        if (action->rsc &&
+            action->rsc->is_remote_node &&
+            safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT)) {
+
+            /* if we are clearing the failcount of an actual remote node connect
+             * resource, then make sure this happens before allowing the connection
+             * to start if we are planning on starting the connection during this
+             * transition */ 
+            custom_action_order(action->rsc,
+                NULL,
+                action,
+                action->rsc,
+                generate_op_key(action->rsc->id, RSC_START, 0),
+                NULL,
+                pe_order_optional,
+                data_set);
+
+                continue;
+        }
+
+        /* detect if the action occurs on a remote node. if so create
+         * ordering constraints that guarantee the action occurs while
+         * the remote node is active (after start, before stop...) things
+         * like that */ 
         if (action->node == NULL ||
             is_remote_node(action->node) == FALSE ||
             action->node->details->remote_rsc == NULL ||
-            action->rsc == NULL ||
             is_set(action->flags, pe_action_pseudo)) {
             continue;
         }
 
         remote_rsc = action->node->details->remote_rsc;
         container = remote_rsc->container;
 
         if (safe_str_eq(action->task, "monitor") ||
             safe_str_eq(action->task, "start") ||
             safe_str_eq(action->task, "promote") ||
             safe_str_eq(action->task, "notify") ||
             safe_str_eq(action->task, CRM_OP_LRM_REFRESH) ||
             safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT) ||
             safe_str_eq(action->task, "delete")) {
 
             custom_action_order(remote_rsc,
                 generate_op_key(remote_rsc->id, RSC_START, 0),
                 NULL,
                 action->rsc,
                 NULL,
                 action,
                 pe_order_preserve | pe_order_implies_then | pe_order_runnable_left,
                 data_set);
 
         } else if (safe_str_eq(action->task, "demote")) {
 
             /* If the connection is being torn down, we don't want
              * to build a constraint between a resource's demotion and
              * the connection resource starting... because the connection
              * resource can not start. The connection might already be up,
              * but the START action would not be allowed which in turn would
              * block the demotion of any resournces living in the remote-node.
              *
              * In this case, only build the constraint between the demotion and
              * the connection's stop action. This allows the connection and all the
              * resources within the remote-node to be torn down properly. */
             if (remote_rsc->next_role == RSC_ROLE_STOPPED) {
                 custom_action_order(action->rsc,
                     NULL,
                     action,
                     remote_rsc,
                     generate_op_key(remote_rsc->id, RSC_STOP, 0),
                     NULL,
                     pe_order_preserve | pe_order_implies_first,
                     data_set);
             } else {
 
                 custom_action_order(remote_rsc,
                     generate_op_key(remote_rsc->id, RSC_START, 0),
                     NULL,
                     action->rsc,
                     NULL,
                     action,
                     pe_order_preserve | pe_order_implies_then | pe_order_runnable_left,
                     data_set);
             }
 
         } else if (safe_str_eq(action->task, "stop") &&
                    container &&
                    is_set(container->flags, pe_rsc_failed)) {
 
             /* when the container representing a remote node fails, the stop
              * action for all the resources living in that container is implied
              * by the container stopping.  This is similar to how fencing operations
              * work for cluster nodes. */
             pe_set_action_bit(action, pe_action_pseudo);
             custom_action_order(container,
                 generate_op_key(container->id, RSC_STOP, 0),
                 NULL,
                 action->rsc,
                 NULL,
                 action,
                 pe_order_preserve | pe_order_implies_then | pe_order_runnable_left,
                 data_set);
         } else if (safe_str_eq(action->task, "stop")) {
             gboolean after_start = FALSE;
 
             /* handle special case with baremetal remote where stop actions need to be
              * ordered after the connection resource starts somewhere else. */
             if (is_baremetal_remote_node(action->node)) {
                 node_t *cluster_node = remote_rsc->running_on ? remote_rsc->running_on->data : NULL;
 
                 /* if the current cluster node a baremetal connection resource
                  * is residing on is unclean or went offline we can't process any
                  * operations on that remote node until after it starts somewhere else. */
                 if (cluster_node == NULL ||
                     cluster_node->details->unclean == TRUE ||
                     cluster_node->details->online == FALSE) {
                     after_start = TRUE;
                 } else if (g_list_length(remote_rsc->running_on) > 1 &&
                            remote_rsc->partial_migration_source &&
                             remote_rsc->partial_migration_target) {
                     /* if we're caught in the middle of migrating a connection resource,
                      * then we have to wait until after the resource migrates before performing
                      * any actions. */
                     after_start = TRUE;
                 }
             }
 
             if (after_start) {
                 custom_action_order(remote_rsc,
                     generate_op_key(remote_rsc->id, RSC_START, 0),
                     NULL,
                     action->rsc,
                     NULL,
                     action,
                     pe_order_preserve | pe_order_implies_then | pe_order_runnable_left,
                     data_set);
             } else {
                 custom_action_order(action->rsc,
                     NULL,
                     action,
                     remote_rsc,
                     generate_op_key(remote_rsc->id, RSC_STOP, 0),
                     NULL,
                     pe_order_preserve | pe_order_implies_first,
                     data_set);
             }
         }
     }
 }
 
 gboolean
 stage7(pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
 
     apply_remote_node_ordering(data_set);
     crm_trace("Applying ordering constraints");
 
     /* Don't ask me why, but apparently they need to be processed in
      * the order they were created in... go figure
      *
      * Also g_list_prepend() has horrendous performance characteristics
      * So we need to use g_list_prepend() and then reverse the list here
      */
     data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints);
 
     gIter = data_set->ordering_constraints;
     for (; gIter != NULL; gIter = gIter->next) {
         order_constraint_t *order = (order_constraint_t *) gIter->data;
         resource_t *rsc = order->lh_rsc;
 
         crm_trace("Applying ordering constraint: %d", order->id);
 
         if (rsc != NULL) {
             crm_trace("rsc_action-to-*");
             rsc_order_first(rsc, order, data_set);
             continue;
         }
 
         rsc = order->rh_rsc;
         if (rsc != NULL) {
             crm_trace("action-to-rsc_action");
             rsc_order_then(order->lh_action, rsc, order);
 
         } else {
             crm_trace("action-to-action");
             order_actions(order->lh_action, order->rh_action, order->type);
         }
     }
 
     crm_trace("Updating %d actions", g_list_length(data_set->actions));
 
     gIter = data_set->actions;
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
 
         update_colo_start_chain(action);
         update_action(action);
     }
 
     crm_trace("Processing reloads");
 
     gIter = data_set->resources;
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
 
         rsc_reload(rsc, data_set);
         LogActions(rsc, data_set, FALSE);
     }
     return TRUE;
 }
 
 static gint
 sort_notify_entries(gconstpointer a, gconstpointer b)
 {
     int tmp;
     const notify_entry_t *entry_a = a;
     const notify_entry_t *entry_b = b;
 
     if (entry_a == NULL && entry_b == NULL) {
         return 0;
     }
     if (entry_a == NULL) {
         return 1;
     }
     if (entry_b == NULL) {
         return -1;
     }
 
     if (entry_a->rsc == NULL && entry_b->rsc == NULL) {
         return 0;
     }
     if (entry_a->rsc == NULL) {
         return 1;
     }
     if (entry_b->rsc == NULL) {
         return -1;
     }
 
     tmp = strcmp(entry_a->rsc->id, entry_b->rsc->id);
     if (tmp != 0) {
         return tmp;
     }
 
     if (entry_a->node == NULL && entry_b->node == NULL) {
         return 0;
     }
     if (entry_a->node == NULL) {
         return 1;
     }
     if (entry_b->node == NULL) {
         return -1;
     }
 
     return strcmp(entry_a->node->details->id, entry_b->node->details->id);
 }
 
 static char *
 expand_node_list(GListPtr list)
 {
     GListPtr gIter = NULL;
     char *node_list = NULL;
 
     if (list == NULL) {
         return strdup(" ");
     }
 
     for (gIter = list; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
 
         if (node->details->uname) {
             int existing_len = 0;
             int len = 2 + strlen(node->details->uname);
 
             if(node_list) {
                 existing_len = strlen(node_list);
             }
 
             crm_trace("Adding %s (%dc) at offset %d", node->details->uname, len - 2, existing_len);
             node_list = realloc_safe(node_list, len + existing_len);
             sprintf(node_list + existing_len, "%s%s", existing_len == 0 ? "":" ", node->details->uname);
         }
     }
 
     return node_list;
 }
 
 static void
 expand_list(GListPtr list, char **rsc_list, char **node_list)
 {
     GListPtr gIter = NULL;
     const char *uname = NULL;
     const char *rsc_id = NULL;
     const char *last_rsc_id = NULL;
 
     if (rsc_list) {
         *rsc_list = NULL;
     }
 
     if (list == NULL) {
         if (rsc_list) {
             *rsc_list = strdup(" ");
         }
         if (node_list) {
             *node_list = strdup(" ");
         }
         return;
     }
 
     if (node_list) {
         *node_list = NULL;
     }
 
     for (gIter = list; gIter != NULL; gIter = gIter->next) {
         notify_entry_t *entry = (notify_entry_t *) gIter->data;
 
         CRM_LOG_ASSERT(entry != NULL);
         CRM_LOG_ASSERT(entry && entry->rsc != NULL);
 
         if(entry == NULL || entry->rsc == NULL) {
             continue;
         }
 
         /* Uh, why? */
         CRM_LOG_ASSERT(node_list == NULL || entry->node != NULL);
         if(node_list != NULL && entry->node == NULL) {
             continue;
         }
 
         uname = NULL;
         rsc_id = entry->rsc->id;
         CRM_ASSERT(rsc_id != NULL);
 
         /* filter dups */
         if (safe_str_eq(rsc_id, last_rsc_id)) {
             continue;
         }
         last_rsc_id = rsc_id;
 
         if (rsc_list != NULL) {
             int existing_len = 0;
             int len = 2 + strlen(rsc_id);       /* +1 space, +1 EOS */
 
             if (rsc_list && *rsc_list) {
                 existing_len = strlen(*rsc_list);
             }
 
             crm_trace("Adding %s (%dc) at offset %d", rsc_id, len - 2, existing_len);
             *rsc_list = realloc_safe(*rsc_list, len + existing_len);
             sprintf(*rsc_list + existing_len, "%s%s", existing_len == 0 ? "":" ", rsc_id);
         }
 
         if (entry->node != NULL) {
             uname = entry->node->details->uname;
         }
 
         if (node_list != NULL && uname) {
             int existing_len = 0;
             int len = 2 + strlen(uname);
 
             if (node_list && *node_list) {
                 existing_len = strlen(*node_list);
             }
 
             crm_trace("Adding %s (%dc) at offset %d", uname, len - 2, existing_len);
             *node_list = realloc_safe(*node_list, len + existing_len);
             sprintf(*node_list + existing_len, "%s%s", existing_len == 0 ? "":" ", uname);
         }
     }
 
 }
 
 static void
 dup_attr(gpointer key, gpointer value, gpointer user_data)
 {
     add_hash_param(user_data, key, value);
 }
 
 static action_t *
 pe_notify(resource_t * rsc, node_t * node, action_t * op, action_t * confirm,
           notify_data_t * n_data, pe_working_set_t * data_set)
 {
     char *key = NULL;
     action_t *trigger = NULL;
     const char *value = NULL;
     const char *task = NULL;
 
     if (op == NULL || confirm == NULL) {
         pe_rsc_trace(rsc, "Op=%p confirm=%p", op, confirm);
         return NULL;
     }
 
     CRM_CHECK(rsc != NULL, return NULL);
     CRM_CHECK(node != NULL, return NULL);
 
     if (node->details->online == FALSE) {
         pe_rsc_trace(rsc, "Skipping notification for %s: node offline", rsc->id);
         return NULL;
     } else if (is_set(op->flags, pe_action_runnable) == FALSE) {
         pe_rsc_trace(rsc, "Skipping notification for %s: not runnable", op->uuid);
         return NULL;
     }
 
     value = g_hash_table_lookup(op->meta, "notify_type");
     task = g_hash_table_lookup(op->meta, "notify_operation");
 
     pe_rsc_trace(rsc, "Creating notify actions for %s: %s (%s-%s)", op->uuid, rsc->id, value, task);
 
     key = generate_notify_key(rsc->id, value, task);
     trigger = custom_action(rsc, key, op->task, node,
                             is_set(op->flags, pe_action_optional), TRUE, data_set);
     g_hash_table_foreach(op->meta, dup_attr, trigger->meta);
     g_hash_table_foreach(n_data->keys, dup_attr, trigger->meta);
 
     /* pseudo_notify before notify */
     pe_rsc_trace(rsc, "Ordering %s before %s (%d->%d)", op->uuid, trigger->uuid, trigger->id,
                  op->id);
 
     order_actions(op, trigger, pe_order_optional);
     order_actions(trigger, confirm, pe_order_optional);
     return trigger;
 }
 
 static void
 pe_post_notify(resource_t * rsc, node_t * node, notify_data_t * n_data, pe_working_set_t * data_set)
 {
     action_t *notify = NULL;
 
     CRM_CHECK(rsc != NULL, return);
 
     if (n_data->post == NULL) {
         return;                 /* Nothing to do */
     }
 
     notify = pe_notify(rsc, node, n_data->post, n_data->post_done, n_data, data_set);
 
     if (notify != NULL) {
         notify->priority = INFINITY;
     }
 
     if (n_data->post_done) {
         GListPtr gIter = rsc->actions;
 
         for (; gIter != NULL; gIter = gIter->next) {
             action_t *mon = (action_t *) gIter->data;
             const char *interval = g_hash_table_lookup(mon->meta, "interval");
 
             if (interval == NULL || safe_str_eq(interval, "0")) {
                 pe_rsc_trace(rsc, "Skipping %s: interval", mon->uuid);
                 continue;
             } else if (safe_str_eq(mon->task, RSC_CANCEL)) {
                 pe_rsc_trace(rsc, "Skipping %s: cancel", mon->uuid);
                 continue;
             }
 
             order_actions(n_data->post_done, mon, pe_order_optional);
         }
     }
 }
 
 notify_data_t *
 create_notification_boundaries(resource_t * rsc, const char *action, action_t * start,
                                action_t * end, pe_working_set_t * data_set)
 {
     /* Create the pseudo ops that preceed and follow the actual notifications */
 
     /*
      * Creates two sequences (conditional on start and end being supplied):
      *   pre_notify -> pre_notify_complete -> start, and
      *   end -> post_notify -> post_notify_complete
      *
      * 'start' and 'end' may be the same event or ${X} and ${X}ed as per clones
      */
     char *key = NULL;
     notify_data_t *n_data = NULL;
 
     if (is_not_set(rsc->flags, pe_rsc_notify)) {
         return NULL;
     }
 
     n_data = calloc(1, sizeof(notify_data_t));
     n_data->action = action;
     n_data->keys =
         g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
 
     if (start) {
         /* create pre-event notification wrappers */
         key = generate_notify_key(rsc->id, "pre", start->task);
         n_data->pre =
             custom_action(rsc, key, RSC_NOTIFY, NULL, is_set(start->flags, pe_action_optional),
                           TRUE, data_set);
 
         update_action_flags(n_data->pre, pe_action_pseudo);
         update_action_flags(n_data->pre, pe_action_runnable);
 
         add_hash_param(n_data->pre->meta, "notify_type", "pre");
         add_hash_param(n_data->pre->meta, "notify_operation", n_data->action);
 
         add_hash_param(n_data->pre->meta, "notify_key_type", "pre");
         add_hash_param(n_data->pre->meta, "notify_key_operation", start->task);
 
         /* create pre_notify_complete */
         key = generate_notify_key(rsc->id, "confirmed-pre", start->task);
         n_data->pre_done =
             custom_action(rsc, key, RSC_NOTIFIED, NULL, is_set(start->flags, pe_action_optional),
                           TRUE, data_set);
 
         update_action_flags(n_data->pre_done, pe_action_pseudo);
         update_action_flags(n_data->pre_done, pe_action_runnable);
 
         add_hash_param(n_data->pre_done->meta, "notify_type", "pre");
         add_hash_param(n_data->pre_done->meta, "notify_operation", n_data->action);
 
         add_hash_param(n_data->pre_done->meta, "notify_key_type", "confirmed-pre");
         add_hash_param(n_data->pre_done->meta, "notify_key_operation", start->task);
 
         order_actions(n_data->pre_done, start, pe_order_optional);
         order_actions(n_data->pre, n_data->pre_done, pe_order_optional);
     }
 
     if (end) {
         /* create post-event notification wrappers */
         key = generate_notify_key(rsc->id, "post", end->task);
         n_data->post =
             custom_action(rsc, key, RSC_NOTIFY, NULL, is_set(end->flags, pe_action_optional), TRUE,
                           data_set);
 
         n_data->post->priority = INFINITY;
         update_action_flags(n_data->post, pe_action_pseudo);
         if (is_set(end->flags, pe_action_runnable)) {
             update_action_flags(n_data->post, pe_action_runnable);
         } else {
             update_action_flags(n_data->post, pe_action_runnable | pe_action_clear);
         }
 
         add_hash_param(n_data->post->meta, "notify_type", "post");
         add_hash_param(n_data->post->meta, "notify_operation", n_data->action);
 
         add_hash_param(n_data->post->meta, "notify_key_type", "post");
         add_hash_param(n_data->post->meta, "notify_key_operation", end->task);
 
         /* create post_notify_complete */
         key = generate_notify_key(rsc->id, "confirmed-post", end->task);
         n_data->post_done =
             custom_action(rsc, key, RSC_NOTIFIED, NULL, is_set(end->flags, pe_action_optional),
                           TRUE, data_set);
 
         n_data->post_done->priority = INFINITY;
         update_action_flags(n_data->post_done, pe_action_pseudo);
         if (is_set(end->flags, pe_action_runnable)) {
             update_action_flags(n_data->post_done, pe_action_runnable);
         } else {
             update_action_flags(n_data->post_done, pe_action_runnable | pe_action_clear);
         }
 
         add_hash_param(n_data->post_done->meta, "notify_type", "post");
         add_hash_param(n_data->post_done->meta, "notify_operation", n_data->action);
 
         add_hash_param(n_data->post_done->meta, "notify_key_type", "confirmed-post");
         add_hash_param(n_data->post_done->meta, "notify_key_operation", end->task);
 
         order_actions(end, n_data->post, pe_order_implies_then);
         order_actions(n_data->post, n_data->post_done, pe_order_implies_then);
     }
 
     if (start && end) {
         order_actions(n_data->pre_done, n_data->post, pe_order_optional);
     }
 
     if (safe_str_eq(action, RSC_STOP)) {
         action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set);
 
         order_actions(n_data->post_done, all_stopped, pe_order_optional);
     }
 
     return n_data;
 }
 
 void
 collect_notification_data(resource_t * rsc, gboolean state, gboolean activity,
                           notify_data_t * n_data)
 {
 
     if(n_data->allowed_nodes == NULL) {
         n_data->allowed_nodes = rsc->allowed_nodes;
     }
 
     if (rsc->children) {
         GListPtr gIter = rsc->children;
 
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child = (resource_t *) gIter->data;
 
             collect_notification_data(child, state, activity, n_data);
         }
         return;
     }
 
     if (state) {
         notify_entry_t *entry = NULL;
 
         entry = calloc(1, sizeof(notify_entry_t));
         entry->rsc = rsc;
         if (rsc->running_on) {
             /* we only take the first one */
             entry->node = rsc->running_on->data;
         }
 
         pe_rsc_trace(rsc, "%s state: %s", rsc->id, role2text(rsc->role));
 
         switch (rsc->role) {
             case RSC_ROLE_STOPPED:
                 n_data->inactive = g_list_prepend(n_data->inactive, entry);
                 break;
             case RSC_ROLE_STARTED:
                 n_data->active = g_list_prepend(n_data->active, entry);
                 break;
             case RSC_ROLE_SLAVE:
                 n_data->slave = g_list_prepend(n_data->slave, entry);
                 break;
             case RSC_ROLE_MASTER:
                 n_data->master = g_list_prepend(n_data->master, entry);
                 break;
             default:
                 crm_err("Unsupported notify role");
                 free(entry);
                 break;
         }
     }
 
     if (activity) {
         notify_entry_t *entry = NULL;
         enum action_tasks task;
 
         GListPtr gIter = rsc->actions;
 
         for (; gIter != NULL; gIter = gIter->next) {
             action_t *op = (action_t *) gIter->data;
 
             if (is_set(op->flags, pe_action_optional) == FALSE && op->node != NULL) {
 
                 entry = calloc(1, sizeof(notify_entry_t));
                 entry->node = op->node;
                 entry->rsc = rsc;
 
                 task = text2task(op->task);
                 switch (task) {
                     case start_rsc:
                         n_data->start = g_list_prepend(n_data->start, entry);
                         break;
                     case stop_rsc:
                         n_data->stop = g_list_prepend(n_data->stop, entry);
                         break;
                     case action_promote:
                         n_data->promote = g_list_prepend(n_data->promote, entry);
                         break;
                     case action_demote:
                         n_data->demote = g_list_prepend(n_data->demote, entry);
                         break;
                     default:
                         free(entry);
                         break;
                 }
             }
         }
     }
 }
 
 gboolean
 expand_notification_data(notify_data_t * n_data, pe_working_set_t * data_set)
 {
     /* Expand the notification entries into a key=value hashtable
      * This hashtable is later used in action2xml()
      */
     gboolean required = FALSE;
     char *rsc_list = NULL;
     char *node_list = NULL;
     GListPtr nodes = NULL;
 
     if (n_data->stop) {
         n_data->stop = g_list_sort(n_data->stop, sort_notify_entries);
     }
     expand_list(n_data->stop, &rsc_list, &node_list);
     if (rsc_list != NULL && safe_str_neq(" ", rsc_list)) {
         if (safe_str_eq(n_data->action, RSC_STOP)) {
             required = TRUE;
         }
     }
     g_hash_table_insert(n_data->keys, strdup("notify_stop_resource"), rsc_list);
     g_hash_table_insert(n_data->keys, strdup("notify_stop_uname"), node_list);
 
     if (n_data->start) {
         n_data->start = g_list_sort(n_data->start, sort_notify_entries);
         if (rsc_list && safe_str_eq(n_data->action, RSC_START)) {
             required = TRUE;
         }
     }
     expand_list(n_data->start, &rsc_list, &node_list);
     g_hash_table_insert(n_data->keys, strdup("notify_start_resource"), rsc_list);
     g_hash_table_insert(n_data->keys, strdup("notify_start_uname"), node_list);
 
     if (n_data->demote) {
         n_data->demote = g_list_sort(n_data->demote, sort_notify_entries);
         if (safe_str_eq(n_data->action, RSC_DEMOTE)) {
             required = TRUE;
         }
     }
 
     expand_list(n_data->demote, &rsc_list, &node_list);
     g_hash_table_insert(n_data->keys, strdup("notify_demote_resource"), rsc_list);
     g_hash_table_insert(n_data->keys, strdup("notify_demote_uname"), node_list);
 
     if (n_data->promote) {
         n_data->promote = g_list_sort(n_data->promote, sort_notify_entries);
         if (safe_str_eq(n_data->action, RSC_PROMOTE)) {
             required = TRUE;
         }
     }
     expand_list(n_data->promote, &rsc_list, &node_list);
     g_hash_table_insert(n_data->keys, strdup("notify_promote_resource"), rsc_list);
     g_hash_table_insert(n_data->keys, strdup("notify_promote_uname"), node_list);
 
     if (n_data->active) {
         n_data->active = g_list_sort(n_data->active, sort_notify_entries);
     }
     expand_list(n_data->active, &rsc_list, &node_list);
     g_hash_table_insert(n_data->keys, strdup("notify_active_resource"), rsc_list);
     g_hash_table_insert(n_data->keys, strdup("notify_active_uname"), node_list);
 
     if (n_data->slave) {
         n_data->slave = g_list_sort(n_data->slave, sort_notify_entries);
     }
     expand_list(n_data->slave, &rsc_list, &node_list);
     g_hash_table_insert(n_data->keys, strdup("notify_slave_resource"), rsc_list);
     g_hash_table_insert(n_data->keys, strdup("notify_slave_uname"), node_list);
 
     if (n_data->master) {
         n_data->master = g_list_sort(n_data->master, sort_notify_entries);
     }
     expand_list(n_data->master, &rsc_list, &node_list);
     g_hash_table_insert(n_data->keys, strdup("notify_master_resource"), rsc_list);
     g_hash_table_insert(n_data->keys, strdup("notify_master_uname"), node_list);
 
     if (n_data->inactive) {
         n_data->inactive = g_list_sort(n_data->inactive, sort_notify_entries);
     }
     expand_list(n_data->inactive, &rsc_list, NULL);
     g_hash_table_insert(n_data->keys, strdup("notify_inactive_resource"), rsc_list);
 
     nodes = g_hash_table_get_values(n_data->allowed_nodes);
     node_list = expand_node_list(nodes);
     g_hash_table_insert(n_data->keys, strdup("notify_available_uname"), node_list);
     g_list_free(nodes);
 
     node_list = expand_node_list(data_set->nodes);
     g_hash_table_insert(n_data->keys, strdup("notify_all_uname"), node_list);
 
     if (required && n_data->pre) {
         update_action_flags(n_data->pre, pe_action_optional | pe_action_clear);
         update_action_flags(n_data->pre_done, pe_action_optional | pe_action_clear);
     }
 
     if (required && n_data->post) {
         update_action_flags(n_data->post, pe_action_optional | pe_action_clear);
         update_action_flags(n_data->post_done, pe_action_optional | pe_action_clear);
     }
     return required;
 }
 
 void
 create_notifications(resource_t * rsc, notify_data_t * n_data, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     action_t *stop = NULL;
     action_t *start = NULL;
     enum action_tasks task = text2task(n_data->action);
 
     if (rsc->children) {
         gIter = rsc->children;
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child = (resource_t *) gIter->data;
 
             create_notifications(child, n_data, data_set);
         }
         return;
     }
 
     /* Copy notification details into standard ops */
 
     gIter = rsc->actions;
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *op = (action_t *) gIter->data;
 
         if (is_set(op->flags, pe_action_optional) == FALSE && op->node != NULL) {
             enum action_tasks t = text2task(op->task);
 
             switch (t) {
                 case start_rsc:
                 case stop_rsc:
                 case action_promote:
                 case action_demote:
                     g_hash_table_foreach(n_data->keys, dup_attr, op->meta);
                     break;
                 default:
                     break;
             }
         }
     }
 
     pe_rsc_trace(rsc, "Creating notificaitons for: %s.%s (%s->%s)",
                  n_data->action, rsc->id, role2text(rsc->role), role2text(rsc->next_role));
 
     stop = find_first_action(rsc->actions, NULL, RSC_STOP, NULL);
     start = find_first_action(rsc->actions, NULL, RSC_START, NULL);
 
     /* stop / demote */
     if (rsc->role != RSC_ROLE_STOPPED) {
         if (task == stop_rsc || task == action_demote) {
             gIter = rsc->running_on;
             for (; gIter != NULL; gIter = gIter->next) {
                 node_t *current_node = (node_t *) gIter->data;
 
                 /* if this stop action is a pseudo action as a result of the current
                  * node being fenced, this stop action is implied by the fencing 
                  * action. There's no reason to send the fenced node a stop notification */ 
                 if (stop &&
                     is_set(stop->flags, pe_action_pseudo) &&
                     current_node->details->unclean) {
 
                     continue;
                 }
 
                 pe_notify(rsc, current_node, n_data->pre, n_data->pre_done, n_data, data_set);
                 if (task == action_demote || stop == NULL
                     || is_set(stop->flags, pe_action_optional)) {
                     pe_post_notify(rsc, current_node, n_data, data_set);
                 }
             }
         }
     }
 
     /* start / promote */
     if (rsc->next_role != RSC_ROLE_STOPPED) {
         if (rsc->allocated_to == NULL) {
             pe_proc_err("Next role '%s' but %s is not allocated", role2text(rsc->next_role),
                         rsc->id);
 
         } else if (task == start_rsc || task == action_promote) {
             if (task != start_rsc || start == NULL || is_set(start->flags, pe_action_optional)) {
                 pe_notify(rsc, rsc->allocated_to, n_data->pre, n_data->pre_done, n_data, data_set);
             }
             pe_post_notify(rsc, rsc->allocated_to, n_data, data_set);
         }
     }
 }
 
 void
 free_notification_data(notify_data_t * n_data)
 {
     if (n_data == NULL) {
         return;
     }
 
     g_list_free_full(n_data->stop, free);
     g_list_free_full(n_data->start, free);
     g_list_free_full(n_data->demote, free);
     g_list_free_full(n_data->promote, free);
     g_list_free_full(n_data->master, free);
     g_list_free_full(n_data->slave, free);
     g_list_free_full(n_data->active, free);
     g_list_free_full(n_data->inactive, free);
     g_hash_table_destroy(n_data->keys);
     free(n_data);
 }
 
 int transition_id = -1;
 
 /*
  * Create a dependency graph to send to the transitioner (via the CRMd)
  */
 gboolean
 stage8(pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     const char *value = NULL;
 
     transition_id++;
     crm_trace("Creating transition graph %d.", transition_id);
 
     data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH);
 
     value = pe_pref(data_set->config_hash, "cluster-delay");
     crm_xml_add(data_set->graph, "cluster-delay", value);
 
     value = pe_pref(data_set->config_hash, "stonith-timeout");
     crm_xml_add(data_set->graph, "stonith-timeout", value);
 
     crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY");
 
     if (is_set(data_set->flags, pe_flag_start_failure_fatal)) {
         crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY");
     } else {
         crm_xml_add(data_set->graph, "failed-start-offset", "1");
     }
 
     value = pe_pref(data_set->config_hash, "batch-limit");
     crm_xml_add(data_set->graph, "batch-limit", value);
 
     crm_xml_add_int(data_set->graph, "transition_id", transition_id);
 
     value = pe_pref(data_set->config_hash, "migration-limit");
     if (crm_int_helper(value, NULL) > 0) {
         crm_xml_add(data_set->graph, "migration-limit", value);
     }
 
 /* errors...
    slist_iter(action, action_t, action_list, lpc,
    if(action->optional == FALSE && action->runnable == FALSE) {
    print_action("Ignoring", action, TRUE);
    }
    );
 */
 
     gIter = data_set->resources;
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
 
         pe_rsc_trace(rsc, "processing actions for rsc=%s", rsc->id);
         rsc->cmds->expand(rsc, data_set);
     }
 
     crm_log_xml_trace(data_set->graph, "created resource-driven action list");
 
     /* catch any non-resource specific actions */
     crm_trace("processing non-resource actions");
 
     gIter = data_set->actions;
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
 
         if (action->rsc
             && action->node
             && action->node->details->shutdown
             && is_not_set(action->rsc->flags, pe_rsc_maintenance)
             && is_not_set(action->flags, pe_action_optional)
             && is_not_set(action->flags, pe_action_runnable)
             && crm_str_eq(action->task, RSC_STOP, TRUE)
             ) {
             /* Eventually we should just ignore the 'fence' case
              * But for now its the best way to detect (in CTS) when
              * CIB resource updates are being lost
              */
             if (is_set(data_set->flags, pe_flag_have_quorum)
                 || data_set->no_quorum_policy == no_quorum_ignore) {
                 crm_crit("Cannot %s node '%s' because of %s:%s%s",
                          action->node->details->unclean ? "fence" : "shut down",
                          action->node->details->uname, action->rsc->id,
                          is_not_set(action->rsc->flags, pe_rsc_managed) ? " unmanaged" : " blocked",
                          is_set(action->rsc->flags, pe_rsc_failed) ? " failed" : "");
             }
         }
 
         graph_element_from_action(action, data_set);
     }
 
     crm_log_xml_trace(data_set->graph, "created generic action list");
     crm_trace("Created transition graph %d.", transition_id);
 
     return TRUE;
 }
 
 void
 cleanup_alloc_calculations(pe_working_set_t * data_set)
 {
     if (data_set == NULL) {
         return;
     }
 
     crm_trace("deleting %d order cons: %p",
               g_list_length(data_set->ordering_constraints), data_set->ordering_constraints);
     pe_free_ordering(data_set->ordering_constraints);
     data_set->ordering_constraints = NULL;
 
     crm_trace("deleting %d node cons: %p",
               g_list_length(data_set->placement_constraints), data_set->placement_constraints);
     pe_free_rsc_to_node(data_set->placement_constraints);
     data_set->placement_constraints = NULL;
 
     crm_trace("deleting %d inter-resource cons: %p",
               g_list_length(data_set->colocation_constraints), data_set->colocation_constraints);
     g_list_free_full(data_set->colocation_constraints, free);
     data_set->colocation_constraints = NULL;
 
     crm_trace("deleting %d ticket deps: %p",
               g_list_length(data_set->ticket_constraints), data_set->ticket_constraints);
     g_list_free_full(data_set->ticket_constraints, free);
     data_set->ticket_constraints = NULL;
 
     cleanup_calculations(data_set);
 }
diff --git a/valgrind-pcmk.suppressions b/valgrind-pcmk.suppressions
index e7caa55dbb..2e382dffbb 100644
--- a/valgrind-pcmk.suppressions
+++ b/valgrind-pcmk.suppressions
@@ -1,297 +1,306 @@
 # Valgrind suppressions for PE testing
 {
    Valgrind bug
    Memcheck:Addr8
    fun:__strspn_sse42
    fun:crm_get_msec
 }
 
 {
    Bash leak
    Memcheck:Leak
    fun:malloc
    fun:xmalloc
    fun:*
    fun:yyparse
    fun:parse_command
    fun:read_command
    fun:reader_loop
    fun:main
 }
 
+{
+   Another bash leak
+   Memcheck:Leak
+   fun:malloc
+   fun:xmalloc
+   fun:set_default_locale
+   fun:main
+}
+
 {
    Ignore option parsing
    Memcheck:Leak
    fun:realloc
    fun:crm_get_option_long
    fun:main
 }
 
 {
    dlopen internals
    Memcheck:Leak
    fun:calloc
    fun:_dlerror_run
    fun:dlopen*
    fun:_log_so_walk_callback
    fun:dl_iterate_phdr
    fun:qb_log_init
 }
 
 {
    Cman - Points to uninitialized bytes
    Memcheck:Param
    socketcall.sendmsg(msg)
    fun:__sendmsg_nocancel
    obj:*/libcman.so.3.0
    obj:*/libcman.so.3.0
 }
 
 {
    Cman - Jump or move depends on uninitialized values
    Memcheck:Cond
    obj:*/libcman.so.3.0
    obj:*/libcman.so.3.0
 }
 
 {
    quarks - hashtable
    Memcheck:Leak
    fun:calloc
    fun:g_malloc0
    obj:*/libglib-*
    fun:g_slice_alloc
    fun:g_hash_table_new_full
    fun:g_quark_from_static_string
 }
 
 {
    quarks - hashtable 2
    Memcheck:Leak
    fun:malloc
    fun:g_malloc
    fun:g_slice_alloc
    fun:g_hash_table_new_full
    fun:g_quark_from_static_string
 }
 
 {
    quarks - hashtable 3
    Memcheck:Leak
    fun:calloc
    fun:g_malloc0
    fun:g_hash_table_new_full
    fun:g_quark_from_static_string
 }
 
 {
    quarks - hashtable 4
    Memcheck:Leak
    fun:malloc
    fun:realloc
    fun:g_realloc
    fun:g_quark_from_static_string
 }
 
 {
    glib mainloop internals - default
    Memcheck:Leak
    fun:calloc
    fun:g_malloc0
    fun:g_main_context_new
    fun:g_main_context_default
    fun:g_main_loop_new
    fun:main
 }
 
 {
    glib mainloop internals - default
    Memcheck:Leak
    fun:malloc
    fun:g_malloc
    fun:g_slice_alloc
    fun:*
    fun:g_main_context_new
    fun:g_main_context_default
    fun:g_main_loop_new
    fun:main
 }
 
 {
    glib mainloop internals - default
    Memcheck:Leak
    fun:calloc
    fun:g_malloc0
    obj:*/libglib-2.*
    fun:g_slice_alloc
    fun:g_ptr_array_sized_new
    fun:g_main_context_new
    fun:g_main_context_default
    fun:g_main_loop_new
 }
 
 {
    glib mainloop internals - run
    Memcheck:Leak
    fun:calloc
    fun:g_malloc0
    fun:g_thread_self
    fun:g_main_loop_run
 }
 
 {
    glib mainloop internals - run
    Memcheck:Leak
    fun:malloc
    fun:g_malloc
    obj:*/libglib-2.*
    fun:g_main_loop_run
 }
 
 {
    glib mainloop internals - run
    Memcheck:Leak
    fun:malloc
    fun:realloc
    fun:g_realloc
    obj:*/libglib-2.*
    fun:g_ptr_array_add
    fun:g_main_context_check
    obj:*/libglib-2.*
    fun:g_main_loop_run
 }
 
 {
    glib mainloop internals - run
    Memcheck:Leak
    fun:malloc
    fun:g_malloc
    fun:g_slice_alloc
    fun:g_slice_alloc0
    obj:*/libglib-2.*
    fun:g_main_context_dispatch
    obj:*/libglib-2.*
    fun:g_main_loop_run
 }
 
 {
    glib mainloop internals - run
    Memcheck:Leak
    fun:malloc
    fun:realloc
    fun:g_realloc
    obj:*/libglib-2.*
    fun:g_array_set_size
    fun:g_static_private_set
    obj:*/libglib-2.*
    fun:g_main_context_dispatch
    obj:*/libglib-2.*
    fun:g_main_loop_run
 }
 
 {
    glib mainloop internals - run
    Memcheck:Leak
    fun:malloc
    fun:g_malloc
    fun:g_slice_alloc
    fun:g_array_sized_new
    fun:g_static_private_set
    obj:*/libglib-2.*
    fun:g_main_context_dispatch
    obj:*/libglib-2.*
    fun:g_main_loop_run
 }
 
 {
    glib types
    Memcheck:Leak
    fun:malloc
    fun:realloc
    fun:g_realloc
    obj:*/libgobject-*
    fun:g_type_register_static
 }
 
 {
    glib types 2
    Memcheck:Leak
    fun:realloc
    fun:g_realloc
    obj:*/libgobject-*
    fun:g_type_register_static
    fun:g_param_type_register_static
 }
 
 {
    glib types 3
    Memcheck:Leak
    fun:calloc
    fun:g_malloc0
    obj:*/libgobject-*
    fun:g_type_register_fundamental
 }
 
 {
    glib types 4
    Memcheck:Leak
    fun:calloc
    fun:g_malloc0
    obj:*/libgobject-*
    obj:*/libgobject-*
    fun:g_type_register_fundamental
 }
 
 {
    glib - the return
    Memcheck:Leak
    fun:calloc
    fun:g_malloc0
    obj:*/libgobject-*
    obj:*/libgobject-*
    fun:_dl_init
 }
 
 {
    glib - seriously?
    Memcheck:Leak
    fun:calloc
    fun:g_malloc0
    obj:*/libgobject-*
    obj:*/libgobject-*
    obj:*/libgobject-*
    fun:_dl_init
 }
 
 {
    glib - this isnt funny anymore
    Memcheck:Leak
    fun:malloc
    fun:realloc
    fun:g_realloc
    obj:*/libgobject-*
    fun:g_type_register_fundamental
 }
 
 {
    glib - why do you hate me?
    Memcheck:Leak
    fun:calloc
    fun:g_malloc0
    obj:*/libgobject-*
    obj:*/libgobject-*
    fun:call_init.part.0
    fun:_dl_init
 }
 
 {
    dear glib - you suck at memory management
    Memcheck:Leak
    fun:calloc
    fun:g_malloc0
    obj:*/libgobject-*
    obj:*/libgobject-*
    obj:*/libgobject-*
    fun:call_init.part.0
    fun:_dl_init
-}
\ No newline at end of file
+}