diff --git a/cib/io.c b/cib/io.c
index e2873a8967..4e2b24affe 100644
--- a/cib/io.c
+++ b/cib/io.c
@@ -1,492 +1,490 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <stdio.h>
 #include <unistd.h>
 #include <string.h>
 #include <stdlib.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <dirent.h>
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <sys/stat.h>
 
 #include <crm/crm.h>
 
 #include <cibio.h>
 #include <crm/cib.h>
 #include <crm/common/util.h>
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 #include <crm/cib/internal.h>
 #include <crm/cluster.h>
 
 extern const char *cib_root;
 
 crm_trigger_t *cib_writer = NULL;
 gboolean initialized = FALSE;
 
 extern int cib_status;
 
 int write_cib_contents(gpointer p);
 
 static void
 cib_rename(const char *old)
 {
     int new_fd;
     char *new = crm_strdup_printf("%s/cib.auto.XXXXXX", cib_root);
 
     crm_err("Archiving unusable file %s as %s", old, new);
     umask(S_IWGRP | S_IWOTH | S_IROTH);
     if ((new_fd = mkstemp(new) < 0) || (rename(old, new) < 0)) {
         crm_perror(LOG_ERR, "Couldn't rename %s as %s", old, new);
         crm_err("Disabling disk writes and continuing");
         cib_writes_enabled = FALSE;
     }
     if (new_fd > 0) {
         close(new_fd);
     }
     free(new);
 }
 
 /*
  * It is the callers responsibility to free the output of this function
  */
 
 static xmlNode *
 retrieveCib(const char *filename, const char *sigfile)
 {
     xmlNode *root = NULL;
 
     crm_info("Reading cluster configuration file %s (digest: %s)",
              filename, sigfile);
     switch (cib_file_read_and_verify(filename, sigfile, &root)) {
         case -pcmk_err_cib_corrupt:
             crm_warn("Continuing but %s will NOT be used.", filename);
             break;
 
         case -pcmk_err_cib_modified:
             /* Archive the original files so the contents are not lost */
             crm_warn("Continuing but %s will NOT be used.", filename);
             cib_rename(filename);
             cib_rename(sigfile);
             break;
     }
     return root;
 }
 
 /*
  * for OSs without support for direntry->d_type, like Solaris
  */
 #ifndef DT_UNKNOWN
 # define DT_UNKNOWN     0
 # define DT_FIFO        1
 # define DT_CHR         2
 # define DT_DIR         4
 # define DT_BLK         6
 # define DT_REG         8
 # define DT_LNK         10
 # define DT_SOCK        12
 # define DT_WHT         14
 #endif /*DT_UNKNOWN*/
 
 static int cib_archive_filter(const struct dirent * a)
 {
     int rc = 0;
     /* Looking for regular files (d_type = 8) starting with 'cib-' and not ending in .sig */
     struct stat s;
     char *a_path = crm_strdup_printf("%s/%s", cib_root, a->d_name);
 
     if(stat(a_path, &s) != 0) {
         rc = errno;
         crm_trace("%s - stat failed: %s (%d)", a->d_name, pcmk_strerror(rc), rc);
         rc = 0;
 
     } else if ((s.st_mode & S_IFREG) != S_IFREG) {
         unsigned char dtype;
 #ifdef HAVE_STRUCT_DIRENT_D_TYPE
         dtype = a->d_type;
 #else
         switch (s.st_mode & S_IFMT) {
             case S_IFREG:  dtype = DT_REG;      break;
             case S_IFDIR:  dtype = DT_DIR;      break;
             case S_IFCHR:  dtype = DT_CHR;      break;
             case S_IFBLK:  dtype = DT_BLK;      break;
             case S_IFLNK:  dtype = DT_LNK;      break;
             case S_IFIFO:  dtype = DT_FIFO;     break;
             case S_IFSOCK: dtype = DT_SOCK;     break;
             default:       dtype = DT_UNKNOWN;  break;
         }
 #endif
          crm_trace("%s - wrong type (%d)", a->d_name, dtype);
 
     } else if(strstr(a->d_name, "cib-") != a->d_name) {
         crm_trace("%s - wrong prefix", a->d_name);
 
     } else if(strstr(a->d_name, ".sig") != NULL) {
         crm_trace("%s - wrong suffix", a->d_name);
 
     } else {
         crm_debug("%s - candidate", a->d_name);
         rc = 1;
     }
 
     free(a_path);
     return rc;
 }
 
 static int cib_archive_sort(const struct dirent ** a, const struct dirent **b)
 {
     /* Order by creation date - most recently created file first */
     int rc = 0;
     struct stat buf;
 
     time_t a_age = 0;
     time_t b_age = 0;
 
     char *a_path = crm_strdup_printf("%s/%s", cib_root, a[0]->d_name);
     char *b_path = crm_strdup_printf("%s/%s", cib_root, b[0]->d_name);
 
     if(stat(a_path, &buf) == 0) {
         a_age = buf.st_ctime;
     }
     if(stat(b_path, &buf) == 0) {
         b_age = buf.st_ctime;
     }
 
     free(a_path);
     free(b_path);
 
     if(a_age > b_age) {
         rc = 1;
     } else if(a_age < b_age) {
         rc = -1;
     }
 
     crm_trace("%s (%u) vs. %s (%u) : %d", a[0]->d_name, a_age, b[0]->d_name, b_age, rc);
     return rc;
 }
 
 xmlNode *
 readCibXmlFile(const char *dir, const char *file, gboolean discard_status)
 {
     struct dirent **namelist = NULL;
 
     int lpc = 0;
     char *sigfile = NULL;
     char *filename = NULL;
     const char *name = NULL;
     const char *value = NULL;
     const char *validation = NULL;
     const char *use_valgrind = getenv("PCMK_valgrind_enabled");
 
     xmlNode *root = NULL;
     xmlNode *status = NULL;
 
     if (!crm_is_writable(dir, file, CRM_DAEMON_USER, NULL, FALSE)) {
         cib_status = -EACCES;
         return NULL;
     }
 
     filename = crm_concat(dir, file, '/');
     sigfile = crm_concat(filename, "sig", '.');
 
     cib_status = pcmk_ok;
     root = retrieveCib(filename, sigfile);
     free(filename);
     free(sigfile);
 
     if (root == NULL) {
         crm_warn("Primary configuration corrupt or unusable, trying backups in %s", cib_root);
         lpc = scandir(cib_root, &namelist, cib_archive_filter, cib_archive_sort);
         if (lpc < 0) {
             crm_perror(LOG_NOTICE, "scandir(%s) failed", cib_root);
         }
     }
 
     while (root == NULL && lpc > 1) {
         crm_debug("Testing %d candidates", lpc);
 
         lpc--;
 
         filename = crm_strdup_printf("%s/%s", cib_root, namelist[lpc]->d_name);
         sigfile = crm_concat(filename, "sig", '.');
 
         crm_info("Reading cluster configuration file %s (digest: %s)",
                  filename, sigfile);
         if (cib_file_read_and_verify(filename, sigfile, &root) < 0) {
             crm_warn("Continuing but %s will NOT be used.", filename);
         } else {
             crm_notice("Continuing with last valid configuration archive: %s", filename);
         }
 
         free(namelist[lpc]);
         free(filename);
         free(sigfile);
     }
     free(namelist);
 
     if (root == NULL) {
         root = createEmptyCib(0);
         crm_warn("Continuing with an empty configuration.");
     }
 
     if (cib_writes_enabled && use_valgrind) {
         if (crm_is_true(use_valgrind) || strstr(use_valgrind, "cib")) {
             cib_writes_enabled = FALSE;
-            crm_err("*********************************************************");
             crm_err("*** Disabling disk writes to avoid confusing Valgrind ***");
-            crm_err("*********************************************************");
         }
     }
 
     status = find_xml_node(root, XML_CIB_TAG_STATUS, FALSE);
     if (discard_status && status != NULL) {
         /* strip out the status section if there is one */
         free_xml(status);
         status = NULL;
     }
     if (status == NULL) {
         create_xml_node(root, XML_CIB_TAG_STATUS);
     }
 
     /* Do this before DTD validation happens */
 
     /* fill in some defaults */
     name = XML_ATTR_GENERATION_ADMIN;
     value = crm_element_value(root, name);
     if (value == NULL) {
         crm_warn("No value for %s was specified in the configuration.", name);
         crm_warn("The reccomended course of action is to shutdown,"
                  " run crm_verify and fix any errors it reports.");
         crm_warn("We will default to zero and continue but may get"
                  " confused about which configuration to use if"
                  " multiple nodes are powered up at the same time.");
         crm_xml_add_int(root, name, 0);
     }
 
     name = XML_ATTR_GENERATION;
     value = crm_element_value(root, name);
     if (value == NULL) {
         crm_xml_add_int(root, name, 0);
     }
 
     name = XML_ATTR_NUMUPDATES;
     value = crm_element_value(root, name);
     if (value == NULL) {
         crm_xml_add_int(root, name, 0);
     }
 
     /* unset these and require the DC/CCM to update as needed */
     xml_remove_prop(root, XML_ATTR_DC_UUID);
 
     if (discard_status) {
         crm_log_xml_trace(root, "[on-disk]");
     }
 
     validation = crm_element_value(root, XML_ATTR_VALIDATION);
     if (validate_xml(root, NULL, TRUE) == FALSE) {
         crm_err("CIB does not validate with %s", crm_str(validation));
         cib_status = -pcmk_err_schema_validation;
 
     } else if (validation == NULL) {
         int version = 0;
 
         update_validation(&root, &version, 0, FALSE, FALSE);
         if (version > 0) {
             crm_notice("Enabling %s validation on"
                        " the existing (sane) configuration", get_schema_name(version));
         } else {
             crm_err("CIB does not validate with any known DTD or schema");
             cib_status = -pcmk_err_schema_validation;
         }
     }
 
     return root;
 }
 
 /*
  * The caller should never free the return value
  */
 xmlNode *
 get_the_CIB(void)
 {
     return the_cib;
 }
 
 gboolean
 uninitializeCib(void)
 {
     xmlNode *tmp_cib = the_cib;
 
     if (tmp_cib == NULL) {
         crm_debug("The CIB has already been deallocated.");
         return FALSE;
     }
 
     initialized = FALSE;
     the_cib = NULL;
 
     crm_debug("Deallocating the CIB.");
 
     free_xml(tmp_cib);
 
     crm_debug("The CIB has been deallocated.");
 
     return TRUE;
 }
 
 /*
  * This method will not free the old CIB pointer or the new one.
  * We rely on the caller to have saved a pointer to the old CIB
  *   and to free the old/bad one depending on what is appropriate.
  */
 gboolean
 initializeCib(xmlNode * new_cib)
 {
     if (new_cib == NULL) {
         return FALSE;
     }
 
     the_cib = new_cib;
     initialized = TRUE;
     return TRUE;
 }
 
 /*
  * This method will free the old CIB pointer on success and the new one
  * on failure.
  */
 int
 activateCibXml(xmlNode * new_cib, gboolean to_disk, const char *op)
 {
     xmlNode *saved_cib = the_cib;
 
     CRM_ASSERT(new_cib != saved_cib);
     if (initializeCib(new_cib) == FALSE) {
         free_xml(new_cib);
         crm_err("Ignoring invalid or NULL CIB");
 
         if (saved_cib != NULL) {
             crm_warn("Reverting to last known CIB");
             if (initializeCib(saved_cib) == FALSE) {
                 /* oh we are so dead  */
                 crm_crit("Couldn't re-initialize the old CIB!");
                 exit(1);
             }
 
         } else {
             crm_crit("Could not write out new CIB and no saved" " version to revert to");
         }
         return -ENODATA;
     }
 
     free_xml(saved_cib);
     if (cib_writes_enabled && cib_status == pcmk_ok && to_disk) {
         crm_debug("Triggering CIB write for %s op", op);
         mainloop_set_trigger(cib_writer);
     }
 
     return pcmk_ok;
 }
 
 static void
 cib_diskwrite_complete(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
 {
     if (signo) {
         crm_notice("Disk write process terminated with signal %d (pid=%d, core=%d)", signo, pid,
                    core);
 
     } else  {
         do_crm_log(exitcode == 0 ? LOG_TRACE : LOG_ERR, "Disk write process exited (pid=%d, rc=%d)",
                    pid, exitcode);
     }
 
     if (exitcode != 0 && cib_writes_enabled) {
         crm_err("Disabling disk writes after write failure");
         cib_writes_enabled = FALSE;
     }
 
     mainloop_trigger_complete(cib_writer);
 }
 
 int
 write_cib_contents(gpointer p)
 {
     int exit_rc = pcmk_ok;
     xmlNode *cib_local = NULL;
 
     /* Make a copy of the CIB to write (possibly in a forked child) */
     if (p) {
         /* Synchronous write out */
         cib_local = copy_xml(p);
 
     } else {
         int pid = 0;
         int bb_state = qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0);
 
         /* Turn it off before the fork() to avoid:
          * - 2 processes writing to the same shared mem
          * - the child needing to disable it
          *   (which would close it from underneath the parent)
          * This way, the shared mem files are already closed
          */
         qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE);
 
         pid = fork();
         if (pid < 0) {
             crm_perror(LOG_ERR, "Disabling disk writes after fork failure");
             cib_writes_enabled = FALSE;
             return FALSE;
         }
 
         if (pid) {
             /* Parent */
             mainloop_child_add(pid, 0, "disk-writer", NULL, cib_diskwrite_complete);
             if (bb_state == QB_LOG_STATE_ENABLED) {
                 /* Re-enable now that it it safe */
                 qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE);
             }
 
             return -1;          /* -1 means 'still work to do' */
         }
 
         /* A-synchronous write out after a fork() */
 
         /* In theory we can scribble on "the_cib" here and not affect the parent
          * But lets be safe anyway
          */
         cib_local = copy_xml(the_cib);
     }
 
     /* Write the CIB */
     exit_rc = cib_file_write_with_digest(cib_local, cib_root, "cib.xml");
 
     /* A nonzero exit code will cause further writes to be disabled */
     free_xml(cib_local);
     if (p == NULL) {
         /* Use _exit() because exit() could affect the parent adversely */
         _exit(exit_rc);
     }
     return exit_rc;
 }
diff --git a/crmd/crmd_lrm.h b/crmd/crmd_lrm.h
index 81a53c579b..78432dff52 100644
--- a/crmd/crmd_lrm.h
+++ b/crmd/crmd_lrm.h
@@ -1,164 +1,166 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crmd_messages.h>
 
 extern gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level);
 extern void lrm_clear_last_failure(const char *rsc_id, const char *node_name);
 void lrm_op_callback(lrmd_event_data_t * op);
 
 typedef struct resource_history_s {
     char *id;
     uint32_t last_callid;
     lrmd_rsc_info_t rsc;
     lrmd_event_data_t *last;
     lrmd_event_data_t *failed;
     GList *recurring_op_list;
 
     /* Resources must be stopped using the same
      * parameters they were started with.  This hashtable
      * holds the parameters that should be used for the next stop
      * cmd on this resource. */
     GHashTable *stop_params;
 } rsc_history_t;
 
+void history_free(gpointer data);
+
 /* TDOD - Replace this with lrmd_event_data_t */
 struct recurring_op_s {
     int call_id;
     int interval;
     gboolean remove;
     gboolean cancelled;
     unsigned int start_time;
     char *rsc_id;
     char *op_type;
     char *op_key;
     char *user_data;
     GHashTable *params;
 };
 
 typedef struct lrm_state_s {
     const char *node_name;
     /* reserved for lrm_state.c usage only */
     void *conn;
     /* reserved for remote_lrmd_ra.c usage only */
     void *remote_ra_data;
 
     GHashTable *resource_history;
     GHashTable *pending_ops;
     GHashTable *deletion_ops;
     GHashTable *rsc_info_cache;
 
     int num_lrm_register_fails;
 } lrm_state_t;
 
 struct pending_deletion_op_s {
     char *rsc;
     ha_msg_input_t *input;
 };
 
 xmlNode *do_lrm_query_internal(lrm_state_t * lrm_state, gboolean is_replace);
 
 /*!
  * \brief Is this the local ipc connection to the lrmd
  */
 gboolean
 lrm_state_is_local(lrm_state_t *lrm_state);
 
 /*!
  * \brief Clear all state information from a single state entry.
  * \note This does not close the lrmd connection
  */
 void lrm_state_reset_tables(lrm_state_t * lrm_state);
 GList *lrm_state_get_list(void);
 
 /*!
  * \brief Initiate internal state tables
  */
 gboolean lrm_state_init_local(void);
 
 /*!
  * \brief Destroy all state entries and internal state tables
  */
 void lrm_state_destroy_all(void);
 
 /*!
  * \brief Create lrmd connection entry.
  */
 lrm_state_t *lrm_state_create(const char *node_name);
 
 /*!
  * \brief Destroy lrmd connection keyed of node name
  */
 void lrm_state_destroy(const char *node_name);
 
 /*!
  * \brief Find lrm_state data by node name
  */
 lrm_state_t *lrm_state_find(const char *node_name);
 
 /*!
  * \brief Either find or create a new entry
  */
 lrm_state_t *lrm_state_find_or_create(const char *node_name);
 
 /*!
  * The functions below are wrappers for the lrmd api calls the crmd
  * uses.  These wrapper functions allow us to treat the crmd's remote
  * lrmd connection resources the same as regular resources.  Internally
  * Regular resources go to the lrmd, and remote connection resources are
  * handled locally in the crmd.
  */
 void lrm_state_disconnect(lrm_state_t * lrm_state);
 int lrm_state_ipc_connect(lrm_state_t * lrm_state);
 int lrm_state_remote_connect_async(lrm_state_t * lrm_state, const char *server, int port,
                                    int timeout);
 int lrm_state_is_connected(lrm_state_t * lrm_state);
 int lrm_state_poke_connection(lrm_state_t * lrm_state);
 
 int lrm_state_get_metadata(lrm_state_t * lrm_state,
                            const char *class,
                            const char *provider,
                            const char *agent, char **output, enum lrmd_call_options options);
 int lrm_state_cancel(lrm_state_t * lrm_state, const char *rsc_id, const char *action, int interval);
 int lrm_state_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */
                    int timeout, /* ms */
                    int start_delay,     /* ms */
                    lrmd_key_value_t * params);
 lrmd_rsc_info_t *lrm_state_get_rsc_info(lrm_state_t * lrm_state,
                                         const char *rsc_id, enum lrmd_call_options options);
 int lrm_state_register_rsc(lrm_state_t * lrm_state,
                            const char *rsc_id,
                            const char *class,
                            const char *provider, const char *agent, enum lrmd_call_options options);
 int lrm_state_unregister_rsc(lrm_state_t * lrm_state,
                              const char *rsc_id, enum lrmd_call_options options);
 
 /*! These functions are used to manage the remote lrmd connection resources */
 void remote_lrm_op_callback(lrmd_event_data_t * op);
 gboolean is_remote_lrmd_ra(const char *agent, const char *provider, const char *id);
 lrmd_rsc_info_t *remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id);
 int remote_ra_cancel(lrm_state_t * lrm_state, const char *rsc_id, const char *action, int interval);
 int remote_ra_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */
                    int timeout, /* ms */
                    int start_delay,     /* ms */
                    lrmd_key_value_t * params);
 void remote_ra_cleanup(lrm_state_t * lrm_state);
 
 xmlNode *simple_remote_node_status(const char *node_name, xmlNode *parent, const char *source);
 
 gboolean process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending);
diff --git a/crmd/lrm.c b/crmd/lrm.c
index 062f769340..418e7cf33c 100644
--- a/crmd/lrm.c
+++ b/crmd/lrm.c
@@ -1,2390 +1,2434 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 
 #include <crm/crm.h>
 #include <crm/services.h>
 
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 
 #include <crmd.h>
 #include <crmd_fsa.h>
 #include <crmd_messages.h>
 #include <crmd_callbacks.h>
 #include <crmd_lrm.h>
 
 #define START_DELAY_THRESHOLD 5 * 60 * 1000
 #define MAX_LRM_REG_FAILS 30
 
 struct delete_event_s {
     int rc;
     const char *rsc;
     lrm_state_t *lrm_state;
 };
 
 static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
 static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
 static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
 static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
                              const char *user_name);
 
 static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op,
                                        const char *rsc_id, const char *operation);
 static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation,
                           xmlNode * msg, xmlNode * request);
 
 void send_direct_ack(const char *to_host, const char *to_sys,
                      lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id);
 
 static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
                                          int log_level);
 static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op);
 
 static void
 lrm_connection_destroy(void)
 {
     if (is_set(fsa_input_register, R_LRM_CONNECTED)) {
         crm_crit("LRM Connection failed");
         register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
         clear_bit(fsa_input_register, R_LRM_CONNECTED);
 
     } else {
         crm_info("LRM Connection disconnected");
     }
 
 }
 
 static char *
 make_stop_id(const char *rsc, int call_id)
 {
     char *op_id = NULL;
 
     op_id = calloc(1, strlen(rsc) + 34);
     if (op_id != NULL) {
         snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id);
     }
     return op_id;
 }
 
 static void
 copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
 {
     if (strstr(key, CRM_META "_") == NULL) {
         g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
     }
 }
 
 static void
 copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
 {
     if (strstr(key, CRM_META "_") != NULL) {
         g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
     }
 }
 
+/*
+ * \internal
+ * \brief Remove a recurring operation from a resource's history
+ *
+ * \param[in,out] history  Resource history to modify
+ * \param[in]     op       Operation to remove
+ *
+ * \return TRUE if the operation was found and removed, FALSE otherwise
+ */
+static gboolean
+history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op)
+{
+    GList *iter;
+
+    for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
+        lrmd_event_data_t *existing = iter->data;
+
+        if ((op->interval == existing->interval)
+            && crm_str_eq(op->rsc_id, existing->rsc_id, TRUE)
+            && safe_str_eq(op->op_type, existing->op_type)) {
+
+            history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter);
+            lrmd_free_event(existing);
+            return TRUE;
+        }
+    }
+    return FALSE;
+}
+
+/*
+ * \internal
+ * \brief Free all recurring operations in resource history
+ *
+ * \param[in,out] history  Resource history to modify
+ */
+static void
+history_free_recurring_ops(rsc_history_t *history)
+{
+    GList *iter;
+
+    for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
+        lrmd_free_event(iter->data);
+    }
+    g_list_free(history->recurring_op_list);
+    history->recurring_op_list = NULL;
+}
+
+/*
+ * \internal
+ * \brief Free resource history
+ *
+ * \param[in,out] history  Resource history to free
+ */
+void
+history_free(gpointer data)
+{
+    rsc_history_t *history = (rsc_history_t*)data;
+
+    if (history->stop_params) {
+        g_hash_table_destroy(history->stop_params);
+    }
+
+    /* Don't need to free history->rsc.id because it's set to history->id */
+    free(history->rsc.type);
+    free(history->rsc.class);
+    free(history->rsc.provider);
+
+    lrmd_free_event(history->failed);
+    lrmd_free_event(history->last);
+    free(history->id);
+    history_free_recurring_ops(history);
+    free(history);
+}
+
 static void
 update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
 {
     int target_rc = 0;
     rsc_history_t *entry = NULL;
 
     if (op->rsc_deleted) {
         crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
         delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL);
         return;
     }
 
     if (safe_str_eq(op->op_type, RSC_NOTIFY)) {
         return;
     }
 
     crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);
 
     entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
     if (entry == NULL && rsc) {
         entry = calloc(1, sizeof(rsc_history_t));
         entry->id = strdup(op->rsc_id);
         g_hash_table_insert(lrm_state->resource_history, entry->id, entry);
 
         entry->rsc.id = entry->id;
         entry->rsc.type = strdup(rsc->type);
         entry->rsc.class = strdup(rsc->class);
         if (rsc->provider) {
             entry->rsc.provider = strdup(rsc->provider);
         } else {
             entry->rsc.provider = NULL;
         }
 
     } else if (entry == NULL) {
         crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
         return;
     }
 
     entry->last_callid = op->call_id;
     target_rc = rsc_op_expected_rc(op);
     if (op->op_status == PCMK_LRM_OP_CANCELLED) {
         if (op->interval > 0) {
-            GList *gIter, *gIterNext;
-
             crm_trace("Removing cancelled recurring op: %s_%s_%d", op->rsc_id, op->op_type,
                       op->interval);
-
-            for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIterNext) {
-                lrmd_event_data_t *existing = gIter->data;
-
-                gIterNext = gIter->next;
-
-                if (crm_str_eq(op->rsc_id, existing->rsc_id, TRUE)
-                    && safe_str_eq(op->op_type, existing->op_type)
-                    && op->interval == existing->interval) {
-                    lrmd_free_event(existing);
-                    entry->recurring_op_list = g_list_delete_link(entry->recurring_op_list, gIter);
-                }
-            }
+            history_remove_recurring_op(entry, op);
             return;
-
         } else {
             crm_trace("Skipping %s_%s_%d rc=%d, status=%d", op->rsc_id, op->op_type, op->interval,
                       op->rc, op->op_status);
         }
 
     } else if (did_rsc_op_fail(op, target_rc)) {
         /* We must store failed monitors here
          * - otherwise the block below will cause them to be forgetten them when a stop happens
          */
         if (entry->failed) {
             lrmd_free_event(entry->failed);
         }
         entry->failed = lrmd_copy_event(op);
 
     } else if (op->interval == 0) {
         if (entry->last) {
             lrmd_free_event(entry->last);
         }
         entry->last = lrmd_copy_event(op);
 
         if (op->params &&
             (safe_str_eq(CRMD_ACTION_START, op->op_type) ||
              safe_str_eq("reload", op->op_type) ||
              safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) {
 
             if (entry->stop_params) {
                 g_hash_table_destroy(entry->stop_params);
             }
             entry->stop_params = g_hash_table_new_full(crm_str_hash,
                                                        g_str_equal, g_hash_destroy_str,
                                                        g_hash_destroy_str);
 
             g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
         }
     }
 
     if (op->interval > 0) {
-        GListPtr iter = NULL;
-
-        for(iter = entry->recurring_op_list; iter; iter = iter->next) {
-            lrmd_event_data_t *o = iter->data;
-
-            /* op->rsc_id is implied */
-            if(op->interval == o->interval && strcmp(op->op_type, o->op_type) == 0) {
-                crm_trace("Removing existing recurring op entry: %s_%s_%d", op->rsc_id, op->op_type, op->interval);
-                entry->recurring_op_list = g_list_remove(entry->recurring_op_list, o);
-                break;
-            }
-        }
+        /* Ensure there are no duplicates */
+        history_remove_recurring_op(entry, op);
 
         crm_trace("Adding recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval);
         entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));
 
     } else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) {
-        GList *gIter = entry->recurring_op_list;
-
         crm_trace("Dropping %d recurring ops because of: %s_%s_%d",
-                  g_list_length(gIter), op->rsc_id, op->op_type, op->interval);
-        for (; gIter != NULL; gIter = gIter->next) {
-            lrmd_free_event(gIter->data);
-        }
-        g_list_free(entry->recurring_op_list);
-        entry->recurring_op_list = NULL;
+                  g_list_length(entry->recurring_op_list), op->rsc_id,
+                  op->op_type, op->interval);
+        history_free_recurring_ops(entry);
     }
 }
 
 void
 lrm_op_callback(lrmd_event_data_t * op)
 {
     const char *nodename = NULL;
     lrm_state_t *lrm_state = NULL;
 
     CRM_CHECK(op != NULL, return);
 
     /* determine the node name for this connection. */
     nodename = op->remote_nodename ? op->remote_nodename : fsa_our_uname;
 
     if (op->type == lrmd_event_disconnect && (safe_str_eq(nodename, fsa_our_uname))) {
         /* if this is the local lrmd ipc connection, set the right bits in the
          * crmd when the connection goes down */
         lrm_connection_destroy();
         return;
     } else if (op->type != lrmd_event_exec_complete) {
         /* we only need to process execution results */
         return;
     }
 
     lrm_state = lrm_state_find(nodename);
     CRM_ASSERT(lrm_state != NULL);
 
     process_lrm_event(lrm_state, op, NULL);
 }
 
 /*	 A_LRM_CONNECT	*/
 void
 do_lrm_control(long long action,
                enum crmd_fsa_cause cause,
                enum crmd_fsa_state cur_state,
                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     /* This only pertains to local lrmd connections.  Remote connections are handled as
      * resources within the pengine.  Connecting and disconnecting from remote lrmd instances
      * handled differently than the local. */
 
     lrm_state_t *lrm_state = NULL;
 
     if(fsa_our_uname == NULL) {
         return; /* Nothing to do */
     }
     lrm_state = lrm_state_find_or_create(fsa_our_uname);
     if (lrm_state == NULL) {
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
         return;
     }
 
     if (action & A_LRM_DISCONNECT) {
         if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
             if (action == A_LRM_DISCONNECT) {
                 crmd_fsa_stall(FALSE);
                 return;
             }
         }
 
         clear_bit(fsa_input_register, R_LRM_CONNECTED);
         crm_info("Disconnecting from the LRM");
         lrm_state_disconnect(lrm_state);
         lrm_state_reset_tables(lrm_state);
         crm_notice("Disconnected from the LRM");
     }
 
     if (action & A_LRM_CONNECT) {
         int ret = pcmk_ok;
 
         crm_debug("Connecting to the LRM");
         ret = lrm_state_ipc_connect(lrm_state);
 
         if (ret != pcmk_ok) {
             if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
                 crm_warn("Failed to sign on to the LRM %d"
                          " (%d max) times", lrm_state->num_lrm_register_fails, MAX_LRM_REG_FAILS);
 
                 crm_timer_start(wait_timer);
                 crmd_fsa_stall(FALSE);
                 return;
             }
         }
 
         if (ret != pcmk_ok) {
             crm_err("Failed to sign on to the LRM %d" " (max) times",
                     lrm_state->num_lrm_register_fails);
             register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
             return;
         }
 
         set_bit(fsa_input_register, R_LRM_CONNECTED);
         crm_info("LRM connection established");
     }
 
     if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
         crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
     }
 }
 
 static gboolean
 lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
 {
     int counter = 0;
     gboolean rc = TRUE;
     const char *when = "lrm disconnect";
 
     GHashTableIter gIter;
     const char *key = NULL;
     rsc_history_t *entry = NULL;
     struct recurring_op_s *pending = NULL;
 
     crm_debug("Checking for active resources before exit");
 
     if (cur_state == S_TERMINATE) {
         log_level = LOG_ERR;
         when = "shutdown";
 
     } else if (is_set(fsa_input_register, R_SHUTDOWN)) {
         when = "shutdown... waiting";
     }
 
     if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) {
         guint removed = g_hash_table_foreach_remove(
             lrm_state->pending_ops, stop_recurring_actions, lrm_state);
 
         crm_notice("Stopped %u recurring operations at %s (%u ops remaining)",
                    removed, when, g_hash_table_size(lrm_state->pending_ops));
     }
 
     if (lrm_state->pending_ops) {
         g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
         while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
             /* Ignore recurring actions in the shutdown calculations */
             if (pending->interval == 0) {
                 counter++;
             }
         }
     }
 
     if (counter > 0) {
         do_crm_log(log_level, "%d pending LRM operations at %s", counter, when);
 
         if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) {
             g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
             while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) {
                 do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
             }
 
         } else {
             rc = FALSE;
         }
         return rc;
     }
 
     if (lrm_state->resource_history == NULL) {
         return rc;
     }
 
     if (cur_state == S_TERMINATE || is_set(fsa_input_register, R_SHUTDOWN)) {
         /* At this point we're not waiting, we're just shutting down */
         when = "shutdown";
     }
 
     counter = 0;
     g_hash_table_iter_init(&gIter, lrm_state->resource_history);
     while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
         if (is_rsc_active(lrm_state, entry->id) == FALSE) {
             continue;
         }
 
         counter++;
         crm_trace("Found %s active", entry->id);
         if (lrm_state->pending_ops) {
             GHashTableIter hIter;
 
             g_hash_table_iter_init(&hIter, lrm_state->pending_ops);
             while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) {
                 if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) {
                     crm_notice("%sction %s (%s) incomplete at %s",
                                pending->interval == 0 ? "A" : "Recurring a",
                                key, pending->op_key, when);
                 }
             }
         }
     }
 
     if (counter) {
         crm_err("%d resources were active at %s.", counter, when);
     }
 
     return rc;
 }
 
 GHashTable *metadata_hash = NULL;
 
 static char *
 get_rsc_metadata(const char *type, const char *rclass, const char *provider, bool force)
 {
     int rc = pcmk_ok;
     int len = 0;
     char *key = NULL;
     char *metadata = NULL;
 
     /* Always use a local connection for this operation */
     lrm_state_t *lrm_state = lrm_state_find(fsa_our_uname);
 
     CRM_CHECK(type != NULL, return NULL);
     CRM_CHECK(rclass != NULL, return NULL);
     CRM_CHECK(lrm_state != NULL, return NULL);
 
     if (provider == NULL) {
         provider = "heartbeat";
     }
 
     if (metadata_hash == NULL) {
         metadata_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
     }
 
     len = strlen(type) + strlen(rclass) + strlen(provider) + 4;
     key = malloc(len);
     if(key == NULL) {
         return NULL;
     }
 
     snprintf(key, len, "%s::%s:%s", type, rclass, provider);
     if(force == FALSE) {
         crm_trace("Retreiving cached metadata for %s", key);
         metadata = g_hash_table_lookup(metadata_hash, key);
     }
 
     if(metadata == NULL) {
         rc = lrm_state_get_metadata(lrm_state, rclass, provider, type, &metadata, 0);
         crm_trace("Retreived live metadata for %s: %s (%d)", key, pcmk_strerror(rc), rc);
         if(rc == pcmk_ok) {
             CRM_LOG_ASSERT(metadata != NULL);
             g_hash_table_insert(metadata_hash, key, metadata);
             key = NULL;
         } else {
             CRM_LOG_ASSERT(metadata == NULL);
             metadata = NULL;
         }
     }
 
     if (metadata == NULL) {
         crm_warn("No metadata found for %s: %s (%d)", key, pcmk_strerror(rc), rc);
     }
 
     free(key);
     return metadata;
 }
 
 static char *
 build_parameter_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode *result,
                      const char *criteria, bool target, bool invert_for_xml)
 {
     int len = 0;
     int max = 0;
     char *list = NULL;
 
     xmlNode *param = NULL;
     xmlNode *params = NULL;
 
     const char *secure_terms[] = {
         "password",
         "passwd",
         "user",
     };
 
     if(safe_str_eq("private", criteria)) {
         /* It will take time for the agents to be updated
          * Check for some common terms
          */
         max = DIMOF(secure_terms);
     }
 
     params = find_xml_node(metadata, "parameters", TRUE);
     for (param = __xml_first_child(params); param != NULL; param = __xml_next(param)) {
         if (crm_str_eq((const char *)param->name, "parameter", TRUE)) {
             bool accept = FALSE;
             const char *name = crm_element_value(param, "name");
             const char *value = crm_element_value(param, criteria);
 
             if(max && value) {
                 /* Turn off the compatibility logic once an agent has been updated to know about 'private' */
                 max = 0;
             }
 
             if (name == NULL) {
                 crm_err("Invalid parameter in %s metadata", op->rsc_id);
 
             } else if(target == crm_is_true(value)) {
                 accept = TRUE;
 
             } else if(max) {
                 int lpc = 0;
                 bool found = FALSE;
 
                 for(lpc = 0; found == FALSE && lpc < max; lpc++) {
                     if(safe_str_eq(secure_terms[lpc], name)) {
                         found = TRUE;
                     }
                 }
 
                 if(found == target) {
                     accept = TRUE;
                 }
             }
 
             if(accept) {
                 int start = len;
 
                 crm_trace("Attr %s is %s%s", name, target?"":"not ", criteria);
 
                 len += strlen(name) + 2;
                 list = realloc_safe(list, len + 1);
                 sprintf(list + start, " %s ", name);
 
             } else {
                 crm_trace("Rejecting %s for %s", name, criteria);
             }
 
             if(invert_for_xml) {
                 crm_trace("Inverting %s match for %s xml", name, criteria);
                 accept = !accept;
             }
 
             if(result && accept) {
                 value = g_hash_table_lookup(op->params, name);
                 if(value != NULL) {
                     crm_trace("Adding attr to the xml result", name, target?"":"not ", criteria);
                     crm_xml_add(result, name, value);
                 }
             }
         }
     }
 
     return list;
 }
 
 static bool
 resource_supports_action(xmlNode *metadata, const char *name) 
 {
     const char *value = NULL;
 
     xmlNode *action = NULL;
     xmlNode *actions = NULL;
 
     actions = find_xml_node(metadata, "actions", TRUE);
     for (action = __xml_first_child(actions); action != NULL; action = __xml_next(action)) {
         if (crm_str_eq((const char *)action->name, "action", TRUE)) {
             value = crm_element_value(action, "name");
             if (safe_str_eq(name, value)) {
                 return TRUE;
             }
         }
     }
 
     return FALSE;
 }
 
 static void
 append_restart_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode * update, const char *version)
 {
     char *list = NULL;
     char *digest = NULL;
     xmlNode *restart = NULL;
 
     CRM_LOG_ASSERT(op->params != NULL);
 
     if (op->interval > 0) {
         /* monitors are not reloadable */
         return;
     }
 
     if(resource_supports_action(metadata, "reload")) {
         restart = create_xml_node(NULL, XML_TAG_PARAMS);
         /* Any parameters with unique="1" should be added into the "op-force-restart" list. */
         list = build_parameter_list(op, metadata, restart, "unique", TRUE, FALSE);
 
     } else {
         /* Resource does not support reloads */
         return;
     }
 
     digest = calculate_operation_digest(restart, version);
     /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload,
      * no matter if it actually supports any parameters with unique="1"). */
     crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: "");
     crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest);
 
     crm_trace("%s: %s, %s", op->rsc_id, digest, list);
     crm_log_xml_trace(restart, "restart digest source");
 
     free_xml(restart);
     free(digest);
     free(list);
 }
 
 static void
 append_secure_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode * update, const char *version)
 {
     char *list = NULL;
     char *digest = NULL;
     xmlNode *secure = NULL;
 
     CRM_LOG_ASSERT(op->params != NULL);
 
     /*
      * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the
      * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on
      * the insecure ones
      */
     secure = create_xml_node(NULL, XML_TAG_PARAMS);
     list = build_parameter_list(op, metadata, secure, "private", TRUE, TRUE);
 
     if (list != NULL) {
         digest = calculate_operation_digest(secure, version);
         crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list);
         crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest);
 
         crm_trace("%s: %s, %s", op->rsc_id, digest, list);
         crm_log_xml_trace(secure, "secure digest source");
     } else {
         crm_trace("%s: no secure parameters", op->rsc_id);
     }
 
     free_xml(secure);
     free(digest);
     free(list);
 }
 
 static gboolean
 build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op,
                        const char *src)
 {
     int target_rc = 0;
     xmlNode *xml_op = NULL;
     xmlNode *metadata = NULL;
     const char *m_string = NULL;
     const char *caller_version = NULL;
 
     if (op == NULL) {
         return FALSE;
     }
 
     target_rc = rsc_op_expected_rc(op);
 
     /* there is a small risk in formerly mixed clusters that it will
      * be sub-optimal.
      *
      * however with our upgrade policy, the update we send should
      * still be completely supported anyway
      */
     caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION);
     CRM_LOG_ASSERT(caller_version != NULL);
 
     if(caller_version == NULL) {
         caller_version = CRM_FEATURE_SET;
     }
 
     crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version);
     xml_op = create_operation_update(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG);
     if (xml_op == NULL) {
         return TRUE;
     }
 
     if (rsc == NULL || op->params == NULL || crm_str_eq(CRMD_ACTION_STOP, op->op_type, TRUE)) {
         /* Stopped resources don't need the digest logic */
         crm_trace("No digests needed for %s %p %p %s", op->rsc_id, op->params, rsc, op->op_type);
         return TRUE;
     }
 
     m_string = get_rsc_metadata(rsc->type, rsc->class, rsc->provider, safe_str_eq(op->op_type, RSC_START));
     if(m_string == NULL) {
         crm_err("No metadata for %s::%s:%s", rsc->provider, rsc->class, rsc->type);
         return TRUE;
     }
 
     metadata = string2xml(m_string);
     if(metadata == NULL) {
         crm_err("Metadata for %s::%s:%s is not valid XML", rsc->provider, rsc->class, rsc->type);
         return TRUE;
     }
 
     crm_trace("Includind additional digests for %s::%s:%s", rsc->provider, rsc->class, rsc->type);
     append_restart_list(op, metadata, xml_op, caller_version);
     append_secure_list(op, metadata, xml_op, caller_version);
 
     free_xml(metadata);
     return TRUE;
 }
 
 static gboolean
 is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
 {
     rsc_history_t *entry = NULL;
 
     entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
     if (entry == NULL || entry->last == NULL) {
         return FALSE;
     }
 
     crm_trace("Processing %s: %s.%d=%d",
               rsc_id, entry->last->op_type, entry->last->interval, entry->last->rc);
     if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) {
         return FALSE;
 
     } else if (entry->last->rc == PCMK_OCF_OK
                && safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) {
         /* a stricter check is too complex...
          * leave that to the PE
          */
         return FALSE;
 
     } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
         return FALSE;
 
     } else if (entry->last->interval == 0 && entry->last->rc == PCMK_OCF_NOT_CONFIGURED) {
         /* Badly configured resources can't be reliably stopped */
         return FALSE;
     }
 
     return TRUE;
 }
 
 static gboolean
 build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
 {
     GHashTableIter iter;
     rsc_history_t *entry = NULL;
 
     g_hash_table_iter_init(&iter, lrm_state->resource_history);
     while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
 
         GList *gIter = NULL;
         xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE);
 
         crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id);
         crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type);
         crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.class);
         crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider);
 
         if (entry->last && entry->last->params) {
             const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
             if (container) {
                 crm_trace("Resource %s is a part of container resource %s", entry->id, container);
                 crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container);
             }
         }
         build_operation_update(xml_rsc, &(entry->rsc), entry->failed, __FUNCTION__);
         build_operation_update(xml_rsc, &(entry->rsc), entry->last, __FUNCTION__);
         for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
             build_operation_update(xml_rsc, &(entry->rsc), gIter->data, __FUNCTION__);
         }
     }
 
     return FALSE;
 }
 
 xmlNode *
 do_lrm_query_internal(lrm_state_t * lrm_state, gboolean is_replace)
 {
     xmlNode *xml_state = NULL;
     xmlNode *xml_data = NULL;
     xmlNode *rsc_list = NULL;
     const char *uuid = NULL;
 
     if (safe_str_eq(lrm_state->node_name, fsa_our_uname)) {
         crm_node_t *peer = crm_get_peer(0, lrm_state->node_name);
         xml_state = do_update_node_cib(peer, node_update_cluster|node_update_peer, NULL, __FUNCTION__);
         /* The next two lines shouldn't be necessary for newer DCs */
         crm_xml_add(xml_state, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER);
         crm_xml_add(xml_state, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER);
         uuid = fsa_our_uuid;
 
     } else {
         xml_state = create_xml_node(NULL, XML_CIB_TAG_STATE);
         crm_xml_add(xml_state, XML_NODE_IS_REMOTE, "true");
         crm_xml_add(xml_state, XML_ATTR_ID, lrm_state->node_name);
         crm_xml_add(xml_state, XML_ATTR_UNAME, lrm_state->node_name);
         uuid = lrm_state->node_name;
     }
 
     xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM);
     crm_xml_add(xml_data, XML_ATTR_ID, uuid);
     rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES);
 
     /* Build a list of active (not always running) resources */
     build_active_RAs(lrm_state, rsc_list);
 
     crm_log_xml_trace(xml_state, "Current state of the LRM");
 
     return xml_state;
 }
 
 xmlNode *
 do_lrm_query(gboolean is_replace, const char *node_name)
 {
     lrm_state_t *lrm_state = lrm_state_find(node_name);
 
     if (!lrm_state) {
         crm_err("Could not query lrm state for lrmd node %s", node_name);
         return NULL;
     }
     return do_lrm_query_internal(lrm_state, is_replace);
 }
 
 static void
 notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
 {
     lrmd_event_data_t *op = NULL;
     const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
     const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
 
     crm_info("Notifying %s on %s that %s was%s deleted",
              from_sys, from_host, rsc_id, rc == pcmk_ok ? "" : " not");
 
     op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE);
     CRM_ASSERT(op != NULL);
 
     if (rc == pcmk_ok) {
         op->op_status = PCMK_LRM_OP_DONE;
         op->rc = PCMK_OCF_OK;
     } else {
         op->op_status = PCMK_LRM_OP_ERROR;
         op->rc = PCMK_OCF_UNKNOWN_ERROR;
     }
 
     send_direct_ack(from_host, from_sys, NULL, op, rsc_id);
     lrmd_free_event(op);
 
     if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
         /* this isn't expected - trigger a new transition */
         time_t now = time(NULL);
         char *now_s = crm_itoa(now);
 
         crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc_id);
 
         update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
                              "last-lrm-refresh", now_s, FALSE, NULL, NULL);
 
         free(now_s);
     }
 }
 
 static gboolean
 lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
 {
     struct delete_event_s *event = user_data;
     struct pending_deletion_op_s *op = value;
 
     if (crm_str_eq(event->rsc, op->rsc, TRUE)) {
         notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
         return TRUE;
     }
     return FALSE;
 }
 
 static gboolean
 lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
 {
     const char *rsc = user_data;
     struct recurring_op_s *pending = value;
 
     if (crm_str_eq(rsc, pending->rsc_id, TRUE)) {
         crm_info("Removing op %s:%d for deleted resource %s",
                  pending->op_key, pending->call_id, rsc);
         return TRUE;
     }
     return FALSE;
 }
 
 /*
  * Remove the rsc from the CIB
  *
  * Avoids refreshing the entire LRM section of this host
  */
 #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']"
 
 static int
 delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
                   const char *user_name)
 {
     char *rsc_xpath = NULL;
     int max = 0;
     int rc = pcmk_ok;
 
     CRM_CHECK(rsc_id != NULL, return -ENXIO);
 
     max = strlen(rsc_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + 1;
     rsc_xpath = calloc(1, max);
     snprintf(rsc_xpath, max, rsc_template, lrm_state->node_name, rsc_id);
 
     rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath,
                          NULL, NULL, call_options | cib_xpath, user_name);
 
     free(rsc_xpath);
     return rc;
 }
 
 static void
 delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id,
                  GHashTableIter * rsc_gIter, int rc, const char *user_name)
 {
     struct delete_event_s event;
 
     CRM_CHECK(rsc_id != NULL, return);
 
     if (rc == pcmk_ok) {
         char *rsc_id_copy = strdup(rsc_id);
 
         if (rsc_gIter)
             g_hash_table_iter_remove(rsc_gIter);
         else
             g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
         crm_debug("sync: Sending delete op for %s", rsc_id_copy);
         delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name);
 
         g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy);
         free(rsc_id_copy);
     }
 
     if (input) {
         notify_deleted(lrm_state, input, rsc_id, rc);
     }
 
     event.rc = rc;
     event.rsc = rsc_id;
     event.lrm_state = lrm_state;
     g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
 }
 
 /*
  * Remove the op from the CIB
  *
  * Avoids refreshing the entire LRM section of this host
  */
 
 #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']"
 #define op_call_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']"
 
 static void
 delete_op_entry(lrm_state_t * lrm_state, lrmd_event_data_t * op, const char *rsc_id,
                 const char *key, int call_id)
 {
     xmlNode *xml_top = NULL;
 
     if (op != NULL) {
         xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP);
         crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id);
         crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data);
 
         if (op->interval > 0) {
             char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval);
 
             /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */
             crm_xml_add(xml_top, XML_ATTR_ID, op_id);
             free(op_id);
         }
 
         crm_debug("async: Sending delete op for %s_%s_%d (call=%d)",
                   op->rsc_id, op->op_type, op->interval, op->call_id);
 
         fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override);
 
     } else if (rsc_id != NULL && key != NULL) {
         int max = 0;
         char *op_xpath = NULL;
 
         if (call_id > 0) {
             max =
                 strlen(op_call_template) + strlen(rsc_id) + strlen(lrm_state->node_name) +
                 strlen(key) + 10;
             op_xpath = calloc(1, max);
             snprintf(op_xpath, max, op_call_template, lrm_state->node_name, rsc_id, key, call_id);
 
         } else {
             max =
                 strlen(op_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + strlen(key) +
                 1;
             op_xpath = calloc(1, max);
             snprintf(op_xpath, max, op_template, lrm_state->node_name, rsc_id, key);
         }
 
         crm_debug("sync: Sending delete op for %s (call=%d)", rsc_id, call_id);
         fsa_cib_conn->cmds->delete(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath);
 
         free(op_xpath);
 
     } else {
         crm_err("Not enough information to delete op entry: rsc=%p key=%p", rsc_id, key);
         return;
     }
 
     crm_log_xml_trace(xml_top, "op:cancel");
     free_xml(xml_top);
 }
 
 void
 lrm_clear_last_failure(const char *rsc_id, const char *node_name)
 {
     char *attr = NULL;
     GHashTableIter iter;
     GList *lrm_state_list = lrm_state_get_list();
     GList *state_entry;
     rsc_history_t *entry = NULL;
 
     attr = generate_op_key(rsc_id, "last_failure", 0);
 
     /* This clears last failure for every lrm state that has this rsc.*/
     for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
         lrm_state_t *lrm_state = state_entry->data;
 
         if (node_name != NULL) {
             if (strcmp(node_name, lrm_state->node_name) != 0) {
                 /* filter by node_name if node_name is present */
                 continue;
             }
         }
 
         delete_op_entry(lrm_state, NULL, rsc_id, attr, 0);
 
         if (!lrm_state->resource_history) {
             continue;
         }
 
         g_hash_table_iter_init(&iter, lrm_state->resource_history);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
             if (crm_str_eq(rsc_id, entry->id, TRUE)) {
                 lrmd_free_event(entry->failed);
                 entry->failed = NULL;
             }
         }
     }
     free(attr);
     g_list_free(lrm_state_list);
 }
 
 /* Returns: gboolean - cancellation is in progress */
 static gboolean
 cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove)
 {
     int rc = pcmk_ok;
     char *local_key = NULL;
     struct recurring_op_s *pending = NULL;
 
     CRM_CHECK(op != 0, return FALSE);
     CRM_CHECK(rsc_id != NULL, return FALSE);
     if (key == NULL) {
         local_key = make_stop_id(rsc_id, op);
         key = local_key;
     }
     pending = g_hash_table_lookup(lrm_state->pending_ops, key);
 
     if (pending) {
         if (remove && pending->remove == FALSE) {
             pending->remove = TRUE;
             crm_debug("Scheduling %s for removal", key);
         }
 
         if (pending->cancelled) {
             crm_debug("Operation %s already cancelled", key);
             free(local_key);
             return FALSE;
         }
 
         pending->cancelled = TRUE;
 
     } else {
         crm_info("No pending op found for %s", key);
         free(local_key);
         return FALSE;
     }
 
     crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
     rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval);
     if (rc == pcmk_ok) {
         crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
         free(local_key);
         return TRUE;
     }
 
     crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
     /* The caller needs to make sure the entry is
      * removed from the pending_ops list
      *
      * Usually by returning TRUE inside the worker function
      * supplied to g_hash_table_foreach_remove()
      *
      * Not removing the entry from pending_ops will block
      * the node from shutting down
      */
     free(local_key);
     return FALSE;
 }
 
 struct cancel_data {
     gboolean done;
     gboolean remove;
     const char *key;
     lrmd_rsc_info_t *rsc;
     lrm_state_t *lrm_state;
 };
 
 static gboolean
 cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
 {
     gboolean remove = FALSE;
     struct cancel_data *data = user_data;
     struct recurring_op_s *op = (struct recurring_op_s *)value;
 
     if (crm_str_eq(op->op_key, data->key, TRUE)) {
         data->done = TRUE;
         remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
     }
     return remove;
 }
 
 static gboolean
 cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
 {
     guint removed = 0;
     struct cancel_data data;
 
     CRM_CHECK(rsc != NULL, return FALSE);
     CRM_CHECK(key != NULL, return FALSE);
 
     data.key = key;
     data.rsc = rsc;
     data.done = FALSE;
     data.remove = remove;
     data.lrm_state = lrm_state;
 
     removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data);
     crm_trace("Removed %u op cache entries, new size: %u",
               removed, g_hash_table_size(lrm_state->pending_ops));
     return data.done;
 }
 
 static lrmd_rsc_info_t *
 get_lrm_resource(lrm_state_t * lrm_state, xmlNode * resource, xmlNode * op_msg, gboolean do_create)
 {
     lrmd_rsc_info_t *rsc = NULL;
     const char *id = ID(resource);
     const char *type = crm_element_value(resource, XML_ATTR_TYPE);
     const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS);
     const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER);
     const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG);
 
     crm_trace("Retrieving %s from the LRM.", id);
     CRM_CHECK(id != NULL, return NULL);
 
     rsc = lrm_state_get_rsc_info(lrm_state, id, 0);
 
     if (!rsc && long_id) {
         rsc = lrm_state_get_rsc_info(lrm_state, long_id, 0);
     }
 
     if (!rsc && do_create) {
         CRM_CHECK(class != NULL, return NULL);
         CRM_CHECK(type != NULL, return NULL);
 
         crm_trace("Adding rsc %s before operation", id);
 
         lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring);
 
         rsc = lrm_state_get_rsc_info(lrm_state, id, 0);
 
         if (!rsc) {
             fsa_data_t *msg_data = NULL;
 
             crm_err("Could not add resource %s to LRM %s", id, lrm_state->node_name);
             /* only register this as a internal error if this involves the local
              * lrmd. Otherwise we're likely dealing with an unresponsive remote-node
              * which is not a FSA failure. */
             if (lrm_state_is_local(lrm_state) == TRUE) {
                 register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
             }
         }
     }
 
     return rsc;
 }
 
 static void
 delete_resource(lrm_state_t * lrm_state,
                 const char *id,
                 lrmd_rsc_info_t * rsc,
                 GHashTableIter * gIter,
                 const char *sys,
                 const char *host,
                 const char *user,
                 ha_msg_input_t * request,
                 gboolean unregister)
 {
     int rc = pcmk_ok;
 
     crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host);
 
     if (rsc && unregister) {
         rc = lrm_state_unregister_rsc(lrm_state, id, 0);
     }
 
     if (rc == pcmk_ok) {
         crm_trace("Resource '%s' deleted", id);
     } else if (rc == -EINPROGRESS) {
         crm_info("Deletion of resource '%s' pending", id);
         if (request) {
             struct pending_deletion_op_s *op = NULL;
             char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE);
 
             op = calloc(1, sizeof(struct pending_deletion_op_s));
             op->rsc = strdup(rsc->id);
             op->input = copy_ha_msg_input(request);
             g_hash_table_insert(lrm_state->deletion_ops, ref, op);
         }
         return;
     } else {
         crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d",
                  id, sys, user ? user : "internal", host, rc);
     }
 
     delete_rsc_entry(lrm_state, request, id, gIter, rc, user);
 }
 
 static int
 get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id)
 {
     int call_id = 999999999;
     rsc_history_t *entry = NULL;
 
     if(lrm_state) {
         entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
     }
 
     /* Make sure the call id is greater than the last successful operation,
      * otherwise the failure will not result in a possible recovery of the resource
      * as it could appear the failure occurred before the successful start */
     if (entry) {
         call_id = entry->last_callid + 1;
     }
 
     if (call_id < 0) {
         call_id = 1;
     }
     return call_id;
 }
 
 static void
 force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node)
 {
         GHashTableIter gIter;
         rsc_history_t *entry = NULL;
 
 
         crm_info("clearing resource history on node %s", lrm_state->node_name);
         g_hash_table_iter_init(&gIter, lrm_state->resource_history);
         while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
             /* only unregister the resource during a reprobe if it is not a remote connection
              * resource. otherwise unregistering the connection will terminate remote-node
              * membership */
             gboolean unregister = TRUE;
 
             if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
                 lrm_state_t *remote_lrm_state = lrm_state_find(entry->id);
                 if (remote_lrm_state) {
                     /* when forcing a reprobe, make sure to clear remote node before
                      * clearing the remote node's connection resource */ 
                     force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE);
                 }
                 unregister = FALSE;
             }
 
             delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host,
                             user_name, NULL, unregister);
         }
 
         /* Now delete the copy in the CIB */
         erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local);
 
         /* And finally, _delete_ the value in attrd
          * Setting it to FALSE results in the PE sending us back here again
          */
         update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node);
 }
 
 static void
 synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) 
 {
     lrmd_event_data_t *op = NULL;
     const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK);
     const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET);
     xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE);
 
     if(xml_rsc == NULL) {
         /* Do something else?  driect_ack? */
         crm_info("Skipping %s=%d on %s (%p): no resource",
                  crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node, lrm_state);
         return;
 
     } else if(operation == NULL) {
         /* This probably came from crm_resource -C, nothing to do */
         crm_info("Skipping %s=%d on %s (%p): no operation",
                  crm_element_value(action, XML_ATTR_TRANSITION_KEY), rc, target_node, lrm_state);
         return;
     }
 
     op = construct_op(lrm_state, action, ID(xml_rsc), operation);
     CRM_ASSERT(op != NULL);
 
     op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
     if(safe_str_eq(operation, RSC_NOTIFY)) {
         /* Notifications can't fail yet */
         op->op_status = PCMK_LRM_OP_DONE;
         op->rc = PCMK_OCF_OK;
 
     } else {
         op->op_status = PCMK_LRM_OP_ERROR;
         op->rc = rc;
     }
     op->t_run = time(NULL);
     op->t_rcchange = op->t_run;
 
     crm_info("Faking result %d for %s_%s_%d on %s (%p)", op->rc, op->rsc_id, op->op_type, op->interval, target_node, lrm_state);
 
     if(lrm_state) {
         process_lrm_event(lrm_state, op, NULL);
 
     } else {
         lrmd_rsc_info_t rsc;
 
         rsc.id = strdup(op->rsc_id);
         rsc.type = crm_element_value_copy(xml_rsc, XML_ATTR_TYPE);
         rsc.class = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_CLASS);
         rsc.provider = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_PROVIDER);
 
         do_update_resource(target_node, &rsc, op);
 
         free(rsc.id);
         free(rsc.type);
         free(rsc.class);
         free(rsc.provider);
     }
     lrmd_free_event(op);
 }
 
 
 /*	 A_LRM_INVOKE	*/
 void
 do_lrm_invoke(long long action,
               enum crmd_fsa_cause cause,
               enum crmd_fsa_state cur_state,
               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     gboolean create_rsc = TRUE;
     lrm_state_t *lrm_state = NULL;
     const char *crm_op = NULL;
     const char *from_sys = NULL;
     const char *from_host = NULL;
     const char *operation = NULL;
     ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
     const char *user_name = NULL;
     const char *target_node = NULL;
     gboolean is_remote_node = FALSE;
     gboolean crm_rsc_delete = FALSE;
 
     if (input->xml != NULL) {
         /* Remote node operations are routed here to their remote connections */
         target_node = crm_element_value(input->xml, XML_LRM_ATTR_TARGET);
     }
     if (target_node == NULL) {
         target_node = fsa_our_uname;
     } else if (safe_str_neq(target_node, fsa_our_uname)) {
         is_remote_node = TRUE;
     }
 
     lrm_state = lrm_state_find(target_node);
 
     if (lrm_state == NULL && is_remote_node) {
         crm_err("no lrmd connection for remote node %s found on cluster node %s. Can not process request.",
             target_node, fsa_our_uname);
 
         /* The action must be recorded here and in the CIB as failed */
         synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED);
         return;
     }
 
     CRM_ASSERT(lrm_state != NULL);
 
 #if ENABLE_ACL
     user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL);
     crm_trace("LRM command from user '%s'", user_name);
 #endif
 
     crm_op = crm_element_value(input->msg, F_CRM_TASK);
     from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
     if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
         from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
     }
 
     crm_trace("LRM command from: %s", from_sys);
 
     if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) {
         /* remember this delete op came from crm_resource */
         crm_rsc_delete = TRUE;
         operation = CRMD_ACTION_DELETE;
 
     } else if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
         operation = CRM_OP_LRM_REFRESH;
 
     } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) {
         lrmd_event_data_t *op = NULL;
         lrmd_rsc_info_t *rsc = NULL;
         xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
 
         CRM_CHECK(xml_rsc != NULL, return);
 
         /* The lrmd can not fail a resource, it does not understand the
          * concept of success or failure in relation to a resource, it simply
          * executes operations and reports the results. We determine what a failure is.
          * Becaues of this, if we want to fail a resource we have to fake what we
          * understand a failure to look like.
          *
          * To do this we create a fake lrmd operation event for the resource
          * we want to fail.  We then pass that event to the lrmd client callback
          * so it will be processed as if it actually came from the lrmd. */
         op = construct_op(lrm_state, input->xml, ID(xml_rsc), "asyncmon");
         CRM_ASSERT(op != NULL);
 
         free((char *)op->user_data);
         op->user_data = NULL;
         op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
         op->interval = 0;
         op->op_status = PCMK_LRM_OP_DONE;
         op->rc = PCMK_OCF_UNKNOWN_ERROR;
         op->t_run = time(NULL);
         op->t_rcchange = op->t_run;
 
 #if ENABLE_ACL
         if (user_name && is_privileged(user_name) == FALSE) {
             crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc));
             send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
             lrmd_free_event(op);
             return;
         }
 #endif
 
         rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc);
         if (rsc) {
             crm_info("Failing resource %s...", rsc->id);
             process_lrm_event(lrm_state, op, NULL);
             op->op_status = PCMK_LRM_OP_DONE;
             op->rc = PCMK_OCF_OK;
             lrmd_free_rsc_info(rsc);
         } else {
             crm_info("Cannot find/create resource in order to fail it...");
             crm_log_xml_warn(input->msg, "bad input");
         }
 
         send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
         lrmd_free_event(op);
         return;
 
     } else if (input->xml != NULL) {
         operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK);
     }
 
     if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
         int rc = pcmk_ok;
         xmlNode *fragment = do_lrm_query_internal(lrm_state, TRUE);
 
         fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name);
         crm_info("Forced a local LRM refresh: call=%d", rc);
 
         if(strcmp(CRM_SYSTEM_CRMD, from_sys) != 0) {
             xmlNode *reply = create_request(
                 CRM_OP_INVOKE_LRM, fragment,
                 from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid);
 
             crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host);
 
             if (relay_message(reply, TRUE) == FALSE) {
                 crm_log_xml_err(reply, "Unable to route reply");
             }
             free_xml(reply);
         }
 
         free_xml(fragment);
 
     } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) {
         xmlNode *data = do_lrm_query_internal(lrm_state, FALSE);
         xmlNode *reply = create_reply(input->msg, data);
 
         if (relay_message(reply, TRUE) == FALSE) {
             crm_err("Unable to route reply");
             crm_log_xml_err(reply, "reply");
         }
         free_xml(reply);
         free_xml(data);
 
     } else if (safe_str_eq(operation, CRM_OP_PROBED)) {
         update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node);
 
     } else if (safe_str_eq(operation, CRM_OP_REPROBE) || safe_str_eq(crm_op, CRM_OP_REPROBE)) {
         crm_notice("Forcing the status of all resources to be redetected");
 
         force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node);
 
         if(strcmp(CRM_SYSTEM_TENGINE, from_sys) != 0
            && strcmp(CRM_SYSTEM_TENGINE, from_sys) != 0) {
             xmlNode *reply = create_request(
                 CRM_OP_INVOKE_LRM, NULL,
                 from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid);
 
             crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);
 
             if (relay_message(reply, TRUE) == FALSE) {
                 crm_log_xml_err(reply, "Unable to route reply");
             }
             free_xml(reply);
         }
 
     } else if (operation != NULL) {
         lrmd_rsc_info_t *rsc = NULL;
         xmlNode *params = NULL;
         xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
 
         CRM_CHECK(xml_rsc != NULL, return);
 
         /* only the first 16 chars are used by the LRM */
         params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE);
 
         if (safe_str_eq(operation, CRMD_ACTION_DELETE)) {
             create_rsc = FALSE;
         }
 
         if(lrm_state_is_connected(lrm_state) == FALSE) {
             synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_CONNECTION_DIED);
             return;
         }
 
         rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc);
         if (rsc == NULL && create_rsc) {
             crm_err("Invalid resource definition for %s", ID(xml_rsc));
             crm_log_xml_warn(input->msg, "bad input");
 
             /* if the operation couldn't complete because we can't register
              * the resource, return a generic error */
             synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_NOT_CONFIGURED);
 
         } else if (rsc == NULL) {
             lrmd_event_data_t *op = NULL;
 
             crm_notice("Not creating resource for a %s event: %s", operation, ID(input->xml));
             delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name);
 
             op = construct_op(lrm_state, input->xml, ID(xml_rsc), operation);
 
             /* Deleting something that does not exist is a success */
             op->op_status = PCMK_LRM_OP_DONE;
             op->rc = PCMK_OCF_OK;
             CRM_ASSERT(op != NULL);
 
             send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
             lrmd_free_event(op);
 
         } else if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) {
             char *op_key = NULL;
             char *meta_key = NULL;
             int call = 0;
             const char *call_id = NULL;
             const char *op_task = NULL;
             const char *op_interval = NULL;
             gboolean in_progress = FALSE;
 
             CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command");
                       return);
 
             meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL);
             op_interval = crm_element_value(params, meta_key);
             free(meta_key);
 
             meta_key = crm_meta_name(XML_LRM_ATTR_TASK);
             op_task = crm_element_value(params, meta_key);
             free(meta_key);
 
             meta_key = crm_meta_name(XML_LRM_ATTR_CALLID);
             call_id = crm_element_value(params, meta_key);
             free(meta_key);
 
             CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command");
                       return);
             CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command");
                       return);
 
             op_key = generate_op_key(rsc->id, op_task, crm_parse_int(op_interval, "0"));
 
             crm_debug("PE requested op %s (call=%s) be cancelled",
                       op_key, call_id ? call_id : "NA");
             call = crm_parse_int(call_id, "0");
             if (call == 0) {
                 /* the normal case when the PE cancels a recurring op */
                 in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);
 
             } else {
                 /* the normal case when the PE cancels an orphan op */
                 in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
             }
 
             if (in_progress == FALSE) {
                 lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc->id, op_task);
 
                 crm_info("Nothing known about operation %d for %s", call, op_key);
                 delete_op_entry(lrm_state, NULL, rsc->id, op_key, call);
 
                 CRM_ASSERT(op != NULL);
 
                 op->rc = PCMK_OCF_OK;
                 op->op_status = PCMK_LRM_OP_DONE;
                 send_direct_ack(from_host, from_sys, rsc, op, rsc->id);
                 lrmd_free_event(op);
 
                 /* needed?? surely not otherwise the cancel_op_(_key) wouldn't
                  * have failed in the first place
                  */
                 g_hash_table_remove(lrm_state->pending_ops, op_key);
             }
 
             free(op_key);
 
         } else if (rsc != NULL && safe_str_eq(operation, CRMD_ACTION_DELETE)) {
             gboolean unregister = TRUE;
 
 #if ENABLE_ACL
             int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun | cib_sync_call, user_name);
             if (cib_rc != pcmk_ok) {
                 lrmd_event_data_t *op = NULL;
 
                 crm_err
                     ("Attempted deletion of resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s",
                      rsc->id, from_sys, user_name ? user_name : "unknown", from_host, cib_rc,
                      pcmk_strerror(cib_rc));
 
                 op = construct_op(lrm_state, input->xml, rsc->id, operation);
                 op->op_status = PCMK_LRM_OP_ERROR;
 
                 if (cib_rc == -EACCES) {
                     op->rc = PCMK_OCF_INSUFFICIENT_PRIV;
                 } else {
                     op->rc = PCMK_OCF_UNKNOWN_ERROR;
                 }
                 send_direct_ack(from_host, from_sys, NULL, op, rsc->id);
                 lrmd_free_event(op);
                 return;
             }
 #endif
             if (crm_rsc_delete == TRUE && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
                 unregister = FALSE;
             }
 
             delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input, unregister);
 
         } else if (rsc != NULL) {
             do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg);
         }
 
         lrmd_free_rsc_info(rsc);
 
     } else {
         crm_err("Operation was neither a lrm_query, nor a rsc op.  %s", crm_str(crm_op));
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
     }
 }
 
 static lrmd_event_data_t *
 construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation)
 {
     lrmd_event_data_t *op = NULL;
     const char *op_delay = NULL;
     const char *op_timeout = NULL;
     const char *op_interval = NULL;
     GHashTable *params = NULL;
 
     const char *transition = NULL;
 
     CRM_ASSERT(rsc_id != NULL);
 
     op = calloc(1, sizeof(lrmd_event_data_t));
     op->type = lrmd_event_exec_complete;
     op->op_type = strdup(operation);
     op->op_status = PCMK_LRM_OP_PENDING;
     op->rc = -1;
     op->rsc_id = strdup(rsc_id);
     op->interval = 0;
     op->timeout = 0;
     op->start_delay = 0;
 
     if (rsc_op == NULL) {
         CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation));
         op->user_data = NULL;
         /* the stop_all_resources() case
          * by definition there is no DC (or they'd be shutting
          *   us down).
          * So we should put our version here.
          */
         op->params = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                            g_hash_destroy_str, g_hash_destroy_str);
 
         g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET));
 
         crm_trace("Constructed %s op for %s", operation, rsc_id);
         return op;
     }
 
     params = xml2list(rsc_op);
     g_hash_table_remove(params, CRM_META "_op_target_rc");
 
     op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY);
     op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT);
     op_interval = crm_meta_value(params, XML_LRM_ATTR_INTERVAL);
 
     op->interval = crm_parse_int(op_interval, "0");
     op->timeout = crm_parse_int(op_timeout, "0");
     op->start_delay = crm_parse_int(op_delay, "0");
 
     if (safe_str_neq(operation, RSC_STOP)) {
         op->params = params;
 
     } else {
         rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
 
         /* If we do not have stop parameters cached, use
          * whatever we are given */
         if (!entry || !entry->stop_params) {
             op->params = params;
         } else {
             /* Copy the cached parameter list so that we stop the resource
              * with the old attributes, not the new ones */
             op->params = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                                g_hash_destroy_str, g_hash_destroy_str);
 
             g_hash_table_foreach(params, copy_meta_keys, op->params);
             g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
             g_hash_table_destroy(params);
             params = NULL;
         }
     }
 
     /* sanity */
     if (op->interval < 0) {
         op->interval = 0;
     }
     if (op->timeout <= 0) {
         op->timeout = op->interval;
     }
     if (op->start_delay < 0) {
         op->start_delay = 0;
     }
 
     transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY);
     CRM_CHECK(transition != NULL, return op);
 
     op->user_data = strdup(transition);
 
     if (op->interval != 0) {
         if (safe_str_eq(operation, CRMD_ACTION_START)
             || safe_str_eq(operation, CRMD_ACTION_STOP)) {
             crm_err("Start and Stop actions cannot have an interval: %d", op->interval);
             op->interval = 0;
         }
     }
 
     crm_trace("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval);
 
     return op;
 }
 
 void
 send_direct_ack(const char *to_host, const char *to_sys,
                 lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id)
 {
     xmlNode *reply = NULL;
     xmlNode *update, *iter;
     crm_node_t *peer = NULL;
 
     CRM_CHECK(op != NULL, return);
     if (op->rsc_id == NULL) {
         CRM_ASSERT(rsc_id != NULL);
         op->rsc_id = strdup(rsc_id);
     }
     if (to_sys == NULL) {
         to_sys = CRM_SYSTEM_TENGINE;
     }
 
     peer = crm_get_peer(0, fsa_our_uname);
     update = do_update_node_cib(peer, node_update_none, NULL, __FUNCTION__);
 
     iter = create_xml_node(update, XML_CIB_TAG_LRM);
     crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid);
     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
 
     crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
 
     build_operation_update(iter, rsc, op, __FUNCTION__);
     reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL);
 
     crm_log_xml_trace(update, "ACK Update");
 
     crm_debug("ACK'ing resource op %s_%s_%d from %s: %s",
               op->rsc_id, op->op_type, op->interval, op->user_data,
               crm_element_value(reply, XML_ATTR_REFERENCE));
 
     if (relay_message(reply, TRUE) == FALSE) {
         crm_log_xml_err(reply, "Unable to route reply");
     }
 
     free_xml(update);
     free_xml(reply);
 }
 
 gboolean
 verify_stopped(enum crmd_fsa_state cur_state, int log_level)
 {
     gboolean res = TRUE;
     GList *lrm_state_list = lrm_state_get_list();
     GList *state_entry;
 
     for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
         lrm_state_t *lrm_state = state_entry->data;
 
         if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
             /* keep iterating through all even when false is returned */
             res = FALSE;
         }
     }
 
     set_bit(fsa_input_register, R_SENT_RSC_STOP);
     g_list_free(lrm_state_list); lrm_state_list = NULL;
     return res;
 }
 
 struct stop_recurring_action_s {
     lrmd_rsc_info_t *rsc;
     lrm_state_t *lrm_state;
 };
 
 static gboolean
 stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
 {
     gboolean remove = FALSE;
     struct stop_recurring_action_s *event = user_data;
     struct recurring_op_s *op = (struct recurring_op_s *)value;
 
     if (op->interval != 0 && crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) {
         crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, key);
         remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
     }
 
     return remove;
 }
 
 static gboolean
 stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
 {
     gboolean remove = FALSE;
     lrm_state_t *lrm_state = user_data;
     struct recurring_op_s *op = (struct recurring_op_s *)value;
 
     if (op->interval != 0) {
         crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, key);
         remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
     }
 
     return remove;
 }
 
 static void
 do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg,
               xmlNode * request)
 {
     int call_id = 0;
     char *op_id = NULL;
     lrmd_event_data_t *op = NULL;
     lrmd_key_value_t *params = NULL;
     fsa_data_t *msg_data = NULL;
     const char *transition = NULL;
     gboolean stop_recurring = FALSE;
 
     CRM_CHECK(rsc != NULL, return);
     CRM_CHECK(operation != NULL, return);
 
     if (msg != NULL) {
         transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
         if (transition == NULL) {
             crm_log_xml_err(msg, "Missing transition number");
         }
     }
 
     op = construct_op(lrm_state, msg, rsc->id, operation);
     CRM_CHECK(op != NULL, return);
 
     if (is_remote_lrmd_ra(NULL, NULL, rsc->id)
         && op->interval == 0
         && strcmp(operation, CRMD_ACTION_MIGRATE) == 0) {
 
         /* pcmk remote connections are a special use case.
          * We never ever want to stop monitoring a connection resource until
          * the entire migration has completed. If the connection is ever unexpected
          * severed, even during a migration, this is an event we must detect.*/
         stop_recurring = FALSE;
 
     } else if (op->interval == 0
         && strcmp(operation, CRMD_ACTION_STATUS) != 0
         && strcmp(operation, CRMD_ACTION_NOTIFY) != 0) {
 
         /* stop any previous monitor operations before changing the resource state */
         stop_recurring = TRUE;
     }
 
     if (stop_recurring == TRUE) {
         guint removed = 0;
         struct stop_recurring_action_s data;
 
         data.rsc = rsc;
         data.lrm_state = lrm_state;
         removed = g_hash_table_foreach_remove(
             lrm_state->pending_ops, stop_recurring_action_by_rsc, &data);
 
         crm_debug("Stopped %u recurring operations in preparation for %s_%s_%d",
                   removed, rsc->id, operation, op->interval);
     }
 
     /* now do the op */
     crm_info("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval);
 
     if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE && fsa_state != S_TRANSITION_ENGINE) {
         if (safe_str_neq(operation, "fail")
             && safe_str_neq(operation, CRMD_ACTION_STOP)) {
             crm_info("Discarding attempt to perform action %s on %s in state %s",
                      operation, rsc->id, fsa_state2string(fsa_state));
             op->rc = CRM_DIRECT_NACK_RC;
             op->op_status = PCMK_LRM_OP_ERROR;
             send_direct_ack(NULL, NULL, rsc, op, rsc->id);
             lrmd_free_event(op);
             free(op_id);
             return;
         }
     }
 
     op_id = generate_op_key(rsc->id, op->op_type, op->interval);
 
     if (op->interval > 0) {
         /* cancel it so we can then restart it without conflict */
         cancel_op_key(lrm_state, rsc, op_id, FALSE);
     }
 
     if (op->params) {
         char *key = NULL;
         char *value = NULL;
         GHashTableIter iter;
 
         g_hash_table_iter_init(&iter, op->params);
         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
             params = lrmd_key_value_add(params, key, value);
         }
     }
 
     call_id = lrm_state_exec(lrm_state,
                              rsc->id,
                              op->op_type,
                              op->user_data, op->interval, op->timeout, op->start_delay, params);
 
     if (call_id <= 0 && lrm_state_is_local(lrm_state)) {
         crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id);
         register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
 
     } else if (call_id <= 0) {
 
         crm_err("Operation %s on resource %s failed to execute on remote node %s: %d", operation, rsc->id, lrm_state->node_name, call_id);
         op->call_id = get_fake_call_id(lrm_state, rsc->id);
         op->op_status = PCMK_LRM_OP_DONE;
         op->rc = PCMK_OCF_UNKNOWN_ERROR;
         op->t_run = time(NULL);
         op->t_rcchange = op->t_run;
         process_lrm_event(lrm_state, op, NULL);
 
     } else {
         /* record all operations so we can wait
          * for them to complete during shutdown
          */
         char *call_id_s = make_stop_id(rsc->id, call_id);
         struct recurring_op_s *pending = NULL;
 
         pending = calloc(1, sizeof(struct recurring_op_s));
         crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
 
         pending->call_id = call_id;
         pending->interval = op->interval;
         pending->op_type = strdup(operation);
         pending->op_key = strdup(op_id);
         pending->rsc_id = strdup(rsc->id);
         pending->start_time = time(NULL);
         pending->user_data = strdup(op->user_data);
         g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending);
 
         if (op->interval > 0 && op->start_delay > START_DELAY_THRESHOLD) {
             char *uuid = NULL;
             int dummy = 0, target_rc = 0;
 
             crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
 
             decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc);
             free(uuid);
 
             op->rc = target_rc;
             op->op_status = PCMK_LRM_OP_DONE;
             send_direct_ack(NULL, NULL, rsc, op, rsc->id);
         }
 
         pending->params = op->params;
         op->params = NULL;
     }
 
     free(op_id);
     lrmd_free_event(op);
     return;
 }
 
 int last_resource_update = 0;
 
 static void
 cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
 {
     switch (rc) {
         case pcmk_ok:
         case -pcmk_err_diff_failed:
         case -pcmk_err_diff_resync:
             crm_trace("Resource update %d complete: rc=%d", call_id, rc);
             break;
         default:
             crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc));
     }
 
     if (call_id == last_resource_update) {
         last_resource_update = 0;
         trigger_fsa(fsa_source);
     }
 }
 
 static void
 remote_node_init_status(const char *node_name, int call_opt)
 {
     int call_id = 0;
     xmlNode *update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
     xmlNode *state;
 
     state = simple_remote_node_status(node_name, update,__FUNCTION__);
     crm_xml_add(state, XML_NODE_IS_FENCED, "0");
 
     fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
     if (call_id != pcmk_ok) {
         crm_debug("Failed to init status section for remote-node %s", node_name);
     }
     free_xml(update);
 }
 
 static void
 remote_node_clear_status(const char *node_name, int call_opt)
 {
     if (node_name == NULL) {
         return;
     }
     remote_node_init_status(node_name, call_opt);
     erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt);
     erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt);
 }
 
 static int
 do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
 {
 /*
   <status>
   <nodes_status id=uname>
   <lrm>
   <lrm_resources>
   <lrm_resource id=...>
   </...>
 */
     int rc = pcmk_ok;
     xmlNode *update, *iter = NULL;
     int call_opt = cib_quorum_override;
     const char *uuid = NULL;
 
     CRM_CHECK(op != NULL, return 0);
 
     if (fsa_state == S_ELECTION || fsa_state == S_PENDING) {
         crm_info("Sending update to local CIB in state: %s", fsa_state2string(fsa_state));
         call_opt |= cib_scope_local;
     }
 
     iter = create_xml_node(iter, XML_CIB_TAG_STATUS);
     update = iter;
     iter = create_xml_node(iter, XML_CIB_TAG_STATE);
 
     if (safe_str_eq(node_name, fsa_our_uname)) {
         uuid = fsa_our_uuid;
 
     } else {
         /* remote nodes uuid and uname are equal */
         uuid = node_name;
         crm_xml_add(iter, XML_NODE_IS_REMOTE, "true");
     }
 
     CRM_LOG_ASSERT(uuid != NULL);
     if(uuid == NULL) {
         rc = -EINVAL;
         goto done;
     }
 
     crm_xml_add(iter, XML_ATTR_UUID,  uuid);
     crm_xml_add(iter, XML_ATTR_UNAME, node_name);
     crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__);
 
     iter = create_xml_node(iter, XML_CIB_TAG_LRM);
     crm_xml_add(iter, XML_ATTR_ID, uuid);
 
     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
     crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
 
     build_operation_update(iter, rsc, op, __FUNCTION__);
 
     if (rsc) {
         const char *container = NULL;
 
         crm_xml_add(iter, XML_ATTR_TYPE, rsc->type);
         crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class);
         crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider);
 
         if (op->params) {
             container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
         }
         if (container) {
             crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container);
             crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container);
         }
 
         CRM_CHECK(rsc->type != NULL, crm_err("Resource %s has no value for type", op->rsc_id));
         CRM_CHECK(rsc->class != NULL, crm_err("Resource %s has no value for class", op->rsc_id));
 
         /* check to see if we need to initialize remote-node related status sections */
         if (safe_str_eq(op->op_type, "start") && op->rc == 0 && op->op_status == PCMK_LRM_OP_DONE) {
             const char *remote_node = g_hash_table_lookup(op->params, CRM_META"_remote_node");
 
             if (remote_node) {
                 /* A container for a remote-node has started, initalize remote-node's status */
                 crm_info("Initalizing lrm status for container remote-node %s. Container successfully started.", remote_node);
                 remote_node_clear_status(remote_node, call_opt);
             } else if (container == FALSE && safe_str_eq(rsc->type, "remote") && safe_str_eq(rsc->provider, "pacemaker")) {
                 /* baremetal remote node connection resource has started, initalize remote-node's status */
                 crm_info("Initializing lrm status for baremetal remote-node %s", rsc->id);
                 remote_node_clear_status(rsc->id, call_opt);
             }
         }
 
     } else {
         crm_warn("Resource %s no longer exists in the lrmd", op->rsc_id);
         send_direct_ack(NULL, NULL, rsc, op, op->rsc_id);
         goto cleanup;
     }
 
     crm_log_xml_trace(update, __FUNCTION__);
 
     /* make it an asyncronous call and be done with it
      *
      * Best case:
      *   the resource state will be discovered during
      *   the next signup or election.
      *
      * Bad case:
      *   we are shutting down and there is no DC at the time,
      *   but then why were we shutting down then anyway?
      *   (probably because of an internal error)
      *
      * Worst case:
      *   we get shot for having resources "running" when the really weren't
      *
      * the alternative however means blocking here for too long, which
      * isnt acceptable
      */
     fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL);
 
     if (rc > 0) {
         last_resource_update = rc;
     }
   done:
     /* the return code is a call number, not an error code */
     crm_trace("Sent resource state update message: %d for %s=%d on %s", rc,
               op->op_type, op->interval, op->rsc_id);
     fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback);
 
   cleanup:
     free_xml(update);
     return rc;
 }
 
 void
 do_lrm_event(long long action,
              enum crmd_fsa_cause cause,
              enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)
 {
     CRM_CHECK(FALSE, return);
 }
 
 gboolean
 process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending)
 {
     char *op_id = NULL;
     char *op_key = NULL;
 
     int update_id = 0;
     gboolean remove = FALSE;
     gboolean removed = FALSE;
     lrmd_rsc_info_t *rsc = NULL;
 
     CRM_CHECK(op != NULL, return FALSE);
     CRM_CHECK(op->rsc_id != NULL, return FALSE);
 
     op_id = make_stop_id(op->rsc_id, op->call_id);
     op_key = generate_op_key(op->rsc_id, op->op_type, op->interval);
     rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
     if(pending == NULL) {
         remove = TRUE;
         pending = g_hash_table_lookup(lrm_state->pending_ops, op_id);
     }
 
     if (op->op_status == PCMK_LRM_OP_ERROR) {
         switch(op->rc) {
             case PCMK_OCF_NOT_RUNNING:
             case PCMK_OCF_RUNNING_MASTER:
             case PCMK_OCF_DEGRADED:
             case PCMK_OCF_DEGRADED_MASTER:
                 /* Leave it up to the TE/PE to decide if this is an error */
                 op->op_status = PCMK_LRM_OP_DONE;
                 break;
             default:
                 /* Nothing to do */
                 break;
         }
     }
 
     if (op->op_status != PCMK_LRM_OP_CANCELLED) {
         if (safe_str_eq(op->op_type, RSC_NOTIFY)) {
             /* Keep notify ops out of the CIB */
             send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
         } else {
             update_id = do_update_resource(lrm_state->node_name, rsc, op);
         }
     } else if (op->interval == 0) {
         /* This will occur when "crm resource cleanup" is called while actions are in-flight */
         crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id);
         send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
 
     } else if (pending == NULL) {
         /* We don't need to do anything for cancelled ops
          * that are not in our pending op list. There are no
          * transition actions waiting on these operations. */
 
     } else if (op->user_data == NULL) {
         /* At this point we have a pending entry, but no transition
          * key present in the user_data field. report this */
         crm_err("Op %s (call=%d): No user data", op_key, op->call_id);
 
     } else if (pending->remove) {
         /* The tengine canceled this op, we have been waiting for the cancel to finish. */
         delete_op_entry(lrm_state, op, op->rsc_id, op_key, op->call_id);
 
     } else if (pending && op->rsc_deleted) {
         /* The tengine initiated this op, but it was cancelled outside of the
          * tengine's control during a resource cleanup/re-probe request. The tengine
          * must be alerted that this operation completed, otherwise the tengine
          * will continue waiting for this update to occur until it is timed out.
          * We don't want this update going to the cib though, so use a direct ack. */
         crm_trace("Op %s (call=%d): cancelled due to rsc deletion", op_key, op->call_id);
         send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
 
     } else {
         /* Before a stop is called, no need to direct ack */
         crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id);
     }
 
     if(remove == FALSE) {
         /* The caller will do this afterwards, but keep the logging consistent */
         removed = TRUE;
 
     } else if ((op->interval == 0) && g_hash_table_remove(lrm_state->pending_ops, op_id)) {
         removed = TRUE;
         crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
                   op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops));
 
     } else if(op->interval != 0 && op->op_status == PCMK_LRM_OP_CANCELLED) {
         removed = TRUE;
         g_hash_table_remove(lrm_state->pending_ops, op_id);
     }
 
     switch (op->op_status) {
         case PCMK_LRM_OP_CANCELLED:
             crm_info("Operation %s: %s (node=%s, call=%d, confirmed=%s)",
                      op_key, services_lrm_status_str(op->op_status), lrm_state->node_name,
                      op->call_id, removed ? "true" : "false");
             break;
 
         case PCMK_LRM_OP_DONE:
             do_crm_log(op->interval?LOG_INFO:LOG_NOTICE,
                        "Operation %s: %s (node=%s, call=%d, rc=%d, cib-update=%d, confirmed=%s)",
                        op_key, services_ocf_exitcode_str(op->rc), lrm_state->node_name,
                        op->call_id, op->rc, update_id, removed ? "true" : "false");
             break;
 
         case PCMK_LRM_OP_TIMEOUT:
             crm_err("Operation %s: %s (node=%s, call=%d, timeout=%dms)",
                     op_key, services_lrm_status_str(op->op_status), lrm_state->node_name, op->call_id, op->timeout);
             break;
 
         default:
             crm_err("Operation %s (node=%s, call=%d, status=%d, cib-update=%d, confirmed=%s) %s",
                     op_key, lrm_state->node_name, op->call_id, op->op_status, update_id, removed ? "true" : "false",
                     services_lrm_status_str(op->op_status));
     }
 
     if (op->output) {
         char *prefix =
             crm_strdup_printf("%s-%s_%s_%d:%d", lrm_state->node_name, op->rsc_id, op->op_type, op->interval, op->call_id);
 
         if (op->rc) {
             crm_log_output(LOG_NOTICE, prefix, op->output);
         } else {
             crm_log_output(LOG_DEBUG, prefix, op->output);
         }
         free(prefix);
     }
 
     if (op->rsc_deleted) {
         crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
         delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL);
     }
 
     /* If a shutdown was escalated while operations were pending,
      * then the FSA will be stalled right now... allow it to continue
      */
     mainloop_set_trigger(fsa_source);
     update_history_cache(lrm_state, rsc, op);
 
     lrmd_free_rsc_info(rsc);
     free(op_key);
     free(op_id);
 
     return TRUE;
 }
diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c
index 374c8069c0..162ad035d4 100644
--- a/crmd/lrm_state.c
+++ b/crmd/lrm_state.c
@@ -1,758 +1,740 @@
 /* 
  * Copyright (C) 2012 David Vossel <dvossel@redhat.com>
  * 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  * 
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 
 #include <crmd.h>
 #include <crmd_fsa.h>
 #include <crmd_messages.h>
 #include <crmd_callbacks.h>
 #include <crmd_lrm.h>
 
 GHashTable *lrm_state_table = NULL;
 extern GHashTable *proxy_table;
 int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg);
 void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg));
 
-static void
-history_cache_destroy(gpointer data)
-{
-    rsc_history_t *entry = data;
-
-    if (entry->stop_params) {
-        g_hash_table_destroy(entry->stop_params);
-    }
-
-    free(entry->rsc.type);
-    free(entry->rsc.class);
-    free(entry->rsc.provider);
-
-    lrmd_free_event(entry->failed);
-    lrmd_free_event(entry->last);
-    free(entry->id);
-    free(entry);
-}
 static void
 free_rsc_info(gpointer value)
 {
     lrmd_rsc_info_t *rsc_info = value;
 
     lrmd_free_rsc_info(rsc_info);
 }
 
 static void
 free_deletion_op(gpointer value)
 {
     struct pending_deletion_op_s *op = value;
 
     free(op->rsc);
     delete_ha_msg_input(op->input);
     free(op);
 }
 
 static void
 free_recurring_op(gpointer value)
 {
     struct recurring_op_s *op = (struct recurring_op_s *)value;
 
     free(op->user_data);
     free(op->rsc_id);
     free(op->op_type);
     free(op->op_key);
     if (op->params) {
         g_hash_table_destroy(op->params);
     }
     free(op);
 }
 
 static gboolean
 fail_pending_op(gpointer key, gpointer value, gpointer user_data)
 {
     lrmd_event_data_t event = { 0, };
     lrm_state_t *lrm_state = user_data;
     struct recurring_op_s *op = (struct recurring_op_s *)value;
 
     crm_trace("Pre-emptively failing %s_%s_%d on %s (call=%s, %s)",
               op->rsc_id, op->op_type, op->interval,
               lrm_state->node_name, key, op->user_data);
 
     event.type = lrmd_event_exec_complete;
     event.rsc_id = op->rsc_id;
     event.op_type = op->op_type;
     event.user_data = op->user_data;
     event.timeout = 0;
     event.interval = op->interval;
     event.rc = PCMK_OCF_CONNECTION_DIED;
     event.op_status = PCMK_LRM_OP_ERROR;
     event.t_run = op->start_time;
     event.t_rcchange = op->start_time;
     event.t_rcchange = op->start_time;
 
     event.call_id = op->call_id;
     event.remote_nodename = lrm_state->node_name;
     event.params = op->params;
 
     process_lrm_event(lrm_state, &event, op);
     return TRUE;
 }
 
 gboolean
 lrm_state_is_local(lrm_state_t *lrm_state)
 {
     if (lrm_state == NULL || fsa_our_uname == NULL) {
         return FALSE;
     }
 
     if (strcmp(lrm_state->node_name, fsa_our_uname) != 0) {
         return FALSE;
     }
 
     return TRUE;
 
 }
 
 lrm_state_t *
 lrm_state_create(const char *node_name)
 {
     lrm_state_t *state = NULL;
 
     if (!node_name) {
         crm_err("No node name given for lrm state object");
         return NULL;
     }
 
     state = calloc(1, sizeof(lrm_state_t));
     if (!state) {
         return NULL;
     }
 
     state->node_name = strdup(node_name);
 
     state->rsc_info_cache = g_hash_table_new_full(crm_str_hash,
                                                 g_str_equal, NULL, free_rsc_info);
 
     state->deletion_ops = g_hash_table_new_full(crm_str_hash,
                                                 g_str_equal, g_hash_destroy_str, free_deletion_op);
 
     state->pending_ops = g_hash_table_new_full(crm_str_hash,
                                                g_str_equal, g_hash_destroy_str, free_recurring_op);
 
     state->resource_history = g_hash_table_new_full(crm_str_hash,
-                                                    g_str_equal, NULL, history_cache_destroy);
+                                                    g_str_equal, NULL, history_free);
 
     g_hash_table_insert(lrm_state_table, (char *)state->node_name, state);
     return state;
 
 }
 
 void
 lrm_state_destroy(const char *node_name)
 {
     g_hash_table_remove(lrm_state_table, node_name);
 }
 
 static gboolean
 remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data)
 {
     remote_proxy_t *proxy = value;
     const char *node_name = user_data;
 
     if (safe_str_eq(node_name, proxy->node_name)) {
         return TRUE;
     }
 
     return FALSE;
 }
 
 static void
 internal_lrm_state_destroy(gpointer data)
 {
     lrm_state_t *lrm_state = data;
 
     if (!lrm_state) {
         return;
     }
 
     crm_trace("Destroying proxy table %s with %d members", lrm_state->node_name, g_hash_table_size(proxy_table));
     g_hash_table_foreach_remove(proxy_table, remote_proxy_remove_by_node, (char *) lrm_state->node_name);
     remote_ra_cleanup(lrm_state);
     lrmd_api_delete(lrm_state->conn);
 
     if (lrm_state->rsc_info_cache) {
         crm_trace("Destroying rsc info cache with %d members", g_hash_table_size(lrm_state->rsc_info_cache));
         g_hash_table_destroy(lrm_state->rsc_info_cache);
     }
     if (lrm_state->resource_history) {
         crm_trace("Destroying history op cache with %d members", g_hash_table_size(lrm_state->resource_history));
         g_hash_table_destroy(lrm_state->resource_history);
     }
     if (lrm_state->deletion_ops) {
         crm_trace("Destroying deletion op cache with %d members", g_hash_table_size(lrm_state->deletion_ops));
         g_hash_table_destroy(lrm_state->deletion_ops);
     }
     if (lrm_state->pending_ops) {
         crm_trace("Destroying pending op cache with %d members", g_hash_table_size(lrm_state->pending_ops));
         g_hash_table_destroy(lrm_state->pending_ops);
     }
 
     free((char *)lrm_state->node_name);
     free(lrm_state);
 }
 
 void
 lrm_state_reset_tables(lrm_state_t * lrm_state)
 {
     if (lrm_state->resource_history) {
         crm_trace("Re-setting history op cache with %d members",
                   g_hash_table_size(lrm_state->resource_history));
         g_hash_table_remove_all(lrm_state->resource_history);
     }
     if (lrm_state->deletion_ops) {
         crm_trace("Re-setting deletion op cache with %d members",
                   g_hash_table_size(lrm_state->deletion_ops));
         g_hash_table_remove_all(lrm_state->deletion_ops);
     }
     if (lrm_state->pending_ops) {
         crm_trace("Re-setting pending op cache with %d members",
                   g_hash_table_size(lrm_state->pending_ops));
         g_hash_table_remove_all(lrm_state->pending_ops);
     }
     if (lrm_state->rsc_info_cache) {
         crm_trace("Re-setting rsc info cache with %d members",
                   g_hash_table_size(lrm_state->rsc_info_cache));
         g_hash_table_remove_all(lrm_state->rsc_info_cache);
     }
 }
 
 gboolean
 lrm_state_init_local(void)
 {
     if (lrm_state_table) {
         return TRUE;
     }
 
     lrm_state_table =
         g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, internal_lrm_state_destroy);
     if (!lrm_state_table) {
         return FALSE;
     }
 
     proxy_table =
         g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, remote_proxy_free);
     if (!proxy_table) {
          g_hash_table_destroy(lrm_state_table);
         return FALSE;
     }
 
     return TRUE;
 }
 
 void
 lrm_state_destroy_all(void)
 {
     if (lrm_state_table) {
         crm_trace("Destroying state table with %d members", g_hash_table_size(lrm_state_table));
         g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL;
     }
     if(proxy_table) {
         crm_trace("Destroying proxy table with %d members", g_hash_table_size(proxy_table));
         g_hash_table_destroy(proxy_table); proxy_table = NULL;
     }
 }
 
 lrm_state_t *
 lrm_state_find(const char *node_name)
 {
     if (!node_name) {
         return NULL;
     }
     return g_hash_table_lookup(lrm_state_table, node_name);
 }
 
 lrm_state_t *
 lrm_state_find_or_create(const char *node_name)
 {
     lrm_state_t *lrm_state;
 
     lrm_state = g_hash_table_lookup(lrm_state_table, node_name);
     if (!lrm_state) {
         lrm_state = lrm_state_create(node_name);
     }
 
     return lrm_state;
 }
 
 GList *
 lrm_state_get_list(void)
 {
     return g_hash_table_get_values(lrm_state_table);
 }
 
 void
 lrm_state_disconnect(lrm_state_t * lrm_state)
 {
     int removed = 0;
 
     if (!lrm_state->conn) {
         return;
     }
     crm_trace("Disconnecting %s", lrm_state->node_name);
     ((lrmd_t *) lrm_state->conn)->cmds->disconnect(lrm_state->conn);
 
     if (is_not_set(fsa_input_register, R_SHUTDOWN)) {
         removed = g_hash_table_foreach_remove(lrm_state->pending_ops, fail_pending_op, lrm_state);
         crm_trace("Synthesized %d operation failures for %s", removed, lrm_state->node_name);
     }
 
     lrmd_api_delete(lrm_state->conn);
     lrm_state->conn = NULL;
 }
 
 int
 lrm_state_is_connected(lrm_state_t * lrm_state)
 {
     if (!lrm_state->conn) {
         return FALSE;
     }
     return ((lrmd_t *) lrm_state->conn)->cmds->is_connected(lrm_state->conn);
 }
 
 int
 lrm_state_poke_connection(lrm_state_t * lrm_state)
 {
 
     if (!lrm_state->conn) {
         return -1;
     }
     return ((lrmd_t *) lrm_state->conn)->cmds->poke_connection(lrm_state->conn);
 }
 
 int
 lrm_state_ipc_connect(lrm_state_t * lrm_state)
 {
     int ret;
 
     if (!lrm_state->conn) {
         lrm_state->conn = lrmd_api_new();
         ((lrmd_t *) lrm_state->conn)->cmds->set_callback(lrm_state->conn, lrm_op_callback);
     }
 
     ret = ((lrmd_t *) lrm_state->conn)->cmds->connect(lrm_state->conn, CRM_SYSTEM_CRMD, NULL);
 
     if (ret != pcmk_ok) {
         lrm_state->num_lrm_register_fails++;
     } else {
         lrm_state->num_lrm_register_fails = 0;
     }
 
     return ret;
 }
 
 static int
 remote_proxy_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata)
 {
     /* Async responses from cib and friends back to clients via pacemaker_remoted */
     xmlNode *xml = NULL;
     remote_proxy_t *proxy = userdata;
     lrm_state_t *lrm_state = lrm_state_find(proxy->node_name);
     uint32_t flags;
 
     if (lrm_state == NULL) {
         return 0;
     }
 
     xml = string2xml(buffer);
     if (xml == NULL) {
         crm_warn("Received a NULL msg from IPC service.");
         return 1;
     }
 
     flags = crm_ipc_buffer_flags(proxy->ipc);
     if (flags & crm_ipc_proxied_relay_response) {
         crm_trace("Passing response back to %.8s on %s: %.200s - request id: %d", proxy->session_id, proxy->node_name, buffer, proxy->last_request_id);
         remote_proxy_relay_response(lrm_state->conn, proxy->session_id, xml, proxy->last_request_id);
         proxy->last_request_id = 0;
 
     } else {
         crm_trace("Passing event back to %.8s on %s: %.200s", proxy->session_id, proxy->node_name, buffer);
         remote_proxy_relay_event(lrm_state->conn, proxy->session_id, xml);
     }
     free_xml(xml);
     return 1;
 }
 
 static void
 remote_proxy_disconnected(void *userdata)
 {
     remote_proxy_t *proxy = userdata;
     lrm_state_t *lrm_state = lrm_state_find(proxy->node_name);
 
     crm_trace("Destroying %s (%p)", lrm_state->node_name, userdata);
 
     proxy->source = NULL;
     proxy->ipc = NULL;
 
     if (lrm_state && lrm_state->conn) {
         remote_proxy_notify_destroy(lrm_state->conn, proxy->session_id);
     }
     g_hash_table_remove(proxy_table, proxy->session_id);
 }
 
 static remote_proxy_t *
 remote_proxy_new(const char *node_name, const char *session_id, const char *channel)
 {
     static struct ipc_client_callbacks proxy_callbacks = {
         .dispatch = remote_proxy_dispatch_internal,
         .destroy = remote_proxy_disconnected
     };
     remote_proxy_t *proxy = calloc(1, sizeof(remote_proxy_t));
 
     proxy->node_name = strdup(node_name);
     proxy->session_id = strdup(session_id);
 
     if (safe_str_eq(channel, CRM_SYSTEM_CRMD)) {
         proxy->is_local = TRUE;
     } else {
         proxy->source = mainloop_add_ipc_client(channel, G_PRIORITY_LOW, 0, proxy, &proxy_callbacks);
         proxy->ipc = mainloop_get_ipc_client(proxy->source);
 
         if (proxy->source == NULL) {
             remote_proxy_free(proxy);
             return NULL;
         }
     }
 
     crm_trace("created proxy session ID %s", proxy->session_id);
     g_hash_table_insert(proxy_table, proxy->session_id, proxy);
 
     return proxy;
 }
 
 gboolean
 crmd_is_proxy_session(const char *session)
 {
     return g_hash_table_lookup(proxy_table, session) ? TRUE : FALSE;
 }
 
 void
 crmd_proxy_send(const char *session, xmlNode *msg)
 {
     remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session);
     lrm_state_t *lrm_state = NULL;
 
     if (!proxy) {
         return;
     }
     crm_log_xml_trace(msg, "to-proxy");
     lrm_state = lrm_state_find(proxy->node_name);
     if (lrm_state) {
         crm_trace("Sending event to %.8s on %s", proxy->session_id, proxy->node_name);
         remote_proxy_relay_event(lrm_state->conn, session, msg);
     }
 }
 
 static void
 crmd_proxy_dispatch(const char *session, xmlNode *msg)
 {
 
     crm_log_xml_trace(msg, "CRMd-PROXY[inbound]");
 
     crm_xml_add(msg, F_CRM_SYS_FROM, session);
     if (crmd_authorize_message(msg, NULL, session)) {
         route_message(C_IPC_MESSAGE, msg);
     }
 
     trigger_fsa(fsa_source);
 }
 
 static void
 remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg)
 {
     lrm_state_t *lrm_state = userdata;
     const char *op = crm_element_value(msg, F_LRMD_IPC_OP);
     const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION);
     int msg_id = 0;
 
     /* sessions are raw ipc connections to IPC,
      * all we do is proxy requests/responses exactly
      * like they are given to us at the ipc level. */
 
     CRM_CHECK(op != NULL, return);
     CRM_CHECK(session != NULL, return);
 
     crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id);
 
     /* This is msg from remote ipc client going to real ipc server */
     if (safe_str_eq(op, "new")) {
         const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER);
 
         CRM_CHECK(channel != NULL, return);
 
         if (remote_proxy_new(lrm_state->node_name, session, channel) == NULL) {
             remote_proxy_notify_destroy(lrmd, session);
         }
         crm_info("new remote proxy client established to %s, session id %s", channel, session);
     } else if (safe_str_eq(op, "destroy")) {
         remote_proxy_end_session(session);
 
     } else if (safe_str_eq(op, "request")) {
         int flags = 0;
         xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG);
         const char *name = crm_element_value(msg, F_LRMD_IPC_CLIENT);
         remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session);
 
         CRM_CHECK(request != NULL, return);
 
         if (proxy == NULL) {
             /* proxy connection no longer exists */
             remote_proxy_notify_destroy(lrmd, session);
             return;
         } else if ((proxy->is_local == FALSE) && (crm_ipc_connected(proxy->ipc) == FALSE)) {
             remote_proxy_end_session(session);
             return;
         }
         proxy->last_request_id = 0;
         crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags);
         crm_xml_add(request, XML_ACL_TAG_ROLE, "pacemaker-remote");
 
 #if ENABLE_ACL
         CRM_ASSERT(lrm_state->node_name);
         crm_acl_get_set_user(request, F_LRMD_IPC_USER, lrm_state->node_name);
 #endif
 
         if (proxy->is_local) {
             /* this is for the crmd, which we are, so don't try
              * and connect/send to ourselves over ipc. instead
              * do it directly. */
             crmd_proxy_dispatch(session, request);
             if (flags & crm_ipc_client_response) {
                 xmlNode *op_reply = create_xml_node(NULL, "ack");
 
                 crm_xml_add(op_reply, "function", __FUNCTION__);
                 crm_xml_add_int(op_reply, "line", __LINE__);
                 remote_proxy_relay_response(lrmd, session, op_reply, msg_id);
                 free_xml(op_reply);
             }
 
         } else if(is_set(flags, crm_ipc_proxied)) {
             int rc = crm_ipc_send(proxy->ipc, request, flags, 5000, NULL);
 
             if(rc < 0) {
                 xmlNode *op_reply = create_xml_node(NULL, "nack");
 
                 crm_err("Could not relay %s request %d from %s to %s for %s: %s (%d)",
                          op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name, pcmk_strerror(rc), rc);
 
                 /* Send a n'ack so the caller doesn't block */
                 crm_xml_add(op_reply, "function", __FUNCTION__);
                 crm_xml_add_int(op_reply, "line", __LINE__);
                 crm_xml_add_int(op_reply, "rc", rc);
                 remote_proxy_relay_response(lrmd, session, op_reply, msg_id);
                 free_xml(op_reply);
 
             } else {
                 crm_trace("Relayed %s request %d from %s to %s for %s",
                           op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name);
                 proxy->last_request_id = msg_id;
             }
 
         } else {
             int rc = pcmk_ok;
             xmlNode *op_reply = NULL;
             /* For backwards compatibility with pacemaker_remoted <= 1.1.10 */
 
             crm_trace("Relaying %s request %d from %s to %s for %s",
                       op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name);
 
             rc = crm_ipc_send(proxy->ipc, request, flags, 10000, &op_reply);
             if(rc < 0) {
                 crm_err("Could not relay %s request %d from %s to %s for %s: %s (%d)",
                          op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name, pcmk_strerror(rc), rc);
             } else {
                 crm_trace("Relayed %s request %d from %s to %s for %s",
                           op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name);
             }
 
             if(op_reply) {
                 remote_proxy_relay_response(lrmd, session, op_reply, msg_id);
                 free_xml(op_reply);
             }
         }
     } else {
         crm_err("Unknown proxy operation: %s", op);
     }
 }
 
 int
 lrm_state_remote_connect_async(lrm_state_t * lrm_state, const char *server, int port,
                                int timeout_ms)
 {
     int ret;
 
     if (!lrm_state->conn) {
         lrm_state->conn = lrmd_remote_api_new(lrm_state->node_name, server, port);
         if (!lrm_state->conn) {
             return -1;
         }
         ((lrmd_t *) lrm_state->conn)->cmds->set_callback(lrm_state->conn, remote_lrm_op_callback);
         lrmd_internal_set_proxy_callback(lrm_state->conn, lrm_state, remote_proxy_cb);
     }
 
     crm_trace("initiating remote connection to %s at %d with timeout %d", server, port, timeout_ms);
     ret =
         ((lrmd_t *) lrm_state->conn)->cmds->connect_async(lrm_state->conn, lrm_state->node_name,
                                                           timeout_ms);
 
     if (ret != pcmk_ok) {
         lrm_state->num_lrm_register_fails++;
     } else {
         lrm_state->num_lrm_register_fails = 0;
     }
 
     return ret;
 }
 
 int
 lrm_state_get_metadata(lrm_state_t * lrm_state,
                        const char *class,
                        const char *provider,
                        const char *agent, char **output, enum lrmd_call_options options)
 {
     if (!lrm_state->conn) {
         return -ENOTCONN;
     }
 
     /* Optimize this... only retrieve metadata from local lrmd connection. Perhaps consider
      * caching result. */
     return ((lrmd_t *) lrm_state->conn)->cmds->get_metadata(lrm_state->conn, class, provider, agent,
                                                             output, options);
 }
 
 int
 lrm_state_cancel(lrm_state_t * lrm_state, const char *rsc_id, const char *action, int interval)
 {
     if (!lrm_state->conn) {
         return -ENOTCONN;
     }
 
     /* Optimize this, cancel requires a synced request/response to the server.
      * Figure out a way to make this async. */
     if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
         return remote_ra_cancel(lrm_state, rsc_id, action, interval);
     }
     return ((lrmd_t *) lrm_state->conn)->cmds->cancel(lrm_state->conn, rsc_id, action, interval);
 }
 
 lrmd_rsc_info_t *
 lrm_state_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options)
 {
     lrmd_rsc_info_t *rsc = NULL;
 
     if (!lrm_state->conn) {
         return NULL;
     }
     if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
         return remote_ra_get_rsc_info(lrm_state, rsc_id);
     }
 
     rsc = g_hash_table_lookup(lrm_state->rsc_info_cache, rsc_id);
     if (rsc == NULL) {
         /* only contact the lrmd if we don't already have a cached rsc info */
         rsc = ((lrmd_t *) lrm_state->conn)->cmds->get_rsc_info(lrm_state->conn, rsc_id, options);
         if (rsc == NULL) {
 		    return NULL;
         }
         /* cache the result */
         g_hash_table_insert(lrm_state->rsc_info_cache, rsc->id, rsc);
     }
 
     return lrmd_copy_rsc_info(rsc);
 
 }
 
 int
 lrm_state_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *action, const char *userdata, int interval,     /* ms */
                int timeout,     /* ms */
                int start_delay, /* ms */
                lrmd_key_value_t * params)
 {
 
     if (!lrm_state->conn) {
         lrmd_key_value_freeall(params);
         return -ENOTCONN;
     }
 
     if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
         return remote_ra_exec(lrm_state,
                               rsc_id, action, userdata, interval, timeout, start_delay, params);
     }
 
     return ((lrmd_t *) lrm_state->conn)->cmds->exec(lrm_state->conn,
                                                     rsc_id,
                                                     action,
                                                     userdata,
                                                     interval,
                                                     timeout,
                                                     start_delay,
                                                     lrmd_opt_notify_changes_only, params);
 }
 
 int
 lrm_state_register_rsc(lrm_state_t * lrm_state,
                        const char *rsc_id,
                        const char *class,
                        const char *provider, const char *agent, enum lrmd_call_options options)
 {
     if (!lrm_state->conn) {
         return -ENOTCONN;
     }
 
     /* optimize this... this function is a synced round trip from client to daemon.
      * The crmd/lrm.c code path should be re-factored to allow the register of resources
      * to be performed async. The lrmd client api needs to make an async version
      * of register available. */
     if (is_remote_lrmd_ra(agent, provider, NULL)) {
         return lrm_state_find_or_create(rsc_id) ? pcmk_ok : -1;
     }
 
     return ((lrmd_t *) lrm_state->conn)->cmds->register_rsc(lrm_state->conn, rsc_id, class,
                                                             provider, agent, options);
 }
 
 int
 lrm_state_unregister_rsc(lrm_state_t * lrm_state,
                          const char *rsc_id, enum lrmd_call_options options)
 {
     if (!lrm_state->conn) {
         return -ENOTCONN;
     }
 
     /* optimize this... this function is a synced round trip from client to daemon.
      * The crmd/lrm.c code path that uses this function should always treat it as an
      * async operation. The lrmd client api needs to make an async version unreg available. */
     if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
         lrm_state_destroy(rsc_id);
         return pcmk_ok;
     }
 
     g_hash_table_remove(lrm_state->rsc_info_cache, rsc_id);
 
     return ((lrmd_t *) lrm_state->conn)->cmds->unregister_rsc(lrm_state->conn, rsc_id, options);
 }
diff --git a/cts/CM_ais.py b/cts/CM_ais.py
index 44f91cdd96..a34f9b1940 100644
--- a/cts/CM_ais.py
+++ b/cts/CM_ais.py
@@ -1,153 +1,154 @@
 '''CTS: Cluster Testing System: AIS dependent modules...
 '''
 
 __copyright__ = '''
 Copyright (C) 2007 Andrew Beekhof <andrew@suse.de>
 
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
 
 from cts.CTSvars import *
 from cts.CM_lha  import crm_lha
 from cts.CTS     import Process
 from cts.patterns    import PatternSelector
 
 #######################################################################
 #
 #  LinuxHA v2 dependent modules
 #
 #######################################################################
 
 
 class crm_ais(crm_lha):
     '''
     The crm version 3 cluster manager class.
     It implements the things we need to talk to and manipulate
     crm clusters running on top of openais
     '''
     def __init__(self, Environment, randseed=None, name=None):
         if not name: name="crm-ais"
         crm_lha.__init__(self, Environment, randseed=randseed, name=name)
 
         self.fullcomplist = {}
         self.templates = PatternSelector(self.name)
 
     def NodeUUID(self, node):
         return node
 
-    def ais_components(self):
+    def ais_components(self, extra={}):
 
         complist = []
         if not len(self.fullcomplist.keys()):
             for c in ["cib", "lrmd", "crmd", "attrd" ]:
-               self.fullcomplist[c] = Process(
-                   self, c, 
-                   pats = self.templates.get_component(self.name, c),
-                   badnews_ignore = self.templates.get_component(self.name, "%s-ignore"%c),
-                   common_ignore = self.templates.get_component(self.name, "common-ignore"))
-
-               self.fullcomplist["pengine"] = Process(
-                   self, "pengine", 
-                   dc_pats = self.templates.get_component(self.name, "pengine"),
-                   badnews_ignore = self.templates.get_component(self.name, "pengine-ignore"),
-                   common_ignore = self.templates.get_component(self.name, "common-ignore"))
-
-               self.fullcomplist["stonith-ng"] = Process(
-                   self, "stonith-ng", process="stonithd", 
-                   pats = self.templates.get_component(self.name, "stonith"),
-                   badnews_ignore = self.templates.get_component(self.name, "stonith-ignore"),
-                   common_ignore = self.templates.get_component(self.name, "common-ignore"))
-
+                self.fullcomplist[c] = Process(
+                    self, c, 
+                    pats = self.templates.get_component(self.name, c),
+                    badnews_ignore = self.templates.get_component(self.name, "%s-ignore" % c),
+                    common_ignore = self.templates.get_component(self.name, "common-ignore"))
+
+            # pengine uses dc_pats instead of pats
+            self.fullcomplist["pengine"] = Process(
+                self, "pengine", 
+                dc_pats = self.templates.get_component(self.name, "pengine"),
+                badnews_ignore = self.templates.get_component(self.name, "pengine-ignore"),
+                common_ignore = self.templates.get_component(self.name, "common-ignore"))
+
+            # stonith-ng's process name is different from its component name
+            self.fullcomplist["stonith-ng"] = Process(
+                self, "stonith-ng", process="stonithd", 
+                pats = self.templates.get_component(self.name, "stonith"),
+                badnews_ignore = self.templates.get_component(self.name, "stonith-ignore"),
+                common_ignore = self.templates.get_component(self.name, "common-ignore"))
+
+            # add (or replace) any extra components passed in
+            self.fullcomplist.update(extra)
+
+        # Processes running under valgrind can't be shot with "killall -9 processname",
+        # so don't include them in the returned list
         vgrind = self.Env["valgrind-procs"].split()
         for key in self.fullcomplist.keys():
             if self.Env["valgrind-tests"]:
-               if key in vgrind:
-               # Processes running under valgrind can't be shot with "killall -9 processname"
+                if key in vgrind:
                     self.log("Filtering %s from the component list as it is being profiled by valgrind" % key)
                     continue
             if key == "stonith-ng" and not self.Env["DoFencing"]:
                 continue
-
             complist.append(self.fullcomplist[key])
 
-        #self.complist = [ fullcomplist["pengine"] ]
         return complist
 
 
 class crm_cs_v0(crm_ais):
     '''
     The crm version 3 cluster manager class.
     It implements the things we need to talk to and manipulate
 
     crm clusters running against version 0 of our plugin
     '''
     def __init__(self, Environment, randseed=None, name=None):
         if not name: name="crm-plugin-v0"
         crm_ais.__init__(self, Environment, randseed=randseed, name=name)
 
     def Components(self):
-        self.ais_components()
-        c = "corosync"
-
-        self.fullcomplist[c] = Process(
-            self, c, 
-            pats = self.templates.get_component(self.name, c),
-            badnews_ignore = self.templates.get_component(self.name, "%s-ignore"%c),
+        extra = {}
+        extra["corosync"] = Process(
+            self, "corosync", 
+            pats = self.templates.get_component(self.name, "corosync"),
+            badnews_ignore = self.templates.get_component(self.name, "corosync-ignore"),
             common_ignore = self.templates.get_component(self.name, "common-ignore")
         )
-
-        return self.ais_components()
+        return self.ais_components(extra=extra)
 
 
 class crm_cs_v1(crm_cs_v0):
     '''
     The crm version 3 cluster manager class.
     It implements the things we need to talk to and manipulate
 
     crm clusters running on top of version 1 of our plugin
     '''
     def __init__(self, Environment, randseed=None, name=None):
         if not name: name="crm-plugin-v1"
         crm_cs_v0.__init__(self, Environment, randseed=randseed, name=name)
 
 
 class crm_mcp(crm_cs_v0):
     '''
     The crm version 4 cluster manager class.
     It implements the things we need to talk to and manipulate
     crm clusters running on top of native corosync (no plugins)
     '''
     def __init__(self, Environment, randseed=None, name=None):
         if not name: name="crm-mcp"
         crm_cs_v0.__init__(self, Environment, randseed=randseed, name=name)
 
         if self.Env["have_systemd"]:
             self.update({
                 # When systemd is in use, we can look for this instead
                 "Pat:We_stopped"   : "%s.*Corosync Cluster Engine exiting normally",
             })
 
 
 class crm_cman(crm_cs_v0):
     '''
     The crm version 3 cluster manager class.
     It implements the things we need to talk to and manipulate
     crm clusters running on top of openais
     '''
     def __init__(self, Environment, randseed=None, name=None):
         if not name: name="crm-cman"
         crm_cs_v0.__init__(self, Environment, randseed=randseed, name=name)
diff --git a/cts/environment.py b/cts/environment.py
index a3399c308f..61d421187b 100644
--- a/cts/environment.py
+++ b/cts/environment.py
@@ -1,678 +1,682 @@
 '''
 Classes related to producing and searching logs
 '''
 
 __copyright__='''
 Copyright (C) 2014 Andrew Beekhof <andrew@beekhof.net>
 Licensed under the GNU GPL.
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
 
 import sys, time, os, socket, random
 
 from cts.remote import *
 
 class Environment:
 
     def __init__(self, args):
         self.data = {}
         self.Nodes = []
 
         self["DeadTime"] = 300
         self["StartTime"] = 300
         self["StableTime"] = 30
         self["tests"] = []
         self["IPagent"] = "IPaddr2"
         self["DoStandby"] = 1
         self["DoFencing"] = 1
         self["XmitLoss"] = "0.0"
         self["RecvLoss"] = "0.0"
         self["ClobberCIB"] = 0
         self["CIBfilename"] = None
         self["CIBResource"] = 0
         self["DoBSC"]    = 0
         self["use_logd"] = 0
         self["oprofile"] = []
         self["warn-inactive"] = 0
         self["ListTests"] = 0
         self["benchmark"] = 0
         self["LogWatcher"] = "any"
         self["SyslogFacility"] = "daemon"
         self["LogFileName"] = "/var/log/messages"
         self["Schema"] = "pacemaker-2.0"
         self["Stack"] = "corosync"
         self["stonith-type"] = "external/ssh"
         self["stonith-params"] = "hostlist=all,livedangerously=yes"
         self["loop-minutes"] = 60
         self["valgrind-prefix"] = None
-        self["valgrind-procs"] = "cib crmd attrd pengine stonith-ng"
+        self["valgrind-procs"] = "attrd cib crmd lrmd pengine stonith-ng"
         self["valgrind-opts"] = """--leak-check=full --show-reachable=yes --trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp"""
 
         self["experimental-tests"] = 0
         self["container-tests"] = 0
         self["valgrind-tests"] = 0
         self["unsafe-tests"] = 1
         self["loop-tests"] = 1
         self["scenario"] = "random"
         self["stats"] = 0
         self["docker"] = 0
 
         self.RandomGen = random.Random()
         self.logger = LogFactory()
 
         self.SeedRandom()
         self.rsh = RemoteFactory().getInstance()
 
         self.target = "localhost"
 
         self.parse_args(args)
         self.discover()
         self.validate()
 
     def SeedRandom(self, seed=None):
         if not seed:
             seed = int(time.time())
 
         self["RandSeed"] = seed
         self.RandomGen.seed(str(seed))
 
     def dump(self):
         keys = []
         for key in self.data.keys():
             keys.append(key)
 
         keys.sort()
         for key in keys:
             self.logger.debug("Environment["+key+"]:\t"+str(self[key]))
 
     def keys(self):
         return self.data.keys()
 
     def has_key(self, key):
         if key == "nodes":
             return True
 
         return self.data.has_key(key)
 
     def __getitem__(self, key):
         if key == "nodes":
             return self.Nodes
 
         elif key == "Name":
             return self.get_stack_short()
 
         elif self.data.has_key(key):
             return self.data[key]
 
         else:
             return None
 
     def __setitem__(self, key, value):
         if key == "Stack":
             self.set_stack(value)
 
         elif key == "node-limit":
             self.data[key] = value
             self.filter_nodes()
 
         elif key == "nodes":
             self.Nodes = []
             for node in value:
                 # I don't think I need the IP address, etc. but this validates
                 # the node name against /etc/hosts and/or DNS, so it's a
                 # GoodThing(tm).
                 try:
                     n = node.strip()
                     if self.data["docker"] == 0:
                         socket.gethostbyname_ex(n)
 
                     self.Nodes.append(n) 
                 except:
                     self.logger.log(node+" not found in DNS... aborting")
                     raise
 
             self.filter_nodes()
 
         else:
             self.data[key] = value
 
     def RandomNode(self):
         '''Choose a random node from the cluster'''
         return self.RandomGen.choice(self["nodes"])
 
     def set_stack(self, name):
         # Normalize stack names
         if name == "heartbeat" or name == "lha":
             self.data["Stack"] = "heartbeat"
 
         elif name == "openais" or name == "ais"  or name == "whitetank":
             self.data["Stack"] = "corosync (plugin v0)"
 
         elif name == "corosync" or name == "cs" or name == "mcp":
             self.data["Stack"] = "corosync 2.x"
 
         elif name == "cman":
             self.data["Stack"] = "corosync (cman)"
 
         elif name == "v1":
             self.data["Stack"] = "corosync (plugin v1)"
 
         elif name == "v0":
             self.data["Stack"] = "corosync (plugin v0)"
 
         else:
             print "Unknown stack: "+name
             sys.exit(1)
 
     def get_stack_short(self):
         # Create the Cluster Manager object
         if not self.data.has_key("Stack"):
             return "unknown"
 
         elif self.data["Stack"] == "heartbeat":
             return "crm-lha"
 
         elif self.data["Stack"] == "corosync 2.x":
             if self["docker"]:
                 return "crm-mcp-docker"
             else:
                 return "crm-mcp"
 
         elif self.data["Stack"] == "corosync (cman)":
             return "crm-cman"
         
         elif self.data["Stack"] == "corosync (plugin v1)":
             return "crm-plugin-v1"
         
         elif self.data["Stack"] == "corosync (plugin v0)":
             return "crm-plugin-v0"
 
         else:
             LogFactory().log("Unknown stack: "+self.data["stack"])
             sys.exit(1)
 
     def detect_syslog(self):
         # Detect syslog variant
         if not self.has_key("syslogd"):
             if self["have_systemd"]:
                 # Systemd
                 self["syslogd"] = self.rsh(self.target, "systemctl list-units | grep syslog.*\.service.*active.*running | sed 's:.service.*::'", stdout=1).strip()
             else:
                 # SYS-V
                 self["syslogd"] = self.rsh(self.target, "chkconfig --list | grep syslog.*on | awk '{print $1}' | head -n 1", stdout=1).strip()
 
             if not self.has_key("syslogd") or not self["syslogd"]:
                 # default
                 self["syslogd"] = "rsyslog"
 
     def detect_at_boot(self):
         # Detect if the cluster starts at boot
         if not self.has_key("at-boot"):
             atboot = 0
 
             if self["have_systemd"]:
             # Systemd
                 atboot = atboot or not self.rsh(self.target, "systemctl is-enabled heartbeat.service")
                 atboot = atboot or not self.rsh(self.target, "systemctl is-enabled corosync.service")
                 atboot = atboot or not self.rsh(self.target, "systemctl is-enabled pacemaker.service")
             else:
                 # SYS-V
                 atboot = atboot or not self.rsh(self.target, "chkconfig --list | grep -e corosync.*on -e heartbeat.*on -e pacemaker.*on")
 
             self["at-boot"] = atboot
 
     def detect_ip_offset(self):
         # Try to determin an offset for IPaddr resources
         if self["CIBResource"] and not self.has_key("IPBase"):
             network=self.rsh(self.target, "ip addr | grep inet | grep -v -e link -e inet6 -e '/32' -e ' lo' | awk '{print $2}'", stdout=1).strip()
             self["IPBase"] = self.rsh(self.target, "nmap -sn -n %s | grep 'scan report' | awk '{print $NF}' | sed 's:(::' | sed 's:)::' | sort -V | tail -n 1" % network, stdout=1).strip()
             if not self["IPBase"]:
                 self["IPBase"] = " fe80::1234:56:7890:1000"
                 self.logger.log("Could not determine an offset for IPaddr resources.  Perhaps nmap is not installed on the nodes.")
                 self.logger.log("Defaulting to '%s', use --test-ip-base to override" % self["IPBase"])
 
             elif int(self["IPBase"].split('.')[3]) >= 240:
                 self.logger.log("Could not determine an offset for IPaddr resources. Upper bound is too high: %s %s"
                                 % (self["IPBase"], self["IPBase"].split('.')[3]))
                 self["IPBase"] = " fe80::1234:56:7890:1000"
                 self.logger.log("Defaulting to '%s', use --test-ip-base to override" % self["IPBase"])
 
     def filter_nodes(self):
         if self["node-limit"] > 0:
             if len(self["nodes"]) > self["node-limit"]:
                 self.logger.log("Limiting the number of nodes configured=%d (max=%d)"
                                 %(len(self["nodes"]), self["node-limit"]))
                 while len(self["nodes"]) > self["node-limit"]:
                     self["nodes"].pop(len(self["nodes"])-1)
 
     def validate(self):
         if len(self["nodes"]) < 1:
             print "No nodes specified!"
             sys.exit(1)
 
     def discover(self):
         self.target = random.Random().choice(self["nodes"])
 
         master = socket.gethostname()
 
         # Use the IP where possible to avoid name lookup failures
         for ip in socket.gethostbyname_ex(master)[2]:
             if ip != "127.0.0.1":
                 master = ip
                 break;
         self["cts-master"] = master
 
         if not self.has_key("have_systemd"):
             self["have_systemd"] = not self.rsh(self.target, "systemctl list-units")
         
         self.detect_syslog()
         self.detect_at_boot()
         self.detect_ip_offset()
 
         self.validate()
 
     def parse_args(self, args):
         skipthis=None
 
         if not args:
             args=sys.argv[1:]
 
         for i in range(0, len(args)):
             if skipthis:
                 skipthis=None
                 continue
 
             elif args[i] == "-l" or args[i] == "--limit-nodes":
                 skipthis=1
                 self["node-limit"] = int(args[i+1])
 
             elif args[i] == "-r" or args[i] == "--populate-resources":
                 self["CIBResource"] = 1
                 self["ClobberCIB"] = 1
 
             elif args[i] == "--outputfile":
                 skipthis=1
                 self["OutputFile"] = args[i+1]
                 LogFactory().add_file(self["OutputFile"])
 
             elif args[i] == "-L" or args[i] == "--logfile":
                 skipthis=1
                 self["LogWatcher"] = "remote"
                 self["LogAuditDisabled"] = 1
                 self["LogFileName"] = args[i+1]
 
             elif args[i] == "--ip" or args[i] == "--test-ip-base":
                 skipthis=1
                 self["IPBase"] = args[i+1]
                 self["CIBResource"] = 1
                 self["ClobberCIB"] = 1
 
             elif args[i] == "--oprofile":
                 skipthis=1
                 self["oprofile"] = args[i+1].split(' ')
 
             elif args[i] == "--trunc":
                 self["TruncateLog"]=1
 
             elif args[i] == "--list-tests" or args[i] == "--list" :
                 self["ListTests"]=1
 
             elif args[i] == "--benchmark":
                 self["benchmark"]=1
 
             elif args[i] == "--bsc":
                 self["DoBSC"] = 1
                 self["scenario"] = "basic-sanity"
 
             elif args[i] == "--qarsh":
                 RemoteFactory().enable_qarsh()
 
             elif args[i] == "--docker":
                 self["docker"] = 1
                 RemoteFactory().enable_docker()
 
             elif args[i] == "--stonith" or args[i] == "--fencing":
                 skipthis=1
                 if args[i+1] == "1" or args[i+1] == "yes":
                     self["DoFencing"]=1
                 elif args[i+1] == "0" or args[i+1] == "no":
                     self["DoFencing"]=0
                 elif args[i+1] == "phd":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_phd_kvm"
                     self["stonith-params"] = "pcmk_arg_map=domain:uname,delay=0"
                 elif args[i+1] == "rhcs" or args[i+1] == "xvm" or args[i+1] == "virt":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_xvm"
                     self["stonith-params"] = "pcmk_arg_map=domain:uname,delay=0"
                 elif args[i+1] == "docker":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_docker_cts"
                 elif args[i+1] == "scsi":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_scsi"
                     self["stonith-params"] = "delay=0"
                 elif args[i+1] == "ssh" or args[i+1] == "lha":
                     self["DoStonith"]=1
                     self["stonith-type"] = "external/ssh"
                     self["stonith-params"] = "hostlist=all,livedangerously=yes"
                 elif args[i+1] == "north":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_apc"
                     self["stonith-params"] = "ipaddr=north-apc,login=apc,passwd=apc,pcmk_host_map=north-01:2;north-02:3;north-03:4;north-04:5;north-05:6;north-06:7;north-07:9;north-08:10;north-09:11;north-10:12;north-11:13;north-12:14;north-13:15;north-14:18;north-15:17;north-16:19;"
                 elif args[i+1] == "south":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_apc"
                     self["stonith-params"] = "ipaddr=south-apc,login=apc,passwd=apc,pcmk_host_map=south-01:2;south-02:3;south-03:4;south-04:5;south-05:6;south-06:7;south-07:9;south-08:10;south-09:11;south-10:12;south-11:13;south-12:14;south-13:15;south-14:18;south-15:17;south-16:19;"
                 elif args[i+1] == "east":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_apc"
                     self["stonith-params"] = "ipaddr=east-apc,login=apc,passwd=apc,pcmk_host_map=east-01:2;east-02:3;east-03:4;east-04:5;east-05:6;east-06:7;east-07:9;east-08:10;east-09:11;east-10:12;east-11:13;east-12:14;east-13:15;east-14:18;east-15:17;east-16:19;"
                 elif args[i+1] == "west":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_apc"
                     self["stonith-params"] = "ipaddr=west-apc,login=apc,passwd=apc,pcmk_host_map=west-01:2;west-02:3;west-03:4;west-04:5;west-05:6;west-06:7;west-07:9;west-08:10;west-09:11;west-10:12;west-11:13;west-12:14;west-13:15;west-14:18;west-15:17;west-16:19;"
                 elif args[i+1] == "openstack":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_openstack"
                     
                     print "Obtaining OpenStack credentials from the current environment"
                     self["stonith-params"] = "region=%s,tenant=%s,auth=%s,user=%s,password=%s" % (
                         os.environ['OS_REGION_NAME'],
                         os.environ['OS_TENANT_NAME'],
                         os.environ['OS_AUTH_URL'],
                         os.environ['OS_USERNAME'],
                         os.environ['OS_PASSWORD']
                     )
                     
                 elif args[i+1] == "rhevm":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_rhevm"
                     
                     print "Obtaining RHEV-M credentials from the current environment"
                     self["stonith-params"] = "login=%s,passwd=%s,ipaddr=%s,ipport=%s,ssl=1,shell_timeout=10" % (
                         os.environ['RHEVM_USERNAME'],
                         os.environ['RHEVM_PASSWORD'],
                         os.environ['RHEVM_SERVER'],
                         os.environ['RHEVM_PORT'],
                     )
                     
                 else:
                     self.usage(args[i+1])
 
             elif args[i] == "--stonith-type":
                 self["stonith-type"] = args[i+1]
                 skipthis=1
 
             elif args[i] == "--stonith-args":
                 self["stonith-params"] = args[i+1]
                 skipthis=1
 
             elif args[i] == "--standby":
                 skipthis=1
                 if args[i+1] == "1" or args[i+1] == "yes":
                     self["DoStandby"] = 1
                 elif args[i+1] == "0" or args[i+1] == "no":
                     self["DoStandby"] = 0
                 else:
                     self.usage(args[i+1])
 
             elif args[i] == "--clobber-cib" or args[i] == "-c":
                 self["ClobberCIB"] = 1
                 
             elif args[i] == "--cib-filename":
                 skipthis=1
                 self["CIBfilename"] = args[i+1]
 
             elif args[i] == "--xmit-loss":
                 try:
                     float(args[i+1])
                 except ValueError:
                     print ("--xmit-loss parameter should be float")
                     self.usage(args[i+1])
                 skipthis=1
                 self["XmitLoss"] = args[i+1]
 
             elif args[i] == "--recv-loss":
                 try:
                     float(args[i+1])
                 except ValueError:
                     print ("--recv-loss parameter should be float")
                     self.usage(args[i+1])
                 skipthis=1
                 self["RecvLoss"] = args[i+1]
 
             elif args[i] == "--choose":
                 skipthis=1
                 self["tests"].append(args[i+1])
                 self["scenario"] = "sequence"
 
             elif args[i] == "--nodes":
                 skipthis=1
                 self["nodes"] = args[i+1].split(' ')
 
             elif args[i] == "-g" or args[i] == "--group" or args[i] == "--dsh-group":
                 skipthis=1
                 self["OutputFile"] = "%s/cluster-%s.log" % (os.environ['HOME'], args[i+1])
                 LogFactory().add_file(self["OutputFile"], "CTS")
 
                 dsh_file = "%s/.dsh/group/%s" % (os.environ['HOME'], args[i+1])
 
                 # Hacks to make my life easier
                 if args[i+1] == "r6":
                     self["Stack"] = "cman"
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_xvm"
                     self["stonith-params"] = "delay=0"
                     self["IPBase"] = " fe80::1234:56:7890:4000"
 
                 elif args[i+1] == "virt1":
                     self["Stack"] = "corosync"
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_xvm"
                     self["stonith-params"] = "delay=0"
                     self["IPBase"] = " fe80::1234:56:7890:1000"
 
                 elif args[i+1] == "east16" or args[i+1] == "nsew":
                     self["Stack"] = "corosync"
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_apc"
                     self["stonith-params"] = "ipaddr=east-apc,login=apc,passwd=apc,pcmk_host_map=east-01:2;east-02:3;east-03:4;east-04:5;east-05:6;east-06:7;east-07:9;east-08:10;east-09:11;east-10:12;east-11:13;east-12:14;east-13:15;east-14:18;east-15:17;east-16:19;"
                     self["IPBase"] = " fe80::1234:56:7890:2000"
 
                     if args[i+1] == "east16":
                         # Requires newer python than available via nsew
                         self["IPagent"] = "Dummy"
 
                 elif args[i+1] == "corosync8":
                     self["Stack"] = "corosync"
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_rhevm"
 
                     print "Obtaining RHEV-M credentials from the current environment"
                     self["stonith-params"] = "login=%s,passwd=%s,ipaddr=%s,ipport=%s,ssl=1,shell_timeout=10" % (
                         os.environ['RHEVM_USERNAME'],
                         os.environ['RHEVM_PASSWORD'],
                         os.environ['RHEVM_SERVER'],
                         os.environ['RHEVM_PORT'],
                    )
                     self["IPBase"] = " fe80::1234:56:7890:3000"
 
                 if os.path.isfile(dsh_file):
                     self["nodes"] = []
                     f = open(dsh_file, 'r')
                     for line in f:
                         l = line.strip().rstrip()
                         if not l.startswith('#'):
                             self["nodes"].append(l)
                     f.close()
 
                 else:
                     print("Unknown DSH group: %s" % args[i+1])
 
             elif args[i] == "--syslog-facility" or args[i] == "--facility":
                 skipthis=1
                 self["SyslogFacility"] = args[i+1]
                 
             elif args[i] == "--seed":
                 skipthis=1
                 self.SeedRandom(args[i+1])
 
             elif args[i] == "--warn-inactive":
                 self["warn-inactive"] = 1
 
             elif args[i] == "--schema":
                 skipthis=1
                 self["Schema"] = args[i+1]
 
             elif args[i] == "--ais":
                 self["Stack"] = "openais"
 
             elif args[i] == "--at-boot" or args[i] == "--cluster-starts-at-boot":
                 skipthis=1
                 if args[i+1] == "1" or args[i+1] == "yes":
                     self["at-boot"] = 1
                 elif args[i+1] == "0" or args[i+1] == "no":
                     self["at-boot"] = 0
                 else:
                     self.usage(args[i+1])
 
             elif args[i] == "--heartbeat" or args[i] == "--lha":
                 self["Stack"] = "heartbeat"
 
             elif args[i] == "--hae":
                 self["Stack"] = "openais"
                 self["Schema"] = "hae"
 
             elif args[i] == "--stack":
                 if args[i+1] == "fedora" or args[i+1] == "fedora-17" or args[i+1] == "fedora-18":
                     self["Stack"] = "corosync"
                 elif args[i+1] == "rhel-6":
                     self["Stack"] = "cman"
                 elif args[i+1] == "rhel-7":
                     self["Stack"] = "corosync"
                 else:
                     self["Stack"] = args[i+1]
                 skipthis=1
 
             elif args[i] == "--once":
                 self["scenario"] = "all-once"
 
             elif args[i] == "--boot":
                 self["scenario"] = "boot"
 
             elif args[i] == "--valgrind-tests":
                 self["valgrind-tests"] = 1
 
+            elif args[i] == "--valgrind-procs":
+                self["valgrind-procs"] = args[i+1]
+                skipthis = 1
+
             elif args[i] == "--no-loop-tests":
                 self["loop-tests"] = 0
 
             elif args[i] == "--loop-minutes":
                 skipthis=1
                 try:
                     self["loop-minutes"]=int(args[i+1])
                 except ValueError:
                     self.usage(args[i])
 
             elif args[i] == "--no-unsafe-tests":
                 self["unsafe-tests"] = 0
 
             elif args[i] == "--experimental-tests":
                 self["experimental-tests"] = 1
 
             elif args[i] == "--container-tests":
                 self["container-tests"] = 1
 
             elif args[i] == "--set":
                 skipthis=1
                 (name, value) = args[i+1].split('=')
                 self[name] = value
                 print "Setting %s = %s" % (name, value)
                 
             elif args[i] == "--help":
                 self.usage(args[i], 0)
 
             elif args[i] == "--":
                 break
 
             else:
                 try:
                     NumIter=int(args[i])
                     self["iterations"] = NumIter
                 except ValueError:
                     self.usage(args[i])
 
     def usage(self, arg, status=1):
         if status:
             print "Illegal argument %s" % arg
         print "usage: " + sys.argv[0] +" [options] number-of-iterations"
         print "\nCommon options: "
         print "\t [--nodes 'node list']        list of cluster nodes separated by whitespace"
         print "\t [--group | -g 'name']        use the nodes listed in the named DSH group (~/.dsh/groups/$name)"
         print "\t [--limit-nodes max]          only use the first 'max' cluster nodes supplied with --nodes"
         print "\t [--stack (v0|v1|cman|corosync|heartbeat|openais)]    which cluster stack is installed"
         print "\t [--list-tests]               list the valid tests"
         print "\t [--benchmark]                add the timing information"
         print "\t "
         print "Options that CTS will usually auto-detect correctly: "
         print "\t [--logfile path]             where should the test software look for logs from cluster nodes"
         print "\t [--syslog-facility name]     which syslog facility should the test software log to"
         print "\t [--at-boot (1|0)]            does the cluster software start at boot time"
         print "\t [--test-ip-base ip]          offset for generated IP address resources"
         print "\t "
         print "Options for release testing: "
         print "\t [--populate-resources | -r]  generate a sample configuration"
         print "\t [--choose name]              run only the named test"
         print "\t [--stonith (1 | 0 | yes | no | rhcs | ssh)]"
         print "\t [--once]                     run all valid tests once"
         print "\t "
         print "Additional (less common) options: "
         print "\t [--clobber-cib | -c ]        erase any existing configuration"
         print "\t [--outputfile path]          optional location for the test software to write logs to"
         print "\t [--trunc]                    truncate logfile before starting"
         print "\t [--xmit-loss lost-rate(0.0-1.0)]"
         print "\t [--recv-loss lost-rate(0.0-1.0)]"
         print "\t [--standby (1 | 0 | yes | no)]"
         print "\t [--fencing (1 | 0 | yes | no | rhcs | lha | openstack )]"
         print "\t [--stonith-type type]"
         print "\t [--stonith-args name=value]"
         print "\t [--bsc]"
         print "\t [--no-loop-tests]            dont run looping/time-based tests"
         print "\t [--no-unsafe-tests]          dont run tests that are unsafe for use with ocfs2/drbd"
         print "\t [--valgrind-tests]           include tests using valgrind"
         print "\t [--experimental-tests]       include experimental tests"
         print "\t [--container-tests]          include pacemaker_remote tests that run in lxc container resources"
         print "\t [--oprofile 'node list']     list of cluster nodes to run oprofile on]"
         print "\t [--qarsh]                    use the QARSH backdoor to access nodes instead of SSH"
         print "\t [--docker]                   Indicates nodes are docker nodes."
         print "\t [--seed random_seed]"
         print "\t [--set option=value]"
         print "\t "
         print "\t Example: "
         print "\t    python sys.argv[0] -g virt1 --stack cs -r --stonith ssh --schema pacemaker-1.0 500"
 
         sys.exit(status)
 
 class EnvFactory:
     instance = None
     def __init__(self):
         pass
 
     def getInstance(self, args=None):
         if not EnvFactory.instance:
             EnvFactory.instance = Environment(args)
         return EnvFactory.instance
diff --git a/cts/patterns.py b/cts/patterns.py
index 1bc05a6232..493b69060a 100644
--- a/cts/patterns.py
+++ b/cts/patterns.py
@@ -1,536 +1,538 @@
 import sys, os
 
 from cts.CTSvars import *
 
 patternvariants = {}
 class BasePatterns:
     def __init__(self, name):
         self.name = name
         patternvariants[name] = self
-        self.ignore = []
+        self.ignore = [
+            "avoid confusing Valgrind",
+        ]
         self.BadNews = []
         self.components = {}
         self.commands = {
             "StatusCmd"      : "crmadmin -t 60000 -S %s 2>/dev/null",
             "CibQuery"       : "cibadmin -Ql",
             "CibAddXml"      : "cibadmin --modify -c --xml-text %s",
             "CibDelXpath"    : "cibadmin --delete --xpath %s",
             # 300,000 == 5 minutes
             "RscRunning"     : CTSvars.CRM_DAEMON_DIR + "/lrmd_test -R -r %s",
             "CIBfile"        : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml",
             "TmpDir"         : "/tmp",
 
             "BreakCommCmd"   : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1",
             "FixCommCmd"     : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1",
 
 # tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit
 # tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated
 # tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1
 # tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null
             "ReduceCommCmd"  : "",
             "RestoreCommCmd" : "tc qdisc del dev lo root",
 
             "UUIDQueryCmd"    : "crmadmin -N",
 
             "SetCheckInterval"    : "cibadmin --modify -c --xml-text '<cluster_property_set id=\"cib-bootstrap-options\"><nvpair id=\"cts-recheck-interval-setting\" name=\"cluster-recheck-interval\" value=\"%s\"/></cluster_property_set>'",
             "ClearCheckInterval"    : "cibadmin --delete --xpath \"//nvpair[@name='cluster-recheck-interval']\"",
 
             "MaintenanceModeOn"    : "cibadmin --modify -c --xml-text '<cluster_property_set id=\"cib-bootstrap-options\"><nvpair id=\"cts-maintenance-mode-setting\" name=\"maintenance-mode\" value=\"true\"/></cluster_property_set>'",
             "MaintenanceModeOff"    : "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"",
 
             "StandbyCmd"      : "crm_attribute -VQ  -U %s -n standby -l forever -v %s 2>/dev/null",
             "StandbyQueryCmd" : "crm_attribute -QG -U %s -n standby -l forever -d off 2>/dev/null",
         }
         self.search = {
             "Pat:DC_IDLE"      : "crmd.*State transition.*-> S_IDLE",
             
             # This wont work if we have multiple partitions
             "Pat:Local_started" : "%s\W.*The local CRM is operational",
             "Pat:Slave_started" : "%s\W.*State transition.*-> S_NOT_DC",
             "Pat:Master_started": "%s\W.*State transition.*-> S_IDLE",
             "Pat:We_stopped"    : "heartbeat.*%s.*Heartbeat shutdown complete",
             "Pat:Logd_stopped"  : "%s\W.*logd:.*Exiting write process",
             "Pat:They_stopped"  : "%s\W.*LOST:.* %s ",
             "Pat:They_dead"     : "node %s.*: is dead",
             "Pat:TransitionComplete" : "Transition status: Complete: complete",
 
             "Pat:Fencing_start" : "Initiating remote operation .* for %s",
             "Pat:Fencing_ok"    : r"stonith.*:\s*Operation .* of %s by .* for .*@.*: OK",
             "Pat:Fencing_recover"    : r"pengine.*: Recover %s",
 
             "Pat:RscOpOK"       : r"crmd.*:\s*Operation %s_%s.*:\s*ok \(.*confirmed=\S+\)",
             "Pat:RscRemoteOpOK" : r"crmd.*:\s*Operation %s_%s.*:\s*ok \(node=%s,.*,\s*confirmed=true\)",
             "Pat:NodeFenced"    : r"crmd.*:\s*Peer\s+%s\s+was\s+terminated\s+\(.*\)\s+by\s+.*\s+for\s+.*:\s+OK",
             "Pat:FenceOpOK"     : "Operation .* for host '%s' with device .* returned: 0",
         }
 
     def get_component(self, key):
         if self.components.has_key(key):
             return self.components[key]
         print "Unknown component '%s' for %s" % (key, self.name)
         return []
 
     def get_patterns(self, key):
         if key == "BadNews":
             return self.BadNews
         elif key == "BadNewsIgnore":
             return self.ignore
         elif key == "Commands":
             return self.commands
         elif key == "Search":
             return self.search
         elif key == "Components":
             return self.components
 
     def __getitem__(self, key):
         if key == "Name":
             return self.name
         elif self.commands.has_key(key):
             return self.commands[key]
         elif self.search.has_key(key):
             return self.search[key]
         else:
             print "Unknown template '%s' for %s" % (key, self.name)
             return None
 
 class crm_lha(BasePatterns):
     def __init__(self, name):
         BasePatterns.__init__(self, name)
 
         self.commands.update({
             "StartCmd"       : "service heartbeat start > /dev/null 2>&1",
             "StopCmd"        : "service heartbeat stop  > /dev/null 2>&1",
             "EpochCmd"      : "crm_node -H -e",
             "QuorumCmd"      : "crm_node -H -q",
             "PartitionCmd"    : "crm_node -H -p",
         })
 
         self.search.update({
             # Patterns to look for in the log files for various occasions...
             "Pat:ChildKilled"  : "%s\W.*heartbeat.*%s.*killed by signal 9",
             "Pat:ChildRespawn" : "%s\W.*heartbeat.*Respawning client.*%s",
             "Pat:ChildExit"    : "(ERROR|error): Client .* exited with return code",            
         })
         self.BadNews = [
                 r"error:",
                 r"crit:",
                 r"ERROR:",
                 r"CRIT:",
                 r"Shutting down...NOW",
                 r"Timer I_TERMINATE just popped",
                 r"input=I_ERROR",
                 r"input=I_FAIL",
                 r"input=I_INTEGRATED cause=C_TIMER_POPPED",
                 r"input=I_FINALIZED cause=C_TIMER_POPPED",
                 r"input=I_ERROR",
                 r", exiting\.",
                 r"WARN.*Ignoring HA message.*vote.*not in our membership list",
                 r"pengine.*Attempting recovery of resource",
                 r"is taking more than 2x its timeout",
                 r"Confirm not received from",
                 r"Welcome reply not received from",
                 r"Attempting to schedule .* after a stop",
                 r"Resource .* was active at shutdown",
                 r"duplicate entries for call_id",
                 r"Search terminated:",
                 r"No need to invoke the TE",
                 r"global_timer_callback:",
                 r"Faking parameter digest creation",
                 r"Parameters to .* action changed:",
                 r"Parameters to .* changed",
             ]
 
-        self.ignore = [
+        self.ignore = self.ignore + [
                 r"(ERROR|error):.*\s+assert\s+at\s+crm_glib_handler:"
                 "(ERROR|error): Message hist queue is filling up",
                 "stonithd.*CRIT: external_hostlist:.*'vmware gethosts' returned an empty hostlist",
                 "stonithd.*(ERROR|error): Could not list nodes for stonith RA external/vmware.",
                 "pengine.*Preventing .* from re-starting",
                 ]
 
 class crm_cs_v0(BasePatterns):
     def __init__(self, name):
         BasePatterns.__init__(self, name)
 
         self.commands.update({
             "EpochCmd"      : "crm_node -e --openais",
             "QuorumCmd"      : "crm_node -q --openais",
             "PartitionCmd"    : "crm_node -p --openais",
             "StartCmd"       : "service corosync start",
             "StopCmd"        : "service corosync stop",
         })
 
         self.search.update({
 # The next pattern is too early
 #            "Pat:We_stopped"   : "%s.*Service engine unloaded: Pacemaker Cluster Manager",
 # The next pattern would be preferred, but it doesn't always come out
 #            "Pat:We_stopped"   : "%s.*Corosync Cluster Engine exiting with status",
             "Pat:We_stopped"   : "%s\W.*Service engine unloaded: corosync cluster quorum service",
             "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost",
             "Pat:They_dead"    : "corosync:.*Node %s is now: lost",
 
             "Pat:ChildExit"    : "Child process .* exited",
             "Pat:ChildKilled"  : "%s\W.*corosync.*Child process %s terminated with signal 9",
             "Pat:ChildRespawn" : "%s\W.*corosync.*Respawning failed child process: %s",
 
             "Pat:InfraUp"      : "%s\W.*corosync.*Initializing transport",
             "Pat:PacemakerUp"  : "%s\W.*pacemakerd.*Starting Pacemaker",
         })
 
-        self.ignore = [
+        self.ignore = self.ignore + [
             r"crm_mon:",
             r"crmadmin:",
             r"update_trace_data",
             r"async_notify:.*strange, client not found",
             r"Parse error: Ignoring unknown option .*nodename",
             r"error.*: Operation 'reboot' .* with device 'FencingFail' returned:",
             r"Child process .* terminated with signal 9",
             r"getinfo response error: 1$",
             "sbd.* error: inquisitor_child: DEBUG MODE IS ACTIVE",
             r"sbd.* pcmk:\s*error:.*Connection to cib_ro failed",
             r"sbd.* pcmk:\s*error:.*Connection to cib_ro.* closed .I/O condition=17",
         ]
 
         self.BadNews = [
             r"error:",
             r"crit:",
             r"ERROR:",
             r"CRIT:",
             r"Shutting down...NOW",
             r"Timer I_TERMINATE just popped",
             r"input=I_ERROR",
             r"input=I_FAIL",
             r"input=I_INTEGRATED cause=C_TIMER_POPPED",
             r"input=I_FINALIZED cause=C_TIMER_POPPED",
             r"input=I_ERROR",
             r", exiting\.",
             r"(WARN|warn).*Ignoring HA message.*vote.*not in our membership list",
             r"pengine.*Attempting recovery of resource",
             r"is taking more than 2x its timeout",
             r"Confirm not received from",
             r"Welcome reply not received from",
             r"Attempting to schedule .* after a stop",
             r"Resource .* was active at shutdown",
             r"duplicate entries for call_id",
             r"Search terminated:",
             r":global_timer_callback",
             r"Faking parameter digest creation",
             r"Parameters to .* action changed:",
             r"Parameters to .* changed",
             r"The .* process .* terminated with signal",
             r"Child process .* terminated with signal",
             r"pengine:.*Recover .*\(.* -\> .*\)",
             r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting",
             r"Peer is not part of our cluster",
             r"We appear to be in an election loop",
             r"Unknown node -> we will not deliver message",
             r"(Blackbox dump requested|Problem detected)",
             r"pacemakerd.*Could not connect to Cluster Configuration Database API",
             r"Receiving messages from a node we think is dead",
             r"share the same cluster nodeid",
             r"share the same name",
 
             #r"crm_ipc_send:.*Request .* failed",
             #r"crm_ipc_send:.*Sending to .* is disabled until pending reply is received",
 
                 # Not inherently bad, but worth tracking
             #r"No need to invoke the TE",
             #r"ping.*: DEBUG: Updated connected = 0",
             #r"Digest mis-match:",
             r"crmd:.*Transition failed: terminated",
             r"Local CIB .* differs from .*:",
             r"warn.*:\s*Continuing but .* will NOT be used",
             r"warn.*:\s*Cluster configuration file .* is corrupt",
             #r"Executing .* fencing operation",
             #r"fence_pcmk.* Call to fence",
             #r"fence_pcmk",
             r"cman killed by node",
             r"Election storm",
             r"stalled the FSA with pending inputs",
         ]
 
 
         self.components["common-ignore"] = [
                     "Pending action:",
                     "error: crm_log_message_adv:",
                     "resources were active at shutdown",
                     "pending LRM operations at shutdown",
                     "Lost connection to the CIB service",
                     "Connection to the CIB terminated...",
                     "Sending message to CIB service FAILED",
                     "apply_xml_diff:.*Diff application failed!",
                     r"crmd.*:\s*Action A_RECOVER .* not supported",
                     "unconfirmed_actions:.*Waiting on .* unconfirmed actions",
                     "cib_native_msgready:.*Message pending on command channel",
                     r"crmd.*:\s*Performing A_EXIT_1 - forcefully exiting the CRMd",
                     "verify_stopped:.*Resource .* was active at shutdown.  You may ignore this error if it is unmanaged.",
                     "error: attrd_connection_destroy:.*Lost connection to attrd",
                     r".*:\s*Executing .* fencing operation \(.*\) on ",
                     r"(Blackbox dump requested|Problem detected)",
 #                    "error: native_create_actions: Resource .*stonith::.* is active on 2 nodes attempting recovery",
 #                    "error: process_pe_message: Transition .* ERRORs found during PE processing",
             ]
         
         self.components["corosync-ignore"] = [
             r"error:.*Connection to the CPG API failed: Library error",
             r"The .* process .* exited",
             r"pacemakerd.*error:.*Child process .* exited",
             r"cib.*error:.*Corosync connection lost",
             r"stonith-ng.*error:.*Corosync connection terminated",
             r"The cib process .* exited: Invalid argument",
             r"The attrd process .* exited: Transport endpoint is not connected",
             r"The crmd process .* exited: Link has been severed",
             r"error:.*Child process cib .* exited: Invalid argument",
             r"error:.*Child process attrd .* exited: Transport endpoint is not connected",
             r"error:.*Child process crmd .* exited: Link has been severed",
             r"lrmd.*error:.*Connection to stonith-ng failed",
             r"lrmd.*error:.*Connection to stonith-ng.* closed",
             r"lrmd.*error:.*LRMD lost STONITH connection",
             r"crmd.*State transition .* S_RECOVERY",
             r"crmd.*error:.*FSA: Input I_ERROR",
             r"crmd.*error:.*FSA: Input I_TERMINATE",
             r"crmd.*error:.*Connection to cman failed",
             r"crmd.*error:.*Could not recover from internal error",
             r"error:.*Connection to cib_shm failed",
             r"error:.*Connection to cib_shm.* closed",
             r"error:.*STONITH connection failed",
             r"error: Connection to stonith-ng failed",
             r"crit: Fencing daemon connection failed",
             r"error: Connection to stonith-ng.* closed",
             ]
 
         self.components["corosync"] = [
             r"pacemakerd.*error:.*Connection destroyed",
             r"attrd.*:\s*crit:.*Lost connection to Corosync service",
             r"stonith.*:\s*Corosync connection terminated",
             r"cib.*:\s*Corosync connection lost!\s+Exiting.",
             r"crmd.*:\s*connection terminated",
             r"pengine.*Scheduling Node .* for STONITH",
             r"crmd.*:\s*Peer %s was terminated \(.*\) by .* for .*:\s*OK",
         ]
 
         self.components["cib-ignore"] = [
             "lrmd.*Connection to stonith-ng failed",
             "lrmd.*Connection to stonith-ng.* closed",
             "lrmd.*LRMD lost STONITH connection",
             "lrmd.*STONITH connection failed, finalizing .* pending operations",
             ]
 
         self.components["cib"] = [
                     "State transition .* S_RECOVERY",
                     "Respawning .* crmd",
                     "Respawning .* attrd",
                     "Connection to cib_.* failed",
                     "Connection to cib_.* closed",
                     "Connection to the CIB terminated...",
                     "(Child process|The) crmd .* exited: Generic Pacemaker error",
                     "(Child process|The) attrd .* exited: (Connection reset by peer|Transport endpoint is not connected)",
                     "Lost connection to CIB service",
                     "crmd.*Input I_TERMINATE from do_recover",
                     "crmd.*I_ERROR.*crmd_cib_connection_destroy",
                     "crmd.*Could not recover from internal error",
                     ]
 
         self.components["lrmd"] = [
                     "State transition .* S_RECOVERY",
                     "LRM Connection failed",
                     "Respawning .* crmd",
                     "Connection to lrmd failed",
                     "Connection to lrmd.* closed",
                     "crmd.*I_ERROR.*lrm_connection_destroy",
                     "(Child process|The) crmd .* exited: Generic Pacemaker error",
                     "crmd.*Input I_TERMINATE from do_recover",
                     "crmd.*Could not recover from internal error",
                     ]
         self.components["lrmd-ignore"] = []
 
         self.components["crmd"] = [
 #                    "WARN: determine_online_status: Node .* is unclean",
 #                    "Scheduling Node .* for STONITH",
 #                    "Executing .* fencing operation",
 # Only if the node wasn't the DC:  "State transition S_IDLE",
                     "State transition .* -> S_IDLE",
                     ]
         self.components["crmd-ignore"] = []
 
         self.components["attrd"] = []
         self.components["attrd-ignore"] = []
 
         self.components["pengine"] = [
                     "State transition .* S_RECOVERY",
                     "Respawning .* crmd",
                     "(The|Child process) crmd .* exited: Generic Pacemaker error",
                     "Connection to pengine failed",
                     "Connection to pengine.* closed",
                     "Connection to the Policy Engine failed",
                     "crmd.*I_ERROR.*save_cib_contents",
                     "crmd.*Input I_TERMINATE from do_recover",
                     "crmd.*Could not recover from internal error",
                     ]
         self.components["pengine-ignore"] = []
 
         self.components["stonith"] = [
             "Connection to stonith-ng failed",
             "LRMD lost STONITH connection",
             "Connection to stonith-ng.* closed",
             "Fencing daemon connection failed",
             r"crmd.*:\s*warn.*:\s*Callback already present",
         ]
         self.components["stonith-ignore"] = [
             r"pengine.*: Recover Fencing",
             r"Updating failcount for Fencing",
             r"error:.*Connection to stonith-ng failed",
             r"error:.*Connection to stonith-ng.*closed \(I/O condition=17\)",
             r"crit:.*Fencing daemon connection failed",
             r"error:.*Sign-in failed: triggered a retry",
             "STONITH connection failed, finalizing .* pending operations.",
             r"crmd.*:\s*Operation Fencing.* Error",
         ]
         self.components["stonith-ignore"].extend(self.components["common-ignore"])
 
 class crm_mcp(crm_cs_v0):
     '''
     The crm version 4 cluster manager class.
     It implements the things we need to talk to and manipulate
     crm clusters running on top of native corosync (no plugins)
     '''
     def __init__(self, name):
         crm_cs_v0.__init__(self, name)
 
         self.commands.update({
             "StartCmd"       : "service corosync start && service pacemaker start",
             "StopCmd"        : "service pacemaker stop; [ ! -e /usr/sbin/pacemaker_remoted ] || service pacemaker_remote stop; service corosync stop",
 
             "EpochCmd"      : "crm_node -e",
             "QuorumCmd"      : "crm_node -q",
             "PartitionCmd"    : "crm_node -p",
         })
 
         self.search.update({
             # Close enough... "Corosync Cluster Engine exiting normally" isn't printed
             #   reliably and there's little interest in doing anything about it
             "Pat:We_stopped"   : "%s\W.*Unloading all Corosync service engines",
             "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost",
             "Pat:They_dead"    : "crmd.*Node %s\[.*state is now lost",
 
             "Pat:ChildExit"    : "The .* process exited",
             "Pat:ChildKilled"  : "%s\W.*pacemakerd.*The %s process .* terminated with signal 9",
             "Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning failed child process: %s",
         })
 
 #        if self.Env["have_systemd"]:
 #            self.update({
 #                # When systemd is in use, we can look for this instead
 #                "Pat:We_stopped"   : "%s.*Stopped Corosync Cluster Engine",
 #            })
 
 class crm_mcp_docker(crm_mcp):
     '''
     The crm version 4 cluster manager class.
     It implements the things we need to talk to and manipulate
     crm clusters running on top of native corosync (no plugins)
     '''
     def __init__(self, name):
         crm_mcp.__init__(self, name)
 
         self.commands.update({
             "StartCmd"       : "pcmk_start",
             "StopCmd"        : "pcmk_stop",
         })
 
 class crm_cman(crm_cs_v0):
     '''
     The crm version 3 cluster manager class.
     It implements the things we need to talk to and manipulate
     crm clusters running on top of openais
     '''
     def __init__(self, name):
         crm_cs_v0.__init__(self, name)
 
         self.commands.update({
             "StartCmd"       : "service pacemaker start",
             "StopCmd"        : "service pacemaker stop; [ ! -e /usr/sbin/pacemaker_remoted ] || service pacemaker_remote stop",
 
             "EpochCmd"      : "crm_node -e --cman",
             "QuorumCmd"      : "crm_node -q --cman",
             "PartitionCmd"    : "crm_node -p --cman",
 
             "Pat:We_stopped"   : "%s.*Unloading all Corosync service engines",
             "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost",
             "Pat:They_dead"    : "crmd.*Node %s\[.*state is now lost",
 
             "Pat:ChildKilled"  : "%s\W.*pacemakerd.*The %s process .* terminated with signal 9",
             "Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning failed child process: %s",
         })
 
 
 class PatternSelector:
 
     def __init__(self, name=None):
         self.name = name
         self.base = BasePatterns("crm-base")
 
         if not name:
             crm_cs_v0("crm-plugin-v0")
             crm_cman("crm-cman")
             crm_mcp("crm-mcp")
             crm_lha("crm-lha")
         elif name == "crm-lha":
             crm_lha(name)
         elif name == "crm-plugin-v0":
             crm_cs_v0(name)
         elif name == "crm-cman":
             crm_cman(name)
         elif name == "crm-mcp":
             crm_mcp(name)
         elif name == "crm-mcp-docker":
             crm_mcp_docker(name)
 
     def get_variant(self, variant):
         if patternvariants.has_key(variant):
             return patternvariants[variant]
         print "defaulting to crm-base for %s" % variant
         return self.base
 
     def get_patterns(self, variant, kind):
         return self.get_variant(variant).get_patterns(kind)
 
     def get_template(self, variant, key):
         v = self.get_variant(variant)
         return v[key]
 
     def get_component(self, variant, kind):
         return self.get_variant(variant).get_component(kind)
 
     def __getitem__(self, key):
         return self.get_template(self.name, key)
 
 # python cts/CTSpatt.py -k crm-mcp -t StartCmd
 if __name__ == '__main__':
 
     pdir=os.path.dirname(sys.path[0])
     sys.path.insert(0, pdir) # So that things work from the source directory
 
     kind=None
     template=None
 
     skipthis=None
     args=sys.argv[1:]
     for i in range(0, len(args)):
        if skipthis:
            skipthis=None
            continue
 
        elif args[i] == "-k" or args[i] == "--kind":
            skipthis=1
            kind = args[i+1]
 
        elif args[i] == "-t" or args[i] == "--template":
            skipthis=1
            template = args[i+1]
 
        else:
            print "Illegal argument " + args[i]
 
 
     print PatternSelector(kind)[template]
diff --git a/lib/services/dbus.c b/lib/services/dbus.c
index 6341fc5380..e2efecb0f7 100644
--- a/lib/services/dbus.c
+++ b/lib/services/dbus.c
@@ -1,557 +1,563 @@
 #include <crm_internal.h>
 #include <crm/crm.h>
 #include <crm/services.h>
 #include <dbus/dbus.h>
 #include <pcmk-dbus.h>
 
 #define BUS_PROPERTY_IFACE "org.freedesktop.DBus.Properties"
 
 struct db_getall_data
 {
         char *name;
         char *target;
         char *object;
         void *userdata;
         void (*callback)(const char *name, const char *value, void *userdata);
 };
 
 static bool pcmk_dbus_error_check(DBusError *err, const char *prefix, const char *function, int line) 
 {
     if (err && dbus_error_is_set(err)) {
         do_crm_log_alias(LOG_ERR, __FILE__, function, line, "%s: DBus error '%s'", prefix, err->message);
         dbus_error_free(err);
         return TRUE;
     }
     return FALSE;
 }
 
 DBusConnection *pcmk_dbus_connect(void)
 {
     DBusError err;
     DBusConnection *connection;
 
     dbus_error_init(&err);
     connection = dbus_bus_get(DBUS_BUS_SYSTEM, &err);
     if(pcmk_dbus_error_check(&err, "Could not connect to System DBus", __FUNCTION__, __LINE__)) {
         return NULL;
     }
 
     if(connection) {
         pcmk_dbus_connection_setup_with_select(connection);
     }
     return connection;
 }
 
 void pcmk_dbus_disconnect(DBusConnection *connection)
 {
 }
 
 bool
 pcmk_dbus_find_error(const char *method, DBusPendingCall* pending, DBusMessage *reply, DBusError *ret)
 {
     DBusError error;
 
     dbus_error_init(&error);
 
     if(pending == NULL) {
         error.name = "org.clusterlabs.pacemaker.NoRequest";
         error.message = "No request sent";
 
     } else if(reply == NULL) {
         error.name = "org.clusterlabs.pacemaker.NoReply";
         error.message = "No reply";
 
     } else {
         DBusMessageIter args;
         int dtype = dbus_message_get_type(reply);
+        char *sig;
 
         switch(dtype) {
             case DBUS_MESSAGE_TYPE_METHOD_RETURN:
                 dbus_message_iter_init(reply, &args);
-                crm_trace("Call to %s returned '%s'", method, dbus_message_iter_get_signature(&args));
+                sig = dbus_message_iter_get_signature(&args);
+                crm_trace("Call to %s returned '%s'", method, sig);
+                dbus_free(sig);
                 break;
             case DBUS_MESSAGE_TYPE_INVALID:
                 error.message = "Invalid reply";
                 error.name = "org.clusterlabs.pacemaker.InvalidReply";
                 crm_err("Error processing %s response: %s", method, error.message);
                 break;
             case DBUS_MESSAGE_TYPE_METHOD_CALL:
                 error.message = "Invalid reply (method call)";
                 error.name = "org.clusterlabs.pacemaker.InvalidReply.Method";
                 crm_err("Error processing %s response: %s", method, error.message);
                 break;
             case DBUS_MESSAGE_TYPE_SIGNAL:
                 error.message = "Invalid reply (signal)";
                 error.name = "org.clusterlabs.pacemaker.InvalidReply.Signal";
                 crm_err("Error processing %s response: %s", method, error.message);
                 break;
 
             case DBUS_MESSAGE_TYPE_ERROR:
                 dbus_set_error_from_message (&error, reply);
                 crm_info("%s error '%s': %s", method, error.name, error.message);
                 break;
             default:
                 error.message = "Unknown reply type";
                 error.name = "org.clusterlabs.pacemaker.InvalidReply.Type";
                 crm_err("Error processing %s response: %s (%d)", method, error.message, dtype);
         }
     }
 
     if(ret && (error.name || error.message)) {
         *ret = error;
         return TRUE;
     }
 
     return FALSE;
 }
 
 DBusMessage *pcmk_dbus_send_recv(DBusMessage *msg, DBusConnection *connection, DBusError *error, int timeout)
 {
     const char *method = NULL;
     DBusMessage *reply = NULL;
     DBusPendingCall* pending = NULL;
 
     CRM_ASSERT(dbus_message_get_type (msg) == DBUS_MESSAGE_TYPE_METHOD_CALL);
     method = dbus_message_get_member (msg);
 
     if (timeout <= 0) {
         timeout = DBUS_TIMEOUT_USE_DEFAULT;
     }
 
     // send message and get a handle for a reply
     if (!dbus_connection_send_with_reply (connection, msg, &pending, timeout/* -1 is default timeout, aka. DBUS_TIMEOUT_USE_DEFAULT */)) {
         if(error) {
             dbus_error_init(error);
             error->message = "Call to dbus_connection_send_with_reply() failed";
             error->name = "org.clusterlabs.pacemaker.SendFailed";
         }
         crm_err("Error sending %s request", method);
         return NULL;
     }
 
     dbus_connection_flush(connection);
 
     if(pending) {
         /* block until we receive a reply */
         dbus_pending_call_block(pending);
 
         /* get the reply message */
         reply = dbus_pending_call_steal_reply(pending);
     }
 
     pcmk_dbus_find_error(method, pending, reply, error);
 
     if(pending) {
         /* free the pending message handle */
         dbus_pending_call_unref(pending);
     }
 
     return reply;
 }
 
 DBusPendingCall* pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection,
                     void(*done)(DBusPendingCall *pending, void *user_data), void *user_data, int timeout)
 {
     DBusError error;
     const char *method = NULL;
     DBusPendingCall* pending = NULL;
 
     dbus_error_init(&error);
 
     CRM_ASSERT(done);
     CRM_ASSERT(dbus_message_get_type (msg) == DBUS_MESSAGE_TYPE_METHOD_CALL);
     method = dbus_message_get_member (msg);
 
 
     if (timeout <= 0) {
         timeout = DBUS_TIMEOUT_USE_DEFAULT;
     }
 
     // send message and get a handle for a reply
     if (!dbus_connection_send_with_reply (connection, msg, &pending, timeout/* -1 is default timeout, aka. DBUS_TIMEOUT_USE_DEFAULT */)) {
         crm_err("Send with reply failed for %s", method);
         return NULL;
 
     } else if (pending == NULL) {
         crm_err("No pending call found for %s", method);
         return NULL;
     }
 
     crm_trace("DBus %s call sent", method);
     if (dbus_pending_call_get_completed(pending)) {
         crm_info("DBus %s call completed too soon", method);
         if(done) {
 #if 0
         /* This sounds like a good idea, but allegedly it breaks things */
         done(pending, user_data);
         pending = NULL;
 #else
         CRM_ASSERT(dbus_pending_call_set_notify(pending, done, user_data, NULL));
 #endif
         }
 
     } else if(done) {
         CRM_ASSERT(dbus_pending_call_set_notify(pending, done, user_data, NULL));
     }
     return pending;
 }
 
 bool pcmk_dbus_type_check(DBusMessage *msg, DBusMessageIter *field, int expected, const char *function, int line)
 {
     int dtype = 0;
     DBusMessageIter lfield;
 
     if(field == NULL) {
         if(dbus_message_iter_init(msg, &lfield)) {
             field = &lfield;
         }
     }
 
     if(field == NULL) {
         do_crm_log_alias(LOG_ERR, __FILE__, function, line,
                          "Empty parameter list in reply expecting '%c'", expected);
         return FALSE;
     }
 
     dtype = dbus_message_iter_get_arg_type(field);
 
     if(dtype != expected) {
         DBusMessageIter args;
+        char *sig;
 
         dbus_message_iter_init(msg, &args);
+        sig = dbus_message_iter_get_signature(&args);
         do_crm_log_alias(LOG_ERR, __FILE__, function, line,
-                         "Unexepcted DBus type, expected %c in '%s' instead of %c",
-                         expected, dbus_message_iter_get_signature(&args), dtype);
+                         "Unexpected DBus type, expected %c in '%s' instead of %c",
+                         expected, sig, dtype);
+        dbus_free(sig);
         return FALSE;
     }
 
     return TRUE;
 }
 
 static char *
 pcmk_dbus_lookup_result(DBusMessage *reply, struct db_getall_data *data)
 {
     DBusError error;
     char *output = NULL;
     DBusMessageIter dict;
     DBusMessageIter args;
 
     if(pcmk_dbus_find_error("GetAll", (void*)&error, reply, &error)) {
         crm_err("Cannot get properties from %s for %s", data->target, data->object);
         goto cleanup;
     }
 
     dbus_message_iter_init(reply, &args);
     if(!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY, __FUNCTION__, __LINE__)) {
         crm_err("Invalid reply from %s for %s", data->target, data->object);
         goto cleanup;
     }
 
     dbus_message_iter_recurse(&args, &dict);
     while (dbus_message_iter_get_arg_type (&dict) != DBUS_TYPE_INVALID) {
         DBusMessageIter sv;
         DBusMessageIter v;
         DBusBasicValue name;
         DBusBasicValue value;
 
         if(!pcmk_dbus_type_check(reply, &dict, DBUS_TYPE_DICT_ENTRY, __FUNCTION__, __LINE__)) {
             dbus_message_iter_next (&dict);
             continue;
         }
 
         dbus_message_iter_recurse(&dict, &sv);
         while (dbus_message_iter_get_arg_type (&sv) != DBUS_TYPE_INVALID) {
             int dtype = dbus_message_iter_get_arg_type(&sv);
 
             switch(dtype) {
                 case DBUS_TYPE_STRING:
                     dbus_message_iter_get_basic(&sv, &name);
 
                     if(data->name && strcmp(name.str, data->name) != 0) {
                         dbus_message_iter_next (&sv); /* Skip the value */
                     }
                     break;
                 case DBUS_TYPE_VARIANT:
                     dbus_message_iter_recurse(&sv, &v);
                     if(pcmk_dbus_type_check(reply, &v, DBUS_TYPE_STRING, __FUNCTION__, __LINE__)) {
                         dbus_message_iter_get_basic(&v, &value);
 
                         crm_trace("Property %s[%s] is '%s'", data->object, name.str, value.str);
                         if(data->callback) {
                             data->callback(name.str, value.str, data->userdata);
 
                         } else {
                             output = strdup(value.str);
                         }
 
                         if(data->name) {
                             goto cleanup;
                         }
                     }
                     break;
                 default:
                     pcmk_dbus_type_check(reply, &sv, DBUS_TYPE_STRING, __FUNCTION__, __LINE__);
             }
             dbus_message_iter_next (&sv);
         }
 
         dbus_message_iter_next (&dict);
     }
 
     if(data->name && data->callback) {
         crm_trace("No value for property %s[%s]", data->object, data->name);
         data->callback(data->name, NULL, data->userdata);
     }
 
   cleanup:
     free(data->target);
     free(data->object);
     free(data->name);
     free(data);
 
     return output;
 }
 
 static void
 pcmk_dbus_lookup_cb(DBusPendingCall *pending, void *user_data)
 {
     DBusMessage *reply = NULL;
 
     if(pending) {
         reply = dbus_pending_call_steal_reply(pending);
     }
 
     pcmk_dbus_lookup_result(reply, user_data);
 
     if(pending) {
         dbus_pending_call_unref(pending);
     }
     if(reply) {
         dbus_message_unref(reply);
     }
 }
 
 char *
 pcmk_dbus_get_property(
     DBusConnection *connection, const char *target, const char *obj, const gchar * iface, const char *name,
     void (*callback)(const char *name, const char *value, void *userdata), void *userdata, DBusPendingCall **pending,
     int timeout)
 {
     DBusMessage *msg;
     const char *method = "GetAll";
     char *output = NULL;
 
     struct db_getall_data *query_data = NULL;
 
     /* char *state = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, unit, BUS_NAME ".Unit", "ActiveState"); */
 
     crm_debug("Calling: %s on %s", method, target);
     msg = dbus_message_new_method_call(target, // target for the method call
                                        obj, // object to call on
                                        BUS_PROPERTY_IFACE, // interface to call on
                                        method); // method name
 
     if (NULL == msg) {
         crm_err("Call to %s failed: No message", method);
         return NULL;
     }
 
     CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &iface, DBUS_TYPE_INVALID));
 
     query_data = malloc(sizeof(struct db_getall_data));
     if(query_data == NULL) {
         crm_err("Call to %s failed: malloc failed", method);
         return NULL;
     }
 
     query_data->target = strdup(target);
     query_data->object = strdup(obj);
     query_data->callback = callback;
     query_data->userdata = userdata;
     query_data->name = NULL;
 
     if(name) {
         query_data->name = strdup(name);
     }
 
     if(query_data->callback) {
         DBusPendingCall* _pending;
         _pending = pcmk_dbus_send(msg, connection, pcmk_dbus_lookup_cb, query_data, timeout);
         if (pending != NULL) {
             *pending = _pending;
         }
 
     } else {
         DBusMessage *reply = pcmk_dbus_send_recv(msg, connection, NULL, timeout);
 
         output = pcmk_dbus_lookup_result(reply, query_data);
 
         if(reply) {
             dbus_message_unref(reply);
         }
     }
 
     dbus_message_unref(msg);
 
     return output;
 }
 
 static void pcmk_dbus_connection_dispatch(DBusConnection *connection, DBusDispatchStatus new_status, void *data){
     crm_trace("status %d for %p", new_status, data);
     if (new_status == DBUS_DISPATCH_DATA_REMAINS){
         dbus_connection_dispatch(connection);
 
         while (dbus_connection_get_dispatch_status(connection) == DBUS_DISPATCH_DATA_REMAINS) {
             dbus_connection_dispatch(connection);
         }
     }
 }
 
 /* Copied from dbus-watch.c */
 
 static const char*
 dbus_watch_flags_to_string (int flags)
 {
   const char *watch_type;
 
   if ((flags & DBUS_WATCH_READABLE) &&
       (flags & DBUS_WATCH_WRITABLE))
     watch_type = "readwrite";
   else if (flags & DBUS_WATCH_READABLE)
     watch_type = "read";
   else if (flags & DBUS_WATCH_WRITABLE)
     watch_type = "write";
   else
     watch_type = "not read or write";
   return watch_type;
 }
 
 static int
 pcmk_dbus_watch_dispatch(gpointer userdata)
 {
     bool oom = FALSE;
     DBusWatch *watch = userdata;
     int flags = dbus_watch_get_flags(watch);
     bool enabled = dbus_watch_get_enabled (watch);
     mainloop_io_t *client = dbus_watch_get_data(watch);
 
     crm_trace("Dispatching client %p: %s", client, dbus_watch_flags_to_string(flags));
     if (enabled && is_set(flags, DBUS_WATCH_READABLE)) {
         oom = !dbus_watch_handle(watch, flags);
 
     } else if (enabled && is_set(flags, DBUS_WATCH_READABLE)) {
         oom = !dbus_watch_handle(watch, flags);
 
     } else if(enabled) {
         oom = !dbus_watch_handle(watch, DBUS_WATCH_ERROR);
     }
 
     if(flags != dbus_watch_get_flags(watch)) {
         flags = dbus_watch_get_flags(watch);
         crm_trace("Dispatched client %p: %s (%d)", client, dbus_watch_flags_to_string(flags), flags);
     }
 
     if(oom) {
         crm_err("DBus encountered OOM while attempting to dispatch %p (%s)", client, dbus_watch_flags_to_string(flags));
     }
     return 0;
 }
 
 static void
 pcmk_dbus_watch_destroy(gpointer userdata)
 {
     mainloop_io_t *client = dbus_watch_get_data(userdata);
     crm_trace("Destroyed %p", client);
 }
 
 
 struct mainloop_fd_callbacks pcmk_dbus_cb = {
     .dispatch = pcmk_dbus_watch_dispatch,
     .destroy = pcmk_dbus_watch_destroy,
 };
 
 static dbus_bool_t
 pcmk_dbus_watch_add(DBusWatch *watch, void *data){
     int fd = dbus_watch_get_unix_fd(watch);
 
     mainloop_io_t *client = mainloop_add_fd(
         "dbus", G_PRIORITY_DEFAULT, fd, watch, &pcmk_dbus_cb);
 
     crm_trace("Added watch %p with fd=%d to client %p", watch, fd, client);
     dbus_watch_set_data(watch, client, NULL);
     return TRUE;
 }
 
 static void
 pcmk_dbus_watch_toggle(DBusWatch *watch, void *data)
 {
     mainloop_io_t *client = dbus_watch_get_data(watch);
     crm_notice("DBus client %p is now %s", client, dbus_watch_get_enabled(watch)?"enabled":"disabled");
 }
 
 
 static void
 pcmk_dbus_watch_remove(DBusWatch *watch, void *data){
     mainloop_io_t *client = dbus_watch_get_data(watch);
 
     crm_trace("Removed client %p (%p)", client, data);
     mainloop_del_fd(client);
 }
 
 static gboolean
 pcmk_dbus_timeout_dispatch(gpointer data)
 {
     crm_info("Timeout %p expired", data);
     dbus_timeout_handle(data);
     return FALSE;
 }
 
 static dbus_bool_t
 pcmk_dbus_timeout_add(DBusTimeout *timeout, void *data){
     guint id = g_timeout_add(dbus_timeout_get_interval(timeout), pcmk_dbus_timeout_dispatch, timeout);
 
     crm_trace("Adding timeout %p (%ld)", timeout, dbus_timeout_get_interval(timeout));
 
     if(id) {
         dbus_timeout_set_data(timeout, GUINT_TO_POINTER(id), NULL);
     }
     return TRUE;
 }
 
 static void
 pcmk_dbus_timeout_remove(DBusTimeout *timeout, void *data){
     void *vid = dbus_timeout_get_data(timeout);
     guint id = GPOINTER_TO_UINT(vid);
 
     crm_trace("Removing timeout %p (%p)", timeout, data);
 
     if(id) {
         g_source_remove(id);
         dbus_timeout_set_data(timeout, 0, NULL);
     }
 }
 
 static void
 pcmk_dbus_timeout_toggle(DBusTimeout *timeout, void *data){
     bool enabled = dbus_timeout_get_enabled(timeout);
 
     crm_trace("Toggling timeout for %p to %s", timeout, enabled?"off":"on");
 
     if(enabled) {
         pcmk_dbus_timeout_add(timeout, data);
     } else {
         pcmk_dbus_timeout_remove(timeout, data);
     }
 }
 
 /* Inspired by http://www.kolej.mff.cuni.cz/~vesej3am/devel/dbus-select.c */
 
 void pcmk_dbus_connection_setup_with_select(DBusConnection *c){
         dbus_connection_set_exit_on_disconnect (c, FALSE);
 	dbus_connection_set_timeout_functions(
             c, pcmk_dbus_timeout_add, pcmk_dbus_timeout_remove, pcmk_dbus_timeout_toggle, NULL, NULL);
 	dbus_connection_set_watch_functions(c, pcmk_dbus_watch_add, pcmk_dbus_watch_remove, pcmk_dbus_watch_toggle, NULL, NULL);
 	dbus_connection_set_dispatch_status_function(c, pcmk_dbus_connection_dispatch, NULL, NULL);
 
 	pcmk_dbus_connection_dispatch(c, dbus_connection_get_dispatch_status(c), NULL);
 }
diff --git a/lib/services/services.c b/lib/services/services.c
index 08bff88d4b..7e2b9f798e 100644
--- a/lib/services/services.c
+++ b/lib/services/services.c
@@ -1,825 +1,853 @@
 /*
  * Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
 #include <crm_internal.h>
 
 #ifndef _GNU_SOURCE
 #  define _GNU_SOURCE
 #endif
 
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <stdio.h>
 
 #include <errno.h>
 #include <unistd.h>
 #include <dirent.h>
 #include <fcntl.h>
 
 #include <crm/crm.h>
 #include <crm/common/mainloop.h>
 #include <crm/services.h>
 #include <crm/msg_xml.h>
 #include "services_private.h"
 
 #if SUPPORT_UPSTART
 #  include <upstart.h>
 #endif
 
 #if SUPPORT_SYSTEMD
 #  include <systemd.h>
 #endif
 
 /* TODO: Develop a rollover strategy */
 
 static int operations = 0;
 GHashTable *recurring_actions = NULL;
 
 /* ops waiting to run async because of conflicting active
  * pending ops*/
 GList *blocked_ops = NULL;
 
 /* ops currently active (in-flight) */
 GList *inflight_ops = NULL;
 
 svc_action_t *
 services_action_create(const char *name, const char *action, int interval, int timeout)
 {
     return resources_action_create(name, "lsb", NULL, name, action, interval, timeout, NULL, 0);
 }
 
 const char *
 resources_find_service_class(const char *agent)
 {
     /* Priority is:
      * - lsb
      * - systemd
      * - upstart
      */
     int rc = 0;
     struct stat st;
     char *path = NULL;
 
 #ifdef LSB_ROOT_DIR
     rc = asprintf(&path, "%s/%s", LSB_ROOT_DIR, agent);
     if (rc > 0 && stat(path, &st) == 0) {
         free(path);
         return "lsb";
     }
     free(path);
 #endif
 
 #if SUPPORT_SYSTEMD
     if (systemd_unit_exists(agent)) {
         return "systemd";
     }
 #endif
 
 #if SUPPORT_UPSTART
     if (upstart_job_exists(agent)) {
         return "upstart";
     }
 #endif
     return NULL;
 }
 
 
 svc_action_t *
 resources_action_create(const char *name, const char *standard, const char *provider,
                         const char *agent, const char *action, int interval, int timeout,
                         GHashTable * params, enum svc_action_flags flags)
 {
     svc_action_t *op = NULL;
 
     /*
      * Do some up front sanity checks before we go off and
      * build the svc_action_t instance.
      */
 
     if (crm_strlen_zero(name)) {
         crm_err("A service or resource action must have a name.");
         goto return_error;
     }
 
     if (crm_strlen_zero(standard)) {
         crm_err("A service action must have a valid standard.");
         goto return_error;
     }
 
     if (!strcasecmp(standard, "ocf") && crm_strlen_zero(provider)) {
         crm_err("An OCF resource action must have a provider.");
         goto return_error;
     }
 
     if (crm_strlen_zero(agent)) {
         crm_err("A service or resource action must have an agent.");
         goto return_error;
     }
 
     if (crm_strlen_zero(action)) {
         crm_err("A service or resource action must specify an action.");
         goto return_error;
     }
 
     if (safe_str_eq(action, "monitor") && (
 #if SUPPORT_HEARTBEAT
         safe_str_eq(standard, "heartbeat") ||
 #endif
         safe_str_eq(standard, "lsb") || safe_str_eq(standard, "service"))) {
         action = "status";
     }
 
     /*
      * Sanity checks passed, proceed!
      */
 
     op = calloc(1, sizeof(svc_action_t));
     op->opaque = calloc(1, sizeof(svc_action_private_t));
     op->rsc = strdup(name);
     op->action = strdup(action);
     op->interval = interval;
     op->timeout = timeout;
     op->standard = strdup(standard);
     op->agent = strdup(agent);
     op->sequence = ++operations;
     op->flags = flags;
     if (asprintf(&op->id, "%s_%s_%d", name, action, interval) == -1) {
         goto return_error;
     }
 
     if (strcasecmp(op->standard, "service") == 0) {
         const char *expanded = resources_find_service_class(op->agent);
 
         if(expanded) {
             crm_debug("Found a %s agent for %s/%s", expanded, op->rsc, op->agent);
             free(op->standard);
             op->standard = strdup(expanded);
 
         } else {
             crm_info("Cannot determine the standard for %s (%s)", op->rsc, op->agent);
             free(op->standard);
             op->standard = strdup("lsb");
         }
         CRM_ASSERT(op->standard);
     }
 
     if (strcasecmp(op->standard, "ocf") == 0) {
         op->provider = strdup(provider);
         op->params = params;
         params = NULL;
 
         if (asprintf(&op->opaque->exec, "%s/resource.d/%s/%s", OCF_ROOT_DIR, provider, agent) == -1) {
             crm_err("Internal error: cannot create agent path");
             goto return_error;
         }
         op->opaque->args[0] = strdup(op->opaque->exec);
         op->opaque->args[1] = strdup(action);
 
     } else if (strcasecmp(op->standard, "lsb") == 0) {
         if (op->agent[0] == '/') {
             /* if given an absolute path, use that instead
              * of tacking on the LSB_ROOT_DIR path to the front */
             op->opaque->exec = strdup(op->agent);
         } else if (asprintf(&op->opaque->exec, "%s/%s", LSB_ROOT_DIR, op->agent) == -1) {
             crm_err("Internal error: cannot create agent path");
             goto return_error;
         }
         op->opaque->args[0] = strdup(op->opaque->exec);
         op->opaque->args[1] = strdup(op->action);
         op->opaque->args[2] = NULL;
 #if SUPPORT_HEARTBEAT
     } else if (strcasecmp(op->standard, "heartbeat") == 0) {
         int index;
         int param_num;
         char buf_tmp[20];
         void *value_tmp;
 
         if (op->agent[0] == '/') {
             /* if given an absolute path, use that instead
              * of tacking on the HB_RA_DIR path to the front */
             op->opaque->exec = strdup(op->agent);
         } else if (asprintf(&op->opaque->exec, "%s/%s", HB_RA_DIR, op->agent) == -1) {
             crm_err("Internal error: cannot create agent path");
             goto return_error;
         }
         op->opaque->args[0] = strdup(op->opaque->exec);
 
         /* The "heartbeat" agent class only has positional arguments,
          * which we keyed by their decimal position number. */
         param_num = 1;
 	for (index = 1; index <= MAX_ARGC - 3; index++ ) {
             snprintf(buf_tmp, sizeof(buf_tmp), "%d", index);
             value_tmp = g_hash_table_lookup(params, buf_tmp);
             if (value_tmp == NULL) {
                 /* maybe: strdup("") ??
                  * But the old lrmd did simply continue as well. */
                 continue;
             }
             op->opaque->args[param_num++] = strdup(value_tmp);
         }
 
 	/* Add operation code as the last argument, */
 	/* and the teminating NULL pointer */
         op->opaque->args[param_num++] = strdup(op->action);
         op->opaque->args[param_num] = NULL;
 #endif
 #if SUPPORT_SYSTEMD
     } else if (strcasecmp(op->standard, "systemd") == 0) {
         op->opaque->exec = strdup("systemd-dbus");
 #endif
 #if SUPPORT_UPSTART
     } else if (strcasecmp(op->standard, "upstart") == 0) {
         op->opaque->exec = strdup("upstart-dbus");
 #endif
     } else if (strcasecmp(op->standard, "service") == 0) {
         op->opaque->exec = strdup(SERVICE_SCRIPT);
         op->opaque->args[0] = strdup(SERVICE_SCRIPT);
         op->opaque->args[1] = strdup(agent);
         op->opaque->args[2] = strdup(action);
 
 #if SUPPORT_NAGIOS
     } else if (strcasecmp(op->standard, "nagios") == 0) {
         int index = 0;
 
         if (op->agent[0] == '/') {
             /* if given an absolute path, use that instead
              * of tacking on the NAGIOS_PLUGIN_DIR path to the front */
             op->opaque->exec = strdup(op->agent);
 
         } else if (asprintf(&op->opaque->exec, "%s/%s", NAGIOS_PLUGIN_DIR, op->agent) == -1) {
             crm_err("Internal error: cannot create agent path");
             goto return_error;
         }
 
         op->opaque->args[0] = strdup(op->opaque->exec);
         index = 1;
 
         if (safe_str_eq(op->action, "monitor") && op->interval == 0) {
             /* Invoke --version for a nagios probe */
             op->opaque->args[index] = strdup("--version");
             index++;
 
         } else if (params) {
             GHashTableIter iter;
             char *key = NULL;
             char *value = NULL;
             static int args_size = sizeof(op->opaque->args) / sizeof(char *);
 
             g_hash_table_iter_init(&iter, params);
 
             while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value) &&
                    index <= args_size - 3) {
                 int len = 3;
                 char *long_opt = NULL;
 
                 if (safe_str_eq(key, XML_ATTR_CRM_VERSION) || strstr(key, CRM_META "_")) {
                     continue;
                 }
 
                 len += strlen(key);
                 long_opt = calloc(1, len);
                 sprintf(long_opt, "--%s", key);
                 long_opt[len - 1] = 0;
 
                 op->opaque->args[index] = long_opt;
                 op->opaque->args[index + 1] = strdup(value);
                 index += 2;
             }
         }
         op->opaque->args[index] = NULL;
 #endif
 
     } else {
         crm_err("Unknown resource standard: %s", op->standard);
         services_action_free(op);
         op = NULL;
     }
 
     if(params) {
         g_hash_table_destroy(params);
     }
     return op;
 
   return_error:
     if(params) {
         g_hash_table_destroy(params);
     }
     services_action_free(op);
 
     return NULL;
 }
 
 svc_action_t *
 services_action_create_generic(const char *exec, const char *args[])
 {
     svc_action_t *op;
     unsigned int cur_arg;
 
     op = calloc(1, sizeof(*op));
     op->opaque = calloc(1, sizeof(svc_action_private_t));
 
     op->opaque->exec = strdup(exec);
     op->opaque->args[0] = strdup(exec);
 
     for (cur_arg = 1; args && args[cur_arg - 1]; cur_arg++) {
         op->opaque->args[cur_arg] = strdup(args[cur_arg - 1]);
 
         if (cur_arg == DIMOF(op->opaque->args) - 1) {
             crm_err("svc_action_t args list not long enough for '%s' execution request.", exec);
             break;
         }
     }
 
     return op;
 }
 
+#if SUPPORT_DBUS
+/*
+ * \internal
+ * \brief Update operation's pending DBus call, unreferencing old one if needed
+ *
+ * \param[in,out] op       Operation to modify
+ * \param[in]     pending  Pending call to set
+ */
+void
+services_set_op_pending(svc_action_t *op, DBusPendingCall *pending)
+{
+    if (op->opaque->pending && (op->opaque->pending != pending)) {
+        if (pending) {
+            crm_info("Lost pending DBus call (%p)", op->opaque->pending);
+        } else {
+            crm_trace("Done with pending DBus call (%p)", op->opaque->pending);
+        }
+        dbus_pending_call_unref(op->opaque->pending);
+    }
+    op->opaque->pending = pending;
+    if (pending) {
+        crm_trace("Updated pending DBus call (%p)", pending);
+    } else {
+        crm_trace("Cleared pending DBus call");
+    }
+}
+#endif
+
 void
 services_action_cleanup(svc_action_t * op)
 {
 #if SUPPORT_DBUS
     if(op->opaque == NULL) {
         return;
     }
 
     if(op->opaque->timerid != 0) {
         crm_trace("Removing timer for call %s to %s", op->action, op->rsc);
         g_source_remove(op->opaque->timerid);
         op->opaque->timerid = 0;
     }
 
     if(op->opaque->pending) {
         crm_trace("Cleaning up pending dbus call %p %s for %s", op->opaque->pending, op->action, op->rsc);
         if(dbus_pending_call_get_completed(op->opaque->pending)) {
             crm_warn("Pending dbus call %s for %s did not complete", op->action, op->rsc);
         }
         dbus_pending_call_cancel(op->opaque->pending);
         dbus_pending_call_unref(op->opaque->pending);
         op->opaque->pending = NULL;
     }
 
     if (op->opaque->stderr_gsource) {
         mainloop_del_fd(op->opaque->stderr_gsource);
         op->opaque->stderr_gsource = NULL;
     }
 
     if (op->opaque->stdout_gsource) {
         mainloop_del_fd(op->opaque->stdout_gsource);
         op->opaque->stdout_gsource = NULL;
     }
 #endif
 }
 
 void
 services_action_free(svc_action_t * op)
 {
     unsigned int i;
 
     if (op == NULL) {
         return;
     }
 
     services_action_cleanup(op);
 
     if (op->opaque->repeat_timer) {
         g_source_remove(op->opaque->repeat_timer);
         op->opaque->repeat_timer = 0;
     }
 
     free(op->id);
     free(op->opaque->exec);
 
     for (i = 0; i < DIMOF(op->opaque->args); i++) {
         free(op->opaque->args[i]);
     }
 
     free(op->opaque);
     free(op->rsc);
     free(op->action);
 
     free(op->standard);
     free(op->agent);
     free(op->provider);
 
     free(op->stdout_data);
     free(op->stderr_data);
 
     if (op->params) {
         g_hash_table_destroy(op->params);
         op->params = NULL;
     }
 
     free(op);
 }
 
 gboolean
 cancel_recurring_action(svc_action_t * op)
 {
     crm_info("Cancelling operation %s", op->id);
 
     if (recurring_actions) {
         g_hash_table_remove(recurring_actions, op->id);
     }
 
     if (op->opaque->repeat_timer) {
         g_source_remove(op->opaque->repeat_timer);
         op->opaque->repeat_timer = 0;
     }
 
     return TRUE;
 }
 
 gboolean
 services_action_cancel(const char *name, const char *action, int interval)
 {
     svc_action_t *op = NULL;
     char id[512];
 
     snprintf(id, sizeof(id), "%s_%s_%d", name, action, interval);
 
     if (!(op = g_hash_table_lookup(recurring_actions, id))) {
         return FALSE;
     }
 
     /* Always kill the recurring timer */
     cancel_recurring_action(op);
 
     if (op->pid == 0) {
         op->status = PCMK_LRM_OP_CANCELLED;
         if (op->opaque->callback) {
             op->opaque->callback(op);
         }
 
         blocked_ops = g_list_remove(blocked_ops, op);
         services_action_free(op);
 
     } else {
         crm_info("Cancelling in-flight op: performing early termination of %s (pid=%d)", id, op->pid);
         op->cancel = 1;
         if (mainloop_child_kill(op->pid) == FALSE) {
             /* even though the early termination failed,
              * the op will be marked as cancelled once it completes. */
             crm_err("Termination of %s (pid=%d) failed", id, op->pid);
             return FALSE;
         }
     }
 
     return TRUE;
 }
 
 gboolean
 services_action_kick(const char *name, const char *action, int interval /* ms */)
 {
     svc_action_t * op = NULL;
     char *id = NULL;
 
     if (asprintf(&id, "%s_%s_%d", name, action, interval) == -1) {
         return FALSE;
     }
 
     op = g_hash_table_lookup(recurring_actions, id);
     free(id);
 
     if (op == NULL) {
         return FALSE;
     }
 
     if (op->pid) {
         return TRUE;
     } else {
         if (op->opaque->repeat_timer) {
             g_source_remove(op->opaque->repeat_timer);
             op->opaque->repeat_timer = 0;
         }
         recurring_action_timer(op);
         return TRUE;
     }
 
 }
 
 /* add new recurring operation, check for duplicates. 
  * - if duplicate found, return TRUE, immediately reschedule op.
  * - if no dup, return FALSE, inserve into recurring op list.*/
 static gboolean
 handle_duplicate_recurring(svc_action_t * op, void (*action_callback) (svc_action_t *))
 {
     svc_action_t * dup = NULL;
 
     if (recurring_actions == NULL) {
         recurring_actions = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, NULL);
         return FALSE;
     }
 
     /* check for duplicates */
     dup = g_hash_table_lookup(recurring_actions, op->id);
 
     if (dup && (dup != op)) {
         /* update user data */
         if (op->opaque->callback) {
             dup->opaque->callback = op->opaque->callback;
             dup->cb_data = op->cb_data;
             op->cb_data = NULL;
         }
         /* immediately execute the next interval */
         if (dup->pid != 0) {
             if (op->opaque->repeat_timer) {
                 g_source_remove(op->opaque->repeat_timer);
                 op->opaque->repeat_timer = 0;
             }
             recurring_action_timer(dup);
         }
         /* free the dup.  */
         services_action_free(op);
         return TRUE;
     }
 
     return FALSE;
 }
 
 static gboolean
 action_async_helper(svc_action_t * op) {
     gboolean res = FALSE;
 
     if (op->standard && strcasecmp(op->standard, "upstart") == 0) {
 #if SUPPORT_UPSTART
         res = upstart_job_exec(op, FALSE);
 #endif
     } else if (op->standard && strcasecmp(op->standard, "systemd") == 0) {
 #if SUPPORT_SYSTEMD
         res =  systemd_unit_exec(op);
 #endif
     } else {
         res = services_os_action_execute(op, FALSE);
     }
 
     /* keep track of ops that are in-flight to avoid collisions in the same namespace */
     if (res) {
         inflight_ops = g_list_append(inflight_ops, op);
     }
 
     return res;
 }
 
 gboolean
 services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *))
 {
     op->synchronous = false;
     if (action_callback) {
         op->opaque->callback = action_callback;
     }
 
     if (op->interval > 0) {
         if (handle_duplicate_recurring(op, action_callback) == TRUE) {
             /* entry rescheduled, dup freed */
             /* exit early */
             return TRUE;
         }
         g_hash_table_replace(recurring_actions, op->id, op);
     }
 
     if (is_op_blocked(op->rsc)) {
         blocked_ops = g_list_append(blocked_ops, op);
         return TRUE;
     }
 
     return action_async_helper(op);
 }
 
 
 static gboolean processing_blocked_ops = FALSE;
 
 gboolean
 is_op_blocked(const char *rsc)
 {
     GList *gIter = NULL;
     svc_action_t *op = NULL;
 
     for (gIter = inflight_ops; gIter != NULL; gIter = gIter->next) {
         op = gIter->data;
         if (safe_str_eq(op->rsc, rsc)) {
             return TRUE;
         }
     }
 
     return FALSE;
 }
 
 void
 handle_blocked_ops(void)
 {
     GList *executed_ops = NULL;
     GList *gIter = NULL;
     svc_action_t *op = NULL;
     gboolean res = FALSE;
 
     if (processing_blocked_ops) {
         /* avoid nested calling of this function */
         return;
     }
 
     processing_blocked_ops = TRUE;
 
     /* n^2 operation here, but blocked ops are incredibly rare. this list
      * will be empty 99% of the time. */
     for (gIter = blocked_ops; gIter != NULL; gIter = gIter->next) {
         op = gIter->data;
         if (is_op_blocked(op->rsc)) {
             continue;
         }
         executed_ops = g_list_append(executed_ops, op);
         res = action_async_helper(op);
         if (res == FALSE) {
             op->status = PCMK_LRM_OP_ERROR;
             /* this can cause this function to be called recursively
              * which is why we have processing_blocked_ops static variable */
             operation_finalize(op);
         }
     }
 
     for (gIter = executed_ops; gIter != NULL; gIter = gIter->next) {
         op = gIter->data;
         blocked_ops = g_list_remove(blocked_ops, op);
     }
     g_list_free(executed_ops);
 
     processing_blocked_ops = FALSE;
 }
 
 gboolean
 services_action_sync(svc_action_t * op)
 {
     gboolean rc = TRUE;
 
     if (op == NULL) {
         crm_trace("No operation to execute");
         return FALSE;
     }
 
     op->synchronous = true;
     if (op->standard && strcasecmp(op->standard, "upstart") == 0) {
 #if SUPPORT_UPSTART
         rc = upstart_job_exec(op, TRUE);
 #endif
     } else if (op->standard && strcasecmp(op->standard, "systemd") == 0) {
 #if SUPPORT_SYSTEMD
         rc = systemd_unit_exec(op);
 #endif
     } else {
         rc = services_os_action_execute(op, TRUE);
     }
     crm_trace(" > %s_%s_%d: %s = %d", op->rsc, op->action, op->interval, op->opaque->exec, op->rc);
     if (op->stdout_data) {
         crm_trace(" >  stdout: %s", op->stdout_data);
     }
     if (op->stderr_data) {
         crm_trace(" >  stderr: %s", op->stderr_data);
     }
     return rc;
 }
 
 GList *
 get_directory_list(const char *root, gboolean files, gboolean executable)
 {
     return services_os_get_directory_list(root, files, executable);
 }
 
 GList *
 services_list(void)
 {
     return resources_list_agents("lsb", NULL);
 }
 
 #if SUPPORT_HEARTBEAT
 static GList *
 resources_os_list_hb_agents(void)
 {
     return services_os_get_directory_list(HB_RA_DIR, TRUE, TRUE);
 }
 #endif
 
 GList *
 resources_list_standards(void)
 {
     GList *standards = NULL;
     GList *agents = NULL;
 
     standards = g_list_append(standards, strdup("ocf"));
     standards = g_list_append(standards, strdup("lsb"));
     standards = g_list_append(standards, strdup("service"));
 
 #if SUPPORT_SYSTEMD
     agents = systemd_unit_listall();
 #else
     agents = NULL;
 #endif
 
     if (agents) {
         standards = g_list_append(standards, strdup("systemd"));
         g_list_free_full(agents, free);
     }
 #if SUPPORT_UPSTART
     agents = upstart_job_listall();
 #else
     agents = NULL;
 #endif
 
     if (agents) {
         standards = g_list_append(standards, strdup("upstart"));
         g_list_free_full(agents, free);
     }
 #if SUPPORT_NAGIOS
     agents = resources_os_list_nagios_agents();
     if (agents) {
         standards = g_list_append(standards, strdup("nagios"));
         g_list_free_full(agents, free);
     }
 #endif
 
 #if SUPPORT_HEARTBEAT
     standards = g_list_append(standards, strdup("heartbeat"));
 #endif
 
     return standards;
 }
 
 GList *
 resources_list_providers(const char *standard)
 {
     if (strcasecmp(standard, "ocf") == 0) {
         return resources_os_list_ocf_providers();
     }
 
     return NULL;
 }
 
 GList *
 resources_list_agents(const char *standard, const char *provider)
 {
     if (standard == NULL || strcasecmp(standard, "service") == 0) {
         GList *tmp1;
         GList *tmp2;
         GList *result = resources_os_list_lsb_agents();
 
         if (standard == NULL) {
             tmp1 = result;
             tmp2 = resources_os_list_ocf_agents(NULL);
             if (tmp2) {
                 result = g_list_concat(tmp1, tmp2);
             }
         }
 #if SUPPORT_SYSTEMD
         tmp1 = result;
         tmp2 = systemd_unit_listall();
         if (tmp2) {
             result = g_list_concat(tmp1, tmp2);
         }
 #endif
 
 #if SUPPORT_UPSTART
         tmp1 = result;
         tmp2 = upstart_job_listall();
         if (tmp2) {
             result = g_list_concat(tmp1, tmp2);
         }
 #endif
 
         return result;
 
     } else if (strcasecmp(standard, "ocf") == 0) {
         return resources_os_list_ocf_agents(provider);
     } else if (strcasecmp(standard, "lsb") == 0) {
         return resources_os_list_lsb_agents();
 #if SUPPORT_HEARTBEAT
     } else if (strcasecmp(standard, "heartbeat") == 0) {
         return resources_os_list_hb_agents();
 #endif
 #if SUPPORT_SYSTEMD
     } else if (strcasecmp(standard, "systemd") == 0) {
         return systemd_unit_listall();
 #endif
 #if SUPPORT_UPSTART
     } else if (strcasecmp(standard, "upstart") == 0) {
         return upstart_job_listall();
 #endif
 #if SUPPORT_NAGIOS
     } else if (strcasecmp(standard, "nagios") == 0) {
         return resources_os_list_nagios_agents();
 #endif
     }
 
     return NULL;
 }
diff --git a/lib/services/services_private.h b/lib/services/services_private.h
index 183afb5d79..a98cd91dd9 100644
--- a/lib/services/services_private.h
+++ b/lib/services/services_private.h
@@ -1,66 +1,70 @@
 /*
  * Copyright (C) 2010 - 2011, Red Hat, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
 #ifndef __MH_SERVICES_PRIVATE_H__
 #  define __MH_SERVICES_PRIVATE_H__
 
 #if SUPPORT_DBUS
 #  include <dbus/dbus.h>
 #endif
 
 #define MAX_ARGC        255
 struct svc_action_private_s {
     char *exec;
     char *args[MAX_ARGC];
 
     guint repeat_timer;
     void (*callback) (svc_action_t * op);
 
     int stderr_fd;
     mainloop_io_t *stderr_gsource;
 
     int stdout_fd;
     mainloop_io_t *stdout_gsource;
 #if SUPPORT_DBUS
     DBusPendingCall* pending;
     unsigned timerid;
 #endif
 };
 
 GList *services_os_get_directory_list(const char *root, gboolean files, gboolean executable);
 
 gboolean services_os_action_execute(svc_action_t * op, gboolean synchronous);
 
 GList *resources_os_list_lsb_agents(void);
 
 GList *resources_os_list_ocf_providers(void);
 
 GList *resources_os_list_ocf_agents(const char *provider);
 
 GList *resources_os_list_nagios_agents(void);
 
 gboolean cancel_recurring_action(svc_action_t * op);
 
 gboolean recurring_action_timer(gpointer data);
 gboolean operation_finalize(svc_action_t * op);
 
 void handle_blocked_ops(void);
 
 gboolean is_op_blocked(const char *rsc);
 
+#if SUPPORT_DBUS
+void services_set_op_pending(svc_action_t *op, DBusPendingCall *pending);
+#endif
+
 #endif                          /* __MH_SERVICES_PRIVATE_H__ */
diff --git a/lib/services/systemd.c b/lib/services/systemd.c
index 749d61c427..e1e1bc9691 100644
--- a/lib/services/systemd.c
+++ b/lib/services/systemd.c
@@ -1,691 +1,692 @@
 /*
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  *
  * Copyright (C) 2012 Andrew Beekhof <andrew@beekhof.net>
  */
 
 #include <crm_internal.h>
 #include <crm/crm.h>
 #include <crm/services.h>
 #include <crm/common/mainloop.h>
 
 #include <gio/gio.h>
 #include <services_private.h>
 #include <systemd.h>
 #include <dbus/dbus.h>
 #include <pcmk-dbus.h>
 
 #define BUS_NAME "org.freedesktop.systemd1"
 #define BUS_PATH "/org/freedesktop/systemd1"
 
 #define BUS_PROPERTY_IFACE "org.freedesktop.DBus.Properties"
 
 /*
    /usr/share/dbus-1/interfaces/org.freedesktop.systemd1.Manager.xml
 */
 gboolean
 systemd_unit_exec_with_unit(svc_action_t * op, const char *unit);
 
 
 struct unit_info {
     const char *id;
     const char *description;
     const char *load_state;
     const char *active_state;
     const char *sub_state;
     const char *following;
     const char *unit_path;
     uint32_t job_id;
     const char *job_type;
     const char *job_path;
 };
 
 struct pcmk_dbus_data 
 {
         char *name;
         char *unit;
         DBusError error;
         svc_action_t *op;
         void (*callback)(DBusMessage *reply, svc_action_t *op);
 };
 
 static DBusMessage *systemd_new_method(const char *iface, const char *method)
 {
     crm_trace("Calling: %s on %s", method, iface);
     return dbus_message_new_method_call(BUS_NAME, // target for the method call
                                         BUS_PATH, // object to call on
                                         iface, // interface to call on
                                         method); // method name
 }
 
 
 static DBusConnection* systemd_proxy = NULL;
 static gboolean
 systemd_init(void)
 {
     static int need_init = 1;
     /* http://dbus.freedesktop.org/doc/api/html/group__DBusConnection.html */
 
     if (need_init) {
         need_init = 0;
         systemd_proxy = pcmk_dbus_connect();
     }
     if (systemd_proxy == NULL) {
         return FALSE;
     }
     return TRUE;
 }
 
 void
 systemd_cleanup(void)
 {
     if (systemd_proxy) {
         pcmk_dbus_disconnect(systemd_proxy);
         systemd_proxy = NULL;
     }
 }
 
 static char *
 systemd_service_name(const char *name)
 {
     if (name == NULL) {
         return NULL;
 
     } else if (strstr(name, ".service")) {
         return strdup(name);
     }
 
     return crm_strdup_printf("%s.service", name);
 }
 
 static void
 systemd_daemon_reload_complete(DBusPendingCall *pending, void *user_data)
 {
     DBusError error;
     DBusMessage *reply = NULL;
     unsigned int reload_count = GPOINTER_TO_UINT(user_data);
 
     dbus_error_init(&error);
     if(pending) {
         reply = dbus_pending_call_steal_reply(pending);
     }
 
     if(pcmk_dbus_find_error("Reload", pending, reply, &error)) {
         crm_err("Could not issue systemd reload %d: %s", reload_count, error.message);
 
     } else {
         crm_trace("Reload %d complete", reload_count);
     }
 
     if(pending) {
         dbus_pending_call_unref(pending);
     }
     if(reply) {
         dbus_message_unref(reply);
     }
 }
 
 static bool
 systemd_daemon_reload(int timeout)
 {
     static unsigned int reload_count = 0;
     const char *method = "Reload";
 
 
     reload_count++;
     if(reload_count % 10 == 0) {
         DBusMessage *msg = systemd_new_method(BUS_NAME".Manager", method);
 
         CRM_ASSERT(msg != NULL);
         pcmk_dbus_send(msg, systemd_proxy, systemd_daemon_reload_complete, GUINT_TO_POINTER(reload_count), timeout);
         dbus_message_unref(msg);
     }
     return TRUE;
 }
 
 static const char *
 systemd_loadunit_result(DBusMessage *reply, svc_action_t * op)
 {
     const char *path = NULL;
 
     if(pcmk_dbus_find_error("LoadUnit", (void*)&path, reply, NULL)) {
         if(op) {
             crm_warn("No unit found for %s", op->rsc);
         }
 
     } else if(pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __FUNCTION__, __LINE__)) {
         dbus_message_get_args (reply, NULL,
                                DBUS_TYPE_OBJECT_PATH, &path,
                                DBUS_TYPE_INVALID);
     }
 
     if(op) {
         systemd_unit_exec_with_unit(op, path);
     }
 
     return path;
 }
 
 
 static void
 systemd_loadunit_cb(DBusPendingCall *pending, void *user_data)
 {
     DBusMessage *reply = NULL;
     svc_action_t * op = user_data;
 
     if(pending) {
         reply = dbus_pending_call_steal_reply(pending);
     }
 
     if(op) {
         crm_trace("Got result: %p for %p for %s, %s", reply, pending, op->rsc, op->action);
     } else {
         crm_trace("Got result: %p for %p", reply, pending);
     }
     systemd_loadunit_result(reply, user_data);
 
     if(pending) {
         dbus_pending_call_unref(pending);
     }
     if(reply) {
         dbus_message_unref(reply);
     }
 }
 
 static char *
 systemd_unit_by_name(const gchar * arg_name, svc_action_t *op)
 {
     DBusMessage *msg;
     DBusMessage *reply = NULL;
     char *name = NULL;
 
 /*
   Equivalent to GetUnit if its already loaded
   <method name="LoadUnit">
    <arg name="name" type="s" direction="in"/>
    <arg name="unit" type="o" direction="out"/>
   </method>
  */
 
     if (systemd_init() == FALSE) {
         return FALSE;
     }
 
     msg = systemd_new_method(BUS_NAME".Manager", "LoadUnit");
     CRM_ASSERT(msg != NULL);
 
     name = systemd_service_name(arg_name);
     CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID));
     free(name);
 
     if(op == NULL || op->synchronous) {
         const char *unit = NULL;
         char *munit = NULL;
         DBusError error;
 
         dbus_error_init(&error);
         reply = pcmk_dbus_send_recv(msg, systemd_proxy, &error, op? op->timeout : DBUS_TIMEOUT_USE_DEFAULT);
         dbus_message_unref(msg);
 
         unit = systemd_loadunit_result(reply, op);
         if(unit) {
             munit = strdup(unit);
         }
         if(reply) {
             dbus_message_unref(reply);
         }
         return munit;
     }
 
     pcmk_dbus_send(msg, systemd_proxy, systemd_loadunit_cb, op, op? op->timeout : DBUS_TIMEOUT_USE_DEFAULT);
     dbus_message_unref(msg);
     return NULL;
 }
 
 GList *
 systemd_unit_listall(void)
 {
     int lpc = 0;
     GList *units = NULL;
     DBusMessageIter args;
     DBusMessageIter unit;
     DBusMessageIter elem;
     DBusMessage *msg = NULL;
     DBusMessage *reply = NULL;
     const char *method = "ListUnits";
     DBusError error;
 
     if (systemd_init() == FALSE) {
         return NULL;
     }
 
 /*
         "  <method name=\"ListUnits\">\n"                               \
         "   <arg name=\"units\" type=\"a(ssssssouso)\" direction=\"out\"/>\n" \
         "  </method>\n"                                                 \
 */
 
     dbus_error_init(&error);
     msg = systemd_new_method(BUS_NAME".Manager", method);
     CRM_ASSERT(msg != NULL);
 
     reply = pcmk_dbus_send_recv(msg, systemd_proxy, &error, DBUS_TIMEOUT_USE_DEFAULT);
     dbus_message_unref(msg);
 
     if(error.name) {
         crm_err("Call to %s failed: %s", method, error.name);
         return NULL;
 
     } else if (reply == NULL) {
         crm_err("Call to %s failed: Message has no reply", method);
         return NULL;
 
     } else if (!dbus_message_iter_init(reply, &args)) {
         crm_err("Call to %s failed: Message has no arguments", method);
         dbus_message_unref(reply);
         return NULL;
     }
 
     if(!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY, __FUNCTION__, __LINE__)) {
         crm_err("Call to %s failed: Message has invalid arguments", method);
         dbus_message_unref(reply);
         return NULL;
     }
 
     dbus_message_iter_recurse(&args, &unit);
     while (dbus_message_iter_get_arg_type (&unit) != DBUS_TYPE_INVALID) {
         DBusBasicValue value;
 
         if(!pcmk_dbus_type_check(reply, &unit, DBUS_TYPE_STRUCT, __FUNCTION__, __LINE__)) {
             continue;
         }
 
         dbus_message_iter_recurse(&unit, &elem);
         if(!pcmk_dbus_type_check(reply, &elem, DBUS_TYPE_STRING, __FUNCTION__, __LINE__)) {
             continue;
         }
 
         dbus_message_iter_get_basic(&elem, &value);
         crm_trace("Got: %s", value.str);
         if(value.str) {
             char *match = strstr(value.str, ".service");
 
             if (match) {
                 lpc++;
                 match[0] = 0;
 
                 units = g_list_append(units, strdup(value.str));
             }
         }
         dbus_message_iter_next (&unit);
     }
 
     dbus_message_unref(reply);
 
     crm_trace("Found %d systemd services", lpc);
     return units;
 }
 
 gboolean
 systemd_unit_exists(const char *name)
 {
     char *unit = NULL;
 
     /* Note: Makes a blocking dbus calls
      * Used by resources_find_service_class() when resource class=service
      */
     unit = systemd_unit_by_name(name, NULL);
     if(unit) {
         free(unit);
         return TRUE;
     }
     return FALSE;
 }
 
 static char *
 systemd_unit_metadata(const char *name, int timeout)
 {
     char *meta = NULL;
     char *desc = NULL;
     char *path = systemd_unit_by_name(name, NULL);
 
     if (path) {
         /* TODO: Worth a making blocking call for? Probably not. Possibly if cached. */
         desc = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, path, BUS_NAME ".Unit", "Description", NULL, NULL, NULL, timeout);
     } else {
         desc = crm_strdup_printf("Systemd unit file for %s", name);
     }
 
     meta = crm_strdup_printf("<?xml version=\"1.0\"?>\n"
                            "<!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n"
                            "<resource-agent name=\"%s\" version=\"0.1\">\n"
                            "  <version>1.0</version>\n"
                            "  <longdesc lang=\"en\">\n"
                            "    %s\n"
                            "  </longdesc>\n"
                            "  <shortdesc lang=\"en\">systemd unit file for %s</shortdesc>\n"
                            "  <parameters>\n"
                            "  </parameters>\n"
                            "  <actions>\n"
                            "    <action name=\"start\"   timeout=\"15\" />\n"
                            "    <action name=\"stop\"    timeout=\"15\" />\n"
                            "    <action name=\"status\"  timeout=\"15\" />\n"
                            "    <action name=\"restart\"  timeout=\"15\" />\n"
                            "    <action name=\"monitor\" timeout=\"15\" interval=\"15\" start-delay=\"15\" />\n"
                            "    <action name=\"meta-data\"  timeout=\"5\" />\n"
                            "  </actions>\n"
                            "  <special tag=\"systemd\">\n"
                            "  </special>\n" "</resource-agent>\n", name, desc, name);
     free(desc);
     free(path);
     return meta;
 }
 
 static bool
 systemd_mask_error(svc_action_t *op, const char *error)
 {
     crm_trace("Could not issue %s for %s: %s", op->action, op->rsc, error);
     if(strstr(error, "org.freedesktop.systemd1.InvalidName")
        || strstr(error, "org.freedesktop.systemd1.LoadFailed")
        || strstr(error, "org.freedesktop.systemd1.NoSuchUnit")) {
 
         if (safe_str_eq(op->action, "stop")) {
             crm_trace("Masking %s failure for %s: unknown services are stopped", op->action, op->rsc);
             op->rc = PCMK_OCF_OK;
 
         } else {
             crm_trace("Mapping %s failure for %s: unknown services are not installed", op->action, op->rsc);
             op->rc = PCMK_OCF_NOT_INSTALLED;
             op->status = PCMK_LRM_OP_NOT_INSTALLED;
         }
         return TRUE;
     }
 
     return FALSE;
 }
 
 static void
 systemd_exec_result(DBusMessage *reply, svc_action_t *op)
 {
     DBusError error;
 
     if(pcmk_dbus_find_error(op->action, (void*)&error, reply, &error)) {
 
         /* ignore "already started" or "not running" errors */
         if (!systemd_mask_error(op, error.name)) {
             crm_err("Could not issue %s for %s: %s (%s)", op->action, op->rsc, error.message);
         }
 
     } else {
         if(!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __FUNCTION__, __LINE__)) {
             crm_warn("Call to %s passed but return type was unexpected", op->action);
             op->rc = PCMK_OCF_OK;
 
         } else {
             const char *path = NULL;
 
             dbus_message_get_args (reply, NULL,
                                    DBUS_TYPE_OBJECT_PATH, &path,
                                    DBUS_TYPE_INVALID);
             crm_info("Call to %s passed: %s", op->action, path);
             op->rc = PCMK_OCF_OK;
         }
     }
 
     operation_finalize(op);
 }
 
 static void
 systemd_async_dispatch(DBusPendingCall *pending, void *user_data)
 {
     DBusError error;
     DBusMessage *reply = NULL;
     svc_action_t *op = user_data;
 
     dbus_error_init(&error);
     if(pending) {
         reply = dbus_pending_call_steal_reply(pending);
     }
 
     if(op) {
         crm_trace("Got result: %p for %p for %s, %s", reply, pending, op->rsc, op->action);
-        op->opaque->pending = NULL;
+        if (pending == op->opaque->pending) {
+            op->opaque->pending = NULL;
+        } else {
+            crm_info("Received unexpected reply for pending DBus call (%p vs %p)",
+                     op->opaque->pending, pending);
+        }
         systemd_exec_result(reply, op);
 
     } else {
         crm_trace("Got result: %p for %p", reply, pending);
     }
 
     if(pending) {
         dbus_pending_call_unref(pending);
     }
     if(reply) {
         dbus_message_unref(reply);
     }
 }
 
 #define SYSTEMD_OVERRIDE_ROOT "/run/systemd/system/"
 
 static void
 systemd_unit_check(const char *name, const char *state, void *userdata)
 {
     svc_action_t * op = userdata;
 
     crm_trace("Resource %s has %s='%s'", op->rsc, name, state);
 
     if(state == NULL) {
         op->rc = PCMK_OCF_NOT_RUNNING;
 
     } else if (g_strcmp0(state, "active") == 0) {
         op->rc = PCMK_OCF_OK;
     } else if (g_strcmp0(state, "activating") == 0) {
         op->rc = PCMK_OCF_PENDING;
     } else if (g_strcmp0(state, "deactivating") == 0) {
         op->rc = PCMK_OCF_PENDING;
     } else {
         op->rc = PCMK_OCF_NOT_RUNNING;
     }
 
     if (op->synchronous == FALSE) {
-        if (op->opaque->pending) {
-            dbus_pending_call_unref(op->opaque->pending);
-        }
-        op->opaque->pending = NULL;
+        services_set_op_pending(op, NULL);
         operation_finalize(op);
     }
 }
 
 gboolean
 systemd_unit_exec_with_unit(svc_action_t * op, const char *unit)
 {
     const char *method = op->action;
     DBusMessage *msg = NULL;
     DBusMessage *reply = NULL;
 
     if (unit == NULL) {
         crm_debug("Could not obtain unit named '%s'", op->agent);
         op->rc = PCMK_OCF_NOT_INSTALLED;
         op->status = PCMK_LRM_OP_NOT_INSTALLED;
         goto cleanup;
     }
 
     if (safe_str_eq(op->action, "monitor") || safe_str_eq(method, "status")) {
         DBusPendingCall *pending = NULL;
         char *state;
 
         state = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, unit,
                                        BUS_NAME ".Unit", "ActiveState",
                                        op->synchronous?NULL:systemd_unit_check,
                                        op, op->synchronous?NULL:&pending, op->timeout);
         if (op->synchronous) {
             systemd_unit_check("ActiveState", state, op);
             free(state);
             return op->rc == PCMK_OCF_OK;
         } else if (pending) {
             dbus_pending_call_ref(pending);
-            op->opaque->pending = pending;
+            services_set_op_pending(op, pending);
             return TRUE;
         }
 
         return FALSE;
 
     } else if (g_strcmp0(method, "start") == 0) {
         FILE *file_strm = NULL;
         char *override_dir = crm_strdup_printf("%s/%s.service.d", SYSTEMD_OVERRIDE_ROOT, op->agent);
         char *override_file = crm_strdup_printf("%s/%s.service.d/50-pacemaker.conf", SYSTEMD_OVERRIDE_ROOT, op->agent);
 
         method = "StartUnit";
         crm_build_path(override_dir, 0755);
 
         file_strm = fopen(override_file, "w");
         if (file_strm != NULL) {
             /* TODO: Insert the start timeout in too */
             char *override = crm_strdup_printf(
                 "[Unit]\n"
                 "Description=Cluster Controlled %s\n"
                 "Before=pacemaker.service\n"
                 "\n"
                 "[Service]\n"
                 "Restart=no\n",
                 op->agent);
 
             int rc = fprintf(file_strm, "%s\n", override);
 
             free(override);
             if (rc < 0) {
                 crm_perror(LOG_ERR, "Cannot write to systemd override file %s", override_file);
             }
 
         } else {
             crm_err("Cannot open systemd override file %s for writing", override_file);
         }
 
         if (file_strm != NULL) {
             fflush(file_strm);
             fclose(file_strm);
         }
         systemd_daemon_reload(op->timeout);
         free(override_file);
         free(override_dir);
 
     } else if (g_strcmp0(method, "stop") == 0) {
         char *override_file = crm_strdup_printf("%s/%s.service.d/50-pacemaker.conf", SYSTEMD_OVERRIDE_ROOT, op->agent);
 
         method = "StopUnit";
         unlink(override_file);
         free(override_file);
         systemd_daemon_reload(op->timeout);
 
     } else if (g_strcmp0(method, "restart") == 0) {
         method = "RestartUnit";
 
     } else {
         op->rc = PCMK_OCF_UNIMPLEMENT_FEATURE;
         goto cleanup;
     }
 
     crm_debug("Calling %s for %s: %s", method, op->rsc, unit);
 
     msg = systemd_new_method(BUS_NAME".Manager", method);
     CRM_ASSERT(msg != NULL);
 
     /* (ss) */
     {
         const char *replace_s = "replace";
         char *name = systemd_service_name(op->agent);
 
         CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID));
         CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &replace_s, DBUS_TYPE_INVALID));
 
         free(name);
     }
 
     if (op->synchronous == FALSE) {
         DBusPendingCall* pending = pcmk_dbus_send(msg, systemd_proxy, systemd_async_dispatch, op, op->timeout);
 
         dbus_message_unref(msg);
         if(pending) {
-            dbus_pending_call_ref(pending);
-            op->opaque->pending = pending;
+            services_set_op_pending(op, pending);
             return TRUE;
         }
         return FALSE;
 
     } else {
         DBusError error;
 
         reply = pcmk_dbus_send_recv(msg, systemd_proxy, &error, op->timeout);
         dbus_message_unref(msg);
         systemd_exec_result(reply, op);
 
         if(reply) {
             dbus_message_unref(reply);
         }
         return FALSE;
     }
 
   cleanup:
     if (op->synchronous == FALSE) {
         operation_finalize(op);
         return TRUE;
     }
 
     return op->rc == PCMK_OCF_OK;
 }
 
 static gboolean
 systemd_timeout_callback(gpointer p)
 {
     svc_action_t * op = p;
 
     op->opaque->timerid = 0;
     crm_warn("%s operation on systemd unit %s named '%s' timed out", op->action, op->agent, op->rsc);
     operation_finalize(op);
 
     return FALSE;
 }
 
 gboolean
 systemd_unit_exec(svc_action_t * op)
 {
     char *unit = NULL;
 
     CRM_ASSERT(op);
     CRM_ASSERT(systemd_init());
     op->rc = PCMK_OCF_UNKNOWN_ERROR;
     crm_debug("Performing %ssynchronous %s op on systemd unit %s named '%s'",
               op->synchronous ? "" : "a", op->action, op->agent, op->rsc);
 
     if (safe_str_eq(op->action, "meta-data")) {
         /* TODO: See if we can teach the lrmd not to make these calls synchronously */
         op->stdout_data = systemd_unit_metadata(op->agent, op->timeout);
         op->rc = PCMK_OCF_OK;
 
         if (op->synchronous == FALSE) {
             operation_finalize(op);
         }
         return TRUE;
     }
 
     unit = systemd_unit_by_name(op->agent, op);
     free(unit);
 
     if (op->synchronous == FALSE) {
         op->opaque->timerid = g_timeout_add(op->timeout + 5000, systemd_timeout_callback, op);
         return TRUE;
     }
 
     return op->rc == PCMK_OCF_OK;
 }