diff --git a/configure.ac b/configure.ac
index 97efbd62e2..1edff40842 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,1917 +1,1918 @@
 dnl
 dnl autoconf for Pacemaker
 dnl
 dnl License: GNU General Public License (GPL)
 
 dnl ===============================================
 dnl Bootstrap
 dnl ===============================================
 AC_PREREQ(2.59)
 
 dnl Suggested structure:
 dnl     information on the package
 dnl     checks for programs
 dnl     checks for libraries
 dnl     checks for header files
 dnl     checks for types
 dnl     checks for structures
 dnl     checks for compiler characteristics
 dnl     checks for library functions
 dnl     checks for system services
 
 m4_include([version.m4])
 AC_INIT([pacemaker], VERSION_NUMBER, pacemaker@oss.clusterlabs.org,pacemaker,http://clusterlabs.org)
 
 PCMK_FEATURES=""
 HB_PKG=heartbeat
 
 AC_CONFIG_AUX_DIR(.)
 AC_CANONICAL_HOST
 
 dnl Where #defines go (e.g. `AC_CHECK_HEADERS' below)
 dnl
 dnl Internal header: include/config.h
 dnl   - Contains ALL defines
 dnl   - include/config.h.in is generated automatically by autoheader
 dnl   - NOT to be included in any header files except lha_internal.h
 dnl     (which is also not to be included in any other header files)
 dnl
 dnl External header: include/crm_config.h
 dnl   - Contains a subset of defines checked here
 dnl   - Manually edit include/crm_config.h.in to have configure include
 dnl     new defines
 dnl   - Should not include HAVE_* defines
 dnl   - Safe to include anywhere
 AM_CONFIG_HEADER(include/config.h include/crm_config.h)
 ALL_LINGUAS="en fr"
 
 AC_ARG_WITH(version,
     [  --with-version=version   Override package version (if you're a packager needing to pretend) ],
     [ PACKAGE_VERSION="$withval" ])
 
 AC_ARG_WITH(pkg-name,
     [  --with-pkg-name=name     Override package name (if you're a packager needing to pretend) ],
     [ PACKAGE_NAME="$withval" ])
 
 dnl Older distros may need: AM_INIT_AUTOMAKE($PACKAGE_NAME, $PACKAGE_VERSION)
 AM_INIT_AUTOMAKE
 AC_DEFINE_UNQUOTED(PACEMAKER_VERSION, "$PACKAGE_VERSION", Current pacemaker version)
 
 PACKAGE_SERIES=`echo $PACKAGE_VERSION | awk -F. '{ print $1"."$2 }'`
 AC_SUBST(PACKAGE_SERIES)
 AC_SUBST(PACKAGE_VERSION)
 
 dnl automake >= 1.11 offers --enable-silent-rules for suppressing the output from
 dnl normal compilation.  When a failure occurs, it will then display the full
 dnl command line
 dnl Wrap in m4_ifdef to avoid breaking on older platforms
 m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])])
 
 dnl Example 2.4. Silent Custom Rule to Generate a File
 dnl %-bar.pc: %.pc
 dnl	$(AM_V_GEN)$(LN_S) $(notdir $^) $@
 
 CC_IN_CONFIGURE=yes
 export CC_IN_CONFIGURE
 
 LDD=ldd
 BUILD_ATOMIC_ATTRD=1
 
 dnl ========================================================================
 dnl Compiler characteristics
 dnl ========================================================================
 
 AC_PROG_CC dnl Can force other with environment variable "CC".
 AM_PROG_CC_C_O
 AC_PROG_CC_STDC
 gl_EARLY
 gl_INIT
 
 AC_LIBTOOL_DLOPEN               dnl Enable dlopen support...
 AC_LIBLTDL_CONVENIENCE          dnl make libltdl a convenience lib
 AC_PROG_LIBTOOL
 
 AC_PROG_YACC
 AM_PROG_LEX
 
 AC_C_STRINGIZE
 AC_TYPE_SIZE_T
 AC_CHECK_SIZEOF(char)
 AC_CHECK_SIZEOF(short)
 AC_CHECK_SIZEOF(int)
 AC_CHECK_SIZEOF(long)
 AC_CHECK_SIZEOF(long long)
 AC_STRUCT_TIMEZONE
 
 dnl ===============================================
 dnl Helpers
 dnl ===============================================
 cc_supports_flag() {
          local CFLAGS="-Werror $@"
          AC_MSG_CHECKING(whether $CC supports "$@")
          AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ ]], [[ ]])], [RC=0; AC_MSG_RESULT(yes)],[RC=1; AC_MSG_RESULT(no)])
          return $RC
 }
 
 try_extract_header_define() {
 	  AC_MSG_CHECKING(if $2 in $1 exists)
 	  Cfile=$srcdir/extract_define.$2.${$}
 	  printf "#include <stdio.h>\n" > ${Cfile}.c
 	  printf "#include <%s>\n" $1 >> ${Cfile}.c
 	  printf "int main(int argc, char **argv) {\n" >> ${Cfile}.c
 	  printf "#ifdef %s\n" $2 >> ${Cfile}.c
 	  printf "printf(\"%%s\", %s);\n" $2 >> ${Cfile}.c
 	  printf "#endif \n return 0; }\n" >> ${Cfile}.c
 	  $CC $CFLAGS ${Cfile}.c -o ${Cfile} 2>/dev/null
 	  value=
 	  if test -x ${Cfile}; then
 	      value=`${Cfile} 2>/dev/null`
 	  fi
 	  if  test x"${value}" == x""; then
 	      value=$3
 	      AC_MSG_RESULT(default: $value)
 	  else
 	      AC_MSG_RESULT($value)
 	  fi
 	  printf $value
 	  rm -rf ${Cfile}.c ${Cfile} ${Cfile}.dSYM ${Cfile}.gcno
 	}
 
 extract_header_define() {
 	  AC_MSG_CHECKING(for $2 in $1)
 	  Cfile=$srcdir/extract_define.$2.${$}
 	  printf "#include <stdio.h>\n" > ${Cfile}.c
 	  printf "#include <%s>\n" $1 >> ${Cfile}.c
 	  printf "int main(int argc, char **argv) { printf(\"%%s\", %s); return 0; }\n" $2 >> ${Cfile}.c
 	  $CC $CFLAGS ${Cfile}.c -o ${Cfile}
 	  value=`${Cfile}`
 	  AC_MSG_RESULT($value)
 	  printf $value
 	  rm -rf ${Cfile}.c ${Cfile} ${Cfile}.dSYM ${Cfile}.gcno
 	}
 
 dnl ===============================================
 dnl Configure Options
 dnl ===============================================
 
 dnl Some systems, like Solaris require a custom package name
 AC_ARG_WITH(pkgname,
     [  --with-pkgname=name     name for pkg (typically for Solaris) ],
     [ PKGNAME="$withval" ],
     [ PKGNAME="LXHAhb" ],
   )
 AC_SUBST(PKGNAME)
 
 AC_ARG_ENABLE([ansi],
 [  --enable-ansi force GCC to compile to ANSI/ANSI standard for older compilers.
      [default=no]])
 
 AC_ARG_ENABLE([fatal-warnings],
 [  --enable-fatal-warnings very pedantic and fatal warnings for gcc
      [default=yes]])
 
 AC_ARG_ENABLE([quiet],
 [  --enable-quiet
      Supress make output unless there is an error
      [default=no]])
 
 AC_ARG_ENABLE([thread-safe],
 [  --enable-thread-safe Enable some client libraries to be thread safe.
      [default=no]])
 
 AC_ARG_ENABLE([bundled-ltdl],
 [  --enable-bundled-ltdl  Configure, build and install the standalone ltdl library bundled with ${PACKAGE} [default=no]])
 LTDL_LIBS=""
 
 AC_ARG_ENABLE([no-stack],
     [  --enable-no-stack
        Only build the Policy Engine and pieces needed to support it [default=no]])
 
 AC_ARG_ENABLE([upstart],
     [  --enable-upstart
        Do not build support for the Upstart init system [default=yes]])
 
 AC_ARG_ENABLE([systemd],
     [  --enable-systemd
        Do not build support for the Systemd init system [default=yes]])
 
 AC_ARG_WITH(ais,
     [  --with-ais
        Support the Corosync messaging and membership layer ],
     [ SUPPORT_CS=$withval ],
     [ SUPPORT_CS=try ],
 )
 
 AC_ARG_WITH(corosync,
     [  --with-corosync
        Support the Corosync messaging and membership layer ],
     [ SUPPORT_CS=$withval ]
 dnl	initialized in AC_ARG_WITH(ais...) already,
 dnl	don't reset to try if it was given as --without-ais
 )
 
 AC_ARG_WITH(heartbeat,
     [  --with-heartbeat
        Support the Heartbeat messaging and membership layer ],
     [ SUPPORT_HEARTBEAT=$withval ],
     [ SUPPORT_HEARTBEAT=try ],
 )
 
 AC_ARG_WITH(cman,
     [  --with-cman
        Support the consumption of membership and quorum from cman ],
     [ SUPPORT_CMAN=$withval ],
     [ SUPPORT_CMAN=try ],
 )
 
 AC_ARG_WITH(cpg,
     [  --with-cs-quorum
        Support the consumption of membership and quorum from corosync ],
     [ SUPPORT_CS_QUORUM=$withval ],
     [ SUPPORT_CS_QUORUM=try ],
 )
 
 AC_ARG_WITH(nagios,
     [  --with-nagios
        Support nagios remote monitoring ],
     [ SUPPORT_NAGIOS=$withval ],
     [ SUPPORT_NAGIOS=try ],
 )
 
 AC_ARG_WITH(nagios-plugin-dir,
     [  --with-nagios-plugin-dir=DIR
        Directory for nagios plugins [${NAGIOS_PLUGIN_DIR}]],
     [ NAGIOS_PLUGIN_DIR="$withval" ]
 )
 
 AC_ARG_WITH(nagios-metadata-dir,
     [  --with-nagios-metadata-dir=DIR
        Directory for nagios plugins metadata [${NAGIOS_METADATA_DIR}]],
     [ NAGIOS_METADATA_DIR="$withval" ]
 )
 
 AC_ARG_WITH(snmp,
     [  --with-snmp
        Support the SNMP protocol ],
     [ SUPPORT_SNMP=$withval ],
     [ SUPPORT_SNMP=try ],
 )
 
 AC_ARG_WITH(esmtp,
     [  --with-esmtp
        Support the sending mail notifications with the esmtp library ],
     [ SUPPORT_ESMTP=$withval ],
     [ SUPPORT_ESMTP=try ],
 )
 
 AC_ARG_WITH(acl,
     [  --with-acl
        Support CIB ACL ],
     [ SUPPORT_ACL=$withval ],
     [ SUPPORT_ACL=yes ],
 )
 
 AC_ARG_WITH(cibsecrets,
     [  --with-cibsecrets
        Support CIB secrets ],
     [ SUPPORT_CIBSECRETS=$withval ],
     [ SUPPORT_CIBSECRETS=no ],
 )
 
 CSPREFIX=""
 AC_ARG_WITH(ais-prefix,
     [  --with-ais-prefix=DIR  Prefix used when Corosync was installed [$prefix]],
     [ CSPREFIX=$withval ],
     [ CSPREFIX=$prefix ])
 
 LCRSODIR=""
 AC_ARG_WITH(lcrso-dir,
     [  --with-lcrso-dir=DIR   Corosync lcrso files. ],
     [ LCRSODIR="$withval" ])
 
 INITDIR=""
 AC_ARG_WITH(initdir,
     [  --with-initdir=DIR      directory for init (rc) scripts [${INITDIR}]],
     [ INITDIR="$withval" ])
 
 SUPPORT_PROFILING=0
 AC_ARG_WITH(profiling,
     [  --with-profiling
        Disable optimizations for effective profiling ],
     [ SUPPORT_PROFILING=$withval ])
 
 AC_ARG_WITH(coverage,
     [  --with-coverage
        Disable optimizations for effective profiling ],
     [ SUPPORT_COVERAGE=$withval ])
 
 PUBLICAN_BRAND="common"
 AC_ARG_WITH(brand,
     [  --with-brand=brand  Brand to use for generated documentation [$PUBLICAN_BRAND]],
     [ PUBLICAN_BRAND="$withval" ])
 AC_SUBST(PUBLICAN_BRAND)
 
 ASCIIDOC_CLI_TYPE="pcs"
 AC_ARG_WITH(doc-cli,
     [  --with-doc-cli=cli_type  CLI type to use for generated documentation. [$ASCIIDOC_CLI_TYPE]],
     [ ASCIIDOC_CLI_TYPE="$withval" ])
 AC_SUBST(ASCIIDOC_CLI_TYPE)
 
 dnl ===============================================
 dnl General Processing
 dnl ===============================================
 
 AC_SUBST(HB_PKG)
 
 INIT_EXT=""
 echo Our Host OS: $host_os/$host
 
 
 AC_MSG_NOTICE(Sanitizing prefix: ${prefix})
 case $prefix in
   NONE)
 	prefix=/usr
 	dnl Fix default variables - "prefix" variable if not specified
 	if test "$localstatedir" = "\${prefix}/var"; then
 		localstatedir="/var"
 	fi
 	if test "$sysconfdir" = "\${prefix}/etc"; then
 		sysconfdir="/etc"
 	fi
 	;;
 esac
 
 
 AC_MSG_NOTICE(Sanitizing exec_prefix: ${exec_prefix})
 case $exec_prefix in
   dnl For consistency with Heartbeat, map NONE->$prefix
   NONE)	  exec_prefix=$prefix;;
   prefix) exec_prefix=$prefix;;
 esac
 
 AC_MSG_NOTICE(Sanitizing ais_prefix: ${CSPREFIX})
 case $CSPREFIX in
   dnl For consistency with Heartbeat, map NONE->$prefix
   NONE)	  CSPREFIX=$prefix;;
   prefix) CSPREFIX=$prefix;;
 esac
 
 AC_MSG_NOTICE(Sanitizing INITDIR: ${INITDIR})
 case $INITDIR in
   prefix) INITDIR=$prefix;;
   "")
     AC_MSG_CHECKING(which init (rc) directory to use)
       for initdir in /etc/init.d /etc/rc.d/init.d /sbin/init.d	\
 	   /usr/local/etc/rc.d /etc/rc.d
       do
         if
           test -d $initdir
         then
           INITDIR=$initdir
           break
         fi
       done
       AC_MSG_RESULT($INITDIR);;
 esac
 AC_SUBST(INITDIR)
 
 AC_MSG_NOTICE(Sanitizing libdir: ${libdir})
 case $libdir in
   dnl For consistency with Heartbeat, map NONE->$prefix
   *prefix*|NONE)
     AC_MSG_CHECKING(which lib directory to use)
     for aDir in lib64 lib
     do
       trydir="${exec_prefix}/${aDir}"
       if
         test -d ${trydir}
       then
         libdir=${trydir}
         break
       fi
     done
     AC_MSG_RESULT($libdir);
     ;;
 esac
 
 dnl Expand autoconf variables so that we dont end up with '${prefix}'
 dnl in #defines and python scripts
 dnl NOTE: Autoconf deliberately leaves them unexpanded to allow
 dnl    make exec_prefix=/foo install
 dnl No longer being able to do this seems like no great loss to me...
 
 eval prefix="`eval echo ${prefix}`"
 eval exec_prefix="`eval echo ${exec_prefix}`"
 eval bindir="`eval echo ${bindir}`"
 eval sbindir="`eval echo ${sbindir}`"
 eval libexecdir="`eval echo ${libexecdir}`"
 eval datadir="`eval echo ${datadir}`"
 eval sysconfdir="`eval echo ${sysconfdir}`"
 eval sharedstatedir="`eval echo ${sharedstatedir}`"
 eval localstatedir="`eval echo ${localstatedir}`"
 eval libdir="`eval echo ${libdir}`"
 eval includedir="`eval echo ${includedir}`"
 eval oldincludedir="`eval echo ${oldincludedir}`"
 eval infodir="`eval echo ${infodir}`"
 eval mandir="`eval echo ${mandir}`"
 
 dnl Home-grown variables
 eval INITDIR="${INITDIR}"
 eval docdir="`eval echo ${docdir}`"
 if test x"${docdir}" = x""; then
    docdir=${datadir}/doc/${PACKAGE}-${VERSION}
    #docdir=${datadir}/doc/packages/${PACKAGE}
 fi
 AC_SUBST(docdir)
 
 for j in prefix exec_prefix bindir sbindir libexecdir datadir sysconfdir \
     sharedstatedir localstatedir libdir includedir oldincludedir infodir \
     mandir INITDIR docdir
 do
   dirname=`eval echo '${'${j}'}'`
   if
     test ! -d "$dirname"
   then
     AC_MSG_WARN([$j directory ($dirname) does not exist!])
   fi
 done
 
 dnl This OS-based decision-making is poor autotools practice;
 dnl feature-based mechanisms are strongly preferred.
 dnl
 dnl So keep this section to a bare minimum; regard as a "necessary evil".
 
 case "$host_os" in
 *bsd*)
 		AC_DEFINE_UNQUOTED(ON_BSD, 1, Compiling for BSD platform)
 		LIBS="-L/usr/local/lib"
 		CPPFLAGS="$CPPFLAGS -I/usr/local/include"
 		INIT_EXT=".sh"
 		;;
 *solaris*)
 		AC_DEFINE_UNQUOTED(ON_SOLARIS, 1, Compiling for Solaris platform)
 		;;
 *linux*)
 		AC_DEFINE_UNQUOTED(ON_LINUX, 1, Compiling for Linux platform)
   		CFLAGS="$CFLAGS -I${prefix}/include"
  		;;
 darwin*)
 		AC_DEFINE_UNQUOTED(ON_DARWIN, 1, Compiling for Darwin platform)
   		LIBS="$LIBS -L${prefix}/lib"
   		CFLAGS="$CFLAGS -I${prefix}/include"
 		;;
 esac
 
 dnl Eventually remove this
 CFLAGS="$CFLAGS -I${prefix}/include/heartbeat"
 
 AC_SUBST(INIT_EXT)
 AC_MSG_NOTICE(Host CPU: $host_cpu)
 
 case "$host_cpu" in
   ppc64|powerpc64)
     case $CFLAGS in
      *powerpc64*)			;;
      *)	if test "$GCC" = yes; then
 	  CFLAGS="$CFLAGS -m64"
 	fi				;;
     esac
 esac
 
 AC_MSG_CHECKING(which format is needed to print uint64_t)
 
 ac_save_CFLAGS=$CFLAGS
 CFLAGS="-Wall -Werror"
 
 AC_COMPILE_IFELSE(
     [AC_LANG_PROGRAM(
       [
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
       ],
       [
 int max = 512;
 uint64_t bignum = 42;
 char *buffer = malloc(max);
 const char *random = "random";
 snprintf(buffer, max-1, "<quorum id=%lu quorate=%s/>", bignum, random);
 fprintf(stderr, "Result: %s\n", buffer);
       ]
     )],
     [U64T="%lu"],
     [U64T="%llu"]
 )
 CFLAGS=$ac_save_CFLAGS
 
 AC_MSG_RESULT($U64T)
 AC_DEFINE_UNQUOTED(U64T, "$U64T", Correct printf format for logging uint64_t)
 
 dnl ===============================================
 dnl Program Paths
 dnl ===============================================
 
 PATH="$PATH:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin"
 export PATH
 
 
 dnl Replacing AC_PROG_LIBTOOL with AC_CHECK_PROG because LIBTOOL
 dnl was NOT being expanded all the time thus causing things to fail.
 AC_CHECK_PROGS(LIBTOOL, glibtool libtool libtool15 libtool13)
 
 AM_PATH_PYTHON
 AC_CHECK_PROGS(MAKE, gmake make)
 AC_PATH_PROGS(HTML2TXT, lynx w3m)
 AC_PATH_PROGS(HELP2MAN, help2man)
 AC_PATH_PROGS(POD2MAN, pod2man, pod2man)
 AC_PATH_PROGS(ASCIIDOC, asciidoc)
 AC_PATH_PROGS(PUBLICAN, publican)
 AC_PATH_PROGS(INKSCAPE, inkscape)
 AC_PATH_PROGS(XSLTPROC, xsltproc)
 AC_PATH_PROGS(FOP, fop)
 AC_PATH_PROGS(SSH, ssh, /usr/bin/ssh)
 AC_PATH_PROGS(SCP, scp, /usr/bin/scp)
 AC_PATH_PROGS(TAR, tar)
 AC_PATH_PROGS(MD5, md5)
 AC_PATH_PROGS(TEST, test)
 AC_PATH_PROGS(PKGCONFIG, pkg-config)
 AC_PATH_PROGS(XML2CONFIG, xml2-config)
 AC_PATH_PROGS(VALGRIND_BIN, valgrind, /usr/bin/valgrind)
 AC_DEFINE_UNQUOTED(VALGRIND_BIN, "$VALGRIND_BIN", Valgrind command)
 
 dnl Disable these until we decide if the stonith config file should be supported
 dnl AC_PATH_PROGS(BISON, bison)
 dnl AC_PATH_PROGS(FLEX, flex)
 dnl AC_PATH_PROGS(HAVE_YACC, $YACC)
 
 if test x"${LIBTOOL}" = x""; then
    AC_MSG_ERROR(You need (g)libtool installed in order to build ${PACKAGE})
 fi
 if test x"${MAKE}" = x""; then
    AC_MSG_ERROR(You need (g)make installed in order to build ${PACKAGE})
 fi
 
 AM_CONDITIONAL(BUILD_HELP, test x"${HELP2MAN}" != x"")
 if test x"${HELP2MAN}" != x""; then
    PCMK_FEATURES="$PCMK_FEATURES generated-manpages"
 fi
 
 MANPAGE_XSLT=""
 if test x"${XSLTPROC}" != x""; then
   AC_MSG_CHECKING(docbook to manpage transform)
   XSLT=`find ${datadir} -name docbook.xsl`
   for xsl in $XSLT; do
     dname=`dirname $xsl`
     bname=`basename $dname`
     if test "$bname" = "manpages"; then
        MANPAGE_XSLT="$xsl"
        break
     fi
   done
 fi
 AC_MSG_RESULT($MANPAGE_XSLT)
 AC_SUBST(MANPAGE_XSLT)
 
 AM_CONDITIONAL(BUILD_XML_HELP, test x"${MANPAGE_XSLT}" != x"")
 if test x"${MANPAGE_XSLT}" != x""; then
    PCMK_FEATURES="$PCMK_FEATURES agent-manpages"
 fi
 
 AM_CONDITIONAL(BUILD_ASCIIDOC, test x"${ASCIIDOC}" != x"")
 if test x"${ASCIIDOC}" != x""; then
    PCMK_FEATURES="$PCMK_FEATURES ascii-docs"
 fi
 
 SUPPORT_STONITH_CONFIG=0
 if test x"${HAVE_YACC}" != x"" -a x"${FLEX}" != x"" -a x"${BISON}" != x""; then
    SUPPORT_STONITH_CONFIG=1
    PCMK_FEATURES="$PCMK_FEATURES st-conf"
 fi
 
 AM_CONDITIONAL(BUILD_STONITH_CONFIG, test $SUPPORT_STONITH_CONFIG = 1)
 AC_DEFINE_UNQUOTED(SUPPORT_STONITH_CONFIG, $SUPPORT_STONITH_CONFIG, Support a stand-alone stonith config file in addition to the CIB)
 
 AM_CONDITIONAL(BUILD_DOCBOOK, test x"${PUBLICAN}" != x"" -a x"${INKSCAPE}" != x"")
 if test x"${PUBLICAN}" != x"" -a x"${INKSCAPE}" != x""; then
    AC_MSG_NOTICE(Enabling publican)
    PCMK_FEATURES="$PCMK_FEATURES publican-docs"
 fi
 
 dnl ========================================================================
 dnl checks for library functions to replace them
 dnl
 dnl     NoSuchFunctionName:
 dnl             is a dummy function which no system supplies.  It is here to make
 dnl             the system compile semi-correctly on OpenBSD which doesn't know
 dnl             how to create an empty archive
 dnl
 dnl     scandir: Only on BSD.
 dnl             System-V systems may have it, but hidden and/or deprecated.
 dnl             A replacement function is supplied for it.
 dnl
 dnl     setenv: is some bsdish function that should also be avoided (use
 dnl             putenv instead)
 dnl             On the other hand, putenv doesn't provide the right API for the
 dnl             code and has memory leaks designed in (sigh...)  Fortunately this
 dnl             A replacement function is supplied for it.
 dnl
 dnl     strerror: returns a string that corresponds to an errno.
 dnl             A replacement function is supplied for it.
 dnl
 dnl	strnlen: is a gnu function similar to strlen, but safer.
 dnl		We wrote a tolearably-fast replacement function for it.
 dnl
 dnl	strndup: is a gnu function similar to strdup, but safer.
 dnl		We wrote a tolearably-fast replacement function for it.
 
 AC_REPLACE_FUNCS(alphasort NoSuchFunctionName scandir setenv strerror strchrnul unsetenv strnlen strndup)
 
 dnl ===============================================
 dnl Libraries
 dnl ===============================================
 AC_CHECK_LIB(socket, socket)			dnl -lsocket
 AC_CHECK_LIB(c, dlopen)				dnl if dlopen is in libc...
 AC_CHECK_LIB(dl, dlopen)			dnl -ldl (for Linux)
 AC_CHECK_LIB(rt, sched_getscheduler)            dnl -lrt (for Tru64)
 AC_CHECK_LIB(gnugetopt, getopt_long)		dnl -lgnugetopt ( if available )
 AC_CHECK_LIB(pam, pam_start)			dnl -lpam (if available)
 
 AC_CHECK_FUNCS([sched_setscheduler])
 
 AC_CHECK_LIB(uuid, uuid_parse)			dnl load the library if necessary
 AC_CHECK_FUNCS(uuid_unparse)			dnl OSX ships uuid_* as standard functions
 
 AC_CHECK_HEADERS(uuid/uuid.h)
 
 if test "x$ac_cv_func_uuid_unparse" != xyes; then
    AC_MSG_ERROR(You do not have the libuuid development package installed)
 fi
 
 if test x"${PKGCONFIG}" = x""; then
    AC_MSG_ERROR(You need pkgconfig installed in order to build ${PACKAGE})
 fi
 
 if test "x${enable_thread_safe}" = "xyes"; then
         GPKGNAME="gthread-2.0"
 else
         GPKGNAME="glib-2.0"
 fi
 
 if
    $PKGCONFIG --exists $GPKGNAME
 then
 	GLIBCONFIG="$PKGCONFIG $GPKGNAME"
 else
 	set -x
         echo PKG_CONFIG_PATH=$PKG_CONFIG_PATH
 	$PKGCONFIG --exists $GPKGNAME; echo $?
 	$PKGCONFIG --cflags $GPKGNAME; echo $?
 	$PKGCONFIG $GPKGNAME; echo $?
 	set +x
 
 	AC_MSG_ERROR(You need glib2-devel installed in order to build ${PACKAGE})
 fi
 AC_MSG_RESULT(using $GLIBCONFIG)
 
 #
 #	Where is dlopen?
 #
 if test "$ac_cv_lib_c_dlopen" = yes; then
 	LIBADD_DL=""
 elif test "$ac_cv_lib_dl_dlopen" = yes; then
 	LIBADD_DL=-ldl
 else
         LIBADD_DL=${lt_cv_dlopen_libs}
 fi
 dnl
 dnl Check for location of gettext
 dnl
 dnl On at least Solaris 2.x, where it is in libc, specifying lintl causes
 dnl grief. Ensure minimal result, not the sum of all possibilities.
 dnl And do libc first.
 dnl Known examples:
 dnl    c:      Linux, Solaris 2.6+
 dnl    intl:   BSD, AIX
 
 AC_CHECK_LIB(c, gettext)
 if test x$ac_cv_lib_c_gettext != xyes; then
    AC_CHECK_LIB(intl, gettext)
 fi
 
 if test x$ac_cv_lib_c_gettext != xyes -a x$ac_cv_lib_intl_gettext != xyes; then
    AC_MSG_ERROR(You need gettext installed in order to build ${PACKAGE})
 fi
 
 if test "X$GLIBCONFIG" != X; then
 	AC_MSG_CHECKING(for special glib includes: )
 	GLIBHEAD=`$GLIBCONFIG --cflags`
 	AC_MSG_RESULT($GLIBHEAD)
 	CPPFLAGS="$CPPFLAGS $GLIBHEAD"
 
 	AC_MSG_CHECKING(for glib library flags)
 	GLIBLIB=`$GLIBCONFIG --libs`
 	AC_MSG_RESULT($GLIBLIB)
 	LIBS="$LIBS $GLIBLIB"
 fi
 
 dnl FreeBSD needs -lcompat for ftime() used by lrmd.c
 AC_CHECK_LIB([compat], [ftime], [COMPAT_LIBS='-lcompat'])
 AC_SUBST(COMPAT_LIBS)
 
 dnl ========================================================================
 dnl Headers
 dnl ========================================================================
 
 AC_HEADER_STDC
 AC_CHECK_HEADERS(arpa/inet.h)
 AC_CHECK_HEADERS(asm/types.h)
 AC_CHECK_HEADERS(assert.h)
 AC_CHECK_HEADERS(auth-client.h)
 AC_CHECK_HEADERS(ctype.h)
 AC_CHECK_HEADERS(dirent.h)
 AC_CHECK_HEADERS(errno.h)
 AC_CHECK_HEADERS(fcntl.h)
 AC_CHECK_HEADERS(getopt.h)
 AC_CHECK_HEADERS(glib.h)
 AC_CHECK_HEADERS(grp.h)
 AC_CHECK_HEADERS(limits.h)
 AC_CHECK_HEADERS(linux/errqueue.h)
 AC_CHECK_HEADERS(linux/swab.h)
 AC_CHECK_HEADERS(malloc.h)
 AC_CHECK_HEADERS(netdb.h)
 AC_CHECK_HEADERS(netinet/in.h)
 AC_CHECK_HEADERS(netinet/ip.h)
 AC_CHECK_HEADERS(pam/pam_appl.h)
 AC_CHECK_HEADERS(pthread.h)
 AC_CHECK_HEADERS(pwd.h)
 AC_CHECK_HEADERS(security/pam_appl.h)
 AC_CHECK_HEADERS(sgtty.h)
 AC_CHECK_HEADERS(signal.h)
 AC_CHECK_HEADERS(stdarg.h)
 AC_CHECK_HEADERS(stddef.h)
 AC_CHECK_HEADERS(stdio.h)
 AC_CHECK_HEADERS(stdlib.h)
 AC_CHECK_HEADERS(string.h)
 AC_CHECK_HEADERS(strings.h)
 AC_CHECK_HEADERS(sys/dir.h)
 AC_CHECK_HEADERS(sys/ioctl.h)
 AC_CHECK_HEADERS(sys/param.h)
 AC_CHECK_HEADERS(sys/poll.h)
 AC_CHECK_HEADERS(sys/reboot.h)
 AC_CHECK_HEADERS(sys/resource.h)
 AC_CHECK_HEADERS(sys/select.h)
 AC_CHECK_HEADERS(sys/socket.h)
 AC_CHECK_HEADERS(sys/signalfd.h)
 AC_CHECK_HEADERS(sys/sockio.h)
 AC_CHECK_HEADERS(sys/stat.h)
 AC_CHECK_HEADERS(sys/time.h)
 AC_CHECK_HEADERS(sys/timeb.h)
 AC_CHECK_HEADERS(sys/types.h)
 AC_CHECK_HEADERS(sys/uio.h)
 AC_CHECK_HEADERS(sys/un.h)
 AC_CHECK_HEADERS(sys/utsname.h)
 AC_CHECK_HEADERS(sys/wait.h)
 AC_CHECK_HEADERS(time.h)
 AC_CHECK_HEADERS(unistd.h)
 AC_CHECK_HEADERS(winsock.h)
 
 dnl These headers need prerequisits before the tests will pass
 dnl AC_CHECK_HEADERS(net/if.h)
 dnl AC_CHECK_HEADERS(netinet/icmp6.h)
 dnl AC_CHECK_HEADERS(netinet/ip6.h)
 dnl AC_CHECK_HEADERS(netinet/ip_icmp.h)
 
 AC_MSG_CHECKING(for special libxml2 includes)
 if test "x$XML2CONFIG" = "x"; then
    AC_MSG_ERROR(libxml2 config not found)
 else
    XML2HEAD="`$XML2CONFIG --cflags`"
    AC_MSG_RESULT($XML2HEAD)
    AC_CHECK_LIB(xml2, xmlReadMemory)
    AC_CHECK_LIB(xslt, xsltApplyStylesheet)
 fi
 
 CPPFLAGS="$CPPFLAGS $XML2HEAD"
 
 AC_CHECK_HEADERS(libxml/xpath.h)
 AC_CHECK_HEADERS(libxslt/xslt.h)
 if test "$ac_cv_header_libxml_xpath_h" != "yes"; then
    AC_MSG_ERROR(The libxml developement headers were not found)
 fi
 if test "$ac_cv_header_libxslt_xslt_h" != "yes"; then
    AC_MSG_ERROR(The libxslt developement headers were not found)
 fi
 
 dnl ========================================================================
 dnl Structures
 dnl ========================================================================
 
 AC_CHECK_MEMBERS([struct tm.tm_gmtoff],,,[[#include <time.h>]])
 AC_CHECK_MEMBERS([lrm_op_t.rsc_deleted],,,[[#include <lrm/lrm_api.h>]])
 AC_CHECK_MEMBER([struct dirent.d_type],
     AC_DEFINE(HAVE_STRUCT_DIRENT_D_TYPE,1,[Define this if struct dirent has d_type]),,
     [#include <dirent.h>])
 
 dnl ========================================================================
 dnl Functions
 dnl ========================================================================
 
 AC_CHECK_FUNCS(g_log_set_default_handler)
 AC_CHECK_FUNCS(getopt, AC_DEFINE(HAVE_DECL_GETOPT,  1, [Have getopt function]))
 AC_CHECK_FUNCS(nanosleep, AC_DEFINE(HAVE_DECL_NANOSLEEP,  1, [Have nanosleep function]))
 
 dnl ========================================================================
 dnl   ltdl
 dnl ========================================================================
 
 AC_CHECK_LIB(ltdl, lt_dlopen, [LTDL_foo=1])
 if test "x${enable_bundled_ltdl}" = "xyes"; then
    if test $ac_cv_lib_ltdl_lt_dlopen = yes; then
       AC_MSG_NOTICE([Disabling usage of installed ltdl])
    fi
    ac_cv_lib_ltdl_lt_dlopen=no
 fi
 
 LIBLTDL_DIR=""
 if test $ac_cv_lib_ltdl_lt_dlopen != yes ; then
    AC_MSG_NOTICE([Installing local ltdl])
    LIBLTDL_DIR=libltdl
    ( cd $srcdir ; $TAR -xvf libltdl.tar )
    if test "$?" -ne 0; then
      AC_MSG_ERROR([$TAR of libltdl.tar in $srcdir failed])
    fi
    AC_CONFIG_SUBDIRS(libltdl)
 else
    LIBS="$LIBS -lltdl"
    AC_MSG_NOTICE([Using installed ltdl])
    INCLTDL=""
    LIBLTDL=""
 fi
 
 AC_SUBST(INCLTDL)
 AC_SUBST(LIBLTDL)
 AC_SUBST(LIBLTDL_DIR)
 
 dnl ========================================================================
 dnl   bzip2
 dnl ========================================================================
 AC_CHECK_HEADERS(bzlib.h)
 AC_CHECK_LIB(bz2, BZ2_bzBuffToBuffCompress)
 
 if test x$ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress != xyes ; then
    AC_MSG_ERROR(BZ2 libraries not found)
 fi
 
 if test x$ac_cv_header_bzlib_h != xyes; then
    AC_MSG_ERROR(BZ2 Development headers not found)
 fi
 
 dnl ========================================================================
 dnl sighandler_t is missing from Illumos, Solaris11 systems
 dnl ========================================================================
 
 AC_MSG_CHECKING([for sighandler_t])
 AC_TRY_COMPILE([#include <signal.h>],[sighandler_t *f;],
 has_sighandler_t=yes,has_sighandler_t=no)
 AC_MSG_RESULT($has_sighandler_t)
 if test "$has_sighandler_t" = "yes" ; then
     AC_DEFINE( HAVE_SIGHANDLER_T, 1, [Define if sighandler_t available] )
 fi
 
 dnl ========================================================================
 dnl   ncurses
 dnl ========================================================================
 dnl
 dnl A few OSes (e.g. Linux) deliver a default "ncurses" alongside "curses".
 dnl Many non-Linux deliver "curses"; sites may add "ncurses".
 dnl
 dnl However, the source-code recommendation for both is to #include "curses.h"
 dnl (i.e. "ncurses" still wants the include to be simple, no-'n', "curses.h").
 dnl
 dnl ncurse takes precedence.
 dnl
 AC_CHECK_HEADERS(curses.h)
 AC_CHECK_HEADERS(curses/curses.h)
 AC_CHECK_HEADERS(ncurses.h)
 AC_CHECK_HEADERS(ncurses/ncurses.h)
 
 dnl Although n-library is preferred, only look for it if the n-header was found.
 CURSESLIBS=''
 if test "$ac_cv_header_ncurses_h" = "yes"; then
   AC_CHECK_LIB(ncurses, printw,
     [CURSESLIBS='-lncurses'; AC_DEFINE(HAVE_LIBNCURSES,1, have ncurses library)]
   )
 fi
 
 if test "$ac_cv_header_ncurses_ncurses_h" = "yes"; then
   AC_CHECK_LIB(ncurses, printw,
     [CURSESLIBS='-lncurses'; AC_DEFINE(HAVE_LIBNCURSES,1, have ncurses library)]
   )
 fi
 
 dnl Only look for non-n-library if there was no n-library.
 if test X"$CURSESLIBS" = X"" -a "$ac_cv_header_curses_h" = "yes"; then
   AC_CHECK_LIB(curses, printw,
     [CURSESLIBS='-lcurses'; AC_DEFINE(HAVE_LIBCURSES,1, have curses library)]
   )
 fi
 
 dnl Only look for non-n-library if there was no n-library.
 if test X"$CURSESLIBS" = X"" -a "$ac_cv_header_curses_curses_h" = "yes"; then
   AC_CHECK_LIB(curses, printw,
     [CURSESLIBS='-lcurses'; AC_DEFINE(HAVE_LIBCURSES,1, have curses library)]
   )
 fi
 
 if test "x$CURSESLIBS" != "x"; then
    PCMK_FEATURES="$PCMK_FEATURES ncurses"
 fi
 
 dnl Check for printw() prototype compatibility
 if test X"$CURSESLIBS" != X"" && cc_supports_flag -Wcast-qual && cc_supports_flag -Werror; then
     AC_MSG_CHECKING(whether printw() requires argument of "const char *")
     ac_save_LIBS=$LIBS
     LIBS="$CURSESLIBS  $LIBS"
     ac_save_CFLAGS=$CFLAGS
     CFLAGS="-Wcast-qual -Werror"
 
     AC_LINK_IFELSE(
 	    [AC_LANG_PROGRAM(
 	      [
 #if defined(HAVE_NCURSES_H)
 #  include <ncurses.h>
 #elif defined(HAVE_NCURSES_NCURSES_H)
 #  include <ncurses/ncurses.h>
 #elif defined(HAVE_CURSES_H)
 #  include <curses.h>
 #endif
 	      ],
 	      [printw((const char *)"Test");]
 	    )],
 	    [ac_cv_compatible_printw=yes],
 	    [ac_cv_compatible_printw=no]
     )
 
     LIBS=$ac_save_LIBS
     CFLAGS=$ac_save_CFLAGS
 
     AC_MSG_RESULT([$ac_cv_compatible_printw])
 
     if test "$ac_cv_compatible_printw" = no; then
 		AC_MSG_WARN([The printw() function of your ncurses or curses library is old, we will disable usage of the library. If you want to use this library anyway, please update to newer version of the library, ncurses 5.4 or later is recommended. You can get the library from http://www.gnu.org/software/ncurses/.])
 		AC_MSG_NOTICE([Disabling curses])
 		AC_DEFINE(HAVE_INCOMPATIBLE_PRINTW, 1, [Do we have incompatible printw() in curses library?])
     fi
 fi
 
 AC_SUBST(CURSESLIBS)
 
 dnl ========================================================================
 dnl    Profiling and GProf
 dnl ========================================================================
 
 AC_MSG_NOTICE(Old CFLAGS: $CFLAGS)
 case $SUPPORT_COVERAGE in
      1|yes|true)
 	SUPPORT_PROFILING=1
 	PCMK_FEATURES="$PCMK_FEATURES coverage"
 	CFLAGS="$CFLAGS -fprofile-arcs -ftest-coverage"
 	dnl During linking, make sure to specify -lgcov or -coverage
 
         dnl Enable gprof
 	#LIBS="$LIBS -pg"
 	#CFLAGS="$CFLAGS -pg"
 	;;
 esac
 
 case $SUPPORT_PROFILING in
      1|yes|true)
 	SUPPORT_PROFILING=1
 
 	dnl Disable various compiler optimizations
 	CFLAGS="$CFLAGS -fno-omit-frame-pointer -fno-inline -fno-builtin "
 	dnl CFLAGS="$CFLAGS -fno-inline-functions -fno-default-inline -fno-inline-functions-called-once -fno-optimize-sibling-calls"
 
 	dnl Turn off optimization so tools can get accurate line numbers
 	CFLAGS=`echo $CFLAGS | sed -e 's/-O.\ //g' -e 's/-Wp,-D_FORTIFY_SOURCE=.\ //g' -e 's/-D_FORTIFY_SOURCE=.\ //g'`
 	CFLAGS="$CFLAGS -O0 -g3 -gdwarf-2"
 
 	dnl Update features
 	PCMK_FEATURES="$PCMK_FEATURES profile"
 	;;
      *) SUPPORT_PROFILING=0;;
 esac
 AC_MSG_NOTICE(New CFLAGS: $CFLAGS)
 AC_DEFINE_UNQUOTED(SUPPORT_PROFILING, $SUPPORT_PROFILING, Support for profiling)
 
 dnl ========================================================================
 dnl    Cluster infrastructure - Heartbeat / LibQB
 dnl ========================================================================
 
 dnl Compatability checks
 AC_CHECK_MEMBERS([struct lrm_ops.fail_rsc],,,[[#include <lrm/lrm_api.h>]])
 
 if test x${enable_no_stack} = xyes; then
     SUPPORT_HEARTBEAT=no
     SUPPORT_CS=no
 fi
 
 PKG_CHECK_MODULES(libqb, libqb, HAVE_libqb=1, HAVE_libqb=0)
 AC_CHECK_HEADERS(qb/qbipc_common.h)
 AC_CHECK_LIB(qb, qb_ipcs_connection_auth_set)
 
 LIBQB_LOG=1
 PCMK_FEATURES="$PCMK_FEATURES libqb-logging libqb-ipc"
 
 AC_CHECK_FUNCS(qb_ipcs_connection_get_buffer_size, AC_DEFINE(HAVE_IPCS_GET_BUFFER_SIZE,  1, [Have qb_ipcc_get_buffer_size function]))
 
 if
    ! pkg-config --atleast-version 0.13 libqb
 then
    AC_MSG_FAILURE(Version of libqb is too old: v0.13 or greater requried)
 fi
 
 LIBS="$LIBS $libqb_LIBS"
 
 AC_CHECK_HEADERS(heartbeat/hb_config.h)
 AC_CHECK_HEADERS(heartbeat/glue_config.h)
 AC_CHECK_HEADERS(stonith/stonith.h)
 AC_CHECK_HEADERS(agent_config.h)
 
 GLUE_HEADER=none
 HAVE_GLUE=0
 if test "$ac_cv_header_heartbeat_glue_config_h" = "yes";  then
    GLUE_HEADER=glue_config.h
    HAVE_GLUE=1
 
 elif test "$ac_cv_header_heartbeat_hb_config_h" = "yes"; then
    GLUE_HEADER=hb_config.h
    HAVE_GLUE=1
 else
    AC_MSG_WARN(cluster-glue development headers were not found)
 fi
 
 if test "$ac_cv_header_stonith_stonith_h" = "yes";  then
    PCMK_FEATURES="$PCMK_FEATURES lha-fencing"
 fi
 
 if test $HAVE_GLUE = 1; then
    dnl On Debian, AC_CHECK_LIBS fail if a library has any unresolved symbols
    dnl So check for all the depenancies (so they're added to LIBS) before checking for -lplumb
    AC_CHECK_LIB(pils, PILLoadPlugin)
    AC_CHECK_LIB(plumb, G_main_add_IPC_Channel)
 fi
 
 dnl ===============================================
 dnl Variables needed for substitution
 dnl ===============================================
 CRM_DTD_DIRECTORY="${datadir}/pacemaker"
 AC_DEFINE_UNQUOTED(CRM_DTD_DIRECTORY,"$CRM_DTD_DIRECTORY", Location for the Pacemaker Relax-NG Schema)
 AC_SUBST(CRM_DTD_DIRECTORY)
 
 CRM_CORE_DIR=`try_extract_header_define $GLUE_HEADER HA_COREDIR ${localstatedir}/lib/pacemaker/cores`
 AC_DEFINE_UNQUOTED(CRM_CORE_DIR,"$CRM_CORE_DIR", Location to store core files produced by Pacemaker daemons)
 AC_SUBST(CRM_CORE_DIR)
 
 CRM_DAEMON_USER=`try_extract_header_define $GLUE_HEADER HA_CCMUSER hacluster`
 AC_DEFINE_UNQUOTED(CRM_DAEMON_USER,"$CRM_DAEMON_USER", User to run Pacemaker daemons as)
 AC_SUBST(CRM_DAEMON_USER)
 
 CRM_DAEMON_GROUP=`try_extract_header_define $GLUE_HEADER HA_APIGROUP haclient`
 AC_DEFINE_UNQUOTED(CRM_DAEMON_GROUP,"$CRM_DAEMON_GROUP", Group to run Pacemaker daemons as)
 AC_SUBST(CRM_DAEMON_GROUP)
 
 CRM_STATE_DIR=${localstatedir}/run/crm
 AC_DEFINE_UNQUOTED(CRM_STATE_DIR,"$CRM_STATE_DIR", Where to keep state files and sockets)
 AC_SUBST(CRM_STATE_DIR)
 
 CRM_BLACKBOX_DIR=${localstatedir}/lib/pacemaker/blackbox
 AC_DEFINE_UNQUOTED(CRM_BLACKBOX_DIR,"$CRM_BLACKBOX_DIR", Where to keep blackbox dumps)
 AC_SUBST(CRM_BLACKBOX_DIR)
 
 PE_STATE_DIR="${localstatedir}/lib/pacemaker/pengine"
 AC_DEFINE_UNQUOTED(PE_STATE_DIR,"$PE_STATE_DIR", Where to keep PEngine outputs)
 AC_SUBST(PE_STATE_DIR)
 
 CRM_CONFIG_DIR="${localstatedir}/lib/pacemaker/cib"
 AC_DEFINE_UNQUOTED(CRM_CONFIG_DIR,"$CRM_CONFIG_DIR", Where to keep configuration files)
 AC_SUBST(CRM_CONFIG_DIR)
 
 CRM_CONFIG_CTS="${localstatedir}/lib/pacemaker/cts"
 AC_DEFINE_UNQUOTED(CRM_CONFIG_CTS,"$CRM_CONFIG_CTS", Where to keep cts stateful data)
 AC_SUBST(CRM_CONFIG_CTS)
 
 CRM_LEGACY_CONFIG_DIR="${localstatedir}/lib/heartbeat/crm"
 AC_DEFINE_UNQUOTED(CRM_LEGACY_CONFIG_DIR,"$CRM_LEGACY_CONFIG_DIR", Where Pacemaker used to keep configuration files)
 AC_SUBST(CRM_LEGACY_CONFIG_DIR)
 
 CRM_DAEMON_DIR="${libexecdir}/pacemaker"
 AC_DEFINE_UNQUOTED(CRM_DAEMON_DIR,"$CRM_DAEMON_DIR", Location for Pacemaker daemons)
 AC_SUBST(CRM_DAEMON_DIR)
 
 HB_DAEMON_DIR=`try_extract_header_define $GLUE_HEADER HA_LIBHBDIR $libdir/heartbeat`
 AC_DEFINE_UNQUOTED(HB_DAEMON_DIR,"$HB_DAEMON_DIR", Location Heartbeat expects Pacemaker daemons to be in)
 AC_SUBST(HB_DAEMON_DIR)
 
 dnl Needed so that the Corosync plugin can clear out the directory as Heartbeat does
 HA_STATE_DIR=`try_extract_header_define $GLUE_HEADER HA_VARRUNDIR ${localstatedir}/run`
 AC_DEFINE_UNQUOTED(HA_STATE_DIR,"$HA_STATE_DIR", Where Heartbeat keeps state files and sockets)
 AC_SUBST(HA_STATE_DIR)
 
 CRM_RSCTMP_DIR=`try_extract_header_define agent_config.h HA_RSCTMPDIR $HA_STATE_DIR/resource-agents`
 AC_MSG_CHECKING(Scratch dir for resource agents)
 AC_MSG_RESULT($CRM_RSCTMP_DIR)
 AC_DEFINE_UNQUOTED(CRM_RSCTMP_DIR,"$CRM_RSCTMP_DIR", Where resource agents should keep state files)
 AC_SUBST(CRM_RSCTMP_DIR)
 
 dnl Needed for the location of hostcache in CTS.py
 HA_VARLIBHBDIR=`try_extract_header_define $GLUE_HEADER HA_VARLIBHBDIR ${localstatedir}/lib/heartbeat`
 AC_SUBST(HA_VARLIBHBDIR)
 
 AC_DEFINE_UNQUOTED(UUID_FILE,"$localstatedir/lib/heartbeat/hb_uuid", Location of Heartbeat's UUID file)
 
 OCF_ROOT_DIR=`try_extract_header_define $GLUE_HEADER OCF_ROOT_DIR /usr/lib/ocf`
 if test "X$OCF_ROOT_DIR" = X; then
   AC_MSG_ERROR(Could not locate OCF directory)
 fi
 AC_SUBST(OCF_ROOT_DIR)
 
 OCF_RA_DIR=`try_extract_header_define $GLUE_HEADER OCF_RA_DIR $OCF_ROOT_DIR/resource.d`
 AC_DEFINE_UNQUOTED(OCF_RA_DIR,"$OCF_RA_DIR", Location for OCF RAs)
 AC_SUBST(OCF_RA_DIR)
 
 RH_STONITH_DIR="$sbindir"
 AC_DEFINE_UNQUOTED(RH_STONITH_DIR,"$RH_STONITH_DIR", Location for Red Hat Stonith agents)
 
 RH_STONITH_PREFIX="fence_"
 AC_DEFINE_UNQUOTED(RH_STONITH_PREFIX,"$RH_STONITH_PREFIX", Prefix for Red Hat Stonith agents)
 
 AC_PATH_PROGS(GIT, git false)
 AC_MSG_CHECKING(build version)
 
 BUILD_VERSION=$Format:%h$
 if test $BUILD_VERSION != ":%h$"; then
    AC_MSG_RESULT(archive hash: $BUILD_VERSION)
 
 elif test -x $GIT -a -d .git; then
    BUILD_VERSION=`$GIT log --pretty="format:%h" -n 1`
    AC_MSG_RESULT(git hash: $BUILD_VERSION)
 
 else
    # The current directory name make a reasonable default
    # Most generated archives will include the hash or tag
    BASE=`basename $PWD`
    BUILD_VERSION=`echo $BASE | sed s:.*[[Pp]]acemaker-::`
    AC_MSG_RESULT(directory based hash: $BUILD_VERSION)
 fi
 
 AC_DEFINE_UNQUOTED(BUILD_VERSION, "$BUILD_VERSION", Build version)
 AC_SUBST(BUILD_VERSION)
 
 
 HAVE_dbus=1
 HAVE_upstart=0
 HAVE_systemd=0
 PKG_CHECK_MODULES(DBUS, dbus-1, ,HAVE_dbus=0)
 
 AC_DEFINE_UNQUOTED(SUPPORT_DBUS, $HAVE_dbus, Support dbus)
 AM_CONDITIONAL(BUILD_DBUS, test $HAVE_dbus = 1)
 
 if test $HAVE_dbus = 1; then
    CFLAGS="$CFLAGS `$PKGCONFIG --cflags dbus-1`"
 fi
 
 DBUS_LIBS="$CFLAGS `$PKGCONFIG --libs dbus-1`"
 AC_SUBST(DBUS_LIBS)
 
 AC_CHECK_TYPES([DBusBasicValue],,,[[#include <dbus/dbus.h>]])
 
 if test $HAVE_dbus = 1 -a "x${enable_upstart}" != xno; then
    HAVE_upstart=1
    PCMK_FEATURES="$PCMK_FEATURES upstart"
 fi
 
 AC_DEFINE_UNQUOTED(SUPPORT_UPSTART, $HAVE_upstart, Support upstart based system services)
 AM_CONDITIONAL(BUILD_UPSTART, test $HAVE_upstart = 1)
 
 if
     $PKGCONFIG --exists systemd
 then
     systemdunitdir=`$PKGCONFIG --variable=systemdsystemunitdir systemd`
     AC_SUBST(systemdunitdir)
 else
     enable_systemd=no
 fi
 
 if test $HAVE_dbus = 1 -a "x${enable_systemd}" != xno; then
    if test -n "$systemdunitdir" -a "x$systemdunitdir" != xno; then
       HAVE_systemd=1
       PCMK_FEATURES="$PCMK_FEATURES systemd"
    fi
 fi
 
 AC_DEFINE_UNQUOTED(SUPPORT_SYSTEMD, $HAVE_systemd, Support systemd based system services)
 AM_CONDITIONAL(BUILD_SYSTEMD, test $HAVE_systemd = 1)
 
 case $SUPPORT_NAGIOS in
      1|yes|true|try)
         SUPPORT_NAGIOS=1;;
      *)
         SUPPORT_NAGIOS=0;;
 esac
 
 if test $SUPPORT_NAGIOS = 1; then
     PCMK_FEATURES="$PCMK_FEATURES nagios"
 fi
 
 AC_DEFINE_UNQUOTED(SUPPORT_NAGIOS, $SUPPORT_NAGIOS, Support nagios plugins)
 AM_CONDITIONAL(BUILD_NAGIOS, test $SUPPORT_NAGIOS = 1)
 
 if test x"$NAGIOS_PLUGIN_DIR" = x""; then
     NAGIOS_PLUGIN_DIR="${libexecdir}/nagios/plugins"
 fi
 
 AC_DEFINE_UNQUOTED(NAGIOS_PLUGIN_DIR, "$NAGIOS_PLUGIN_DIR", Directory for nagios plugins)
 AC_SUBST(NAGIOS_PLUGIN_DIR)
 
 if test x"$NAGIOS_METADATA_DIR" = x""; then
     NAGIOS_METADATA_DIR="${datadir}/nagios/plugins-metadata"
 fi
 
 AC_DEFINE_UNQUOTED(NAGIOS_METADATA_DIR, "$NAGIOS_METADATA_DIR", Directory for nagios plugins metadata)
 AC_SUBST(NAGIOS_METADATA_DIR)
 
 STACKS=""
 CLUSTERLIBS=""
 
 dnl ========================================================================
 dnl    Cluster stack - Heartbeat
 dnl ========================================================================
 
 case $SUPPORT_HEARTBEAT in
 1|yes|true|try)
    AC_MSG_CHECKING(for heartbeat support)
    AC_CHECK_LIB(hbclient, ll_cluster_new, [SUPPORT_HEARTBEAT=1],
 		[if test $SUPPORT_HEARTBEAT != try; then
 			AC_MSG_FAILURE(Unable to support Heartbeat: client libraries not found)
 		fi])
 
    if test $SUPPORT_HEARTBEAT = 1 ; then
 	STACKS="$STACKS heartbeat"
 	dnl objdump -x ${libdir}/libccmclient.so | grep SONAME | awk '{print $2}'
 	AC_DEFINE_UNQUOTED(CCM_LIBRARY, "libccmclient.so.1", Library to load for ccm support)
 	AC_DEFINE_UNQUOTED(HEARTBEAT_LIBRARY, "libhbclient.so.1", Library to load for heartbeat support)
 	BUILD_ATOMIC_ATTRD=0
    else
 	SUPPORT_HEARTBEAT=0
    fi
    ;;
 *) SUPPORT_HEARTBEAT=0;;
 esac
 
 AM_CONDITIONAL(BUILD_HEARTBEAT_SUPPORT, test $SUPPORT_HEARTBEAT = 1)
 AC_DEFINE_UNQUOTED(SUPPORT_HEARTBEAT, $SUPPORT_HEARTBEAT, Support the Heartbeat messaging and membership layer)
 AC_SUBST(SUPPORT_HEARTBEAT)
 
 dnl ========================================================================
 dnl    Cluster stack - Corosync
 dnl ========================================================================
 
 dnl Normalize the values
 case $SUPPORT_CS in
      1|yes|true)
 		SUPPORT_CS=yes
 		missingisfatal=1;;
      try)	missingisfatal=0;;
      *) SUPPORT_CS=no;;
 esac
 
 AC_MSG_CHECKING(for native corosync)
 COROSYNC_LIBS=""
 CS_USES_LIBQB=0
 
 PCMK_SERVICE_ID=9
 LCRSODIR="$libdir"
 
 if test $SUPPORT_CS = no; then
     AC_MSG_RESULT(no (disabled))
     SUPPORT_CS=0
 else
     AC_MSG_RESULT($SUPPORT_CS, with '$CSPREFIX')
     PKG_CHECK_MODULES(cpg,    libcpg) dnl Fatal
     PKG_CHECK_MODULES(cfg,    libcfg) dnl Fatal
     PKG_CHECK_MODULES(cmap,   libcmap,   HAVE_cmap=1,   HAVE_cmap=0)
     PKG_CHECK_MODULES(cman,   libcman,   HAVE_cman=1,   HAVE_cman=0)
     PKG_CHECK_MODULES(confdb, libconfdb, HAVE_confdb=1, HAVE_confdb=0)
     PKG_CHECK_MODULES(fenced, libfenced, HAVE_fenced=1, HAVE_fenced=0)
     PKG_CHECK_MODULES(quorum, libquorum, HAVE_quorum=1, HAVE_quorum=0)
     PKG_CHECK_MODULES(oldipc, libcoroipcc, HAVE_oldipc=1, HAVE_oldipc=0)
 
     if test $HAVE_oldipc = 1; then
         SUPPORT_CS=1
 	CFLAGS="$CFLAGS $oldipc_FLAGS $cpg_FLAGS $cfg_FLAGS"
         COROSYNC_LIBS="$COROSYNC_LIBS $oldipc_LIBS $cpg_LIBS $cfg_LIBS"
 
     elif test $HAVE_libqb = 1; then
         SUPPORT_CS=1
         CS_USES_LIBQB=1
 	CFLAGS="$CFLAGS $libqb_FLAGS $cpg_FLAGS $cfg_FLAGS"
         COROSYNC_LIBS="$COROSYNC_LIBS $libqb_LIBS $cpg_LIBS $cfg_LIBS"
         AC_CHECK_LIB(corosync_common, cs_strerror)
 
     else
         aisreason="corosync/libqb IPC libraries not found by pkg_config"
     fi
 
     AC_DEFINE_UNQUOTED(HAVE_CONFDB, $HAVE_confdb, Have the old herarchial Corosync config API)
     AC_DEFINE_UNQUOTED(HAVE_CMAP, $HAVE_cmap, Have the new non-herarchial Corosync config API)
 fi
 
 
 if test $SUPPORT_CS = 1 -a x$HAVE_oldipc = x0 ; then
     dnl Support for plugins was removed about the time the IPC was
     dnl moved to libqb.
     dnl The only option now is the built-in quorum API
     CFLAGS="$CFLAGS $cmap_CFLAGS $quorum_CFLAGS"
     COROSYNC_LIBS="$COROSYNC_LIBS $cmap_LIBS $quorum_LIBS"
 
     STACKS="$STACKS corosync-native"
     AC_DEFINE_UNQUOTED(SUPPORT_CS_QUORUM, 1, Support the consumption of membership and quorum from corosync)
 fi
 
 SUPPORT_PLUGIN=0
 if test $SUPPORT_CS = 1 -a x$HAVE_confdb = x1; then
     dnl Need confdb to support cman and the plugins
     SUPPORT_PLUGIN=1
     BUILD_ATOMIC_ATTRD=0
     LCRSODIR=`$PKGCONFIG corosync --variable=lcrsodir`
     STACKS="$STACKS corosync-plugin"
     COROSYNC_LIBS="$COROSYNC_LIBS $confdb_LIBS"
 
     if test $SUPPORT_CMAN != no; then
         if test $HAVE_cman = 1 -a $HAVE_fenced = 1; then
             SUPPORT_CMAN=1
 	    STACKS="$STACKS cman"
             CFLAGS="$CFLAGS $cman_FLAGS $fenced_FLAGS"
             COROSYNC_LIBS="$COROSYNC_LIBS $cman_LIBS $fenced_LIBS"
         fi
     fi
 fi
 
 dnl Normalize SUPPORT_CS and SUPPORT_CMAN for use with #if directives
 if test $SUPPORT_CMAN != 1; then
     SUPPORT_CMAN=0
 fi
 
 if test $SUPPORT_CS = 1; then
     CLUSTERLIBS="$CLUSTERLIBS $COROSYNC_LIBS"
 
 elif test $SUPPORT_CS != 0; then
     SUPPORT_CS=0
     if test $missingisfatal = 0; then
         AC_MSG_WARN(Unable to support Corosync: $aisreason)
     else
         AC_MSG_FAILURE(Unable to support Corosync: $aisreason)
     fi
 fi
 
 AC_DEFINE_UNQUOTED(SUPPORT_COROSYNC, $SUPPORT_CS,    Support the Corosync messaging and membership layer)
 AC_DEFINE_UNQUOTED(SUPPORT_CMAN,     $SUPPORT_CMAN,  Support the consumption of membership and quorum from cman)
 AC_DEFINE_UNQUOTED(CS_USES_LIBQB,    $CS_USES_LIBQB, Does corosync use libqb for its ipc)
 AC_DEFINE_UNQUOTED(PCMK_SERVICE_ID,  $PCMK_SERVICE_ID, Corosync service number)
 AC_DEFINE_UNQUOTED(SUPPORT_PLUGIN,   $SUPPORT_PLUGIN, Support the Pacemaker plugin for Corosync)
 
 AM_CONDITIONAL(BUILD_CS_SUPPORT, test $SUPPORT_CS = 1)
 AM_CONDITIONAL(BUILD_CS_PLUGIN, test $SUPPORT_PLUGIN = 1)
 AM_CONDITIONAL(BUILD_CMAN, test $SUPPORT_CMAN = 1)
 
 AM_CONDITIONAL(BUILD_ATOMIC_ATTRD, test $BUILD_ATOMIC_ATTRD = 1)
 AC_DEFINE_UNQUOTED(HAVE_ATOMIC_ATTRD, $BUILD_ATOMIC_ATTRD, Support the new atomic attrd)
 
 AC_SUBST(SUPPORT_CMAN)
 AC_SUBST(SUPPORT_CS)
 
 dnl
 dnl    Cluster stack - Sanity
 dnl
 
 if test x${enable_no_stack} = xyes; then
     AC_MSG_NOTICE(No cluster stack supported.  Just building the Policy Engine)
     PCMK_FEATURES="$PCMK_FEATURES no-cluster-stack"
 else
     AC_MSG_CHECKING(for supported stacks)
     if test x"$STACKS" = x; then
       AC_MSG_FAILURE(You must support at least one cluster stack (heartbeat or corosync) )
     fi
     AC_MSG_RESULT($STACKS)
     PCMK_FEATURES="$PCMK_FEATURES $STACKS"
 fi
 
 if test ${BUILD_ATOMIC_ATTRD} = 1; then
     PCMK_FEATURES="$PCMK_FEATURES atomic-attrd"
 fi
 AC_SUBST(CLUSTERLIBS)
 AC_SUBST(LCRSODIR)
 
 dnl ========================================================================
 dnl    SNMP
 dnl ========================================================================
 
 case $SUPPORT_SNMP in
      1|yes|true) missingisfatal=1;;
      try)        missingisfatal=0;;
      *)		 SUPPORT_SNMP=no;;
 esac
 
 SNMPLIBS=""
 
 AC_MSG_CHECKING(for snmp support)
 if test $SUPPORT_SNMP = no; then
    AC_MSG_RESULT(no (disabled))
    SUPPORT_SNMP=0
 else
     SNMPCONFIG=""
     AC_MSG_RESULT($SUPPORT_SNMP)
     AC_CHECK_HEADERS(net-snmp/net-snmp-config.h)
 
     if test "x${ac_cv_header_net_snmp_net_snmp_config_h}" != "xyes"; then
  	SUPPORT_SNMP="no"
     fi
 
     if test $SUPPORT_SNMP != no; then
 	AC_PATH_PROGS(SNMPCONFIG, net-snmp-config)
 	if test "X${SNMPCONFIG}" = "X"; then
 		AC_MSG_RESULT(You need the net_snmp development package to continue.)
 		SUPPORT_SNMP=no
 	fi
     fi
 
     if test $SUPPORT_SNMP != no; then
 	AC_MSG_CHECKING(for special snmp libraries)
 	SNMPLIBS=`$SNMPCONFIG --agent-libs`
 	AC_MSG_RESULT($SNMPLIBS)
     fi
 
     if test $SUPPORT_SNMP != no; then
         savedLibs=$LIBS
         LIBS="$LIBS $SNMPLIBS"
 
         dnl    On many systems libcrypto is needed when linking against libsnmp.
         dnl    Check to see if it exists, and if so use it.
 	dnl AC_CHECK_LIB(crypto, CRYPTO_free, CRYPTOLIB="-lcrypto",)
 	dnl AC_SUBST(CRYPTOLIB)
 
         AC_CHECK_FUNCS(netsnmp_transport_open_client)
         if test $ac_cv_func_netsnmp_transport_open_client != yes; then
             AC_CHECK_FUNCS(netsnmp_tdomain_transport)
             if test $ac_cv_func_netsnmp_tdomain_transport != yes; then
                 SUPPORT_SNMP=no
 	    else
                 AC_DEFINE_UNQUOTED(NETSNMPV53, 1, [Use the older 5.3 version of the net-snmp API])
             fi
         fi
         LIBS=$savedLibs
     fi
 
     if test $SUPPORT_SNMP = no; then
    	SNMPLIBS=""
    	SUPPORT_SNMP=0
      	if test $missingisfatal = 0; then
 	    AC_MSG_WARN(Unable to support SNMP)
         else
 	    AC_MSG_FAILURE(Unable to support SNMP)
         fi
     else
    	SUPPORT_SNMP=1
     fi
 fi
 
 if test $SUPPORT_SNMP = 1; then
    PCMK_FEATURES="$PCMK_FEATURES snmp"
 fi
 
 AC_SUBST(SNMPLIBS)
 AM_CONDITIONAL(ENABLE_SNMP, test "$SUPPORT_SNMP" = "1")
 AC_DEFINE_UNQUOTED(ENABLE_SNMP, $SUPPORT_SNMP, Build in support for sending SNMP traps)
 
 dnl ========================================================================
 dnl    ESMTP
 dnl ========================================================================
 
 case $SUPPORT_ESMTP in
      1|yes|true) missingisfatal=1;;
      try)        missingisfatal=0;;
      *)		 SUPPORT_ESMTP=no;;
 esac
 
 ESMTPLIB=""
 
 AC_MSG_CHECKING(for esmtp support)
 if test $SUPPORT_ESMTP = no; then
    AC_MSG_RESULT(no (disabled))
    SUPPORT_ESMTP=0
 else
    ESMTPCONFIG=""
    AC_MSG_RESULT($SUPPORT_ESMTP)
    AC_CHECK_HEADERS(libesmtp.h)
 
    if test "x${ac_cv_header_libesmtp_h}" != "xyes"; then
 	ENABLE_ESMTP="no"
    fi
 
    if test $SUPPORT_ESMTP != no; then
 	AC_PATH_PROGS(ESMTPCONFIG, libesmtp-config)
 	if test "X${ESMTPCONFIG}" = "X"; then
 		AC_MSG_RESULT(You need the libesmtp development package to continue.)
 		SUPPORT_ESMTP=no
 	fi
    fi
 
    if test $SUPPORT_ESMTP != no; then
 	AC_MSG_CHECKING(for special esmtp libraries)
 	ESMTPLIBS=`$ESMTPCONFIG --libs | tr '\n' ' '`
 	AC_MSG_RESULT($ESMTPLIBS)
    fi
 
    if test $SUPPORT_ESMTP = no; then
    	SUPPORT_ESMTP=0
      	if test $missingisfatal = 0; then
 	    AC_MSG_WARN(Unable to support ESMTP)
         else
 	    AC_MSG_FAILURE(Unable to support ESMTP)
         fi
    else
    	SUPPORT_ESMTP=1
         PCMK_FEATURES="$PCMK_FEATURES libesmtp"
    fi
 fi
 
 AC_SUBST(ESMTPLIBS)
 AM_CONDITIONAL(ENABLE_ESMTP, test "$SUPPORT_ESMTP" = "1")
 AC_DEFINE_UNQUOTED(ENABLE_ESMTP, $SUPPORT_ESMTP, Build in support for sending mail notifications with ESMTP)
 
 dnl ========================================================================
 dnl    ACL
 dnl ========================================================================
 
 case $SUPPORT_ACL in
      1|yes|true) missingisfatal=1;;
      try)        missingisfatal=0;;
      *)		 SUPPORT_ACL=no;;
 esac
 
 AC_MSG_CHECKING(for acl support)
 if test $SUPPORT_ACL = no; then
     AC_MSG_RESULT(no (disabled))
     SUPPORT_ACL=0
 else
     AC_MSG_RESULT($SUPPORT_ACL)
 
     SUPPORT_ACL=1
     AC_CHECK_LIB(qb, qb_ipcs_connection_auth_set)
     if test $ac_cv_lib_qb_qb_ipcs_connection_auth_set != yes; then
         SUPPORT_ACL=0
     fi
 
     if test $SUPPORT_ACL = 0; then
         if test $missingisfatal = 0; then
             AC_MSG_WARN(Unable to support ACL. You need to use libqb > 0.13.0)
         else
             AC_MSG_FAILURE(Unable to support ACL. You need to use libqb > 0.13.0)
         fi
     fi
 fi
 
 if test $SUPPORT_ACL = 1; then
     PCMK_FEATURES="$PCMK_FEATURES acls"
 fi
 
 AM_CONDITIONAL(ENABLE_ACL, test "$SUPPORT_ACL" = "1")
 AC_DEFINE_UNQUOTED(ENABLE_ACL, $SUPPORT_ACL, Build in support for CIB ACL)
 
 dnl ========================================================================
 dnl    CIB secrets
 dnl ========================================================================
 
 case $SUPPORT_CIBSECRETS in
      1|yes|true|try)
         SUPPORT_CIBSECRETS=1;;
      *)
         SUPPORT_CIBSECRETS=0;;
 esac
 
 AC_DEFINE_UNQUOTED(SUPPORT_CIBSECRETS, $SUPPORT_CIBSECRETS, Support CIB secrets)
 AM_CONDITIONAL(BUILD_CIBSECRETS, test $SUPPORT_CIBSECRETS = 1)
 
 if test $SUPPORT_CIBSECRETS = 1; then
     PCMK_FEATURES="$PCMK_FEATURES cibsecrets"
 
     LRM_CIBSECRETS_DIR="${localstatedir}/lib/pacemaker/lrm/secrets"
     AC_DEFINE_UNQUOTED(LRM_CIBSECRETS_DIR,"$LRM_CIBSECRETS_DIR", Location for CIB secrets)
     AC_SUBST(LRM_CIBSECRETS_DIR)
 
     LRM_LEGACY_CIBSECRETS_DIR="${localstatedir}/lib/heartbeat/lrm/secrets"
     AC_DEFINE_UNQUOTED(LRM_LEGACY_CIBSECRETS_DIR,"$LRM_LEGACY_CIBSECRETS_DIR", Legacy location for CIB secrets)
     AC_SUBST(LRM_LEGACY_CIBSECRETS_DIR)
 fi
 
 dnl ========================================================================
 dnl    GnuTLS
 dnl ========================================================================
 
 AC_CHECK_HEADERS(gnutls/gnutls.h)
 AC_CHECK_HEADERS(security/pam_appl.h pam/pam_appl.h)
 
 dnl GNUTLS library: Attempt to determine by 'libgnutls-config' program.
 dnl If no 'libgnutls-config', try traditional autoconf means.
 AC_PATH_PROGS(LIBGNUTLS_CONFIG, libgnutls-config)
 
 if test -n "$LIBGNUTLS_CONFIG"; then
 	AC_MSG_CHECKING(for gnutls header flags)
 	GNUTLSHEAD="`$LIBGNUTLS_CONFIG --cflags`";
 	AC_MSG_RESULT($GNUTLSHEAD)
 	AC_MSG_CHECKING(for gnutls library flags)
 	GNUTLSLIBS="`$LIBGNUTLS_CONFIG --libs`";
 	AC_MSG_RESULT($GNUTLSLIBS)
 fi
 AC_CHECK_LIB(gnutls, gnutls_init)
 AC_CHECK_FUNCS(gnutls_priority_set_direct)
 
 AC_SUBST(GNUTLSHEAD)
 AC_SUBST(GNUTLSLIBS)
 
 
 dnl ========================================================================
 dnl    System Health
 dnl ========================================================================
 
 dnl Check if servicelog development package is installed
 SERVICELOG=servicelog-1
 SERVICELOG_EXISTS="no"
 AC_MSG_CHECKING(for $SERVICELOG packages)
 if
     $PKGCONFIG --exists $SERVICELOG
 then
     PKG_CHECK_MODULES([SERVICELOG], [servicelog-1])
     SERVICELOG_EXISTS="yes"
 fi
 AC_MSG_RESULT($SERVICELOG_EXISTS)
 AM_CONDITIONAL(BUILD_SERVICELOG, test "$SERVICELOG_EXISTS" = "yes")
 
 dnl Check if OpenIMPI packages and servicelog are installed
 OPENIPMI="OpenIPMI OpenIPMIposix"
 OPENIPMI_SERVICELOG_EXISTS="no"
 AC_MSG_CHECKING(for $SERVICELOG $OPENIPMI packages)
 if
     $PKGCONFIG --exists $OPENIPMI $SERVICELOG
 then
     PKG_CHECK_MODULES([OPENIPMI_SERVICELOG],[OpenIPMI OpenIPMIposix])
     OPENIPMI_SERVICELOG_EXISTS="yes"
 fi
 AC_MSG_RESULT($OPENIPMI_SERVICELOG_EXISTS)
 AM_CONDITIONAL(BUILD_OPENIPMI_SERVICELOG, test "$OPENIPMI_SERVICELOG_EXISTS" = "yes")
 
 dnl ========================================================================
 dnl Compiler flags
 dnl ========================================================================
 
 dnl Make sure that CFLAGS is not exported. If the user did
 dnl not have CFLAGS in their environment then this should have
 dnl no effect. However if CFLAGS was exported from the user's
 dnl environment, then the new CFLAGS will also be exported
 dnl to sub processes.
 
 CC_ERRORS=""
 CC_EXTRAS=""
 
 if export | fgrep " CFLAGS=" > /dev/null; then
 	SAVED_CFLAGS="$CFLAGS"
 	unset CFLAGS
 	CFLAGS="$SAVED_CFLAGS"
 	unset SAVED_CFLAGS
 fi
 
 if test "$GCC" != yes; then
         CFLAGS="$CFLAGS -g"
 	enable_fatal_warnings=no
 else
         CFLAGS="$CFLAGS -ggdb"
 
 	# We had to eliminate -Wnested-externs because of libtool changes
         EXTRA_FLAGS="-fgnu89-inline
 		-Wall
 		-Waggregate-return
 		-Wbad-function-cast
 		-Wcast-align
 		-Wdeclaration-after-statement
 		-Wendif-labels
 		-Wfloat-equal
 		-Wformat=2
 		-Wformat-security
 		-Wformat-nonliteral
 		-Wmissing-prototypes
 		-Wmissing-declarations
 		-Wnested-externs
 		-Wno-long-long
 		-Wno-strict-aliasing
 		-Wpointer-arith
 		-Wstrict-prototypes
 		-Wwrite-strings
 		-Wunused-but-set-variable
 		-Wunsigned-char"
 
 # Additional warnings it might be nice to enable one day
 #		-Wshadow
 #		-Wunreachable-code
 	case "$host_os" in
 	    *solaris*) ;;
 	    *) EXTRA_FLAGS="$EXTRA_FLAGS
 			-fstack-protector-all"
 		;;
 	esac
 	for j in $EXTRA_FLAGS
 	do
 	  if
 	    cc_supports_flag $j
 	  then
 	    CC_EXTRAS="$CC_EXTRAS $j"
 	  fi
 	done
 
 dnl In lib/ais/Makefile.am there's a gcc option available as of v4.x
 
 	GCC_MAJOR=`gcc -v 2>&1 | awk 'END{print $3}' | sed 's/[.].*//'`
 	AM_CONDITIONAL(GCC_4, test "${GCC_MAJOR}" = 4)
 
 dnl System specific options
 
 	case "$host_os" in
   	*linux*|*bsd*)
 		if test "${enable_fatal_warnings}" = "unknown"; then
         		enable_fatal_warnings=yes
         	fi
           	;;
 	esac
 
 	if test "x${enable_fatal_warnings}" != xno && cc_supports_flag -Werror ; then
 	   enable_fatal_warnings=yes
 	else
 	   enable_fatal_warnings=no
         fi
 
 	if test "x${enable_ansi}" = xyes && cc_supports_flag -std=iso9899:199409 ; then
 	  AC_MSG_NOTICE(Enabling ANSI Compatibility)
 	  CC_EXTRAS="$CC_EXTRAS -ansi -D_GNU_SOURCE -DANSI_ONLY"
 	fi
 
   	AC_MSG_NOTICE(Activated additional gcc flags: ${CC_EXTRAS})
 fi
 
 CFLAGS="$CFLAGS $CC_EXTRAS"
 
 NON_FATAL_CFLAGS="$CFLAGS"
 AC_SUBST(NON_FATAL_CFLAGS)
 
 dnl
 dnl We reset CFLAGS to include our warnings *after* all function
 dnl checking goes on, so that our warning flags don't keep the
 dnl AC_*FUNCS() calls above from working.  In particular, -Werror will
 dnl *always* cause us troubles if we set it before here.
 dnl
 dnl
 if test "x${enable_fatal_warnings}" = xyes ; then
    AC_MSG_NOTICE(Enabling Fatal Warnings)
    CFLAGS="$CFLAGS -Werror"
 fi
 AC_SUBST(CFLAGS)
 
 dnl This is useful for use in Makefiles that need to remove one specific flag
 CFLAGS_COPY="$CFLAGS"
 AC_SUBST(CFLAGS_COPY)
 
 AC_SUBST(LIBADD_DL)	dnl extra flags for dynamic linking libraries
 AC_SUBST(LIBADD_INTL)	dnl extra flags for GNU gettext stuff...
 
 AC_SUBST(LOCALE)
 
 dnl Options for cleaning up the compiler output
 QUIET_LIBTOOL_OPTS=""
 QUIET_MAKE_OPTS=""
 if test "x${enable_quiet}" = "xyes"; then
    QUIET_LIBTOOL_OPTS="--quiet"
    QUIET_MAKE_OPTS="--quiet"
 fi
 
 AC_MSG_RESULT(Supress make details: ${enable_quiet})
 
 dnl Put the above variables to use
 LIBTOOL="${LIBTOOL} --tag=CC \$(QUIET_LIBTOOL_OPTS)"
 MAKE="${MAKE} \$(QUIET_MAKE_OPTS)"
 
 AC_SUBST(CC)
 AC_SUBST(MAKE)
 AC_SUBST(LIBTOOL)
 AC_SUBST(QUIET_MAKE_OPTS)
 AC_SUBST(QUIET_LIBTOOL_OPTS)
 AC_DEFINE_UNQUOTED(CRM_FEATURES, "$PCMK_FEATURES", Set of enabled features)
 AC_SUBST(PCMK_FEATURES)
 
 dnl The Makefiles and shell scripts we output
 AC_CONFIG_FILES(Makefile				        \
 Doxyfile							\
 coverage.sh							\
 cts/Makefile					        	\
 	cts/CTSvars.py						\
 	cts/LSBDummy						\
 	cts/benchmark/Makefile					\
 	cts/benchmark/clubench					\
 	cts/lxc_autogen.sh					\
 cib/Makefile							\
 attrd/Makefile							\
 crmd/Makefile							\
 pengine/Makefile						\
 	pengine/regression.core.sh				\
 doc/Makefile							\
 	doc/Pacemaker_Explained/publican.cfg			\
 	doc/Clusters_from_Scratch/publican.cfg			\
 	doc/Pacemaker_Remote/publican.cfg			\
 include/Makefile						\
 	include/crm/Makefile					\
 		include/crm/cib/Makefile			\
 		include/crm/common/Makefile			\
 		include/crm/cluster/Makefile			\
 		include/crm/fencing/Makefile			\
 		include/crm/pengine/Makefile			\
 replace/Makefile						\
 lib/Makefile							\
 	lib/pacemaker.pc					\
 	lib/pacemaker-cib.pc					\
 	lib/pacemaker-lrmd.pc					\
 	lib/pacemaker-service.pc				\
 	lib/pacemaker-pengine.pc				\
 	lib/pacemaker-fencing.pc				\
 	lib/pacemaker-cluster.pc				\
 	lib/ais/Makefile					\
 	lib/common/Makefile					\
 	lib/cluster/Makefile					\
 	lib/cib/Makefile					\
 	lib/pengine/Makefile					\
 	lib/transition/Makefile					\
 	lib/fencing/Makefile					\
 	lib/lrmd/Makefile					\
 	lib/services/Makefile					\
 mcp/Makefile							\
 	mcp/pacemaker						\
 	mcp/pacemaker.service					\
 	mcp/pacemaker.upstart					\
 	mcp/pacemaker.combined.upstart				\
 fencing/Makefile                                                \
         fencing/regression.py                                   \
 lrmd/Makefile                                                   \
         lrmd/regression.py                                      \
         lrmd/pacemaker_remote.service				\
         lrmd/pacemaker_remote					\
 extra/Makefile							\
 	extra/resources/Makefile				\
 	extra/rgmanager/Makefile				\
 	extra/logrotate/Makefile				\
 	extra/logrotate/pacemaker				\
 tools/Makefile							\
 	tools/crm_report					\
         tools/report.common                                     \
 	tools/cibsecret						\
+	tools/crm_mon.upstart					\
 xml/Makefile							\
 lib/gnu/Makefile						\
 		)
 
 dnl Now process the entire list of files added by previous
 dnl  calls to AC_CONFIG_FILES()
 AC_OUTPUT()
 
 dnl *****************
 dnl Configure summary
 dnl *****************
 
 AC_MSG_RESULT([])
 AC_MSG_RESULT([$PACKAGE configuration:])
 AC_MSG_RESULT([  Version                  = ${VERSION} (Build: $BUILD_VERSION)])
 AC_MSG_RESULT([  Features                 =${PCMK_FEATURES}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([  Prefix                   = ${prefix}])
 AC_MSG_RESULT([  Executables              = ${sbindir}])
 AC_MSG_RESULT([  Man pages                = ${mandir}])
 AC_MSG_RESULT([  Libraries                = ${libdir}])
 AC_MSG_RESULT([  Header files             = ${includedir}])
 AC_MSG_RESULT([  Arch-independent files   = ${datadir}])
 AC_MSG_RESULT([  State information        = ${localstatedir}])
 AC_MSG_RESULT([  System configuration     = ${sysconfdir}])
 AC_MSG_RESULT([  Corosync Plugins         = ${LCRSODIR}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([  Use system LTDL          = ${ac_cv_lib_ltdl_lt_dlopen}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([  HA group name            = ${CRM_DAEMON_GROUP}])
 AC_MSG_RESULT([  HA user name             = ${CRM_DAEMON_USER}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([  CFLAGS                   = ${CFLAGS}])
 AC_MSG_RESULT([  Libraries                = ${LIBS}])
 AC_MSG_RESULT([  Stack Libraries          = ${CLUSTERLIBS}])
diff --git a/crmd/lrm.c b/crmd/lrm.c
index db0bffbf43..44634fba7a 100644
--- a/crmd/lrm.c
+++ b/crmd/lrm.c
@@ -1,2176 +1,2192 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 
 #include <crm/crm.h>
 #include <crm/services.h>
 
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 
 #include <crmd.h>
 #include <crmd_fsa.h>
 #include <crmd_messages.h>
 #include <crmd_callbacks.h>
 #include <crmd_lrm.h>
 
 #define START_DELAY_THRESHOLD 5 * 60 * 1000
 #define MAX_LRM_REG_FAILS 30
 
 struct delete_event_s {
     int rc;
     const char *rsc;
     lrm_state_t *lrm_state;
 };
 
 gboolean process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op);
 static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
 static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
 static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
 static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
                              const char *user_name);
 
 static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op,
                                        const char *rsc_id, const char *operation);
 static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation,
                           xmlNode * msg, xmlNode * request);
 
 void send_direct_ack(const char *to_host, const char *to_sys,
                      lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id);
 
 static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
                                          int log_level);
 
 static void
 lrm_connection_destroy(void)
 {
     if (is_set(fsa_input_register, R_LRM_CONNECTED)) {
         crm_crit("LRM Connection failed");
         register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
         clear_bit(fsa_input_register, R_LRM_CONNECTED);
 
     } else {
         crm_info("LRM Connection disconnected");
     }
 
 }
 
 static char *
 make_stop_id(const char *rsc, int call_id)
 {
     char *op_id = NULL;
 
     op_id = calloc(1, strlen(rsc) + 34);
     if (op_id != NULL) {
         snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id);
     }
     return op_id;
 }
 
 static void
 copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
 {
     if (strstr(key, CRM_META "_") == NULL) {
         g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
     }
 }
 
 static void
 copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
 {
     if (strstr(key, CRM_META "_") != NULL) {
         g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
     }
 }
 
 static void
 update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
 {
     int target_rc = 0;
     rsc_history_t *entry = NULL;
 
     if (op->rsc_deleted) {
         crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
         delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL);
         return;
     }
 
     if (safe_str_eq(op->op_type, RSC_NOTIFY)) {
         return;
     }
 
     crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);
 
     entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
     if (entry == NULL && rsc) {
         entry = calloc(1, sizeof(rsc_history_t));
         entry->id = strdup(op->rsc_id);
         g_hash_table_insert(lrm_state->resource_history, entry->id, entry);
 
         entry->rsc.id = entry->id;
         entry->rsc.type = strdup(rsc->type);
         entry->rsc.class = strdup(rsc->class);
         if (rsc->provider) {
             entry->rsc.provider = strdup(rsc->provider);
         } else {
             entry->rsc.provider = NULL;
         }
 
     } else if (entry == NULL) {
         crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
         return;
     }
 
     entry->last_callid = op->call_id;
     target_rc = rsc_op_expected_rc(op);
     if (op->op_status == PCMK_LRM_OP_CANCELLED) {
         if (op->interval > 0) {
             GList *gIter, *gIterNext;
 
             crm_trace("Removing cancelled recurring op: %s_%s_%d", op->rsc_id, op->op_type,
                       op->interval);
 
             for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIterNext) {
                 lrmd_event_data_t *existing = gIter->data;
 
                 gIterNext = gIter->next;
 
                 if (crm_str_eq(op->rsc_id, existing->rsc_id, TRUE)
                     && safe_str_eq(op->op_type, existing->op_type)
                     && op->interval == existing->interval) {
                     lrmd_free_event(existing);
                     entry->recurring_op_list = g_list_delete_link(entry->recurring_op_list, gIter);
                 }
             }
             return;
 
         } else {
             crm_trace("Skipping %s_%s_%d rc=%d, status=%d", op->rsc_id, op->op_type, op->interval,
                       op->rc, op->op_status);
         }
 
     } else if (did_rsc_op_fail(op, target_rc)) {
         /* We must store failed monitors here
          * - otherwise the block below will cause them to be forgetten them when a stop happens
          */
         if (entry->failed) {
             lrmd_free_event(entry->failed);
         }
         entry->failed = lrmd_copy_event(op);
 
     } else if (op->interval == 0) {
         if (entry->last) {
             lrmd_free_event(entry->last);
         }
         entry->last = lrmd_copy_event(op);
 
         if (op->params &&
             (safe_str_eq(CRMD_ACTION_START, op->op_type) ||
              safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) {
 
             if (entry->stop_params) {
                 g_hash_table_destroy(entry->stop_params);
             }
             entry->stop_params = g_hash_table_new_full(crm_str_hash,
                                                        g_str_equal, g_hash_destroy_str,
                                                        g_hash_destroy_str);
 
             g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
         }
     }
 
     if (op->interval > 0) {
         GListPtr iter = NULL;
 
         for(iter = entry->recurring_op_list; iter; iter = iter->next) {
             lrmd_event_data_t *o = iter->data;
 
             /* op->rsc_id is implied */
             if(op->interval == o->interval && strcmp(op->op_type, o->op_type) == 0) {
                 crm_trace("Removing existing recurring op entry: %s_%s_%d", op->rsc_id, op->op_type, op->interval);
                 entry->recurring_op_list = g_list_remove(entry->recurring_op_list, o);
                 break;
             }
         }
 
         crm_trace("Adding recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval);
         entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));
 
     } else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) {
         GList *gIter = entry->recurring_op_list;
 
         crm_trace("Dropping %d recurring ops because of: %s_%s_%d",
                   g_list_length(gIter), op->rsc_id, op->op_type, op->interval);
         for (; gIter != NULL; gIter = gIter->next) {
             lrmd_free_event(gIter->data);
         }
         g_list_free(entry->recurring_op_list);
         entry->recurring_op_list = NULL;
     }
 }
 
 void
 lrm_op_callback(lrmd_event_data_t * op)
 {
     const char *nodename = NULL;
     lrm_state_t *lrm_state = NULL;
 
     CRM_CHECK(op != NULL, return);
 
     /* determine the node name for this connection. */
     nodename = op->remote_nodename ? op->remote_nodename : fsa_our_uname;
 
     if (op->type == lrmd_event_disconnect && (safe_str_eq(nodename, fsa_our_uname))) {
         /* if this is the local lrmd ipc connection, set the right bits in the
          * crmd when the connection goes down */
         lrm_connection_destroy();
         return;
     } else if (op->type != lrmd_event_exec_complete) {
         /* we only need to process execution results */
         return;
     }
 
     lrm_state = lrm_state_find(nodename);
     CRM_ASSERT(lrm_state != NULL);
 
     process_lrm_event(lrm_state, op);
 }
 
 /*	 A_LRM_CONNECT	*/
 void
 do_lrm_control(long long action,
                enum crmd_fsa_cause cause,
                enum crmd_fsa_state cur_state,
                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     /* This only pertains to local lrmd connections.  Remote connections are handled as
      * resources within the pengine.  Connecting and disconnecting from remote lrmd instances
      * handled differently than the local. */
 
     lrm_state_t *lrm_state = NULL;
 
     if(fsa_our_uname == NULL) {
         return; /* Nothing to do */
     }
     lrm_state = lrm_state_find_or_create(fsa_our_uname);
     if (lrm_state == NULL) {
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
         return;
     }
 
     if (action & A_LRM_DISCONNECT) {
         if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
             if (action == A_LRM_DISCONNECT) {
                 crmd_fsa_stall(FALSE);
                 return;
             }
         }
 
         clear_bit(fsa_input_register, R_LRM_CONNECTED);
         crm_info("Disconnecting from the LRM");
         lrm_state_disconnect(lrm_state);
         lrm_state_reset_tables(lrm_state);
         crm_notice("Disconnected from the LRM");
     }
 
     if (action & A_LRM_CONNECT) {
         int ret = pcmk_ok;
 
         crm_debug("Connecting to the LRM");
         ret = lrm_state_ipc_connect(lrm_state);
 
         if (ret != pcmk_ok) {
             if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
                 crm_warn("Failed to sign on to the LRM %d"
                          " (%d max) times", lrm_state->num_lrm_register_fails, MAX_LRM_REG_FAILS);
 
                 crm_timer_start(wait_timer);
                 crmd_fsa_stall(FALSE);
                 return;
             }
         }
 
         if (ret != pcmk_ok) {
             crm_err("Failed to sign on to the LRM %d" " (max) times",
                     lrm_state->num_lrm_register_fails);
             register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
             return;
         }
 
         set_bit(fsa_input_register, R_LRM_CONNECTED);
         crm_info("LRM connection established");
     }
 
     if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
         crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
     }
 }
 
 static gboolean
 lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
 {
     int counter = 0;
     gboolean rc = TRUE;
     const char *when = "lrm disconnect";
 
     GHashTableIter gIter;
     const char *key = NULL;
     rsc_history_t *entry = NULL;
     struct recurring_op_s *pending = NULL;
 
     crm_debug("Checking for active resources before exit");
 
     if (cur_state == S_TERMINATE) {
         log_level = LOG_ERR;
         when = "shutdown";
 
     } else if (is_set(fsa_input_register, R_SHUTDOWN)) {
         when = "shutdown... waiting";
     }
 
     if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) {
         guint removed = g_hash_table_foreach_remove(
             lrm_state->pending_ops, stop_recurring_actions, lrm_state);
 
         crm_notice("Stopped %u recurring operations at %s (%u ops remaining)",
                    removed, when, g_hash_table_size(lrm_state->pending_ops));
     }
 
     if (lrm_state->pending_ops) {
         g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
         while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
             /* Ignore recurring actions in the shutdown calculations */
             if (pending->interval == 0) {
                 counter++;
             }
         }
     }
 
     if (counter > 0) {
         do_crm_log(log_level, "%d pending LRM operations at %s", counter, when);
 
         if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) {
             g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
             while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) {
                 do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
             }
 
         } else {
             rc = FALSE;
         }
         return rc;
     }
 
     if (lrm_state->resource_history == NULL) {
         return rc;
     }
 
     if (cur_state == S_TERMINATE || is_set(fsa_input_register, R_SHUTDOWN)) {
         /* At this point we're not waiting, we're just shutting down */
         when = "shutdown";
     }
 
     counter = 0;
     g_hash_table_iter_init(&gIter, lrm_state->resource_history);
     while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
         if (is_rsc_active(lrm_state, entry->id) == FALSE) {
             continue;
         }
 
         counter++;
         crm_trace("Found %s active", entry->id);
         if (lrm_state->pending_ops) {
             GHashTableIter hIter;
 
             g_hash_table_iter_init(&hIter, lrm_state->pending_ops);
             while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) {
                 if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) {
                     crm_notice("%sction %s (%s) incomplete at %s",
                                pending->interval == 0 ? "A" : "Recurring a",
                                key, pending->op_key, when);
                 }
             }
         }
     }
 
     if (counter) {
         crm_err("%d resources were active at %s.", counter, when);
     }
 
     return rc;
 }
 
 static char *
 get_rsc_metadata(const char *type, const char *class, const char *provider)
 {
     int rc = 0;
     char *metadata = NULL;
 
     /* Always use a local connection for this operation */
     lrm_state_t *lrm_state = lrm_state_find(fsa_our_uname);
 
     CRM_CHECK(type != NULL, return NULL);
     CRM_CHECK(class != NULL, return NULL);
     CRM_CHECK(lrm_state != NULL, return NULL);
 
     if (provider == NULL) {
         provider = "heartbeat";
     }
 
     crm_trace("Retreiving metadata for %s::%s:%s", type, class, provider);
     rc = lrm_state_get_metadata(lrm_state, class, provider, type, &metadata, 0);
 
     if (metadata) {
         /* copy the metadata because the LRM likes using
          *   g_alloc instead of cl_malloc
          */
         char *m_copy = strdup(metadata);
 
         g_free(metadata);
         metadata = m_copy;
 
     } else {
         crm_warn("No metadata found for %s::%s:%s: %s (%d)", type, class, provider, pcmk_strerror(rc), rc);
     }
 
     return metadata;
 }
 
 typedef struct reload_data_s {
     char *key;
     char *metadata;
     time_t last_query;
     gboolean can_reload;
     GListPtr restart_list;
 } reload_data_t;
 
 static void
 g_hash_destroy_reload(gpointer data)
 {
     reload_data_t *reload = data;
 
     free(reload->key);
     free(reload->metadata);
     g_list_free_full(reload->restart_list, free);
     free(reload);
 }
 
 GHashTable *reload_hash = NULL;
 static GListPtr
 get_rsc_restart_list(lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
 {
     int len = 0;
     char *key = NULL;
     char *copy = NULL;
     const char *value = NULL;
     const char *provider = NULL;
 
     xmlNode *param = NULL;
     xmlNode *params = NULL;
     xmlNode *actions = NULL;
     xmlNode *metadata = NULL;
 
     time_t now = time(NULL);
     reload_data_t *reload = NULL;
 
     if (reload_hash == NULL) {
         reload_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_reload);
     }
 
     provider = rsc->provider;
     if (provider == NULL) {
         provider = "heartbeat";
     }
 
     len = strlen(rsc->type) + strlen(rsc->class) + strlen(provider) + 4;
     key = malloc(len);
     if(key) {
         snprintf(key, len, "%s::%s:%s", rsc->type, rsc->class, provider);
         reload = g_hash_table_lookup(reload_hash, key);
     }
 
     if (reload && ((now - 9) > reload->last_query)
         && safe_str_eq(op->op_type, RSC_START)) {
         reload = NULL;          /* re-query */
     }
 
     if (reload == NULL) {
         xmlNode *action = NULL;
 
         reload = calloc(1, sizeof(reload_data_t));
         g_hash_table_replace(reload_hash, key, reload);
 
         reload->last_query = now;
         reload->key = key;
         key = NULL;
         reload->metadata = get_rsc_metadata(rsc->type, rsc->class, provider);
 
         if(reload->metadata == NULL) {
             goto cleanup;
         }
 
         metadata = string2xml(reload->metadata);
         if (metadata == NULL) {
             crm_err("Metadata for %s::%s:%s is not valid XML",
                     rsc->provider, rsc->class, rsc->type);
             goto cleanup;
         }
 
         actions = find_xml_node(metadata, "actions", TRUE);
 
         for (action = __xml_first_child(actions); action != NULL; action = __xml_next(action)) {
             if (crm_str_eq((const char *)action->name, "action", TRUE)) {
                 value = crm_element_value(action, "name");
                 if (safe_str_eq("reload", value)) {
                     reload->can_reload = TRUE;
                     break;
                 }
             }
         }
 
         if (reload->can_reload == FALSE) {
             goto cleanup;
         }
 
         params = find_xml_node(metadata, "parameters", TRUE);
         for (param = __xml_first_child(params); param != NULL; param = __xml_next(param)) {
             if (crm_str_eq((const char *)param->name, "parameter", TRUE)) {
                 value = crm_element_value(param, "unique");
                 if (crm_is_true(value)) {
                     value = crm_element_value(param, "name");
                     if (value == NULL) {
                         crm_err("%s: NULL param", key);
                         continue;
                     }
                     crm_debug("Attr %s is not reloadable", value);
                     copy = strdup(value);
                     CRM_LOG_ASSERT(copy != NULL);
                     if(copy == NULL) { continue; };
                     reload->restart_list = g_list_append(reload->restart_list, copy);
                 }
             }
         }
     }
 
   cleanup:
     free(key);
     free_xml(metadata);
     return reload->restart_list;
 }
 
 static void
 append_restart_list(lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, xmlNode * update,
                     const char *version)
 {
     int len = 0;
     char *list = NULL;
     char *digest = NULL;
     const char *value = NULL;
     xmlNode *restart = NULL;
     GListPtr restart_list = NULL;
     GListPtr lpc = NULL;
 
     if (op->interval > 0) {
         /* monitors are not reloadable */
         return;
 
     } else if (op->params == NULL) {
         crm_debug("%s has no parameters", ID(update));
         return;
 
     } else if (rsc == NULL) {
         return;
 
     } else if (crm_str_eq(CRMD_ACTION_STOP, op->op_type, TRUE)) {
         /* Stopped resources don't need to be reloaded */
         return;
 
     } else if (compare_version("1.0.8", version) > 0) {
         /* Caller version does not support reloads */
         return;
     }
 
     restart_list = get_rsc_restart_list(rsc, op);
     if (restart_list == NULL) {
         /* Resource does not support reloads */
         return;
     }
 
     restart = create_xml_node(NULL, XML_TAG_PARAMS);
     for (lpc = restart_list; lpc != NULL; lpc = lpc->next) {
         const char *param = (const char *)lpc->data;
 
         int start = len;
 
         CRM_LOG_ASSERT(param != NULL);
         if(param == NULL) {  continue; };
 
         value = g_hash_table_lookup(op->params, param);
         if (value != NULL) {
             crm_xml_add(restart, param, value);
         }
         len += strlen(param) + 2;
         list = realloc(list, len + 1);
         sprintf(list + start, " %s ", param);
     }
 
     digest = calculate_operation_digest(restart, version);
     crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list);
     crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest);
 
     crm_trace("%s: %s, %s", rsc->id, digest, list);
     crm_log_xml_trace(restart, "restart digest source");
 
     free_xml(restart);
     free(digest);
     free(list);
 }
 
 static gboolean
 build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op,
                        const char *src)
 {
     int target_rc = 0;
     xmlNode *xml_op = NULL;
     const char *caller_version = CRM_FEATURE_SET;
 
     if (op == NULL) {
         return FALSE;
 
     } else if (AM_I_DC) {
 
     } else if (fsa_our_dc_version != NULL) {
         caller_version = fsa_our_dc_version;
     } else if (op->params == NULL) {
         caller_version = fsa_our_dc_version;
     } else {
         /* there is a small risk in formerly mixed clusters that
          *   it will be sub-optimal.
          * however with our upgrade policy, the update we send
          *   should still be completely supported anyway
          */
         caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION);
         crm_debug("Falling back to operation originator version: %s", caller_version);
     }
 
     target_rc = rsc_op_expected_rc(op);
     xml_op = create_operation_update(parent, op, caller_version, target_rc, src, LOG_DEBUG);
     crm_xml_add(xml_op, XML_LRM_ATTR_TARGET, fsa_our_uname); /* For context during triage */
 
     if (xml_op) {
         append_restart_list(rsc, op, xml_op, caller_version);
     }
     return TRUE;
 }
 
 static gboolean
 is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
 {
     rsc_history_t *entry = NULL;
 
     entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
     if (entry == NULL || entry->last == NULL) {
         return FALSE;
     }
 
     crm_trace("Processing %s: %s.%d=%d",
               rsc_id, entry->last->op_type, entry->last->interval, entry->last->rc);
     if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) {
         return FALSE;
 
     } else if (entry->last->rc == PCMK_OCF_OK
                && safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) {
         /* a stricter check is too complex...
          * leave that to the PE
          */
         return FALSE;
 
     } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
         return FALSE;
 
     } else if (entry->last->interval == 0 && entry->last->rc == PCMK_OCF_NOT_CONFIGURED) {
         /* Badly configured resources can't be reliably stopped */
         return FALSE;
     }
 
     return TRUE;
 }
 
 static gboolean
 build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
 {
     GHashTableIter iter;
     rsc_history_t *entry = NULL;
 
     g_hash_table_iter_init(&iter, lrm_state->resource_history);
     while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
 
         GList *gIter = NULL;
         xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE);
 
         crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id);
         crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type);
         crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.class);
         crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider);
 
         if (entry->last && entry->last->params) {
             const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
             if (container) {
                 crm_trace("Resource %s is a part of container resource %s", entry->id, container);
                 crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container);
             }
         }
         build_operation_update(xml_rsc, &(entry->rsc), entry->failed, __FUNCTION__);
         build_operation_update(xml_rsc, &(entry->rsc), entry->last, __FUNCTION__);
         for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
             build_operation_update(xml_rsc, &(entry->rsc), gIter->data, __FUNCTION__);
         }
     }
 
     return FALSE;
 }
 
 xmlNode *
 do_lrm_query_internal(lrm_state_t * lrm_state, gboolean is_replace)
 {
     xmlNode *xml_state = NULL;
     xmlNode *xml_data = NULL;
     xmlNode *rsc_list = NULL;
     const char *uuid = NULL;
 
     if (safe_str_eq(lrm_state->node_name, fsa_our_uname)) {
         crm_node_t *peer = crm_get_peer(0, lrm_state->node_name);
         xml_state = do_update_node_cib(peer, node_update_cluster|node_update_peer, NULL, __FUNCTION__);
         /* The next two lines shouldn't be necessary for newer DCs */
         crm_xml_add(xml_state, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER);
         crm_xml_add(xml_state, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER);
         uuid = fsa_our_uuid;
 
     } else {
         xml_state = create_xml_node(NULL, XML_CIB_TAG_STATE);
         crm_xml_add(xml_state, XML_NODE_IS_REMOTE, "true");
         crm_xml_add(xml_state, XML_ATTR_ID, lrm_state->node_name);
         crm_xml_add(xml_state, XML_ATTR_UNAME, lrm_state->node_name);
         uuid = lrm_state->node_name;
     }
 
     xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM);
     crm_xml_add(xml_data, XML_ATTR_ID, uuid);
     rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES);
 
     /* Build a list of active (not always running) resources */
     build_active_RAs(lrm_state, rsc_list);
 
     crm_log_xml_trace(xml_state, "Current state of the LRM");
 
     return xml_state;
 }
 
 xmlNode *
 do_lrm_query(gboolean is_replace, const char *node_name)
 {
     lrm_state_t *lrm_state = lrm_state_find(node_name);
 
     if (!lrm_state) {
         crm_err("Could not query lrm state for lrmd node %s", node_name);
         return NULL;
     }
     return do_lrm_query_internal(lrm_state, is_replace);
 }
 
 static void
 notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
 {
     lrmd_event_data_t *op = NULL;
     const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
     const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
 
     crm_info("Notifying %s on %s that %s was%s deleted",
              from_sys, from_host, rsc_id, rc == pcmk_ok ? "" : " not");
 
     op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE);
     CRM_ASSERT(op != NULL);
 
     if (rc == pcmk_ok) {
         op->op_status = PCMK_LRM_OP_DONE;
         op->rc = PCMK_OCF_OK;
     } else {
         op->op_status = PCMK_LRM_OP_ERROR;
         op->rc = PCMK_OCF_UNKNOWN_ERROR;
     }
 
     send_direct_ack(from_host, from_sys, NULL, op, rsc_id);
     lrmd_free_event(op);
 
     if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
         /* this isn't expected - trigger a new transition */
         time_t now = time(NULL);
         char *now_s = crm_itoa(now);
 
         crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc_id);
 
         update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
                              "last-lrm-refresh", now_s, FALSE, NULL, NULL);
 
         free(now_s);
     }
 }
 
 static gboolean
 lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
 {
     struct delete_event_s *event = user_data;
     struct pending_deletion_op_s *op = value;
 
     if (crm_str_eq(event->rsc, op->rsc, TRUE)) {
         notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
         return TRUE;
     }
     return FALSE;
 }
 
 static gboolean
 lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
 {
     const char *rsc = user_data;
     struct recurring_op_s *pending = value;
 
     if (crm_str_eq(rsc, pending->rsc_id, TRUE)) {
         crm_info("Removing op %s:%d for deleted resource %s",
                  pending->op_key, pending->call_id, rsc);
         return TRUE;
     }
     return FALSE;
 }
 
 /*
  * Remove the rsc from the CIB
  *
  * Avoids refreshing the entire LRM section of this host
  */
 #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']"
 
 static int
 delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
                   const char *user_name)
 {
     char *rsc_xpath = NULL;
     int max = 0;
     int rc = pcmk_ok;
 
     CRM_CHECK(rsc_id != NULL, return -ENXIO);
 
     max = strlen(rsc_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + 1;
     rsc_xpath = calloc(1, max);
     snprintf(rsc_xpath, max, rsc_template, lrm_state->node_name, rsc_id);
 
     rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath,
                          NULL, NULL, call_options | cib_xpath, user_name);
 
     free(rsc_xpath);
     return rc;
 }
 
 static void
 delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id,
                  GHashTableIter * rsc_gIter, int rc, const char *user_name)
 {
     struct delete_event_s event;
 
     CRM_CHECK(rsc_id != NULL, return);
 
     if (rc == pcmk_ok) {
         char *rsc_id_copy = strdup(rsc_id);
 
         if (rsc_gIter)
             g_hash_table_iter_remove(rsc_gIter);
         else
             g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
         crm_debug("sync: Sending delete op for %s", rsc_id_copy);
         delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name);
 
         g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy);
         free(rsc_id_copy);
     }
 
     if (input) {
         notify_deleted(lrm_state, input, rsc_id, rc);
     }
 
     event.rc = rc;
     event.rsc = rsc_id;
     event.lrm_state = lrm_state;
     g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
 }
 
 /*
  * Remove the op from the CIB
  *
  * Avoids refreshing the entire LRM section of this host
  */
 
 #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']"
 #define op_call_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']"
 
 static void
 delete_op_entry(lrm_state_t * lrm_state, lrmd_event_data_t * op, const char *rsc_id,
                 const char *key, int call_id)
 {
     xmlNode *xml_top = NULL;
 
     if (op != NULL) {
         xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP);
         crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id);
         crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data);
 
         if (op->interval > 0) {
             char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval);
 
             /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */
             crm_xml_add(xml_top, XML_ATTR_ID, op_id);
             free(op_id);
         }
 
         crm_debug("async: Sending delete op for %s_%s_%d (call=%d)",
                   op->rsc_id, op->op_type, op->interval, op->call_id);
 
         fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override);
 
     } else if (rsc_id != NULL && key != NULL) {
         int max = 0;
         char *op_xpath = NULL;
 
         if (call_id > 0) {
             max =
                 strlen(op_call_template) + strlen(rsc_id) + strlen(lrm_state->node_name) +
                 strlen(key) + 10;
             op_xpath = calloc(1, max);
             snprintf(op_xpath, max, op_call_template, lrm_state->node_name, rsc_id, key, call_id);
 
         } else {
             max =
                 strlen(op_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + strlen(key) +
                 1;
             op_xpath = calloc(1, max);
             snprintf(op_xpath, max, op_template, lrm_state->node_name, rsc_id, key);
         }
 
         crm_debug("sync: Sending delete op for %s (call=%d)", rsc_id, call_id);
         fsa_cib_conn->cmds->delete(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath);
 
         free(op_xpath);
 
     } else {
         crm_err("Not enough information to delete op entry: rsc=%p key=%p", rsc_id, key);
         return;
     }
 
     crm_log_xml_trace(xml_top, "op:cancel");
     free_xml(xml_top);
 }
 
 void
 lrm_clear_last_failure(const char *rsc_id, const char *node_name)
 {
     char *attr = NULL;
     GHashTableIter iter;
     GList *lrm_state_list = lrm_state_get_list();
     GList *state_entry;
     rsc_history_t *entry = NULL;
 
     attr = generate_op_key(rsc_id, "last_failure", 0);
 
     /* This clears last failure for every lrm state that has this rsc.*/
     for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
         lrm_state_t *lrm_state = state_entry->data;
 
         if (node_name != NULL) {
             if (strcmp(node_name, lrm_state->node_name) != 0) {
                 /* filter by node_name if node_name is present */
                 continue;
             }
         }
 
         delete_op_entry(lrm_state, NULL, rsc_id, attr, 0);
 
         if (!lrm_state->resource_history) {
             continue;
         }
 
         g_hash_table_iter_init(&iter, lrm_state->resource_history);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
             if (crm_str_eq(rsc_id, entry->id, TRUE)) {
                 lrmd_free_event(entry->failed);
                 entry->failed = NULL;
             }
         }
     }
     free(attr);
     g_list_free(lrm_state_list);
 }
 
 /* Returns: gboolean - cancellation is in progress */
 static gboolean
 cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove)
 {
     int rc = pcmk_ok;
     char *local_key = NULL;
     struct recurring_op_s *pending = NULL;
 
     CRM_CHECK(op != 0, return FALSE);
     CRM_CHECK(rsc_id != NULL, return FALSE);
     if (key == NULL) {
         local_key = make_stop_id(rsc_id, op);
         key = local_key;
     }
     pending = g_hash_table_lookup(lrm_state->pending_ops, key);
 
     if (pending) {
         if (remove && pending->remove == FALSE) {
             pending->remove = TRUE;
             crm_debug("Scheduling %s for removal", key);
         }
 
         if (pending->cancelled) {
             crm_debug("Operation %s already cancelled", key);
             free(local_key);
             return FALSE;
         }
 
         pending->cancelled = TRUE;
 
     } else {
         crm_info("No pending op found for %s", key);
         free(local_key);
         return FALSE;
     }
 
     crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
     rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval);
     if (rc == pcmk_ok) {
         crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
         free(local_key);
         return TRUE;
     }
 
     crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
     /* The caller needs to make sure the entry is
      * removed from the pending_ops list
      *
      * Usually by returning TRUE inside the worker function
      * supplied to g_hash_table_foreach_remove()
      *
      * Not removing the entry from pending_ops will block
      * the node from shutting down
      */
     free(local_key);
     return FALSE;
 }
 
 struct cancel_data {
     gboolean done;
     gboolean remove;
     const char *key;
     lrmd_rsc_info_t *rsc;
     lrm_state_t *lrm_state;
 };
 
 static gboolean
 cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
 {
     gboolean remove = FALSE;
     struct cancel_data *data = user_data;
     struct recurring_op_s *op = (struct recurring_op_s *)value;
 
     if (crm_str_eq(op->op_key, data->key, TRUE)) {
         data->done = TRUE;
         remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
     }
     return remove;
 }
 
 static gboolean
 cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
 {
     guint removed = 0;
     struct cancel_data data;
 
     CRM_CHECK(rsc != NULL, return FALSE);
     CRM_CHECK(key != NULL, return FALSE);
 
     data.key = key;
     data.rsc = rsc;
     data.done = FALSE;
     data.remove = remove;
     data.lrm_state = lrm_state;
 
     removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data);
     crm_trace("Removed %u op cache entries, new size: %u",
               removed, g_hash_table_size(lrm_state->pending_ops));
     return data.done;
 }
 
 static lrmd_rsc_info_t *
 get_lrm_resource(lrm_state_t * lrm_state, xmlNode * resource, xmlNode * op_msg, gboolean do_create)
 {
     lrmd_rsc_info_t *rsc = NULL;
     const char *id = ID(resource);
     const char *type = crm_element_value(resource, XML_ATTR_TYPE);
     const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS);
     const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER);
     const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG);
 
     crm_trace("Retrieving %s from the LRM.", id);
     CRM_CHECK(id != NULL, return NULL);
 
     rsc = lrm_state_get_rsc_info(lrm_state, id, 0);
 
     if (!rsc && long_id) {
         rsc = lrm_state_get_rsc_info(lrm_state, long_id, 0);
     }
 
     if (!rsc && do_create) {
         CRM_CHECK(class != NULL, return NULL);
         CRM_CHECK(type != NULL, return NULL);
 
         crm_trace("Adding rsc %s before operation", id);
 
         lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring);
 
         rsc = lrm_state_get_rsc_info(lrm_state, id, 0);
 
         if (!rsc) {
             fsa_data_t *msg_data = NULL;
 
-            crm_err("Could not add resource %s to LRM", id);
+            crm_err("Could not add resource %s to LRM %s", id, lrm_state->node_name);
             register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
         }
     }
 
     return rsc;
 }
 
 static void
 delete_resource(lrm_state_t * lrm_state,
                 const char *id,
                 lrmd_rsc_info_t * rsc,
                 GHashTableIter * gIter,
-                const char *sys, const char *host, const char *user, ha_msg_input_t * request)
+                const char *sys,
+                const char *host,
+                const char *user,
+                ha_msg_input_t * request,
+                gboolean unregister)
 {
     int rc = pcmk_ok;
 
     crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host);
 
-    if (rsc) {
+    if (rsc && unregister) {
         rc = lrm_state_unregister_rsc(lrm_state, id, 0);
     }
 
     if (rc == pcmk_ok) {
         crm_trace("Resource '%s' deleted", id);
     } else if (rc == -EINPROGRESS) {
         crm_info("Deletion of resource '%s' pending", id);
         if (request) {
             struct pending_deletion_op_s *op = NULL;
             char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE);
 
             op = calloc(1, sizeof(struct pending_deletion_op_s));
             op->rsc = strdup(rsc->id);
             op->input = copy_ha_msg_input(request);
             g_hash_table_insert(lrm_state->deletion_ops, ref, op);
         }
         return;
     } else {
         crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d",
                  id, sys, user ? user : "internal", host, rc);
     }
 
     delete_rsc_entry(lrm_state, request, id, gIter, rc, user);
 }
 
 /*	 A_LRM_INVOKE	*/
 void
 do_lrm_invoke(long long action,
               enum crmd_fsa_cause cause,
               enum crmd_fsa_state cur_state,
               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     gboolean create_rsc = TRUE;
     lrm_state_t *lrm_state = NULL;
     const char *crm_op = NULL;
     const char *from_sys = NULL;
     const char *from_host = NULL;
     const char *operation = NULL;
     ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
     const char *user_name = NULL;
     const char *target_node = NULL;
     gboolean is_remote_node = FALSE;
+    gboolean crm_rsc_delete = FALSE;
 
     if (input->xml != NULL) {
         /* Remote node operations are routed here to their remote connections */
         target_node = crm_element_value(input->xml, XML_LRM_ATTR_TARGET);
     }
     if (target_node == NULL) {
         target_node = fsa_our_uname;
     } else if (safe_str_neq(target_node, fsa_our_uname)) {
         is_remote_node = TRUE;
     }
 
     lrm_state = lrm_state_find(target_node);
 
     if (lrm_state == NULL && is_remote_node) {
         crm_err("no lrmd connection for remote node %s found on cluster node %s. Can not process request.",
             target_node, fsa_our_uname);
         return;
     }
 
     CRM_ASSERT(lrm_state != NULL);
 
 #if ENABLE_ACL
     user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL);
     crm_trace("LRM command from user '%s'", user_name);
 #endif
 
     crm_op = crm_element_value(input->msg, F_CRM_TASK);
     from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
     if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
         from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
     }
 
     crm_trace("LRM command from: %s", from_sys);
 
     if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) {
+        /* remember this delete op came from crm_resource */
+        crm_rsc_delete = TRUE;
         operation = CRMD_ACTION_DELETE;
 
     } else if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
         operation = CRM_OP_LRM_REFRESH;
 
     } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) {
         rsc_history_t *entry = NULL;
         lrmd_event_data_t *op = NULL;
         lrmd_rsc_info_t *rsc = NULL;
         xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
 
         CRM_CHECK(xml_rsc != NULL, return);
 
         /* The lrmd can not fail a resource, it does not understand the
          * concept of success or failure in relation to a resource, it simply
          * executes operations and reports the results. We determine what a failure is.
          * Becaues of this, if we want to fail a resource we have to fake what we
          * understand a failure to look like.
          *
          * To do this we create a fake lrmd operation event for the resource
          * we want to fail.  We then pass that event to the lrmd client callback
          * so it will be processed as if it actually came from the lrmd. */
         op = construct_op(lrm_state, input->xml, ID(xml_rsc), "asyncmon");
         CRM_ASSERT(op != NULL);
 
         free((char *)op->user_data);
         op->user_data = NULL;
         entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
         /* Make sure the call id is greater than the last successful operation,
          * otherwise the failure will not result in a possible recovery of the resource
          * as it could appear the failure occurred before the successful start */
         if (entry) {
             op->call_id = entry->last_callid + 1;
             if (op->call_id < 0) {
                 op->call_id = 1;
             }
         }
         op->interval = 0;
         op->op_status = PCMK_LRM_OP_DONE;
         op->rc = PCMK_OCF_UNKNOWN_ERROR;
         op->t_run = time(NULL);
         op->t_rcchange = op->t_run;
 
 #if ENABLE_ACL
         if (user_name && is_privileged(user_name) == FALSE) {
             crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc));
             send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
             lrmd_free_event(op);
             return;
         }
 #endif
 
         rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc);
         if (rsc) {
             crm_info("Failing resource %s...", rsc->id);
             process_lrm_event(lrm_state, op);
             op->op_status = PCMK_LRM_OP_DONE;
             op->rc = PCMK_OCF_OK;
             lrmd_free_rsc_info(rsc);
         } else {
             crm_info("Cannot find/create resource in order to fail it...");
             crm_log_xml_warn(input->msg, "bad input");
         }
 
         send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
         lrmd_free_event(op);
         return;
 
     } else if (input->xml != NULL) {
         operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK);
     }
 
     if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
         int rc = pcmk_ok;
         xmlNode *fragment = do_lrm_query_internal(lrm_state, TRUE);
 
         fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name);
         crm_info("Forced a local LRM refresh: call=%d", rc);
 
         if(strcmp(CRM_SYSTEM_CRMD, from_sys) != 0) {
             xmlNode *reply = create_request(
                 CRM_OP_INVOKE_LRM, fragment,
                 from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid);
 
             crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host);
 
             if (relay_message(reply, TRUE) == FALSE) {
                 crm_log_xml_err(reply, "Unable to route reply");
             }
             free_xml(reply);
         }
 
         free_xml(fragment);
 
     } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) {
         xmlNode *data = do_lrm_query_internal(lrm_state, FALSE);
         xmlNode *reply = create_reply(input->msg, data);
 
         if (relay_message(reply, TRUE) == FALSE) {
             crm_err("Unable to route reply");
             crm_log_xml_err(reply, "reply");
         }
         free_xml(reply);
         free_xml(data);
 
     } else if (safe_str_eq(operation, CRM_OP_PROBED)) {
         update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node);
 
     } else if (safe_str_eq(operation, CRM_OP_REPROBE) || safe_str_eq(crm_op, CRM_OP_REPROBE)) {
         GHashTableIter gIter;
         rsc_history_t *entry = NULL;
+        gboolean unregister = is_remote_lrmd_ra(NULL, NULL, entry->id) ? FALSE : TRUE;
 
         crm_notice("Forcing the status of all resources to be redetected");
 
         g_hash_table_iter_init(&gIter, lrm_state->resource_history);
         while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
+            /* only unregister the resource during a reprobe if it is not a remote connection
+             * resource. otherwise unregistering the connection will terminate remote-node
+             * membership */
             delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host,
-                            user_name, NULL);
+                            user_name, NULL, unregister);
         }
 
         /* Now delete the copy in the CIB */
         erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local);
 
         /* And finally, _delete_ the value in attrd
          * Setting it to FALSE results in the PE sending us back here again
          */
         update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node);
 
         if(strcmp(CRM_SYSTEM_TENGINE, from_sys) != 0
            && strcmp(CRM_SYSTEM_TENGINE, from_sys) != 0) {
             xmlNode *reply = create_request(
                 CRM_OP_INVOKE_LRM, NULL,
                 from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid);
 
             crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);
 
             if (relay_message(reply, TRUE) == FALSE) {
                 crm_log_xml_err(reply, "Unable to route reply");
             }
             free_xml(reply);
         }
 
     } else if (operation != NULL) {
         lrmd_rsc_info_t *rsc = NULL;
         xmlNode *params = NULL;
         xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
 
         CRM_CHECK(xml_rsc != NULL, return);
 
         /* only the first 16 chars are used by the LRM */
         params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE);
 
         if (safe_str_eq(operation, CRMD_ACTION_DELETE)) {
             create_rsc = FALSE;
         }
 
         rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc);
 
         if (rsc == NULL && create_rsc) {
             crm_err("Invalid resource definition");
             crm_log_xml_warn(input->msg, "bad input");
 
         } else if (rsc == NULL) {
             lrmd_event_data_t *op = NULL;
 
             crm_notice("Not creating resource for a %s event: %s", operation, ID(input->xml));
             delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name);
 
             op = construct_op(lrm_state, input->xml, ID(xml_rsc), operation);
             op->op_status = PCMK_LRM_OP_DONE;
             op->rc = PCMK_OCF_OK;
             CRM_ASSERT(op != NULL);
             send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
             lrmd_free_event(op);
 
         } else if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) {
             char *op_key = NULL;
             char *meta_key = NULL;
             int call = 0;
             const char *call_id = NULL;
             const char *op_task = NULL;
             const char *op_interval = NULL;
             gboolean in_progress = FALSE;
 
             CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command");
                       return);
 
             meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL);
             op_interval = crm_element_value(params, meta_key);
             free(meta_key);
 
             meta_key = crm_meta_name(XML_LRM_ATTR_TASK);
             op_task = crm_element_value(params, meta_key);
             free(meta_key);
 
             meta_key = crm_meta_name(XML_LRM_ATTR_CALLID);
             call_id = crm_element_value(params, meta_key);
             free(meta_key);
 
             CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command");
                       return);
             CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command");
                       return);
 
             op_key = generate_op_key(rsc->id, op_task, crm_parse_int(op_interval, "0"));
 
             crm_debug("PE requested op %s (call=%s) be cancelled",
                       op_key, call_id ? call_id : "NA");
             call = crm_parse_int(call_id, "0");
             if (call == 0) {
                 /* the normal case when the PE cancels a recurring op */
                 in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);
 
             } else {
                 /* the normal case when the PE cancels an orphan op */
                 in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
             }
 
             if (in_progress == FALSE) {
                 lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc->id, op_task);
 
                 crm_info("Nothing known about operation %d for %s", call, op_key);
                 delete_op_entry(lrm_state, NULL, rsc->id, op_key, call);
 
                 CRM_ASSERT(op != NULL);
 
                 op->rc = PCMK_OCF_OK;
                 op->op_status = PCMK_LRM_OP_DONE;
                 send_direct_ack(from_host, from_sys, rsc, op, rsc->id);
                 lrmd_free_event(op);
 
                 /* needed?? surely not otherwise the cancel_op_(_key) wouldn't
                  * have failed in the first place
                  */
                 g_hash_table_remove(lrm_state->pending_ops, op_key);
             }
 
             free(op_key);
 
         } else if (rsc != NULL && safe_str_eq(operation, CRMD_ACTION_DELETE)) {
+            gboolean unregister = TRUE;
 
 #if ENABLE_ACL
             int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun | cib_sync_call, user_name);
             if (cib_rc != pcmk_ok) {
                 lrmd_event_data_t *op = NULL;
 
                 crm_err
                     ("Attempted deletion of resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s",
                      rsc->id, from_sys, user_name ? user_name : "unknown", from_host, cib_rc,
                      pcmk_strerror(cib_rc));
 
                 op = construct_op(lrm_state, input->xml, rsc->id, operation);
                 op->op_status = PCMK_LRM_OP_ERROR;
 
                 if (cib_rc == -EACCES) {
                     op->rc = PCMK_OCF_INSUFFICIENT_PRIV;
                 } else {
                     op->rc = PCMK_OCF_UNKNOWN_ERROR;
                 }
                 send_direct_ack(from_host, from_sys, NULL, op, rsc->id);
                 lrmd_free_event(op);
                 return;
             }
 #endif
-            delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input);
+            if (crm_rsc_delete == TRUE && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
+                unregister = FALSE;
+            }
+
+            delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input, unregister);
 
         } else if (rsc != NULL) {
             do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg);
         }
 
         lrmd_free_rsc_info(rsc);
 
     } else {
         crm_err("Operation was neither a lrm_query, nor a rsc op.  %s", crm_str(crm_op));
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
     }
 }
 
 static lrmd_event_data_t *
 construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation)
 {
     lrmd_event_data_t *op = NULL;
     const char *op_delay = NULL;
     const char *op_timeout = NULL;
     const char *op_interval = NULL;
     GHashTable *params = NULL;
 
     const char *transition = NULL;
 
     CRM_ASSERT(rsc_id != NULL);
 
     op = calloc(1, sizeof(lrmd_event_data_t));
     op->type = lrmd_event_exec_complete;
     op->op_type = strdup(operation);
     op->op_status = PCMK_LRM_OP_PENDING;
     op->rc = -1;
     op->rsc_id = strdup(rsc_id);
     op->interval = 0;
     op->timeout = 0;
     op->start_delay = 0;
 
     if (rsc_op == NULL) {
         CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation));
         op->user_data = NULL;
         /* the stop_all_resources() case
          * by definition there is no DC (or they'd be shutting
          *   us down).
          * So we should put our version here.
          */
         op->params = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                            g_hash_destroy_str, g_hash_destroy_str);
 
         g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET));
 
         crm_trace("Constructed %s op for %s", operation, rsc_id);
         return op;
     }
 
     params = xml2list(rsc_op);
     g_hash_table_remove(params, CRM_META "_op_target_rc");
 
     op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY);
     op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT);
     op_interval = crm_meta_value(params, XML_LRM_ATTR_INTERVAL);
 
     op->interval = crm_parse_int(op_interval, "0");
     op->timeout = crm_parse_int(op_timeout, "0");
     op->start_delay = crm_parse_int(op_delay, "0");
 
     if (safe_str_neq(operation, RSC_STOP)) {
         op->params = params;
 
     } else {
         rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
 
         /* If we do not have stop parameters cached, use
          * whatever we are given */
         if (!entry || !entry->stop_params) {
             op->params = params;
         } else {
             /* Copy the cached parameter list so that we stop the resource
              * with the old attributes, not the new ones */
             op->params = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                                g_hash_destroy_str, g_hash_destroy_str);
 
             g_hash_table_foreach(params, copy_meta_keys, op->params);
             g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
             g_hash_table_destroy(params);
             params = NULL;
         }
     }
 
     /* sanity */
     if (op->interval < 0) {
         op->interval = 0;
     }
     if (op->timeout <= 0) {
         op->timeout = op->interval;
     }
     if (op->start_delay < 0) {
         op->start_delay = 0;
     }
 
     transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY);
     CRM_CHECK(transition != NULL, return op);
 
     op->user_data = strdup(transition);
 
     if (op->interval != 0) {
         if (safe_str_eq(operation, CRMD_ACTION_START)
             || safe_str_eq(operation, CRMD_ACTION_STOP)) {
             crm_err("Start and Stop actions cannot have an interval: %d", op->interval);
             op->interval = 0;
         }
     }
 
     crm_trace("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval);
 
     return op;
 }
 
 void
 send_direct_ack(const char *to_host, const char *to_sys,
                 lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id)
 {
     xmlNode *reply = NULL;
     xmlNode *update, *iter;
     crm_node_t *peer = NULL;
 
     CRM_CHECK(op != NULL, return);
     if (op->rsc_id == NULL) {
         CRM_ASSERT(rsc_id != NULL);
         op->rsc_id = strdup(rsc_id);
     }
     if (to_sys == NULL) {
         to_sys = CRM_SYSTEM_TENGINE;
     }
 
     peer = crm_get_peer(0, fsa_our_uname);
     update = do_update_node_cib(peer, node_update_none, NULL, __FUNCTION__);
 
     iter = create_xml_node(update, XML_CIB_TAG_LRM);
     crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid);
     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
 
     crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
 
     build_operation_update(iter, rsc, op, __FUNCTION__);
     reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL);
 
     crm_log_xml_trace(update, "ACK Update");
 
     crm_debug("ACK'ing resource op %s_%s_%d from %s: %s",
               op->rsc_id, op->op_type, op->interval, op->user_data,
               crm_element_value(reply, XML_ATTR_REFERENCE));
 
     if (relay_message(reply, TRUE) == FALSE) {
         crm_log_xml_err(reply, "Unable to route reply");
     }
 
     free_xml(update);
     free_xml(reply);
 }
 
 gboolean
 verify_stopped(enum crmd_fsa_state cur_state, int log_level)
 {
     gboolean res = TRUE;
     GList *lrm_state_list = lrm_state_get_list();
     GList *state_entry;
 
     for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
         lrm_state_t *lrm_state = state_entry->data;
 
         if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
             /* keep iterating through all even when false is returned */
             res = FALSE;
         }
     }
 
     set_bit(fsa_input_register, R_SENT_RSC_STOP);
     g_list_free(lrm_state_list); lrm_state_list = NULL;
     return res;
 }
 
 struct stop_recurring_action_s {
     lrmd_rsc_info_t *rsc;
     lrm_state_t *lrm_state;
 };
 
 static gboolean
 stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
 {
     gboolean remove = FALSE;
     struct stop_recurring_action_s *event = user_data;
     struct recurring_op_s *op = (struct recurring_op_s *)value;
 
     if (op->interval != 0 && crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) {
         crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, key);
         remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
     }
 
     return remove;
 }
 
 static gboolean
 stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
 {
     gboolean remove = FALSE;
     lrm_state_t *lrm_state = user_data;
     struct recurring_op_s *op = (struct recurring_op_s *)value;
 
     if (op->interval != 0) {
         crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, key);
         remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
     }
 
     return remove;
 }
 
 static void
 do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg,
               xmlNode * request)
 {
     int call_id = 0;
     char *op_id = NULL;
     lrmd_event_data_t *op = NULL;
     lrmd_key_value_t *params = NULL;
     fsa_data_t *msg_data = NULL;
     const char *transition = NULL;
 
     CRM_CHECK(rsc != NULL, return);
     CRM_CHECK(operation != NULL, return);
 
     if (msg != NULL) {
         transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
         if (transition == NULL) {
             crm_log_xml_err(msg, "Missing transition number");
         }
     }
 
     op = construct_op(lrm_state, msg, rsc->id, operation);
     CRM_CHECK(op != NULL, return);
 
     /* stop any previous monitor operations before changing the resource state */
     if (op->interval == 0
         && strcmp(operation, CRMD_ACTION_STATUS) != 0
         && strcmp(operation, CRMD_ACTION_NOTIFY) != 0) {
         guint removed = 0;
         struct stop_recurring_action_s data;
 
         data.rsc = rsc;
         data.lrm_state = lrm_state;
         removed = g_hash_table_foreach_remove(
             lrm_state->pending_ops, stop_recurring_action_by_rsc, &data);
 
         crm_debug("Stopped %u recurring operations in preparation for %s_%s_%d",
                   removed, rsc->id, operation, op->interval);
     }
 
     /* now do the op */
     crm_info("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval);
 
     if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE && fsa_state != S_TRANSITION_ENGINE) {
         if (safe_str_neq(operation, "fail")
             && safe_str_neq(operation, CRMD_ACTION_STOP)) {
             crm_info("Discarding attempt to perform action %s on %s in state %s",
                      operation, rsc->id, fsa_state2string(fsa_state));
             op->rc = 99;
             op->op_status = PCMK_LRM_OP_ERROR;
             send_direct_ack(NULL, NULL, rsc, op, rsc->id);
             lrmd_free_event(op);
             free(op_id);
             return;
         }
     }
 
     op_id = generate_op_key(rsc->id, op->op_type, op->interval);
 
     if (op->interval > 0) {
         /* cancel it so we can then restart it without conflict */
         cancel_op_key(lrm_state, rsc, op_id, FALSE);
     }
 
     if (op->params) {
         char *key = NULL;
         char *value = NULL;
         GHashTableIter iter;
 
         g_hash_table_iter_init(&iter, op->params);
         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
             params = lrmd_key_value_add(params, key, value);
         }
     }
 
     call_id = lrm_state_exec(lrm_state,
                              rsc->id,
                              op->op_type,
                              op->user_data, op->interval, op->timeout, op->start_delay, params);
 
     if (call_id <= 0) {
         crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id);
         register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
 
     } else {
         /* record all operations so we can wait
          * for them to complete during shutdown
          */
         char *call_id_s = make_stop_id(rsc->id, call_id);
         struct recurring_op_s *pending = NULL;
 
         pending = calloc(1, sizeof(struct recurring_op_s));
         crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
 
         pending->call_id = call_id;
         pending->interval = op->interval;
         pending->op_type = strdup(operation);
         pending->op_key = strdup(op_id);
         pending->rsc_id = strdup(rsc->id);
         g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending);
 
         if (op->interval > 0 && op->start_delay > START_DELAY_THRESHOLD) {
             char *uuid = NULL;
             int dummy = 0, target_rc = 0;
 
             crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
 
             decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc);
             free(uuid);
 
             op->rc = target_rc;
             op->op_status = PCMK_LRM_OP_DONE;
             send_direct_ack(NULL, NULL, rsc, op, rsc->id);
         }
     }
 
     free(op_id);
     lrmd_free_event(op);
     return;
 }
 
 int last_resource_update = 0;
 
 static void
 cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
 {
     switch (rc) {
         case pcmk_ok:
         case -pcmk_err_diff_failed:
         case -pcmk_err_diff_resync:
             crm_trace("Resource update %d complete: rc=%d", call_id, rc);
             break;
         default:
             crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc));
     }
 
     if (call_id == last_resource_update) {
         last_resource_update = 0;
         trigger_fsa(fsa_source);
     }
 }
 
 static void
 remote_node_init_status(const char *node_name, int call_opt)
 {
     int call_id = 0;
     xmlNode *update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
 
     simple_remote_node_status(node_name, update,__FUNCTION__);
 
     fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
     if (call_id != pcmk_ok) {
         crm_debug("Failed to init status section for remote-node %s", node_name);
     }
     free_xml(update);
 }
 
 static void
 remote_node_clear_status(const char *node_name, int call_opt)
 {
     if (node_name == NULL) {
         return;
     }
     remote_node_init_status(node_name, call_opt);
     erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt);
     erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt);
 }
 
 static int
 do_update_resource(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
 {
 /*
   <status>
   <nodes_status id=uname>
   <lrm>
   <lrm_resources>
   <lrm_resource id=...>
   </...>
 */
     int rc = pcmk_ok;
     xmlNode *update, *iter = NULL;
     int call_opt = cib_quorum_override;
     const char *uuid = NULL;
 
     CRM_CHECK(op != NULL, return 0);
 
     if (fsa_state == S_ELECTION || fsa_state == S_PENDING) {
         crm_info("Sending update to local CIB in state: %s", fsa_state2string(fsa_state));
         call_opt |= cib_scope_local;
     }
 
     iter = create_xml_node(iter, XML_CIB_TAG_STATUS);
     update = iter;
     iter = create_xml_node(iter, XML_CIB_TAG_STATE);
 
     if (safe_str_eq(lrm_state->node_name, fsa_our_uname)) {
         uuid = fsa_our_uuid;
 
     } else {
         /* remote nodes uuid and uname are equal */
         uuid = lrm_state->node_name;
         crm_xml_add(iter, XML_NODE_IS_REMOTE, "true");
     }
 
     CRM_LOG_ASSERT(uuid != NULL);
     if(uuid == NULL) {
         rc = -EINVAL;
         goto done;
     }
 
     crm_xml_add(iter, XML_ATTR_UUID,  uuid);
     crm_xml_add(iter, XML_ATTR_UNAME, lrm_state->node_name);
     crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__);
 
     iter = create_xml_node(iter, XML_CIB_TAG_LRM);
     crm_xml_add(iter, XML_ATTR_ID, uuid);
 
     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
     crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
 
     build_operation_update(iter, rsc, op, __FUNCTION__);
 
     if (rsc) {
         const char *container = NULL;
 
         crm_xml_add(iter, XML_ATTR_TYPE, rsc->type);
         crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class);
         crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider);
 
         if (op->params) {
             container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
         }
         if (container) {
             crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container);
             crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container);
         }
 
         CRM_CHECK(rsc->type != NULL, crm_err("Resource %s has no value for type", op->rsc_id));
         CRM_CHECK(rsc->class != NULL, crm_err("Resource %s has no value for class", op->rsc_id));
 
         /* check to see if we need to initialize remote-node related status sections */
         if (safe_str_eq(op->op_type, "start") && op->rc == 0 && op->op_status == PCMK_LRM_OP_DONE) {
             const char *remote_node = g_hash_table_lookup(op->params, CRM_META"_remote_node");
 
             if (remote_node) {
                 /* A container for a remote-node has started, initalize remote-node's status */
                 crm_info("Initalizing lrm status for container remote-node %s. Container successfully started.", remote_node);
                 remote_node_clear_status(remote_node, call_opt);
             } else if (container == FALSE && safe_str_eq(rsc->type, "remote") && safe_str_eq(rsc->provider, "pacemaker")) {
                 /* baremetal remote node connection resource has started, initalize remote-node's status */
                 crm_info("Initializing lrm status for baremetal remote-node %s", rsc->id);
                 remote_node_clear_status(rsc->id, call_opt);
             }
         }
 
     } else {
         crm_warn("Resource %s no longer exists in the lrmd", op->rsc_id);
         send_direct_ack(NULL, NULL, rsc, op, op->rsc_id);
         goto cleanup;
     }
 
     crm_log_xml_trace(update, __FUNCTION__);
 
     /* make it an asyncronous call and be done with it
      *
      * Best case:
      *   the resource state will be discovered during
      *   the next signup or election.
      *
      * Bad case:
      *   we are shutting down and there is no DC at the time,
      *   but then why were we shutting down then anyway?
      *   (probably because of an internal error)
      *
      * Worst case:
      *   we get shot for having resources "running" when the really weren't
      *
      * the alternative however means blocking here for too long, which
      * isnt acceptable
      */
     fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL);
 
     if (rc > 0) {
         last_resource_update = rc;
     }
   done:
     /* the return code is a call number, not an error code */
     crm_trace("Sent resource state update message: %d for %s=%d on %s", rc,
               op->op_type, op->interval, op->rsc_id);
     fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback);
 
   cleanup:
     free_xml(update);
     return rc;
 }
 
 void
 do_lrm_event(long long action,
              enum crmd_fsa_cause cause,
              enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)
 {
     CRM_CHECK(FALSE, return);
 }
 
 gboolean
 process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op)
 {
     char *op_id = NULL;
     char *op_key = NULL;
 
     int update_id = 0;
     gboolean removed = FALSE;
     lrmd_rsc_info_t *rsc = NULL;
 
     struct recurring_op_s *pending = NULL;
 
     CRM_CHECK(op != NULL, return FALSE);
 
     CRM_CHECK(op->rsc_id != NULL, return FALSE);
     op_id = make_stop_id(op->rsc_id, op->call_id);
     pending = g_hash_table_lookup(lrm_state->pending_ops, op_id);
     op_key = generate_op_key(op->rsc_id, op->op_type, op->interval);
     rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
 
     if (op->op_status == PCMK_LRM_OP_ERROR
         && (op->rc == PCMK_OCF_RUNNING_MASTER || op->rc == PCMK_OCF_NOT_RUNNING)) {
         /* Leave it up to the TE/PE to decide if this is an error */
         op->op_status = PCMK_LRM_OP_DONE;
     }
 
     if (op->op_status != PCMK_LRM_OP_CANCELLED) {
         if (safe_str_eq(op->op_type, RSC_NOTIFY)) {
             /* Keep notify ops out of the CIB */
             send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
         } else {
             update_id = do_update_resource(lrm_state, rsc, op);
         }
     } else if (op->interval == 0) {
         /* This will occur when "crm resource cleanup" is called while actions are in-flight */
         crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id);
         send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
 
     } else if (pending == NULL) {
         /* We don't need to do anything for cancelled ops
          * that are not in our pending op list. There are no
          * transition actions waiting on these operations. */
 
     } else if (op->user_data == NULL) {
         /* At this point we have a pending entry, but no transition
          * key present in the user_data field. report this */
         crm_err("Op %s (call=%d): No user data", op_key, op->call_id);
 
     } else if (pending->remove) {
         /* The tengine canceled this op, we have been waiting for the cancel to finish. */
         delete_op_entry(lrm_state, op, op->rsc_id, op_key, op->call_id);
 
     } else if (pending && op->rsc_deleted) {
         /* The tengine initiated this op, but it was cancelled outside of the
          * tengine's control during a resource cleanup/re-probe request. The tengine
          * must be alerted that this operation completed, otherwise the tengine
          * will continue waiting for this update to occur until it is timed out.
          * We don't want this update going to the cib though, so use a direct ack. */
         crm_trace("Op %s (call=%d): cancelled due to rsc deletion", op_key, op->call_id);
         send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
 
     } else {
         /* Before a stop is called, no need to direct ack */
         crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id);
     }
 
     if ((op->interval == 0) && g_hash_table_remove(lrm_state->pending_ops, op_id)) {
         removed = TRUE;
         crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
                   op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops));
 
     } else if(op->interval != 0 && op->op_status == PCMK_LRM_OP_CANCELLED) {
         removed = TRUE;
         g_hash_table_remove(lrm_state->pending_ops, op_id);
     }
 
     switch (op->op_status) {
         case PCMK_LRM_OP_CANCELLED:
             crm_info("Operation %s: %s (node=%s, call=%d, confirmed=%s)",
                      op_key, services_lrm_status_str(op->op_status), lrm_state->node_name,
                      op->call_id, removed ? "true" : "false");
             break;
 
         case PCMK_LRM_OP_DONE:
             crm_notice("Operation %s: %s (node=%s, call=%d, rc=%d, cib-update=%d, confirmed=%s)",
                        op_key, services_ocf_exitcode_str(op->rc), lrm_state->node_name,
                        op->call_id, op->rc, update_id, removed ? "true" : "false");
             break;
 
         case PCMK_LRM_OP_TIMEOUT:
             crm_err("Operation %s: %s (node=%s, call=%d, timeout=%dms)",
                     op_key, services_lrm_status_str(op->op_status), lrm_state->node_name, op->call_id, op->timeout);
             break;
 
         default:
             crm_err("Operation %s (node=%s, call=%d, status=%d, cib-update=%d, confirmed=%s) %s",
                     op_key, lrm_state->node_name, op->call_id, op->op_status, update_id, removed ? "true" : "false",
                     services_lrm_status_str(op->op_status));
     }
 
     if (op->output) {
         char *prefix =
             g_strdup_printf("%s-%s_%s_%d:%d", lrm_state->node_name, op->rsc_id, op->op_type, op->interval, op->call_id);
 
         if (op->rc) {
             crm_log_output(LOG_NOTICE, prefix, op->output);
         } else {
             crm_log_output(LOG_DEBUG, prefix, op->output);
         }
         g_free(prefix);
     }
 
     if (op->rsc_deleted) {
         crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
         delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL);
     }
 
     /* If a shutdown was escalated while operations were pending,
      * then the FSA will be stalled right now... allow it to continue
      */
     mainloop_set_trigger(fsa_source);
     update_history_cache(lrm_state, rsc, op);
 
     lrmd_free_rsc_info(rsc);
     free(op_key);
     free(op_id);
 
     return TRUE;
 }
diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c
index 98f59c80cc..f3dedeb359 100644
--- a/crmd/remote_lrmd_ra.c
+++ b/crmd/remote_lrmd_ra.c
@@ -1,793 +1,800 @@
 /* 
  * Copyright (C) 2013 David Vossel <dvossel@redhat.com>
  * 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  * 
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 
 #include <crmd.h>
 #include <crmd_fsa.h>
 #include <crmd_messages.h>
 #include <crmd_callbacks.h>
 #include <crmd_lrm.h>
 #include <crm/lrmd.h>
 #include <crm/services.h>
 
 #define REMOTE_LRMD_RA "remote"
 
 /* The max start timeout before cmd retry */
 #define MAX_START_TIMEOUT_MS 10000
 
 typedef struct remote_ra_cmd_s {
     /*! the local node the cmd is issued from */
     char *owner;
     /*! the remote node the cmd is executed on */
     char *rsc_id;
     /*! the action to execute */
     char *action;
     /*! some string the client wants us to give it back */
     char *userdata;
     /*! start delay in ms */
     int start_delay;
     /*! timer id used for start delay. */
     int delay_id;
     /*! timeout in ms for cmd */
     int timeout;
     int remaining_timeout;
     /*! recurring interval in ms */
     int interval;
     /*! interval timer id */
     int interval_id;
     int reported_success;
     int monitor_timeout_id;
     int takeover_timeout_id;
     /*! action parameters */
     lrmd_key_value_t *params;
     /*! executed rc */
     int rc;
     int op_status;
     int call_id;
     time_t start_time;
     gboolean cancel;
 } remote_ra_cmd_t;
 
 enum remote_migration_status {
     expect_takeover = 1,
     takeover_complete,
 };
 
 typedef struct remote_ra_data_s {
     crm_trigger_t *work;
     remote_ra_cmd_t *cur_cmd;
     GList *cmds;
     GList *recurring_cmds;
 
     enum remote_migration_status migrate_status;
 
     gboolean active;
 } remote_ra_data_t;
 
 static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
 static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
 static GList *fail_all_monitor_cmds(GList * list);
 
 static void
 free_cmd(gpointer user_data)
 {
     remote_ra_cmd_t *cmd = user_data;
 
     if (!cmd) {
         return;
     }
     if (cmd->delay_id) {
         g_source_remove(cmd->delay_id);
     }
     if (cmd->interval_id) {
         g_source_remove(cmd->interval_id);
     }
     if (cmd->monitor_timeout_id) {
         g_source_remove(cmd->monitor_timeout_id);
     }
     if (cmd->takeover_timeout_id) {
         g_source_remove(cmd->takeover_timeout_id);
     }
     free(cmd->owner);
     free(cmd->rsc_id);
     free(cmd->action);
     free(cmd->userdata);
     lrmd_key_value_freeall(cmd->params);
     free(cmd);
 }
 
 static int
 generate_callid(void)
 {
     static int remote_ra_callid = 0;
 
     remote_ra_callid++;
     if (remote_ra_callid <= 0) {
         remote_ra_callid = 1;
     }
 
     return remote_ra_callid;
 }
 
 static gboolean
 recurring_helper(gpointer data)
 {
     remote_ra_cmd_t *cmd = data;
     lrm_state_t *connection_rsc = NULL;
 
     cmd->interval_id = 0;
     connection_rsc = lrm_state_find(cmd->rsc_id);
     if (connection_rsc && connection_rsc->remote_ra_data) {
         remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
 
         ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
 
         cmd->call_id = generate_callid();
 
         ra_data->cmds = g_list_append(ra_data->cmds, cmd);
         mainloop_set_trigger(ra_data->work);
     }
     return FALSE;
 }
 
 static gboolean
 start_delay_helper(gpointer data)
 {
     remote_ra_cmd_t *cmd = data;
     lrm_state_t *connection_rsc = NULL;
 
     cmd->delay_id = 0;
     connection_rsc = lrm_state_find(cmd->rsc_id);
     if (connection_rsc && connection_rsc->remote_ra_data) {
         remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
 
         mainloop_set_trigger(ra_data->work);
     }
     return FALSE;
 }
 
 static void
 report_remote_ra_result(remote_ra_cmd_t * cmd)
 {
     lrmd_event_data_t op = { 0, };
 
     op.type = lrmd_event_exec_complete;
     op.rsc_id = cmd->rsc_id;
     op.op_type = cmd->action;
     op.user_data = cmd->userdata;
     op.timeout = cmd->timeout;
     op.interval = cmd->interval;
     op.rc = cmd->rc;
     op.op_status = cmd->op_status;
     if (cmd->params) {
         lrmd_key_value_t *tmp;
 
         op.params = g_hash_table_new_full(crm_str_hash,
                                           g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
         for (tmp = cmd->params; tmp; tmp = tmp->next) {
             g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
         }
 
     }
     op.call_id = cmd->call_id;
     op.remote_nodename = cmd->owner;
 
     lrm_op_callback(&op);
 
     if (op.params) {
         g_hash_table_destroy(op.params);
     }
 }
 
 static void
 update_remaining_timeout(remote_ra_cmd_t * cmd)
 {
     cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
 }
 
 static gboolean
 retry_start_cmd_cb(gpointer data)
 {
     lrm_state_t *lrm_state = data;
     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
     remote_ra_cmd_t *cmd = NULL;
     int rc = -1;
 
     if (!ra_data || !ra_data->cur_cmd) {
         return FALSE;
     }
     cmd = ra_data->cur_cmd;
     if (safe_str_neq(cmd->action, "start")) {
         return FALSE;
     }
     update_remaining_timeout(cmd);
 
     if (cmd->remaining_timeout > 0) {
         rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
     }
 
     if (rc != 0) {
         cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
         cmd->op_status = PCMK_LRM_OP_ERROR;
         report_remote_ra_result(cmd);
 
         if (ra_data->cmds) {
             mainloop_set_trigger(ra_data->work);
         }
         ra_data->cur_cmd = NULL;
         free_cmd(cmd);
     } else {
         /* wait for connection event */
     }
 
     return FALSE;
 }
 
 
 static gboolean
 connection_takeover_timeout_cb(gpointer data)
 {
     lrm_state_t *lrm_state = NULL;
     remote_ra_cmd_t *cmd = data;
 
     crm_debug("takeover event timed out for node %s", cmd->rsc_id);
     cmd->takeover_timeout_id = 0;
 
+    lrm_state = lrm_state_find(cmd->rsc_id);
+
     handle_remote_ra_stop(lrm_state, cmd);
     free_cmd(cmd);
 
     return FALSE;
 }
 
 static gboolean
 monitor_timeout_cb(gpointer data)
 {
     lrm_state_t *lrm_state = NULL;
     remote_ra_cmd_t *cmd = data;
 
     crm_debug("Poke async response timed out for node %s", cmd->rsc_id);
     cmd->monitor_timeout_id = 0;
     cmd->op_status = PCMK_LRM_OP_TIMEOUT;
     cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
 
     lrm_state = lrm_state_find(cmd->rsc_id);
     if (lrm_state && lrm_state->remote_ra_data) {
         remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
 
         if (ra_data->cur_cmd == cmd) {
             ra_data->cur_cmd = NULL;
         }
         if (ra_data->cmds) {
             mainloop_set_trigger(ra_data->work);
         }
     }
 
     report_remote_ra_result(cmd);
     free_cmd(cmd);
     return FALSE;
 }
 
 xmlNode *
 simple_remote_node_status(const char *node_name, xmlNode * parent, const char *source)
 {
     xmlNode *state = create_xml_node(parent, XML_CIB_TAG_STATE);
 
     crm_xml_add(state, XML_NODE_IS_REMOTE, "true");
     crm_xml_add(state, XML_ATTR_UUID,  node_name);
     crm_xml_add(state, XML_ATTR_UNAME, node_name);
     crm_xml_add(state, XML_ATTR_ORIGIN, source);
 
     return state;
 }
 
 void
 remote_lrm_op_callback(lrmd_event_data_t * op)
 {
     gboolean cmd_handled = FALSE;
     lrm_state_t *lrm_state = NULL;
     remote_ra_data_t *ra_data = NULL;
     remote_ra_cmd_t *cmd = NULL;
 
     crm_debug("remote connection event - event_type:%s node:%s action:%s rc:%s op_status:%s",
               lrmd_event_type2str(op->type),
               op->remote_nodename,
               op->op_type ? op->op_type : "none",
               services_ocf_exitcode_str(op->rc), services_lrm_status_str(op->op_status));
 
     lrm_state = lrm_state_find(op->remote_nodename);
     if (!lrm_state || !lrm_state->remote_ra_data) {
         crm_debug("lrm_state info not found for remote lrmd connection event");
         return;
     }
     ra_data = lrm_state->remote_ra_data;
 
     /* Another client has connected to the remote daemon,
      * determine if this is expected. */
     if (op->type == lrmd_event_new_client) {
         /* great, we new this was coming */
         if (ra_data->migrate_status == expect_takeover) {
             ra_data->migrate_status = takeover_complete;
         } else {
             crm_err("Unexpected pacemaker_remote client takeover. Disconnecting");
             lrm_state_disconnect(lrm_state);
         }
         return;
     }
 
     /* filter all EXEC events up */
     if (op->type == lrmd_event_exec_complete) {
         if (ra_data->migrate_status == takeover_complete) {
             crm_debug("ignoring event, this connection is taken over by another node");
         } else {
             lrm_op_callback(op);
         }
         return;
     }
 
     if ((op->type == lrmd_event_disconnect) &&
         (ra_data->cur_cmd == NULL) &&
         (ra_data->active == TRUE)) {
 
         crm_err("Unexpected disconnect on remote-node %s", lrm_state->node_name);
         ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
         ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
         return;
     }
 
     if (!ra_data->cur_cmd) {
         crm_debug("no event to match");
         return;
     }
 
     cmd = ra_data->cur_cmd;
 
     /* Start actions and migrate from actions complete after connection
      * comes back to us. */
     if (op->type == lrmd_event_connect && (safe_str_eq(cmd->action, "start") ||
                                            safe_str_eq(cmd->action, "migrate_from"))) {
 
         if (op->connection_rc < 0) {
             update_remaining_timeout(cmd);
             /* There isn't much of a reason to reschedule if the timeout is too small */
             if (cmd->remaining_timeout > 3000) {
                 crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
                 g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
                 return;
             } else {
                 crm_trace("can't reschedule start, remaining timeout too small %d",
                           cmd->remaining_timeout);
             }
             cmd->op_status = PCMK_LRM_OP_TIMEOUT;
             cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
 
         } else {
+
+            if (safe_str_eq(cmd->action, "start")) {
+                /* clear PROBED value if it happens to be set after start completes. */
+                update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
+            }
             lrm_state_reset_tables(lrm_state);
             cmd->rc = PCMK_OCF_OK;
             cmd->op_status = PCMK_LRM_OP_DONE;
             ra_data->active = TRUE;
         }
 
         crm_debug("remote lrmd connect event matched %s action. ", cmd->action);
         report_remote_ra_result(cmd);
         cmd_handled = TRUE;
 
     } else if (op->type == lrmd_event_poke && safe_str_eq(cmd->action, "monitor")) {
 
         if (cmd->monitor_timeout_id) {
             g_source_remove(cmd->monitor_timeout_id);
             cmd->monitor_timeout_id = 0;
         }
 
         /* Only report success the first time, after that only worry about failures.
          * For this function, if we get the poke pack, it is always a success. Pokes
          * only fail if the send fails, or the response times out. */
         if (!cmd->reported_success) {
             cmd->rc = PCMK_OCF_OK;
             cmd->op_status = PCMK_LRM_OP_DONE;
             report_remote_ra_result(cmd);
             cmd->reported_success = 1;
         }
 
         crm_debug("remote lrmd poke event matched %s action. ", cmd->action);
 
         /* success, keep rescheduling if interval is present. */
         if (cmd->interval && (cmd->cancel == FALSE)) {
             ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
             cmd->interval_id = g_timeout_add(cmd->interval, recurring_helper, cmd);
             cmd = NULL;         /* prevent free */
         }
         cmd_handled = TRUE;
 
     } else if (op->type == lrmd_event_disconnect && safe_str_eq(cmd->action, "monitor")) {
 
         if (ra_data->active == TRUE && (cmd->cancel == FALSE)) {
             cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
             cmd->op_status = PCMK_LRM_OP_ERROR;
             report_remote_ra_result(cmd);
             crm_err("remote-node %s unexpectedly disconneced during monitor operation", lrm_state->node_name);
         }
         cmd_handled = TRUE;
 
     } else if (op->type == lrmd_event_new_client && safe_str_eq(cmd->action, "stop")) {
 
         handle_remote_ra_stop(lrm_state, cmd);
         cmd_handled = TRUE;
 
     } else {
         crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
     }
 
     if (cmd_handled) {
         ra_data->cur_cmd = NULL;
         if (ra_data->cmds) {
             mainloop_set_trigger(ra_data->work);
         }
         free_cmd(cmd);
     }
 }
 
 static void
 handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
 {
     remote_ra_data_t *ra_data = NULL;
 
     CRM_ASSERT(lrm_state);
     ra_data = lrm_state->remote_ra_data;
 
     if (ra_data->migrate_status != takeover_complete) {
         /* only clear the status if this stop is not apart of a successful migration */
         update_attrd_remote_node_removed(lrm_state->node_name, NULL);
     }
 
     ra_data->active = FALSE;
     lrm_state_disconnect(lrm_state);
     cmd->rc = PCMK_OCF_OK;
     cmd->op_status = PCMK_LRM_OP_DONE;
 
     if (ra_data->cmds) {
         g_list_free_full(ra_data->cmds, free_cmd);
     }
     if (ra_data->recurring_cmds) {
         g_list_free_full(ra_data->recurring_cmds, free_cmd);
     }
     ra_data->cmds = NULL;
     ra_data->recurring_cmds = NULL;
     ra_data->cur_cmd = NULL;
 
     report_remote_ra_result(cmd);
 }
 
 static int
 handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
 {
     const char *server = NULL;
     lrmd_key_value_t *tmp = NULL;
     int port = 0;
     int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
 
     for (tmp = cmd->params; tmp; tmp = tmp->next) {
         if (safe_str_eq(tmp->key, "addr") || safe_str_eq(tmp->key, "server")) {
             server = tmp->value;
         }
         if (safe_str_eq(tmp->key, "port")) {
             port = atoi(tmp->value);
         }
     }
 
     return lrm_state_remote_connect_async(lrm_state, server, port, timeout_used);
 }
 
 static gboolean
 handle_remote_ra_exec(gpointer user_data)
 {
     int rc = 0;
     lrm_state_t *lrm_state = user_data;
     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
     remote_ra_cmd_t *cmd;
     GList *first = NULL;
 
     if (ra_data->cur_cmd) {
         /* still waiting on previous cmd */
         return TRUE;
     }
 
     while (ra_data->cmds) {
         first = ra_data->cmds;
         cmd = first->data;
         if (cmd->delay_id) {
             /* still waiting for start delay timer to trip */
             return TRUE;
         }
 
         ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
         g_list_free_1(first);
 
         if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
             ra_data->migrate_status = 0;
             rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout);
             if (rc == 0) {
                 /* take care of this later when we get async connection result */
                 crm_debug("began remote lrmd connect, waiting for connect event.");
                 ra_data->cur_cmd = cmd;
                 return TRUE;
             } else {
                 crm_debug("connect failed, not expecting to match any connection event later");
                 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
                 cmd->op_status = PCMK_LRM_OP_ERROR;
             }
             report_remote_ra_result(cmd);
 
         } else if (!strcmp(cmd->action, "monitor")) {
 
             if (lrm_state_is_connected(lrm_state) == TRUE) {
                 rc = lrm_state_poke_connection(lrm_state);
                 if (rc < 0) {
                     cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
                     cmd->op_status = PCMK_LRM_OP_ERROR;
                 }
             } else {
                 rc = -1;
                 cmd->op_status = PCMK_LRM_OP_DONE;
                 cmd->rc = PCMK_OCF_NOT_RUNNING;
             }
 
             if (rc == 0) {
                 crm_debug("poked remote lrmd at node %s, waiting for async response.", cmd->rsc_id);
                 ra_data->cur_cmd = cmd;
                 cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
                 return TRUE;
             }
             report_remote_ra_result(cmd);
 
         } else if (!strcmp(cmd->action, "stop")) {
 
             if (ra_data->migrate_status == expect_takeover) {
                 /* briefly wait on stop for the takeover event to occur. If the
                  * takeover event does not occur during the wait period, that's fine.
                  * It just means that the remote-node's lrm_status section is going to get
                  * cleared which will require all the resources running in the remote-node
                  * to be explicitly re-detected via probe actions.  If the takeover does occur
                  * successfully, then we can leave the status section intact. */
                 cmd->monitor_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
                 ra_data->cur_cmd = cmd;
                 return TRUE;
             }
 
             handle_remote_ra_stop(lrm_state, cmd);
 
         } else if (!strcmp(cmd->action, "migrate_to")) {
             ra_data->migrate_status = expect_takeover;
             cmd->rc = PCMK_OCF_OK;
             cmd->op_status = PCMK_LRM_OP_DONE;
             report_remote_ra_result(cmd);
         }
 
         free_cmd(cmd);
     }
 
     return TRUE;
 }
 
 static void
 remote_ra_data_init(lrm_state_t * lrm_state)
 {
     remote_ra_data_t *ra_data = NULL;
 
     if (lrm_state->remote_ra_data) {
         return;
     }
 
     ra_data = calloc(1, sizeof(remote_ra_data_t));
     ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
     lrm_state->remote_ra_data = ra_data;
 }
 
 void
 remote_ra_cleanup(lrm_state_t * lrm_state)
 {
     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
 
     if (!ra_data) {
         return;
     }
 
     if (ra_data->cmds) {
         g_list_free_full(ra_data->cmds, free_cmd);
     }
 
     if (ra_data->recurring_cmds) {
         g_list_free_full(ra_data->recurring_cmds, free_cmd);
     }
     mainloop_destroy_trigger(ra_data->work);
     free(ra_data);
     lrm_state->remote_ra_data = NULL;
 }
 
 gboolean
 is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
 {
     if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
         return TRUE;
     }
     if (id && lrm_state_find(id)) {
         return TRUE;
     }
 
     return FALSE;
 }
 
 lrmd_rsc_info_t *
 remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
 {
     lrmd_rsc_info_t *info = NULL;
 
     if ((lrm_state_find(rsc_id))) {
         info = calloc(1, sizeof(lrmd_rsc_info_t));
 
         info->id = strdup(rsc_id);
         info->type = strdup(REMOTE_LRMD_RA);
         info->class = strdup("ocf");
         info->provider = strdup("pacemaker");
     }
 
     return info;
 }
 
 static gboolean
 is_remote_ra_supported_action(const char *action)
 {
     if (!action) {
         return FALSE;
     } else if (strcmp(action, "start") &&
                strcmp(action, "stop") &&
                strcmp(action, "migrate_to") &&
                strcmp(action, "migrate_from") && strcmp(action, "monitor")) {
         return FALSE;
     }
 
     return TRUE;
 }
 
 static GList *
 fail_all_monitor_cmds(GList * list)
 {
     GList *rm_list = NULL;
     remote_ra_cmd_t *cmd = NULL;
     GListPtr gIter = NULL;
 
     for (gIter = list; gIter != NULL; gIter = gIter->next) {
         cmd = gIter->data;
         if (cmd->interval > 0 && safe_str_eq(cmd->action, "monitor")) {
             rm_list = g_list_append(rm_list, cmd);
         }
     }
 
     for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
         cmd = gIter->data;
 
         cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
         cmd->op_status = PCMK_LRM_OP_ERROR;
         report_remote_ra_result(cmd);
 
         list = g_list_remove(list, cmd);
         free_cmd(cmd);
     }
 
     /* frees only the list data, not the cmds */
     g_list_free(rm_list);
     return list;
 }
 
 static GList *
 remove_cmd(GList * list, const char *action, int interval)
 {
     remote_ra_cmd_t *cmd = NULL;
     GListPtr gIter = NULL;
 
     for (gIter = list; gIter != NULL; gIter = gIter->next) {
         cmd = gIter->data;
         if (cmd->interval == interval && safe_str_eq(cmd->action, action)) {
             break;
         }
         cmd = NULL;
     }
     if (cmd) {
         list = g_list_remove(list, cmd);
         free_cmd(cmd);
     }
     return list;
 }
 
 int
 remote_ra_cancel(lrm_state_t * lrm_state, const char *rsc_id, const char *action, int interval)
 {
     lrm_state_t *connection_rsc = NULL;
     remote_ra_data_t *ra_data = NULL;
 
     connection_rsc = lrm_state_find(rsc_id);
     if (!connection_rsc || !connection_rsc->remote_ra_data) {
         return -EINVAL;
     }
 
     ra_data = connection_rsc->remote_ra_data;
     ra_data->cmds = remove_cmd(ra_data->cmds, action, interval);
     ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action, interval);
     if (ra_data->cur_cmd &&
         (ra_data->cur_cmd->interval == interval) &&
         (safe_str_eq(ra_data->cur_cmd->action, action))) {
 
         ra_data->cur_cmd->cancel = TRUE;
     }
 
     return 0;
 }
 
 int
 remote_ra_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *action, const char *userdata, int interval,     /* ms */
                int timeout,     /* ms */
                int start_delay, /* ms */
                lrmd_key_value_t * params)
 {
     int rc = 0;
     lrm_state_t *connection_rsc = NULL;
     remote_ra_cmd_t *cmd = NULL;
     remote_ra_data_t *ra_data = NULL;
 
     if (is_remote_ra_supported_action(action) == FALSE) {
         rc = -EINVAL;
         goto exec_done;
     }
 
     connection_rsc = lrm_state_find(rsc_id);
     if (!connection_rsc) {
         rc = -EINVAL;
         goto exec_done;
     }
 
     remote_ra_data_init(connection_rsc);
 
     cmd = calloc(1, sizeof(remote_ra_cmd_t));
     cmd->owner = strdup(lrm_state->node_name);
     cmd->rsc_id = strdup(rsc_id);
     cmd->action = strdup(action);
     cmd->userdata = strdup(userdata);
     cmd->interval = interval;
     cmd->timeout = timeout;
     cmd->start_delay = start_delay;
     cmd->params = params;
     cmd->start_time = time(NULL);
 
     cmd->call_id = generate_callid();
 
     if (cmd->start_delay) {
         cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
     }
     ra_data = connection_rsc->remote_ra_data;
 
     ra_data->cmds = g_list_append(ra_data->cmds, cmd);
     mainloop_set_trigger(ra_data->work);
 
     return cmd->call_id;
   exec_done:
 
     lrmd_key_value_freeall(params);
     return rc;
 }
diff --git a/crmd/te_actions.c b/crmd/te_actions.c
index 926996b87a..a3aa78b6ea 100644
--- a/crmd/te_actions.c
+++ b/crmd/te_actions.c
@@ -1,715 +1,734 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 #include <crm/crm.h>
 #include <crm/cib.h>
 #include <crm/msg_xml.h>
 
 #include <crm/common/xml.h>
 #include <tengine.h>
 
 #include <crmd_fsa.h>
 #include <crmd_messages.h>
 #include <crm/cluster.h>
 #include <throttle.h>
 
 char *te_uuid = NULL;
 GHashTable *te_targets = NULL;
 void send_rsc_command(crm_action_t * action);
 static void te_update_job_count(crm_action_t * action, int offset);
 
 static void
 te_start_action_timer(crm_graph_t * graph, crm_action_t * action)
 {
     action->timer = calloc(1, sizeof(crm_action_timer_t));
     action->timer->timeout = action->timeout;
     action->timer->reason = timeout_action;
     action->timer->action = action;
     action->timer->source_id = g_timeout_add(action->timer->timeout + graph->network_delay,
                                              action_timer_callback, (void *)action->timer);
 
     CRM_ASSERT(action->timer->source_id != 0);
 }
 
 static gboolean
 te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo)
 {
     crm_debug("Pseudo action %d fired and confirmed", pseudo->id);
     te_action_confirmed(pseudo);
     update_graph(graph, pseudo);
     trigger_graph();
     return TRUE;
 }
 
 void
 send_stonith_update(crm_action_t * action, const char *target, const char *uuid)
 {
     int rc = pcmk_ok;
     crm_node_t *peer = NULL;
 
     /* zero out the node-status & remove all LRM status info */
     xmlNode *node_state = NULL;
 
     CRM_CHECK(target != NULL, return);
     CRM_CHECK(uuid != NULL, return);
 
     /* Make sure the membership and join caches are accurate */
     peer = crm_get_peer_full(0, target, CRM_GET_PEER_CLUSTER | CRM_GET_PEER_REMOTE);
 
     CRM_CHECK(peer != NULL, return);
 
     if (peer->uuid == NULL) {
         crm_info("Recording uuid '%s' for node '%s'", uuid, target);
         peer->uuid = strdup(uuid);
     }
 
     crmd_peer_down(peer, TRUE);
     node_state =
         do_update_node_cib(peer,
                            node_update_cluster | node_update_peer | node_update_join |
                            node_update_expected, NULL, __FUNCTION__);
 
     /* Force our known ID */
     crm_xml_add(node_state, XML_ATTR_UUID, uuid);
 
     rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state,
                                     cib_quorum_override | cib_scope_local | cib_can_create);
 
     /* Delay processing the trigger until the update completes */
     crm_debug("Sending fencing update %d for %s", rc, target);
     fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated);
 
     /* Make sure it sticks */
     /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local);    */
 
     erase_status_tag(peer->uname, XML_CIB_TAG_LRM, cib_scope_local);
     erase_status_tag(peer->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
 
     free_xml(node_state);
     return;
 }
 
 static gboolean
 te_fence_node(crm_graph_t * graph, crm_action_t * action)
 {
     int rc = 0;
     const char *id = NULL;
     const char *uuid = NULL;
     const char *target = NULL;
     const char *type = NULL;
     gboolean invalid_action = FALSE;
     enum stonith_call_options options = st_opt_none;
 
     id = ID(action->xml);
     target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
     uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
     type = crm_meta_value(action->params, "stonith_action");
 
     CRM_CHECK(id != NULL, invalid_action = TRUE);
     CRM_CHECK(uuid != NULL, invalid_action = TRUE);
     CRM_CHECK(type != NULL, invalid_action = TRUE);
     CRM_CHECK(target != NULL, invalid_action = TRUE);
 
     if (invalid_action) {
         crm_log_xml_warn(action->xml, "BadAction");
         return FALSE;
     }
 
     crm_notice("Executing %s fencing operation (%s) on %s (timeout=%d)",
                type, id, target, transition_graph->stonith_timeout);
 
     /* Passing NULL means block until we can connect... */
     te_connect_stonith(NULL);
 
     if (crmd_join_phase_count(crm_join_confirmed) == 1) {
         options |= st_opt_allow_suicide;
     }
 
     rc = stonith_api->cmds->fence(stonith_api, options, target, type,
                                   transition_graph->stonith_timeout / 1000, 0);
 
     stonith_api->cmds->register_callback(stonith_api, rc, transition_graph->stonith_timeout / 1000,
                                          st_opt_timeout_updates,
                                          generate_transition_key(transition_graph->id, action->id,
                                                                  0, te_uuid),
                                          "tengine_stonith_callback", tengine_stonith_callback);
 
     return TRUE;
 }
 
 static int
 get_target_rc(crm_action_t * action)
 {
     const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC);
 
     if (target_rc_s != NULL) {
         return crm_parse_int(target_rc_s, "0");
     }
     return 0;
 }
 
 static gboolean
 te_crm_command(crm_graph_t * graph, crm_action_t * action)
 {
     char *counter = NULL;
     xmlNode *cmd = NULL;
     gboolean is_local = FALSE;
 
     const char *id = NULL;
     const char *task = NULL;
     const char *value = NULL;
     const char *on_node = NULL;
     const char *router_node = NULL;
 
     gboolean rc = TRUE;
     gboolean no_wait = FALSE;
 
     id = ID(action->xml);
     task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
     on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
     router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
 
     if (!router_node) {
         router_node = on_node;
     }
 
     CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
               crm_err("Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task));
               return FALSE);
 
     crm_info("Executing crm-event (%s): %s on %s%s%s",
              crm_str(id), crm_str(task), on_node,
              is_local ? " (local)" : "", no_wait ? " - no waiting" : "");
 
     if (safe_str_eq(router_node, fsa_our_uname)) {
         is_local = TRUE;
     }
 
     value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
     if (crm_is_true(value)) {
         no_wait = TRUE;
     }
 
     if (is_local && safe_str_eq(task, CRM_OP_SHUTDOWN)) {
         /* defer until everything else completes */
         crm_info("crm-event (%s) is a local shutdown", crm_str(id));
         graph->completion_action = tg_shutdown;
         graph->abort_reason = "local shutdown";
         te_action_confirmed(action);
         update_graph(graph, action);
         trigger_graph();
         return TRUE;
 
     } else if (safe_str_eq(task, CRM_OP_SHUTDOWN)) {
         crm_node_t *peer = crm_get_peer(0, router_node);
         crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN);
     }
 
     cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
 
     counter =
         generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid);
     crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);
 
     rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE);
     free(counter);
     free_xml(cmd);
 
     if (rc == FALSE) {
         crm_err("Action %d failed: send", action->id);
         return FALSE;
 
     } else if (no_wait) {
         te_action_confirmed(action);
         update_graph(graph, action);
         trigger_graph();
 
     } else {
         if (action->timeout <= 0) {
             crm_err("Action %d: %s on %s had an invalid timeout (%dms).  Using %dms instead",
                     action->id, task, on_node, action->timeout, graph->network_delay);
             action->timeout = graph->network_delay;
         }
         te_start_action_timer(graph, action);
     }
 
     return TRUE;
 }
 
 gboolean
 cib_action_update(crm_action_t * action, int status, int op_rc)
 {
     lrmd_event_data_t *op = NULL;
     xmlNode *state = NULL;
     xmlNode *rsc = NULL;
     xmlNode *xml_op = NULL;
     xmlNode *action_rsc = NULL;
 
     int rc = pcmk_ok;
 
     const char *name = NULL;
     const char *value = NULL;
     const char *rsc_id = NULL;
     const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
     const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
     const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
     const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
 
     int call_options = cib_quorum_override | cib_scope_local;
     int target_rc = get_target_rc(action);
 
     if (status == PCMK_LRM_OP_PENDING) {
         crm_debug("%s %d: Recording pending operation %s on %s",
                   crm_element_name(action->xml), action->id, task_uuid, target);
     } else {
         crm_warn("%s %d: %s on %s timed out",
                  crm_element_name(action->xml), action->id, task_uuid, target);
     }
 
     action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
     if (action_rsc == NULL) {
         return FALSE;
     }
 
     rsc_id = ID(action_rsc);
     CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action");
               return FALSE);
 
 /*
   update the CIB
 
 <node_state id="hadev">
       <lrm>
         <lrm_resources>
           <lrm_resource id="rsc2" last_op="start" op_code="0" target="hadev"/>
 */
 
     state = create_xml_node(NULL, XML_CIB_TAG_STATE);
 
     crm_xml_add(state, XML_ATTR_UUID, target_uuid);
     crm_xml_add(state, XML_ATTR_UNAME, target);
 
     rsc = create_xml_node(state, XML_CIB_TAG_LRM);
     crm_xml_add(rsc, XML_ATTR_ID, target_uuid);
 
     rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
     rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
     crm_xml_add(rsc, XML_ATTR_ID, rsc_id);
 
     name = XML_ATTR_TYPE;
     value = crm_element_value(action_rsc, name);
     crm_xml_add(rsc, name, value);
     name = XML_AGENT_ATTR_CLASS;
     value = crm_element_value(action_rsc, name);
     crm_xml_add(rsc, name, value);
     name = XML_AGENT_ATTR_PROVIDER;
     value = crm_element_value(action_rsc, name);
     crm_xml_add(rsc, name, value);
 
     op = convert_graph_action(NULL, action, status, op_rc);
     op->call_id = -1;
     op->user_data = generate_transition_key(transition_graph->id, action->id, target_rc, te_uuid);
 
     xml_op = create_operation_update(rsc, op, CRM_FEATURE_SET, target_rc, __FUNCTION__, LOG_INFO);
     crm_xml_add(xml_op, XML_LRM_ATTR_TARGET, target); /* For context during triage */
     lrmd_free_event(op);
 
     crm_trace("Updating CIB with \"%s\" (%s): %s %s on %s",
               status < 0 ? "new action" : XML_ATTR_TIMEOUT,
               crm_element_name(action->xml), crm_str(task), rsc_id, target);
     crm_log_xml_trace(xml_op, "Op");
 
     rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options);
 
     crm_trace("Updating CIB with %s action %d: %s on %s (call_id=%d)",
               services_lrm_status_str(status), action->id, task_uuid, target, rc);
 
     fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated);
     free_xml(state);
 
     action->sent_update = TRUE;
 
     if (rc < pcmk_ok) {
         return FALSE;
     }
 
     return TRUE;
 }
 
 static gboolean
 te_rsc_command(crm_graph_t * graph, crm_action_t * action)
 {
     /* never overwrite stop actions in the CIB with
      *   anything other than completed results
      *
      * Writing pending stops makes it look like the
      *   resource is running again
      */
     xmlNode *cmd = NULL;
     xmlNode *rsc_op = NULL;
 
     gboolean rc = TRUE;
     gboolean no_wait = FALSE;
     gboolean is_local = FALSE;
 
     char *counter = NULL;
     const char *task = NULL;
     const char *value = NULL;
     const char *on_node = NULL;
     const char *router_node = NULL;
     const char *task_uuid = NULL;
 
     CRM_ASSERT(action != NULL);
     CRM_ASSERT(action->xml != NULL);
 
     action->executed = FALSE;
     on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
 
     CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
               crm_err("Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task));
               return FALSE);
 
     rsc_op = action->xml;
     task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
     task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
     router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE);
 
     if (!router_node) {
         router_node = on_node;
     }
 
     counter =
         generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid);
     crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);
 
     if (safe_str_eq(router_node, fsa_our_uname)) {
         is_local = TRUE;
     }
 
     value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
     if (crm_is_true(value)) {
         no_wait = TRUE;
     }
 
     crm_notice("Initiating action %d: %s %s on %s%s%s",
                action->id, task, task_uuid, on_node,
                is_local ? " (local)" : "", no_wait ? " - no waiting" : "");
 
     cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
                          CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
 
     if (is_local) {
         /* shortcut local resource commands */
         ha_msg_input_t data = {
             .msg = cmd,
             .xml = rsc_op,
         };
 
         fsa_data_t msg = {
             .id = 0,
             .data = &data,
             .data_type = fsa_dt_ha_msg,
             .fsa_input = I_NULL,
             .fsa_cause = C_FSA_INTERNAL,
             .actions = A_LRM_INVOKE,
             .origin = __FUNCTION__,
         };
 
         do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg);
 
     } else {
         rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE);
     }
 
     free(counter);
     free_xml(cmd);
 
     action->executed = TRUE;
 
     if (rc == FALSE) {
         crm_err("Action %d failed: send", action->id);
         return FALSE;
 
     } else if (no_wait) {
         crm_info("Action %d confirmed - no wait", action->id);
         action->confirmed = TRUE; /* Just mark confirmed.
                                    * Don't bump the job count only to immediately decrement it
                                    */
         update_graph(transition_graph, action);
         trigger_graph();
 
     } else if (action->confirmed == TRUE) {
         crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
                   action->id, task, task_uuid, on_node, action->timeout);
     } else {
         if (action->timeout <= 0) {
             crm_err("Action %d: %s %s on %s had an invalid timeout (%dms).  Using %dms instead",
                     action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
             action->timeout = graph->network_delay;
         }
         te_update_job_count(action, 1);
         te_start_action_timer(graph, action);
     }
 
     value = crm_meta_value(action->params, XML_OP_ATTR_PENDING);
     if (crm_is_true(value)
         && safe_str_neq(task, CRMD_ACTION_CANCEL)
         && safe_str_neq(task, CRMD_ACTION_DELETE)) {
         /* write a "pending" entry to the CIB, inhibit notification */
         crm_debug("Recording pending op %s in the CIB", task_uuid);
         cib_action_update(action, PCMK_LRM_OP_PENDING, PCMK_OCF_UNKNOWN);
     }
 
     return TRUE;
 }
 
 struct te_peer_s
 {
         char *name;
         int jobs;
         int migrate_jobs;
 };
 
 static void te_peer_free(gpointer p)
 {
     struct te_peer_s *peer = p;
 
     free(peer->name);
     free(peer);
 }
 
 void te_reset_job_counts(void)
 {
     GHashTableIter iter;
     struct te_peer_s *peer = NULL;
 
     if(te_targets == NULL) {
         te_targets = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, te_peer_free);
     }
 
     g_hash_table_iter_init(&iter, te_targets);
     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
         peer->jobs = 0;
         peer->migrate_jobs = 0;
     }
 }
 
 static void
 te_update_job_count_on(const char *target, int offset, bool migrate)
 {
     struct te_peer_s *r = NULL;
 
     if(target == NULL || te_targets == NULL) {
         return;
     }
 
     r = g_hash_table_lookup(te_targets, target);
     if(r == NULL) {
         r = calloc(1, sizeof(struct te_peer_s));
         r->name = strdup(target);
         g_hash_table_insert(te_targets, r->name, r);
     }
 
     r->jobs += offset;
     if(migrate) {
         r->migrate_jobs += offset;
     }
     crm_trace("jobs[%s] = %d", target, r->jobs);
 }
 
 static void
 te_update_job_count(crm_action_t * action, int offset)
 {
     const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
     const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
 
     if (action->type != action_type_rsc || target == NULL) {
         /* No limit on these */
         return;
     }
 
-    if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
+    /* if we have a router node, this means the action is performing
+     * on a remote node. For now, we count all action occuring on a
+     * remote node against the job list on the cluster node hosting
+     * the connection resources */
+    target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
+
+    if ((target == NULL) &&
+        (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED))) {
+
         const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
         const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
 
         te_update_job_count_on(t1, offset, TRUE);
         te_update_job_count_on(t2, offset, TRUE);
-
-    } else {
-
-        te_update_job_count_on(target, offset, FALSE);
+        return;
+    } else if (target == NULL) {
+        target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
     }
+
+    te_update_job_count_on(target, offset, FALSE);
 }
 
 static gboolean
 te_should_perform_action_on(crm_graph_t * graph, crm_action_t * action, const char *target)
 {
     int limit = 0;
     struct te_peer_s *r = NULL;
     const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
     const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
 
     if(target == NULL) {
         /* No limit on these */
         return TRUE;
 
     } else if(te_targets == NULL) {
         return FALSE;
     }
 
     r = g_hash_table_lookup(te_targets, target);
     limit = throttle_get_job_limit(target);
 
     if(r == NULL) {
         r = calloc(1, sizeof(struct te_peer_s));
         r->name = strdup(target);
         g_hash_table_insert(te_targets, r->name, r);
     }
 
     if(limit <= r->jobs) {
         crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
                   target, limit, r->jobs, id);
         return FALSE;
 
     } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
         if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
             crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
                       target, graph->migration_limit, r->migrate_jobs, id);
             return FALSE;
         }
     }
 
+    crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
+
     return TRUE;
 }
 
 static gboolean
 te_should_perform_action(crm_graph_t * graph, crm_action_t * action)
 {
     const char *target = NULL;
     const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
 
     if (action->type != action_type_rsc) {
         /* No limit on these */
         return TRUE;
     }
 
-    if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
+    /* if we have a router node, this means the action is performing
+     * on a remote node. For now, we count all action occuring on a
+     * remote node against the job list on the cluster node hosting
+     * the connection resources */
+    target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
+
+    if ((target == NULL) &&
+        (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED))) {
+
         target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
         if(te_should_perform_action_on(graph, action, target) == FALSE) {
             return FALSE;
         }
 
         target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
 
-    } else {
+    } else if (target == NULL) {
         target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
     }
 
     return te_should_perform_action_on(graph, action, target);
 }
 
 void
 te_action_confirmed(crm_action_t * action)
 {
     const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
 
     if (action->confirmed == FALSE && action->type == action_type_rsc && target != NULL) {
         te_update_job_count(action, -1);
     }
     action->confirmed = TRUE;
 }
 
 
 crm_graph_functions_t te_graph_fns = {
     te_pseudo_action,
     te_rsc_command,
     te_crm_command,
     te_fence_node,
     te_should_perform_action,
 };
 
 void
 notify_crmd(crm_graph_t * graph)
 {
     const char *type = "unknown";
     enum crmd_fsa_input event = I_NULL;
 
     crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state));
 
     CRM_CHECK(graph->complete, graph->complete = TRUE);
 
     switch (graph->completion_action) {
         case tg_stop:
             type = "stop";
             if (fsa_state == S_TRANSITION_ENGINE) {
                 event = I_TE_SUCCESS;
             }
             break;
         case tg_done:
             type = "done";
             if (fsa_state == S_TRANSITION_ENGINE) {
                 event = I_TE_SUCCESS;
             }
             break;
 
         case tg_restart:
             type = "restart";
             if (fsa_state == S_TRANSITION_ENGINE) {
                 if (too_many_st_failures() == FALSE) {
                     if (transition_timer->period_ms > 0) {
                         crm_timer_stop(transition_timer);
                         crm_timer_start(transition_timer);
                     } else {
                         event = I_PE_CALC;
                     }
                 } else {
                     event = I_TE_SUCCESS;
                 }
 
             } else if (fsa_state == S_POLICY_ENGINE) {
                 register_fsa_action(A_PE_INVOKE);
             }
             break;
 
         case tg_shutdown:
             type = "shutdown";
             if (is_set(fsa_input_register, R_SHUTDOWN)) {
                 event = I_STOP;
 
             } else {
                 crm_err("We didn't ask to be shut down, yet our" " PE is telling us too.");
                 event = I_TERMINATE;
             }
     }
 
     crm_debug("Transition %d status: %s - %s", graph->id, type, crm_str(graph->abort_reason));
 
     graph->abort_reason = NULL;
     graph->completion_action = tg_done;
     clear_bit(fsa_input_register, R_IN_TRANSITION);
 
     if (event != I_NULL) {
         register_fsa_input(C_FSA_INTERNAL, event, NULL);
 
     } else if (fsa_source) {
         mainloop_set_trigger(fsa_source);
     }
 }
diff --git a/crmd/te_events.c b/crmd/te_events.c
index afe3072e02..b81a13e0a0 100644
--- a/crmd/te_events.c
+++ b/crmd/te_events.c
@@ -1,594 +1,590 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 #include <crm/crm.h>
 #include <crm/cib.h>
 #include <crm/msg_xml.h>
 
 #include <crm/common/xml.h>
 #include <tengine.h>
 
 #include <crmd_fsa.h>
 
 char *failed_stop_offset = NULL;
 char *failed_start_offset = NULL;
 
 int match_graph_event(int action_id, xmlNode * event, const char *event_node,
                       int op_status, int op_rc, int target_rc);
 
 gboolean
 fail_incompletable_actions(crm_graph_t * graph, const char *down_node)
 {
     const char *target_uuid = NULL;
     const char *router = NULL;
     const char *router_uuid = NULL;
     xmlNode *last_action = NULL;
 
     GListPtr gIter = NULL;
     GListPtr gIter2 = NULL;
 
     if (graph == NULL || graph->complete) {
         return FALSE;
     }
 
     gIter = graph->synapses;
     for (; gIter != NULL; gIter = gIter->next) {
         synapse_t *synapse = (synapse_t *) gIter->data;
 
         if (synapse->confirmed) {
             continue;
         }
 
         gIter2 = synapse->actions;
         for (; gIter2 != NULL; gIter2 = gIter2->next) {
             crm_action_t *action = (crm_action_t *) gIter2->data;
 
             if (action->type == action_type_pseudo || action->confirmed) {
                 continue;
             } else if (action->type == action_type_crm) {
                 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
 
                 if (safe_str_eq(task, CRM_OP_FENCE)) {
                     continue;
                 }
             }
 
             target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
             router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
             if (router) {
                 crm_node_t *node = crm_get_peer(0, router);
                 if (node) {
                     router_uuid = node->uuid;
                 }
             }
 
             if (safe_str_eq(target_uuid, down_node) || safe_str_eq(router_uuid, down_node)) {
                 action->failed = TRUE;
                 synapse->failed = TRUE;
                 last_action = action->xml;
                 stop_te_timer(action->timer);
                 update_graph(graph, action);
 
                 if (synapse->executed) {
                     crm_notice("Action %d (%s) was pending on %s (offline)",
                                action->id, ID(action->xml), down_node);
                 } else {
                     crm_notice("Action %d (%s) is scheduled for %s (offline)",
                                action->id, ID(action->xml), down_node);
                 }
             }
         }
     }
 
     if (last_action != NULL) {
         crm_warn("Node %s shutdown resulted in un-runnable actions", down_node);
         abort_transition(INFINITY, tg_restart, "Node failure", last_action);
         return TRUE;
     }
 
     return FALSE;
 }
 
 static gboolean
 update_failcount(xmlNode * event, const char *event_node_uuid, int rc, int target_rc, gboolean do_update)
 {
     int interval = 0;
 
     char *task = NULL;
     char *rsc_id = NULL;
     char *attr_name = NULL;
 
     const char *value = NULL;
     const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
     const char *on_uname = crm_peer_uname(event_node_uuid);
     const char *origin = crm_element_value(event, XML_ATTR_ORIGIN);
 
     if (rc == 99) {
         /* this is an internal code for "we're busy, try again" */
         return FALSE;
 
     } else if (rc == target_rc) {
         return FALSE;
     }
 
     if (safe_str_eq(origin, "build_active_RAs")) {
         crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
                   id, rc, on_uname);
         return FALSE;
     }
 
     CRM_CHECK(on_uname != NULL, return TRUE);
 
     if (failed_stop_offset == NULL) {
         failed_stop_offset = strdup(INFINITY_S);
     }
 
     if (failed_start_offset == NULL) {
         failed_start_offset = strdup(INFINITY_S);
     }
 
     CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval), crm_err("Couldn't parse: %s", ID(event));
               goto bail);
     CRM_CHECK(task != NULL, goto bail);
     CRM_CHECK(rsc_id != NULL, goto bail);
 
     if (do_update || interval > 0) {
         do_update = TRUE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_START)) {
         do_update = TRUE;
         value = failed_start_offset;
 
     } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
         do_update = TRUE;
         value = failed_stop_offset;
 
-    } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
-        do_update = TRUE;
-        value = failed_stop_offset;
-
     } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
         do_update = TRUE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
         do_update = TRUE;
     }
 
     if (value == NULL || safe_str_neq(value, INFINITY_S)) {
         value = XML_NVPAIR_ATTR_VALUE "++";
     }
 
     if (do_update) {
         char *now = crm_itoa(time(NULL));
         gboolean is_remote_node = FALSE;
 
         if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) {
             is_remote_node = TRUE;
         }
 
         crm_warn("Updating failcount for %s on %s after failed %s:"
                  " rc=%d (update=%s, time=%s)", rsc_id, on_uname, task, rc, value, now);
 
         attr_name = crm_concat("fail-count", rsc_id, '-');
         update_attrd(on_uname, attr_name, value, NULL, is_remote_node);
         free(attr_name);
 
         attr_name = crm_concat("last-failure", rsc_id, '-');
         update_attrd(on_uname, attr_name, now, NULL, is_remote_node);
         free(attr_name);
 
         free(now);
     }
 
   bail:
     free(rsc_id);
     free(task);
     return TRUE;
 }
 
 static int
 status_from_rc(crm_action_t * action, int orig_status, int rc, int target_rc)
 {
     int status = orig_status;
 
     if (target_rc == rc) {
         crm_trace("Target rc: == %d", rc);
         if (status != PCMK_LRM_OP_DONE) {
             crm_trace("Re-mapping op status to" " PCMK_LRM_OP_DONE for rc=%d", rc);
             status = PCMK_LRM_OP_DONE;
         }
 
     } else {
         status = PCMK_LRM_OP_ERROR;
     }
 
     /* 99 is the code we use for direct nack's */
     if (rc != 99 && status != PCMK_LRM_OP_DONE) {
         const char *task, *uname;
 
         task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
         uname = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
         crm_warn("Action %d (%s) on %s failed (target: %d vs. rc: %d): %s",
                  action->id, task, uname, target_rc, rc, services_lrm_status_str(status));
     }
 
     return status;
 }
 
 static void
 process_remote_node_action(crm_action_t *action, xmlNode *event)
 {
     xmlNode *child = NULL;
 
     /* The whole point of this function is to detect when a remote-node
      * is integrated into the cluster, and abort the transition if that remote-node
      * was fenced earlier in the transition. This allows a new transition to be
      * generated so resources can be placed on the new node.
      */
 
     if (crm_remote_peer_cache_size() == 0) {
         return;
     } else if (action->type != action_type_rsc) {
         return;
     } else if (action->failed || action->confirmed == FALSE) {
         return;
     } else if (safe_str_neq(crm_element_value(action->xml, XML_LRM_ATTR_TASK), "start")) {
         return;
     }
 
     for (child = __xml_first_child(action->xml); child != NULL; child = __xml_next(child)) {
         const char *provider;
         const char *type;
         const char *rsc;
         crm_node_t *remote_peer;
 
         if (safe_str_neq(crm_element_name(child), XML_CIB_TAG_RESOURCE)) {
             continue;
         }
 
         provider = crm_element_value(child, XML_AGENT_ATTR_PROVIDER);
         type = crm_element_value(child, XML_ATTR_TYPE);
         rsc = ID(child);
 
         if (safe_str_neq(provider, "pacemaker") || safe_str_neq(type, "remote") || rsc == NULL) {
             break;
         }
 
         remote_peer = crm_get_peer_full(0, rsc, CRM_GET_PEER_REMOTE);
         if (remote_peer == NULL) {
             break;
         }
 
         /* A remote node will be placed in the "lost" state after
          * it has been successfully fenced.  After successfully connecting
          * to a remote-node after being fenced, we need to abort the transition
          * so resources can be placed on the newly integrated remote-node */
         if (safe_str_eq(remote_peer->state, CRM_NODE_LOST)) {
             abort_transition(INFINITY, tg_restart, "Remote-node re-discovered.", event);
         }
 
         return;
     }
 }
 
 /*
  * returns the ID of the action if a match is found
  * returns -1 if a match was not found
  * returns -2 if a match was found but the action failed (and was
  *            not allowed to)
  */
 int
 match_graph_event(int action_id, xmlNode * event, const char *event_node,
                   int op_status, int op_rc, int target_rc)
 {
     const char *target = NULL;
     const char *allow_fail = NULL;
     const char *this_event = NULL;
     crm_action_t *action = NULL;
 
     action = get_action(action_id, FALSE);
     if (action == NULL) {
         return -1;
     }
 
     op_status = status_from_rc(action, op_status, op_rc, target_rc);
     if (op_status != PCMK_LRM_OP_DONE) {
         update_failcount(event, event_node, op_rc, target_rc, FALSE);
     }
 
     /* Process OP status */
     switch (op_status) {
         case PCMK_LRM_OP_PENDING:
             crm_debug("Ignoring pending operation");
             return action->id;
             break;
         case PCMK_LRM_OP_DONE:
             break;
         case PCMK_LRM_OP_ERROR:
         case PCMK_LRM_OP_TIMEOUT:
         case PCMK_LRM_OP_NOTSUPPORTED:
             action->failed = TRUE;
             break;
         case PCMK_LRM_OP_CANCELLED:
             /* do nothing?? */
             crm_err("Dont know what to do for cancelled ops yet");
             break;
         default:
             action->failed = TRUE;
             crm_err("Unsupported action result: %d", op_status);
     }
 
     /* stop this event's timer if it had one */
     stop_te_timer(action->timer);
     te_action_confirmed(action);
 
     update_graph(transition_graph, action);
     trigger_graph();
 
     if (action->failed) {
         allow_fail = crm_meta_value(action->params, XML_ATTR_TE_ALLOWFAIL);
         if (crm_is_true(allow_fail)) {
             action->failed = FALSE;
         }
     }
 
     if (action->failed) {
         abort_transition(action->synapse->priority + 1, tg_restart, "Event failed", event);
     }
 
     this_event = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
     target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
     crm_info("Action %s (%d) confirmed on %s (rc=%d)",
              crm_str(this_event), action->id, crm_str(target), op_status);
 
     /* determine if this action affects a remote-node's online/offline status */
     process_remote_node_action(action, event);
     return action->id;
 }
 
 crm_action_t *
 get_action(int id, gboolean confirmed)
 {
     GListPtr gIter = NULL;
     GListPtr gIter2 = NULL;
 
     gIter = transition_graph->synapses;
     for (; gIter != NULL; gIter = gIter->next) {
         synapse_t *synapse = (synapse_t *) gIter->data;
 
         gIter2 = synapse->actions;
         for (; gIter2 != NULL; gIter2 = gIter2->next) {
             crm_action_t *action = (crm_action_t *) gIter2->data;
 
             if (action->id == id) {
                 if (confirmed) {
                     stop_te_timer(action->timer);
                     te_action_confirmed(action);
                 }
                 return action;
             }
         }
     }
 
     return NULL;
 }
 
 crm_action_t *
 get_cancel_action(const char *id, const char *node)
 {
     GListPtr gIter = NULL;
     GListPtr gIter2 = NULL;
 
     gIter = transition_graph->synapses;
     for (; gIter != NULL; gIter = gIter->next) {
         synapse_t *synapse = (synapse_t *) gIter->data;
 
         gIter2 = synapse->actions;
         for (; gIter2 != NULL; gIter2 = gIter2->next) {
             const char *task = NULL;
             const char *target = NULL;
             crm_action_t *action = (crm_action_t *) gIter2->data;
 
             task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
             if (safe_str_neq(CRMD_ACTION_CANCEL, task)) {
                 continue;
             }
 
             task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
             if (safe_str_neq(task, id)) {
                 crm_trace("Wrong key %s for %s on %s", task, id, node);
                 continue;
             }
 
             target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
             if (node && safe_str_neq(target, node)) {
                 crm_trace("Wrong node %s for %s on %s", target, id, node);
                 continue;
             }
 
             crm_trace("Found %s on %s", id, node);
             return action;
         }
     }
 
     return NULL;
 }
 
 crm_action_t *
 match_down_event(int id, const char *target, const char *filter, bool quiet)
 {
     const char *this_action = NULL;
     const char *this_node = NULL;
     crm_action_t *match = NULL;
 
     GListPtr gIter = NULL;
     GListPtr gIter2 = NULL;
 
     gIter = transition_graph->synapses;
     for (; gIter != NULL; gIter = gIter->next) {
         synapse_t *synapse = (synapse_t *) gIter->data;
 
         /* lookup event */
         gIter2 = synapse->actions;
         for (; gIter2 != NULL; gIter2 = gIter2->next) {
             crm_action_t *action = (crm_action_t *) gIter2->data;
 
             if (id > 0 && action->id == id) {
                 match = action;
                 break;
             }
 
             this_action = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
 
             if (action->type != action_type_crm) {
                 continue;
 
             } else if (safe_str_eq(this_action, CRM_OP_LRM_REFRESH)) {
                 continue;
 
             } else if (filter != NULL && safe_str_neq(this_action, filter)) {
                 continue;
             }
 
             this_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
 
             if (this_node == NULL) {
                 crm_log_xml_err(action->xml, "No node uuid");
             }
 
             if (safe_str_neq(this_node, target)) {
                 crm_debug("Action %d : Node mismatch: %s", action->id, this_node);
                 continue;
             }
 
             match = action;
             id = action->id;
             break;
         }
 
         if (match != NULL) {
             /* stop this event's timer if it had one */
             break;
         }
     }
 
     if (match != NULL) {
         /* stop this event's timer if it had one */
         crm_debug("Match found for action %d: %s on %s", id,
                   crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target);
 
     } else if (id > 0) {
         crm_err("No match for action %d", id);
 
     } else if(quiet == FALSE) {
         crm_warn("No match for shutdown action on %s", target);
     }
 
     return match;
 }
 
 gboolean
 process_graph_event(xmlNode * event, const char *event_node)
 {
     int rc = -1;
     int status = -1;
     int callid = -1;
 
     int action = -1;
     int target_rc = -1;
     int transition_num = -1;
     char *update_te_uuid = NULL;
 
     gboolean stop_early = FALSE;
     gboolean passed = FALSE;
     const char *id = NULL;
     const char *desc = NULL;
     const char *magic = NULL;
 
     CRM_ASSERT(event != NULL);
 
 /*
 <lrm_rsc_op id="rsc_east-05_last_0" operation_key="rsc_east-05_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" transition-magic="0:7;9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" call-id="17" rc-code="7" op-status="0" interval="0" last-run="1355361636" last-rc-change="1355361636" exec-time="128" queue-time="0" op-digest="c81f5f40b1c9e859c992e800b1aa6972"/>
 */
 
     id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
     crm_element_value_int(event, XML_LRM_ATTR_RC, &rc);
     crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status);
     crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid);
 
     magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY);
     if (magic == NULL) {
         /* non-change */
         return FALSE;
     }
 
     if (decode_transition_key(magic, &update_te_uuid, &transition_num, &action, &target_rc) ==
         FALSE) {
         crm_err("Invalid event %s.%d detected: %s", id, callid, magic);
         abort_transition(INFINITY, tg_restart, "Bad event", event);
         return FALSE;
     }
 
     if (status == PCMK_LRM_OP_PENDING) {
         goto bail;
     }
 
     if (transition_num == -1) {
         desc = "initiated outside of the cluster";
         abort_transition(INFINITY, tg_restart, "Unexpected event", event);
 
     } else if (action < 0 || crm_str_eq(update_te_uuid, te_uuid, TRUE) == FALSE) {
         desc = "initiated by a different node";
         abort_transition(INFINITY, tg_restart, "Foreign event", event);
         stop_early = TRUE;      /* This could be an lrm status refresh */
 
     } else if (transition_graph->id != transition_num) {
         desc = "arrived really late";
         abort_transition(INFINITY, tg_restart, "Old event", event);
         stop_early = TRUE;      /* This could be an lrm status refresh */
 
     } else if (transition_graph->complete) {
         desc = "arrived late";
         abort_transition(INFINITY, tg_restart, "Inactive graph", event);
 
     } else if (match_graph_event(action, event, event_node, status, rc, target_rc) < 0) {
         desc = "unknown";
         abort_transition(INFINITY, tg_restart, "Unknown event", event);
 
     } else if (rc == target_rc) {
         passed = TRUE;
         crm_trace("Processed update to %s: %s", id, magic);
     }
 
     if (passed == FALSE) {
         if (update_failcount(event, event_node, rc, target_rc, transition_num == -1)) {
             /* Turns out this wasn't an lrm status refresh update aferall */
             stop_early = FALSE;
             desc = "failed";
         }
         crm_info("Detected action (%d.%d) %s.%d=%s: %s", transition_num, action, id, callid,
                  services_ocf_exitcode_str(rc), desc);
     }
 
   bail:
     free(update_te_uuid);
     return stop_early;
 }
diff --git a/cts/CTS.py b/cts/CTS.py
index 676b2b3eb3..f4198c441e 100644
--- a/cts/CTS.py
+++ b/cts/CTS.py
@@ -1,1015 +1,1018 @@
 '''CTS: Cluster Testing System: Main module
 
 Classes related to testing high-availability clusters...
  '''
 
 __copyright__ = '''
 Copyright (C) 2000, 2001 Alan Robertson <alanr@unix.sh>
 Licensed under the GNU GPL.
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
 
 import types, string, select, sys, time, re, os, struct, signal
 import time, syslog, random, traceback, base64, pickle, binascii, fcntl
 
 
 from socket import gethostbyname_ex
 from UserDict import UserDict
 from subprocess import Popen,PIPE
 from threading import Thread
 
 from cts.CTSvars     import *
 from cts.logging     import LogFactory
 from cts.watcher     import LogWatcher
 from cts.remote      import RemoteFactory
 from cts.environment import EnvFactory
 from cts.patterns    import PatternSelector
 
 has_log_stats = {}
 log_stats_bin = CTSvars.CRM_DAEMON_DIR + "/cts_log_stats.sh"
 log_stats = """
 #!/bin/bash
 # Tool for generating system load reports while CTS runs
 
 trap "" 1
 
 f=$1; shift
 action=$1; shift
 base=`basename $0`
 
 if [ ! -e $f ]; then
     echo "Time, Load 1, Load 5, Load 15, Test Marker" > $f
 fi
 
 function killpid() {
     if [ -e $f.pid ]; then
        kill -9 `cat $f.pid`
        rm -f $f.pid
     fi
 }
 
 function status() {
     if [ -e $f.pid ]; then
        kill -0 `cat $f.pid`
        return $?
     else
        return 1
     fi
 }
 
 function start() {
     # Is it already running?
     if
 	status
     then
         return
     fi
 
     echo Active as $$
     echo $$ > $f.pid
 
     while [ 1 = 1 ]; do
         uptime | sed s/up.*:/,/ | tr '\\n' ',' >> $f
         #top -b -c -n1 | grep -e usr/libexec/pacemaker | grep -v -e grep -e python | head -n 1 | sed s@/usr/libexec/pacemaker/@@ | awk '{print " 0, "$9", "$10", "$12}' | tr '\\n' ',' >> $f
         echo 0 >> $f
         sleep 5
     done
 }
 
 case $action in
     start)
         start
         ;;
     start-bg|bg)
         # Use c --ssh -- ./stats.sh file start-bg
         nohup $0 $f start >/dev/null 2>&1 </dev/null &
         ;;
     stop)
 	killpid
 	;;
     delete)
 	killpid
 	rm -f $f
 	;;
     mark)
 	uptime | sed s/up.*:/,/ | tr '\\n' ',' >> $f
 	echo " $*" >> $f
         start
 	;;
     *)
 	echo "Unknown action: $action."
 	;;
 esac
 """
 
 class CtsLab:
     '''This class defines the Lab Environment for the Cluster Test System.
     It defines those things which are expected to change from test
     environment to test environment for the same cluster manager.
 
     It is where you define the set of nodes that are in your test lab
     what kind of reset mechanism you use, etc.
 
     This class is derived from a UserDict because we hold many
     different parameters of different kinds, and this provides
     provide a uniform and extensible interface useful for any kind of
     communication between the user/administrator/tester and CTS.
 
     At this point in time, it is the intent of this class to model static
     configuration and/or environmental data about the environment which
     doesn't change as the tests proceed.
 
     Well-known names (keys) are an important concept in this class.
     The HasMinimalKeys member function knows the minimal set of
     well-known names for the class.
 
     The following names are standard (well-known) at this time:
 
         nodes           An array of the nodes in the cluster
         reset           A ResetMechanism object
         logger          An array of objects that log strings...
         CMclass         The type of ClusterManager we are running
                         (This is a class object, not a class instance)
         RandSeed        Random seed.  It is a triple of bytes. (optional)
 
     The CTS code ignores names it doesn't know about/need.
     The individual tests have access to this information, and it is
     perfectly acceptable to provide hints, tweaks, fine-tuning
     directions or other information to the tests through this mechanism.
     '''
 
     def __init__(self, args=None):
         self.Env = EnvFactory().getInstance(args)
         self.Scenario = None
         self.logger = LogFactory()
         self.rsh = RemoteFactory().getInstance()
 
     def dump(self):
         self.Env.dump()
 
     def has_key(self, key):
         return self.Env.has_key(key)
 
     def __getitem__(self, key):
         return self.Env[key]
 
     def __setitem__(self, key, value):
         self.Env[key] = value
 
     def HasMinimalKeys(self):
         'Return TRUE if our object has the minimal set of keys/values in it'
         result = 1
         for key in self.MinimalKeys:
             if not self.has_key(key):
                 result = None
         return result
 
     def run(self, Scenario, Iterations):
         if not Scenario:
             self.logger.log("No scenario was defined")
             return 1
 
         self.logger.log("Cluster nodes: ")
         for node in self.Env["nodes"]:
             self.logger.log("    * %s" % (node))
 
         if not Scenario.SetUp():
             return 1
 
         try :
             Scenario.run(Iterations)
         except :
             self.logger.log("Exception by %s" % sys.exc_info()[0])
             self.logger.traceback(traceback)
 
             Scenario.summarize()
             Scenario.TearDown()
             return 1
 
         #ClusterManager.oprofileSave(Iterations)
         Scenario.TearDown()
 
         Scenario.summarize()
         if Scenario.Stats["failure"] > 0:
             return Scenario.Stats["failure"]
 
         elif Scenario.Stats["success"] != Iterations:
             self.logger.log("No failure count but success != requested iterations")
             return 1
 
         return 0
 
     def IsValidNode(self, node):
         'Return TRUE if the given node is valid'
         return self.Nodes.has_key(node)
 
     def __CheckNode(self, node):
         "Raise a ValueError if the given node isn't valid"
 
         if not self.IsValidNode(node):
             raise ValueError("Invalid node [%s] in CheckNode" % node)
 
 class NodeStatus:
     def __init__(self, env):
-        pass
+        self.Env = env
 
     def IsNodeBooted(self, node):
         '''Return TRUE if the given node is booted (responds to pings)'''
+        if self.Env["docker"]:
+            return RemoteFactory().getInstance()("localhost", "docker inspect --format {{.State.Running}} %s | grep -q true" % node, silent=True) == 0
+
         return RemoteFactory().getInstance()("localhost", "ping -nq -c1 -w1 %s" % node, silent=True) == 0
 
     def IsSshdUp(self, node):
         rc = RemoteFactory().getInstance()(node, "true", silent=True)
         return rc == 0
 
     def WaitForNodeToComeUp(self, node, Timeout=300):
         '''Return TRUE when given node comes up, or None/FALSE if timeout'''
         timeout = Timeout
         anytimeouts = 0
         while timeout > 0:
             if self.IsNodeBooted(node) and self.IsSshdUp(node):
                 if anytimeouts:
                      # Fudge to wait for the system to finish coming up
                      time.sleep(30)
                      LogFactory().debug("Node %s now up" % node)
                 return 1
 
             time.sleep(30)
             if (not anytimeouts):
                 LogFactory().debug("Waiting for node %s to come up" % node)
 
             anytimeouts = 1
             timeout = timeout - 1
 
         LogFactory().log("%s did not come up within %d tries" % (node, Timeout))
         answer = raw_input('Continue? [nY]')
         if answer and answer == "n":
             raise ValueError("%s did not come up within %d tries" % (node, Timeout))
 
     def WaitForAllNodesToComeUp(self, nodes, timeout=300):
         '''Return TRUE when all nodes come up, or FALSE if timeout'''
 
         for node in nodes:
             if not self.WaitForNodeToComeUp(node, timeout):
                 return None
         return 1
 
 
 class ClusterManager(UserDict):
     '''The Cluster Manager class.
     This is an subclass of the Python dictionary class.
     (this is because it contains lots of {name,value} pairs,
     not because it's behavior is that terribly similar to a
     dictionary in other ways.)
 
     This is an abstract class which class implements high-level
     operations on the cluster and/or its cluster managers.
     Actual cluster managers classes are subclassed from this type.
 
     One of the things we do is track the state we think every node should
     be in.
     '''
 
     def __InitialConditions(self):
         #if os.geteuid() != 0:
         #  raise ValueError("Must Be Root!")
         None
 
     def _finalConditions(self):
         for key in self.keys():
             if self[key] == None:
                 raise ValueError("Improper derivation: self[" + key +   "] must be overridden by subclass.")
 
     def __init__(self, Environment, randseed=None):
         self.Env = EnvFactory().getInstance()
         self.templates = PatternSelector(self.Env["Name"])
         self.__InitialConditions()
         self.logger = LogFactory()
         self.clear_cache = 0
         self.TestLoggingLevel=0
         self.data = {}
         self.name = self.Env["Name"]
 
         self.rsh = RemoteFactory().getInstance()
         self.ShouldBeStatus={}
         self.ns = NodeStatus(self.Env)
         self.OurNode = string.lower(os.uname()[1])
         self.__instance_errorstoignore = []
 
     def __getitem__(self, key):
         if key == "Name":
             return self.name
 
         print "FIXME: Getting %s from %s" % (key, repr(self))
         if self.data.has_key(key):
             return self.data[key]
 
         return self.templates.get_patterns(self.Env["Name"], key)
 
     def __setitem__(self, key, value):
         print "FIXME: Setting %s=%s on %s" % (key, value, repr(self))
         self.data[key] = value
 
     def key_for_node(self, node):
         return node
 
     def instance_errorstoignore_clear(self):
         '''Allows the test scenario to reset instance errors to ignore on each iteration.'''
         self.__instance_errorstoignore = []
 
     def instance_errorstoignore(self):
         '''Return list of errors which are 'normal' for a specific test instance'''
         return self.__instance_errorstoignore
 
     def errorstoignore(self):
         '''Return list of errors which are 'normal' and should be ignored'''
         return []
 
     def log(self, args):
         self.logger.log(args)
 
     def debug(self, args):
         self.logger.debug(args)
 
     def prepare(self):
         '''Finish the Initialization process. Prepare to test...'''
 
         print repr(self)+"prepare"
         for node in self.Env["nodes"]:
             if self.StataCM(node):
                 self.ShouldBeStatus[node] = "up"
             else:
                 self.ShouldBeStatus[node] = "down"
 
             self.unisolate_node(node)
 
     def upcount(self):
         '''How many nodes are up?'''
         count = 0
         for node in self.Env["nodes"]:
           if self.ShouldBeStatus[node] == "up":
             count = count + 1
         return count
 
     def install_helper(self, filename, destdir=None, nodes=None, sourcedir=None):
         if sourcedir == None:
             sourcedir = CTSvars.CTS_home
         file_with_path = "%s/%s" % (sourcedir, filename)
         if not nodes:
             nodes = self.Env["nodes"]
 
         if not destdir:
             destdir = CTSvars.CTS_home
 
         self.debug("Installing %s to %s on %s" % (filename, destdir, repr(self.Env["nodes"])))
         for node in nodes:
             self.rsh(node, "mkdir -p %s" % destdir)
             self.rsh.cp(file_with_path, "root@%s:%s/%s" % (node, destdir, filename))
         return file_with_path
 
     def install_config(self, node):
         return None
 
     def clear_all_caches(self):
         if self.clear_cache:
             for node in self.Env["nodes"]:
                 if self.ShouldBeStatus[node] == "down":
                     self.debug("Removing cache file on: "+node)
                     self.rsh(node, "rm -f "+CTSvars.HA_VARLIBHBDIR+"/hostcache")
                 else:
                     self.debug("NOT Removing cache file on: "+node)
 
     def prepare_fencing_watcher(self, name):
         # If we don't have quorum now but get it as a result of starting this node,
         # then a bunch of nodes might get fenced
         upnode = None
         if self.HasQuorum(None):
             self.debug("Have quorum")
             return None
 
         if not self.templates["Pat:Fencing_start"]:
             print "No start pattern"
             return None
 
         if not self.templates["Pat:Fencing_ok"]:
             print "No ok pattern"
             return None
 
         stonith = None
         stonithPats = []
         for peer in self.Env["nodes"]:
             if self.ShouldBeStatus[peer] != "up":
                 stonithPats.append(self.templates["Pat:Fencing_ok"] % peer)
                 stonithPats.append(self.templates["Pat:Fencing_start"] % peer)
             elif self.Env["Stack"] == "corosync (cman)":
                 # There is a delay between gaining quorum and CMAN starting fencing
                 # This can mean that even nodes that are fully up get fenced
                 # There is no use fighting it, just look for everyone so that CTS doesn't get confused
                 stonithPats.append(self.templates["Pat:Fencing_ok"] % peer)
                 stonithPats.append(self.templates["Pat:Fencing_start"] % peer)
 
         stonith = LogWatcher(self.Env["LogFileName"], stonithPats, "StartupFencing", 0, hosts=self.Env["nodes"], kind=self.Env["LogWatcher"])
         stonith.setwatch()
         return stonith
 
     def fencing_cleanup(self, node, stonith):
         peer_list = []
         peer_state = {}
 
         self.debug("Looking for nodes that were fenced as a result of %s starting" % node)
 
         # If we just started a node, we may now have quorum (and permission to fence)
         if not stonith:
             self.debug("Nothing to do")
             return peer_list
 
         q = self.HasQuorum(None)
         if not q and len(self.Env["nodes"]) > 2:
             # We didn't gain quorum - we shouldn't have shot anyone
             self.debug("Quorum: %d Len: %d" % (q, len(self.Env["nodes"])))
             return peer_list
 
         for n in self.Env["nodes"]:
             peer_state[n] = "unknown"
 
         # Now see if any states need to be updated
         self.debug("looking for: " + repr(stonith.regexes))
         shot = stonith.look(0)
         while shot:
             line = repr(shot)
             self.debug("Found: " + line)
             del stonith.regexes[stonith.whichmatch]
 
             # Extract node name
             for n in self.Env["nodes"]:
                 if re.search(self.templates["Pat:Fencing_ok"] % n, shot):
                     peer = n
                     peer_state[peer] = "complete"
                     self.__instance_errorstoignore.append(self.templates["Pat:Fencing_ok"] % peer)
 
                 elif peer_state[n] != "complete" and re.search(self.templates["Pat:Fencing_start"] % n, shot):
                     # TODO: Correctly detect multiple fencing operations for the same host
                     peer = n
                     peer_state[peer] = "in-progress"
                     self.__instance_errorstoignore.append(self.templates["Pat:Fencing_start"] % peer)
 
             if not peer:
                 self.logger.log("ERROR: Unknown stonith match: %s" % line)
 
             elif not peer in peer_list:
                 self.debug("Found peer: " + peer)
                 peer_list.append(peer)
 
             # Get the next one
             shot = stonith.look(60)
 
         for peer in peer_list:
 
             self.debug("   Peer %s was fenced as a result of %s starting: %s" % (peer, node, peer_state[peer]))
             if self.Env["at-boot"]:
                 self.ShouldBeStatus[peer] = "up"
             else:
                 self.ShouldBeStatus[peer] = "down"
 
             if peer_state[peer] == "in-progress":
                 # Wait for any in-progress operations to complete
                 shot = stonith.look(60)
                 while len(stonith.regexes) and shot:
                     line = repr(shot)
                     self.debug("Found: " + line)
                     del stonith.regexes[stonith.whichmatch]
                     shot = stonith.look(60)
 
             # Now make sure the node is alive too
             self.ns.WaitForNodeToComeUp(peer, self.Env["DeadTime"])
 
             # Poll until it comes up
             if self.Env["at-boot"]:
                 if not self.StataCM(peer):
                     time.sleep(self.Env["StartTime"])
 
                 if not self.StataCM(peer):
                     self.logger.log("ERROR: Peer %s failed to restart after being fenced" % peer)
                     return None
 
         return peer_list
 
     def StartaCM(self, node, verbose=False):
 
         '''Start up the cluster manager on a given node'''
         if verbose: self.logger.log("Starting %s on node %s" % (self.templates["Name"], node))
         else: self.debug("Starting %s on node %s" % (self.templates["Name"], node))
         ret = 1
 
         if not self.ShouldBeStatus.has_key(node):
             self.ShouldBeStatus[node] = "down"
 
         if self.ShouldBeStatus[node] != "down":
             return 1
 
         patterns = []
         # Technically we should always be able to notice ourselves starting
         patterns.append(self.templates["Pat:Local_started"] % node)
         if self.upcount() == 0:
             patterns.append(self.templates["Pat:Master_started"] % node)
         else:
             patterns.append(self.templates["Pat:Slave_started"] % node)
 
         watch = LogWatcher(
             self.Env["LogFileName"], patterns, "StartaCM", self.Env["StartTime"]+10, hosts=self.Env["nodes"], kind=self.Env["LogWatcher"])
 
         self.install_config(node)
 
         self.ShouldBeStatus[node] = "any"
         if self.StataCM(node) and self.cluster_stable(self.Env["DeadTime"]):
             self.logger.log ("%s was already started" % (node))
             return 1
 
         # Clear out the host cache so autojoin can be exercised
         if self.clear_cache:
             self.debug("Removing cache file on: "+node)
             self.rsh(node, "rm -f "+CTSvars.HA_VARLIBHBDIR+"/hostcache")
 
         if not(self.Env["valgrind-tests"]):
             startCmd = self.templates["StartCmd"]
         else:
             if self.Env["valgrind-prefix"]:
                 prefix = self.Env["valgrind-prefix"]
             else:
                 prefix = "cts"
 
             startCmd = """G_SLICE=always-malloc HA_VALGRIND_ENABLED='%s' VALGRIND_OPTS='%s --log-file=/tmp/%s-%s.valgrind' %s""" % (
                 self.Env["valgrind-procs"], self.Env["valgrind-opts"], prefix, """%p""", self.templates["StartCmd"])
 
         stonith = self.prepare_fencing_watcher(node)
 
         watch.setwatch()
 
         if self.rsh(node, startCmd) != 0:
             self.logger.log ("Warn: Start command failed on node %s" % (node))
             self.fencing_cleanup(node, stonith)
             return None
 
         self.ShouldBeStatus[node] = "up"
         watch_result = watch.lookforall()
 
         if watch.unmatched:
             for regex in watch.unmatched:
                 self.logger.log ("Warn: Startup pattern not found: %s" % (regex))
 
         if watch_result and self.cluster_stable(self.Env["DeadTime"]):
             #self.debug("Found match: "+ repr(watch_result))
             self.fencing_cleanup(node, stonith)
             return 1
 
         elif self.StataCM(node) and self.cluster_stable(self.Env["DeadTime"]):
             self.fencing_cleanup(node, stonith)
             return 1
 
         self.logger.log ("Warn: Start failed for node %s" % (node))
         return None
 
     def StartaCMnoBlock(self, node, verbose=False):
 
         '''Start up the cluster manager on a given node with none-block mode'''
 
         if verbose: self.logger.log("Starting %s on node %s" % (self["Name"], node))
         else: self.debug("Starting %s on node %s" % (self["Name"], node))
 
         # Clear out the host cache so autojoin can be exercised
         if self.clear_cache:
             self.debug("Removing cache file on: "+node)
             self.rsh(node, "rm -f "+CTSvars.HA_VARLIBHBDIR+"/hostcache")
 
         self.install_config(node)
         if not(self.Env["valgrind-tests"]):
             startCmd = self.templates["StartCmd"]
         else:
             if self.Env["valgrind-prefix"]:
                 prefix = self.Env["valgrind-prefix"]
             else:
                 prefix = "cts"
 
             startCmd = """G_SLICE=always-malloc HA_VALGRIND_ENABLED='%s' VALGRIND_OPTS='%s --log-file=/tmp/%s-%s.valgrind' %s""" % (
                 self.Env["valgrind-procs"], self.Env["valgrind-opts"], prefix, """%p""", self.templates["StartCmd"])
 
         self.rsh(node, startCmd, synchronous=0)
         self.ShouldBeStatus[node] = "up"
         return 1
 
     def StopaCM(self, node, verbose=False, force=False):
 
         '''Stop the cluster manager on a given node'''
 
         if verbose: self.logger.log("Stopping %s on node %s" % (self["Name"], node))
         else: self.debug("Stopping %s on node %s" % (self["Name"], node))
 
         if self.ShouldBeStatus[node] != "up" and force == False:
             return 1
 
         if self.rsh(node, self.templates["StopCmd"]) == 0:
             # Make sure we can continue even if corosync leaks
             # fdata-* is the old name
             #self.rsh(node, "rm -f /dev/shm/qb-* /dev/shm/fdata-*")
             self.ShouldBeStatus[node] = "down"
             self.cluster_stable(self.Env["DeadTime"])
             return 1
         else:
             self.logger.log ("ERROR: Could not stop %s on node %s" % (self["Name"], node))
 
         return None
 
     def StopaCMnoBlock(self, node):
 
         '''Stop the cluster manager on a given node with none-block mode'''
 
         self.debug("Stopping %s on node %s" % (self["Name"], node))
 
         self.rsh(node, self.templates["StopCmd"], synchronous=0)
         self.ShouldBeStatus[node] = "down"
         return 1
 
     def cluster_stable(self, timeout = None):
         time.sleep(self.Env["StableTime"])
         return 1
 
     def node_stable(self, node):
         return 1
 
     def RereadCM(self, node):
 
         '''Force the cluster manager on a given node to reread its config
            This may be a no-op on certain cluster managers.
         '''
         rc=self.rsh(node, self.templates["RereadCmd"])
         if rc == 0:
             return 1
         else:
             self.logger.log ("Could not force %s on node %s to reread its config"
             %        (self["Name"], node))
         return None
 
     def StataCM(self, node):
 
         '''Report the status of the cluster manager on a given node'''
 
         out=self.rsh(node, self.templates["StatusCmd"] % node, 1)
         ret= (string.find(out, 'stopped') == -1)
 
         try:
             if ret:
                 if self.ShouldBeStatus[node] == "down":
                     self.logger.log(
                     "Node status for %s is %s but we think it should be %s"
                     %        (node, "up", self.ShouldBeStatus[node]))
             else:
                 if self.ShouldBeStatus[node] == "up":
                     self.logger.log(
                     "Node status for %s is %s but we think it should be %s"
                     %        (node, "down", self.ShouldBeStatus[node]))
         except KeyError:        pass
 
         if ret:
             self.ShouldBeStatus[node] = "up"
         else:
             self.ShouldBeStatus[node] = "down"
         return ret
 
     def startall(self, nodelist=None, verbose=False, quick=False):
 
         '''Start the cluster manager on every node in the cluster.
         We can do it on a subset of the cluster if nodelist is not None.
         '''
         map = {}
         if not nodelist:
             nodelist = self.Env["nodes"]
 
         for node in nodelist:
             if self.ShouldBeStatus[node] == "down":
                 self.ns.WaitForAllNodesToComeUp(nodelist, 300)
 
         if not quick:
             if not self.StartaCM(node, verbose=verbose):
                 return 0
             return 1
 
         # Approximation of SimulStartList for --boot 
         watchpats = [ ]
         watchpats.append(self.templates["Pat:DC_IDLE"])
         for node in nodelist:
             watchpats.append(self.templates["Pat:Local_started"] % node)
             watchpats.append(self.templates["Pat:InfraUp"] % node)
             watchpats.append(self.templates["Pat:PacemakerUp"] % node)
 
         #   Start all the nodes - at about the same time...
         watch = LogWatcher(self.Env["LogFileName"], watchpats, "fast-start", self.Env["DeadTime"]+10, hosts=self.Env["nodes"], kind=self.Env["LogWatcher"])
         watch.setwatch()
 
         if not self.StartaCM(nodelist[0], verbose=verbose):
             return 0
         for node in nodelist:
             self.StartaCMnoBlock(node, verbose=verbose)
 
         watch.lookforall()
         if watch.unmatched:
             for regex in watch.unmatched:
                 self.logger.log ("Warn: Startup pattern not found: %s" % (regex))
 
         if not self.cluster_stable():
             self.logger.log("Cluster did not stabilize")
             return 0
 
         return 1
 
     def stopall(self, nodelist=None, verbose=False, force=False):
 
         '''Stop the cluster managers on every node in the cluster.
         We can do it on a subset of the cluster if nodelist is not None.
         '''
 
         ret = 1
         map = {}
         if not nodelist:
             nodelist = self.Env["nodes"]
         for node in self.Env["nodes"]:
             if self.ShouldBeStatus[node] == "up" or force == True:
                 if not self.StopaCM(node, verbose=verbose, force=force):
                     ret = 0
         return ret
 
     def rereadall(self, nodelist=None):
 
         '''Force the cluster managers on every node in the cluster
         to reread their config files.  We can do it on a subset of the
         cluster if nodelist is not None.
         '''
 
         map = {}
         if not nodelist:
             nodelist = self.Env["nodes"]
         for node in self.Env["nodes"]:
             if self.ShouldBeStatus[node] == "up":
                 self.RereadCM(node)
 
     def statall(self, nodelist=None):
 
         '''Return the status of the cluster managers in the cluster.
         We can do it on a subset of the cluster if nodelist is not None.
         '''
 
         result = {}
         if not nodelist:
             nodelist = self.Env["nodes"]
         for node in nodelist:
             if self.StataCM(node):
                 result[node] = "up"
             else:
                 result[node] = "down"
         return result
 
     def isolate_node(self, target, nodes=None):
         '''isolate the communication between the nodes'''
         if not nodes:
             nodes = self.Env["nodes"]
 
         for node in nodes:
             if node != target:
                 rc = self.rsh(target, self.templates["BreakCommCmd"] % self.key_for_node(node))
                 if rc != 0:
                     self.logger.log("Could not break the communication between %s and %s: %d" % (target, node, rc))
                     return None
                 else:
                     self.debug("Communication cut between %s and %s" % (target, node))
         return 1
 
     def unisolate_node(self, target, nodes=None):
         '''fix the communication between the nodes'''
         if not nodes:
             nodes = self.Env["nodes"]
 
         for node in nodes:
             if node != target:
                 restored = 0
 
                 # Limit the amount of time we have asynchronous connectivity for
                 # Restore both sides as simultaneously as possible
                 self.rsh(target, self.templates["FixCommCmd"] % self.key_for_node(node), synchronous=0)
                 self.rsh(node, self.templates["FixCommCmd"] % self.key_for_node(target), synchronous=0)
                 self.debug("Communication restored between %s and %s" % (target, node))
 
     def reducecomm_node(self,node):
         '''reduce the communication between the nodes'''
         rc = self.rsh(node, self.templates["ReduceCommCmd"]%(self.Env["XmitLoss"],self.Env["RecvLoss"]))
         if rc == 0:
             return 1
         else:
             self.logger.log("Could not reduce the communication between the nodes from node: %s" % node)
         return None
 
     def restorecomm_node(self,node):
         '''restore the saved communication between the nodes'''
         rc = 0
         if float(self.Env["XmitLoss"]) != 0 or float(self.Env["RecvLoss"]) != 0 :
             rc = self.rsh(node, self.templates["RestoreCommCmd"]);
         if rc == 0:
             return 1
         else:
             self.logger.log("Could not restore the communication between the nodes from node: %s" % node)
         return None
 
     def HasQuorum(self, node_list):
         "Return TRUE if the cluster currently has quorum"
         # If we are auditing a partition, then one side will
         #   have quorum and the other not.
         # So the caller needs to tell us which we are checking
         # If no value for node_list is specified... assume all nodes
         raise ValueError("Abstract Class member (HasQuorum)")
 
     def Components(self):
         raise ValueError("Abstract Class member (Components)")
 
     def oprofileStart(self, node=None):
         if not node:
             for n in self.Env["oprofile"]:
                 self.oprofileStart(n)
 
         elif node in self.Env["oprofile"]:
             self.debug("Enabling oprofile on %s" % node)
             self.rsh(node, "opcontrol --init")
             self.rsh(node, "opcontrol --setup --no-vmlinux --separate=lib --callgraph=20 --image=all")
             self.rsh(node, "opcontrol --start")
             self.rsh(node, "opcontrol --reset")
 
     def oprofileSave(self, test, node=None):
         if not node:
             for n in self.Env["oprofile"]:
                 self.oprofileSave(test, n)
 
         elif node in self.Env["oprofile"]:
             self.rsh(node, "opcontrol --dump")
             self.rsh(node, "opcontrol --save=cts.%d" % test)
             # Read back with: opreport -l session:cts.0 image:/usr/lib/heartbeat/c*
             if None:
                 self.rsh(node, "opcontrol --reset")
             else:
                 self.oprofileStop(node)
                 self.oprofileStart(node)
 
     def oprofileStop(self, node=None):
         if not node:
             for n in self.Env["oprofile"]:
                 self.oprofileStop(n)
 
         elif node in self.Env["oprofile"]:
             self.debug("Stopping oprofile on %s" % node)
             self.rsh(node, "opcontrol --reset")
             self.rsh(node, "opcontrol --shutdown 2>&1 > /dev/null")
 
 
     def StatsExtract(self):
         if not self.Env["stats"]:
             return
 
         for host in self.Env["nodes"]:
             log_stats_file = "%s/cts-stats.csv" % CTSvars.CRM_DAEMON_DIR
             if has_log_stats.has_key(host):
                 self.rsh(host, '''bash %s %s stop''' % (log_stats_bin, log_stats_file))
                 (rc, lines) = self.rsh(host, '''cat %s''' % log_stats_file, stdout=2)
                 self.rsh(host, '''bash %s %s delete''' % (log_stats_bin, log_stats_file))
 
                 fname = "cts-stats-%d-nodes-%s.csv" % (len(self.Env["nodes"]), host)
                 print "Extracted stats: %s" % fname
                 fd = open(fname, "a")
                 fd.writelines(lines)
                 fd.close()
 
     def StatsMark(self, testnum):
         '''Mark the test number in the stats log'''
 
         global has_log_stats
         if not self.Env["stats"]:
             return
 
         for host in self.Env["nodes"]:
             log_stats_file = "%s/cts-stats.csv" % CTSvars.CRM_DAEMON_DIR
             if not has_log_stats.has_key(host):
 
                 global log_stats
                 global log_stats_bin
                 script=log_stats
                 #script = re.sub("\\\\", "\\\\", script)
                 script = re.sub('\"', '\\\"', script)
                 script = re.sub("'", "\'", script)
                 script = re.sub("`", "\`", script)
                 script = re.sub("\$", "\\\$", script)
 
                 self.debug("Installing %s on %s" % (log_stats_bin, host))
                 self.rsh(host, '''echo "%s" > %s''' % (script, log_stats_bin), silent=True)
                 self.rsh(host, '''bash %s %s delete''' % (log_stats_bin, log_stats_file))
                 has_log_stats[host] = 1
 
             # Now mark it
             self.rsh(host, '''bash %s %s mark %s''' % (log_stats_bin, log_stats_file, testnum), synchronous=0)
 
 
 class Resource:
     '''
     This is an HA resource (not a resource group).
     A resource group is just an ordered list of Resource objects.
     '''
 
     def __init__(self, cm, rsctype=None, instance=None):
         self.CM = cm
         self.ResourceType = rsctype
         self.Instance = instance
         self.needs_quorum = 1
 
     def Type(self):
         return self.ResourceType
 
     def Instance(self, nodename):
         return self.Instance
 
     def IsRunningOn(self, nodename):
         '''
         This member function returns true if our resource is running
         on the given node in the cluster.
         It is analagous to the "status" operation on SystemV init scripts and
         heartbeat scripts.  FailSafe calls it the "exclusive" operation.
         '''
         raise ValueError("Abstract Class member (IsRunningOn)")
         return None
 
     def IsWorkingCorrectly(self, nodename):
         '''
         This member function returns true if our resource is operating
         correctly on the given node in the cluster.
         Heartbeat does not require this operation, but it might be called
         the Monitor operation, which is what FailSafe calls it.
         For remotely monitorable resources (like IP addresses), they *should*
         be monitored remotely for testing.
         '''
         raise ValueError("Abstract Class member (IsWorkingCorrectly)")
         return None
 
     def Start(self, nodename):
         '''
         This member function starts or activates the resource.
         '''
         raise ValueError("Abstract Class member (Start)")
         return None
 
     def Stop(self, nodename):
         '''
         This member function stops or deactivates the resource.
         '''
         raise ValueError("Abstract Class member (Stop)")
         return None
 
     def __repr__(self):
         if (self.Instance and len(self.Instance) > 1):
                 return "{" + self.ResourceType + "::" + self.Instance + "}"
         else:
                 return "{" + self.ResourceType + "}"
 
 
 class Component:
     def kill(self, node):
         None
 
 
 class Process(Component):
     def __init__(self, cm, name, process=None, dc_only=0, pats=[], dc_pats=[], badnews_ignore=[], common_ignore=[], triggersreboot=0):
         self.name = str(name)
         self.dc_only = dc_only
         self.pats = pats
         self.dc_pats = dc_pats
         self.CM = cm
         self.badnews_ignore = badnews_ignore
         self.badnews_ignore.extend(common_ignore)
 	self.triggersreboot = triggersreboot
 
         if process:
             self.proc = str(process)
         else:
             self.proc = str(name)
         self.KillCmd = "killall -9 " + self.proc
 
     def kill(self, node):
         if self.CM.rsh(node, self.KillCmd) != 0:
             self.CM.log ("ERROR: Kill %s failed on node %s" % (self.name,node))
             return None
         return 1
diff --git a/cts/CTSlab.py b/cts/CTSlab.py
index 314c347c8c..9b336a5beb 100755
--- a/cts/CTSlab.py
+++ b/cts/CTSlab.py
@@ -1,165 +1,165 @@
 #!/usr/bin/python
 
 '''CTS: Cluster Testing System: Lab environment module
  '''
 
 __copyright__ = '''
 Copyright (C) 2001,2005 Alan Robertson <alanr@unix.sh>
 Licensed under the GNU GPL.
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
 
 from UserDict import UserDict
 import sys, types, string, string, signal, os, socket
 
 pdir = os.path.dirname(sys.path[0])
 sys.path.insert(0, pdir) # So that things work from the source directory
 
 try:
     from cts.CTSvars    import *
     from cts.CM_ais     import *
     from cts.CM_lha     import crm_lha
     from cts.CTSaudits  import AuditList
     from cts.CTStests   import TestList
     from cts.CTSscenarios import *
     from cts.logging      import LogFactory
 
 except ImportError:
     sys.stderr.write("abort: couldn't find cts libraries in [%s]\n" %
                      ' '.join(sys.path))
     sys.stderr.write("(check your install and PYTHONPATH)\n")
 
     # Now do it again to get more details
     from cts.CTSvars    import *
     from cts.CM_ais     import *
     from cts.CM_lha     import crm_lha
     from cts.CTSaudits  import AuditList
     from cts.CTStests   import TestList
     from cts.CTSscenarios import *
     from cts.logging      import LogFactory
     sys.exit(-1)
 
 cm = None
 scenario = None
 
 LogFactory().add_stderr()
 def sig_handler(signum, frame) :
     LogFactory().log("Interrupted by signal %d"%signum)
     if scenario: scenario.summarize()
     if signum == 15 :
         if scenario: scenario.TearDown()
         sys.exit(1)
 
 if __name__ == '__main__':
 
     Environment = CtsLab(sys.argv[1:])
     NumIter = Environment["iterations"]
     Tests = []
 
     # Set the signal handler
     signal.signal(15, sig_handler)
     signal.signal(10, sig_handler)
 
     # Create the Cluster Manager object
     if Environment["Stack"] == "heartbeat":
         cm = crm_lha(Environment)
 
     elif Environment["Stack"] == "openais (whitetank)":
         cm = crm_whitetank(Environment)
         
     elif Environment["Stack"] == "corosync 2.x":
         cm = crm_mcp(Environment)
         
     elif Environment["Stack"] == "corosync (cman)":
         cm = crm_cman(Environment)
         
     elif Environment["Stack"] == "corosync (plugin v1)":
         cm = crm_cs_v1(Environment)
         
     elif Environment["Stack"] == "corosync (plugin v0)":
         cm = crm_cs_v0(Environment)
     else:
         LogFactory().log("Unknown stack: "+Environment["stack"])
         sys.exit(1)
 
     if Environment["TruncateLog"] == 1:
         Environment.log("Truncating %s" % LogFile)
         lf = open(LogFile, "w");
         if lf != None:
             lf.truncate(0)
             lf.close()
 
     Audits = AuditList(cm)
 
     if Environment["ListTests"] == 1:
         Tests = TestList(cm, Audits)
-        Environment.log("Total %d tests"%len(Tests))
+        LogFactory().log("Total %d tests"%len(Tests))
         for test in Tests :
-            Environment.log(str(test.name));
+            LogFactory().log(str(test.name));
         sys.exit(0)
 
     elif len(Environment["tests"]) == 0:
         Tests = TestList(cm, Audits)
 
     else:
         Chosen = Environment["tests"]
         for TestCase in Chosen:
            match = None
 
            for test in TestList(cm, Audits):
                if test.name == TestCase:
                    match = test
 
            if not match:
                usage("--choose: No applicable/valid tests chosen")
            else:
                Tests.append(match)
 
     # Scenario selection
     if Environment["scenario"] == "basic-sanity":
         scenario = RandomTests(cm, [ BasicSanityCheck(Environment) ], Audits, Tests)
 
     elif Environment["scenario"] == "all-once":
         NumIter = len(Tests)
         scenario = AllOnce(
             cm, [ BootCluster(Environment), PacketLoss(Environment) ], Audits, Tests)
     elif Environment["scenario"] == "sequence":
         scenario = Sequence(
             cm, [ BootCluster(Environment), PacketLoss(Environment) ], Audits, Tests)
     elif Environment["scenario"] == "boot":
         scenario = Boot(cm, [ LeaveBooted(Environment)], Audits, [])
     else:
         scenario = RandomTests(
             cm, [ BootCluster(Environment), PacketLoss(Environment) ], Audits, Tests)
 
     LogFactory().log(">>>>>>>>>>>>>>>> BEGINNING " + repr(NumIter) + " TESTS ")
     LogFactory().log("Stack:                  %s (%s)" % (Environment["Stack"], Environment["Name"]))
     LogFactory().log("Schema:                 %s" % Environment["Schema"])
     LogFactory().log("Scenario:               %s" % scenario.__doc__)
     LogFactory().log("CTS Master:             %s" % Environment["cts-master"])
     LogFactory().log("CTS Logfile:            %s" % Environment["OutputFile"])
     LogFactory().log("Random Seed:            %s" % Environment["RandSeed"])
     LogFactory().log("Syslog variant:         %s" % Environment["syslogd"].strip())
     LogFactory().log("System log files:       %s" % Environment["LogFileName"])
 #    Environment.log(" ")
     if Environment.has_key("IPBase"):
         LogFactory().log("Base IP for resources:  %s" % Environment["IPBase"])
     LogFactory().log("Cluster starts at boot: %d" % Environment["at-boot"])
 
     Environment.dump()
     rc = Environment.run(scenario, NumIter)
     sys.exit(rc)
diff --git a/cts/CTStests.py b/cts/CTStests.py
index 6fc5238b97..cd5b7ce365 100644
--- a/cts/CTStests.py
+++ b/cts/CTStests.py
@@ -1,2905 +1,2912 @@
 '''CTS: Cluster Testing System: Tests module
 
 There are a few things we want to do here:
 
  '''
 
 __copyright__ = '''
 Copyright (C) 2000, 2001 Alan Robertson <alanr@unix.sh>
 Licensed under the GNU GPL.
 
 Add RecourceRecover testcase Zhao Kai <zhaokai@cn.ibm.com>
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
 
 #
 #        SPECIAL NOTE:
 #
 #        Tests may NOT implement any cluster-manager-specific code in them.
 #        EXTEND the ClusterManager object to provide the base capabilities
 #        the test needs if you need to do something that the current CM classes
 #        do not.  Otherwise you screw up the whole point of the object structure
 #        in CTS.
 #
 #                Thank you.
 #
 
 import time, os, re, types, string, tempfile, sys
 from stat import *
 from cts import CTS
 from cts.CTSaudits import *
 from cts.CTSvars   import *
 from cts.patterns  import PatternSelector
 from cts.logging   import LogFactory
 from cts.remote    import RemoteFactory
 from cts.watcher   import LogWatcher
 from cts.environment import EnvFactory
 
 AllTestClasses = [ ]
 
 
 class CTSTest:
     '''
     A Cluster test.
     We implement the basic set of properties and behaviors for a generic
     cluster test.
 
     Cluster tests track their own statistics.
     We keep each of the kinds of counts we track as separate {name,value}
     pairs.
     '''
 
     def __init__(self, cm):
         #self.name="the unnamed test"
         self.Stats = {"calls":0
         ,        "success":0
         ,        "failure":0
         ,        "skipped":0
         ,        "auditfail":0}
 
 #        if not issubclass(cm.__class__, ClusterManager):
 #            raise ValueError("Must be a ClusterManager object")
         self.CM = cm
         self.Env = EnvFactory().getInstance()
         self.rsh = RemoteFactory().getInstance()
         self.logger = LogFactory()
         self.templates = PatternSelector(cm["Name"])
         self.Audits = []
         self.timeout = 120
         self.passed = 1
         self.is_loop = 0
         self.is_unsafe = 0
+        self.is_docker_unsafe = 0
         self.is_experimental = 0
         self.is_container = 0
         self.is_valgrind = 0
         self.benchmark = 0  # which tests to benchmark
         self.timer = {}  # timers
 
     def log(self, args):
         self.logger.log(args)
 
     def debug(self, args):
         self.logger.debug(args)
 
     def has_key(self, key):
         return self.Stats.has_key(key)
 
     def __setitem__(self, key, value):
         self.Stats[key] = value
 
     def __getitem__(self, key):
         return self.Stats[key]
 
     def log_mark(self, msg):
         self.debug("MARK: test %s %s %d" % (self.name,msg,time.time()))
         return
 
     def get_timer(self,key = "test"):
         try: return self.timer[key]
         except: return 0
 
     def set_timer(self,key = "test"):
         self.timer[key] = time.time()
         return self.timer[key]
 
     def log_timer(self,key = "test"):
         elapsed = 0
         if key in self.timer:
             elapsed = time.time() - self.timer[key]
             s = key == "test" and self.name or "%s:%s" % (self.name,key)
             self.debug("%s runtime: %.2f" % (s, elapsed))
             del self.timer[key]
         return elapsed
 
     def incr(self, name):
         '''Increment (or initialize) the value associated with the given name'''
         if not self.Stats.has_key(name):
             self.Stats[name] = 0
         self.Stats[name] = self.Stats[name]+1
 
         # Reset the test passed boolean
         if name == "calls":
             self.passed = 1
 
     def failure(self, reason="none"):
         '''Increment the failure count'''
         self.passed = 0
         self.incr("failure")
         self.logger.log(("Test %s" % self.name).ljust(35) + " FAILED: %s" % reason)
         return None
 
     def success(self):
         '''Increment the success count'''
         self.incr("success")
         return 1
 
     def skipped(self):
         '''Increment the skipped count'''
         self.incr("skipped")
         return 1
 
     def __call__(self, node):
         '''Perform the given test'''
         raise ValueError("Abstract Class member (__call__)")
         self.incr("calls")
         return self.failure()
 
     def audit(self):
         passed = 1
         if len(self.Audits) > 0:
             for audit in self.Audits:
                 if not audit():
                     self.logger.log("Internal %s Audit %s FAILED." % (self.name, audit.name()))
                     self.incr("auditfail")
                     passed = 0
         return passed
 
     def setup(self, node):
         '''Setup the given test'''
         return self.success()
 
     def teardown(self, node):
         '''Tear down the given test'''
         return self.success()
 
     def create_watch(self, patterns, timeout, name=None):
         if not name:
             name = self.name
         return LogWatcher(self.Env["LogFileName"], patterns, name, timeout, kind=self.Env["LogWatcher"], hosts=self.Env["nodes"])
 
     def local_badnews(self, prefix, watch, local_ignore=[]):
         errcount = 0
         if not prefix:
             prefix = "LocalBadNews:"
 
         ignorelist = []
         ignorelist.append(" CTS: ")
         ignorelist.append(prefix)
         ignorelist.extend(local_ignore)
 
         while errcount < 100:
             match = watch.look(0)
             if match:
                add_err = 1
                for ignore in ignorelist:
                    if add_err == 1 and re.search(ignore, match):
                        add_err = 0
                if add_err == 1:
                    self.logger.log(prefix + " " + match)
                    errcount = errcount + 1
             else:
               break
         else:
             self.logger.log("Too many errors!")
 
         watch.end()
         return errcount
 
     def is_applicable(self):
         return self.is_applicable_common()
 
     def is_applicable_common(self):
         '''Return TRUE if we are applicable in the current test configuration'''
         #raise ValueError("Abstract Class member (is_applicable)")
 
         if self.is_loop and not self.Env["loop-tests"]:
             return 0
         elif self.is_unsafe and not self.Env["unsafe-tests"]:
             return 0
         elif self.is_valgrind and not self.Env["valgrind-tests"]:
             return 0
         elif self.is_experimental and not self.Env["experimental-tests"]:
             return 0
+        elif self.is_docker_unsafe and self.Env["docker"]:
+            return 0
         elif self.is_container and not self.Env["container-tests"]:
             return 0
         elif self.Env["benchmark"] and self.benchmark == 0:
             return 0
 
         return 1
 
     def find_ocfs2_resources(self, node):
         self.r_o2cb = None
         self.r_ocfs2 = []
 
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rtype == "o2cb" and r.parent != "NA":
                     self.debug("Found o2cb: %s" % self.r_o2cb)
                     self.r_o2cb = r.parent
             if re.search("^Constraint", line):
                 c = AuditConstraint(self.CM, line)
                 if c.type == "rsc_colocation" and c.target == self.r_o2cb:
                     self.r_ocfs2.append(c.rsc)
 
         self.debug("Found ocfs2 filesystems: %s" % repr(self.r_ocfs2))
         return len(self.r_ocfs2)
 
     def canrunnow(self, node):
         '''Return TRUE if we can meaningfully run right now'''
         return 1
 
     def errorstoignore(self):
         '''Return list of errors which are 'normal' and should be ignored'''
         return []
 
 
 class StopTest(CTSTest):
     '''Stop (deactivate) the cluster manager on a node'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "Stop"
 
     def __call__(self, node):
         '''Perform the 'stop' test. '''
         self.incr("calls")
         if self.CM.ShouldBeStatus[node] != "up":
             return self.skipped()
 
         patterns = []
         # Technically we should always be able to notice ourselves stopping
         patterns.append(self.templates["Pat:We_stopped"] % node)
 
         #if self.Env["use_logd"]:
         #    patterns.append(self.templates["Pat:Logd_stopped"] % node)
 
         # Any active node needs to notice this one left
         # NOTE: This wont work if we have multiple partitions
         for other in self.Env["nodes"]:
             if self.CM.ShouldBeStatus[other] == "up" and other != node:
                 patterns.append(self.templates["Pat:They_stopped"] %(other, self.CM.key_for_node(node)))
                 #self.debug("Checking %s will notice %s left"%(other, node))
 
         watch = self.create_watch(patterns, self.Env["DeadTime"])
         watch.setwatch()
 
         if node == self.CM.OurNode:
             self.incr("us")
         else:
             if self.CM.upcount() <= 1:
                 self.incr("all")
             else:
                 self.incr("them")
 
         self.CM.StopaCM(node)
         watch_result = watch.lookforall()
 
         failreason = None
         UnmatchedList = "||"
         if watch.unmatched:
             (rc, output) = self.rsh(node, "/bin/ps axf", None)
             for line in output:
                 self.debug(line)
 
             (rc, output) = self.rsh(node, "/usr/sbin/dlm_tool dump", None)
             for line in output:
                 self.debug(line)
 
             for regex in watch.unmatched:
                 self.logger.log ("ERROR: Shutdown pattern not found: %s" % (regex))
                 UnmatchedList +=  regex + "||";
                 failreason = "Missing shutdown pattern"
 
         self.CM.cluster_stable(self.Env["DeadTime"])
 
         if not watch.unmatched or self.CM.upcount() == 0:
             return self.success()
 
         if len(watch.unmatched) >= self.CM.upcount():
             return self.failure("no match against (%s)" % UnmatchedList)
 
         if failreason == None:
             return self.success()
         else:
             return self.failure(failreason)
 #
 # We don't register StopTest because it's better when called by
 # another test...
 #
 
 
 class StartTest(CTSTest):
     '''Start (activate) the cluster manager on a node'''
     def __init__(self, cm, debug=None):
         CTSTest.__init__(self,cm)
         self.name = "start"
         self.debug = debug
 
     def __call__(self, node):
         '''Perform the 'start' test. '''
         self.incr("calls")
 
         if self.CM.upcount() == 0:
             self.incr("us")
         else:
             self.incr("them")
 
         if self.CM.ShouldBeStatus[node] != "down":
             return self.skipped()
         elif self.CM.StartaCM(node):
             return self.success()
         else:
             return self.failure("Startup %s on node %s failed"
                                 % (self.Env["Name"], node))
 
 #
 # We don't register StartTest because it's better when called by
 # another test...
 #
 
 
 class FlipTest(CTSTest):
     '''If it's running, stop it.  If it's stopped start it.
        Overthrow the status quo...
     '''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Flip"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
 
     def __call__(self, node):
         '''Perform the 'Flip' test. '''
         self.incr("calls")
         if self.CM.ShouldBeStatus[node] == "up":
             self.incr("stopped")
             ret = self.stop(node)
             type = "up->down"
             # Give the cluster time to recognize it's gone...
             time.sleep(self.Env["StableTime"])
         elif self.CM.ShouldBeStatus[node] == "down":
             self.incr("started")
             ret = self.start(node)
             type = "down->up"
         else:
             return self.skipped()
 
         self.incr(type)
         if ret:
             return self.success()
         else:
             return self.failure("%s failure" % type)
 
 #        Register FlipTest as a good test to run
 AllTestClasses.append(FlipTest)
 
 
 class RestartTest(CTSTest):
     '''Stop and restart a node'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Restart"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
         self.benchmark = 1
 
     def __call__(self, node):
         '''Perform the 'restart' test. '''
         self.incr("calls")
 
         self.incr("node:" + node)
 
         ret1 = 1
         if self.CM.StataCM(node):
             self.incr("WasStopped")
             if not self.start(node):
                 return self.failure("start (setup) failure: "+node)
 
         self.set_timer()
         if not self.stop(node):
             return self.failure("stop failure: "+node)
         if not self.start(node):
             return self.failure("start failure: "+node)
         return self.success()
 
 #        Register RestartTest as a good test to run
 AllTestClasses.append(RestartTest)
 
 
 class StonithdTest(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "Stonithd"
         self.startall = SimulStartLite(cm)
         self.benchmark = 1
 
     def __call__(self, node):
         self.incr("calls")
         if len(self.Env["nodes"]) < 2:
             return self.skipped()
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         is_dc = self.CM.is_node_dc(node)
 
         watchpats = []
         watchpats.append("Operation .* for host '%s' with device .* returned: 0" % node)
         watchpats.append("tengine_stonith_notify:.*Peer %s was terminated .*: OK" % node)
 
         if self.Env["at-boot"] == 0:
             self.debug("Expecting %s to stay down" % node)
             self.CM.ShouldBeStatus[node] = "down"
         else:
             self.debug("Expecting %s to come up again %d" % (node, self.Env["at-boot"]))
             watchpats.append("%s.* S_STARTING -> S_PENDING" % node)
             watchpats.append("%s.* S_PENDING -> S_NOT_DC" % node)
 
         watch = self.create_watch(watchpats, 30 + self.Env["DeadTime"] + self.Env["StableTime"] + self.Env["StartTime"])
         watch.setwatch()
 
         origin = self.Env.RandomGen.choice(self.Env["nodes"])
 
         rc = self.rsh(origin, "stonith_admin --reboot %s -VVVVVV" % node)
 
         if rc == 194:
             # 194 - 256 = -62 = Timer expired
             #
             # Look for the patterns, usually this means the required
             # device was running on the node to be fenced - or that
             # the required devices were in the process of being loaded
             # and/or moved
             #
             # Effectively the node committed suicide so there will be
             # no confirmation, but pacemaker should be watching and
             # fence the node again
 
             self.logger.log("Fencing command on %s to fence %s timed out" % (origin, node))
 
         elif origin != node and rc != 0:
             self.debug("Waiting for the cluster to recover")
             self.CM.cluster_stable()
 
             self.debug("Waiting STONITHd node to come back up")
             self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)
 
             self.logger.log("Fencing command on %s failed to fence %s (rc=%d)" % (origin, node, rc))
 
         elif origin == node and rc != 255:
             # 255 == broken pipe, ie. the node was fenced as epxected
             self.logger.log("Logcally originated fencing returned %d" % rc)
 
         self.set_timer("fence")
         matched = watch.lookforall()
         self.log_timer("fence")
         self.set_timer("reform")
         if watch.unmatched:
             self.logger.log("Patterns not found: " + repr(watch.unmatched))
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         self.debug("Waiting STONITHd node to come back up")
         self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)
 
         self.debug("Waiting for the cluster to re-stabilize with all nodes")
         is_stable = self.CM.cluster_stable(self.Env["StartTime"])
 
         if not matched:
             return self.failure("Didn't find all expected patterns")
         elif not is_stable:
             return self.failure("Cluster did not become stable")
 
         self.log_timer("reform")
         return self.success()
 
     def errorstoignore(self):
         return [
             self.templates["Pat:Fencing_start"] % ".*",
             self.templates["Pat:Fencing_ok"] % ".*",
             "error: native_create_actions: Resource .*stonith::.* is active on 2 nodes attempting recovery",
             "error: remote_op_done: Operation reboot of .*by .* for stonith_admin.*: Timer expired",
             ]
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
 
         if self.Env.has_key("DoFencing"):
             return self.Env["DoFencing"]
 
         return 1
 
 AllTestClasses.append(StonithdTest)
 
 
 class StartOnebyOne(CTSTest):
     '''Start all the nodes ~ one by one'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "StartOnebyOne"
         self.stopall = SimulStopLite(cm)
         self.start = StartTest(cm)
         self.ns = CTS.NodeStatus(cm.Env)
 
     def __call__(self, dummy):
         '''Perform the 'StartOnebyOne' test. '''
         self.incr("calls")
 
         #        We ignore the "node" parameter...
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Test setup failed")
 
         failed = []
         self.set_timer()
         for node in self.Env["nodes"]:
             if not self.start(node):
                 failed.append(node)
 
         if len(failed) > 0:
             return self.failure("Some node failed to start: " + repr(failed))
 
         return self.success()
 
 #        Register StartOnebyOne as a good test to run
 AllTestClasses.append(StartOnebyOne)
 
 
 class SimulStart(CTSTest):
     '''Start all the nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SimulStart"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'SimulStart' test. '''
         self.incr("calls")
 
         #        We ignore the "node" parameter...
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Setup failed")
 
         self.CM.clear_all_caches()
 
         if not self.startall(None):
             return self.failure("Startall failed")
 
         return self.success()
 
 #        Register SimulStart as a good test to run
 AllTestClasses.append(SimulStart)
 
 
 class SimulStop(CTSTest):
     '''Stop all the nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SimulStop"
         self.startall = SimulStartLite(cm)
         self.stopall = SimulStopLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'SimulStop' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         if not self.stopall(None):
             return self.failure("Stopall failed")
 
         return self.success()
 
 #     Register SimulStop as a good test to run
 AllTestClasses.append(SimulStop)
 
 
 class StopOnebyOne(CTSTest):
     '''Stop all the nodes in order'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "StopOnebyOne"
         self.startall = SimulStartLite(cm)
         self.stop = StopTest(cm)
 
     def __call__(self, dummy):
         '''Perform the 'StopOnebyOne' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         failed = []
         self.set_timer()
         for node in self.Env["nodes"]:
             if not self.stop(node):
                 failed.append(node)
 
         if len(failed) > 0:
             return self.failure("Some node failed to stop: " + repr(failed))
 
         self.CM.clear_all_caches()
         return self.success()
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(StopOnebyOne)
 
 
 class RestartOnebyOne(CTSTest):
     '''Restart all the nodes in order'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RestartOnebyOne"
         self.startall = SimulStartLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'RestartOnebyOne' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         did_fail = []
         self.set_timer()
         self.restart = RestartTest(self.CM)
         for node in self.Env["nodes"]:
             if not self.restart(node):
                 did_fail.append(node)
 
         if did_fail:
             return self.failure("Could not restart %d nodes: %s"
                                 % (len(did_fail), repr(did_fail)))
         return self.success()
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(RestartOnebyOne)
 
 
 class PartialStart(CTSTest):
     '''Start a node - but tell it to stop before it finishes starting up'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "PartialStart"
         self.startall = SimulStartLite(cm)
         self.stopall = SimulStopLite(cm)
         self.stop = StopTest(cm)
         #self.is_unsafe = 1
 
     def __call__(self, node):
         '''Perform the 'PartialStart' test. '''
         self.incr("calls")
 
         ret = self.stopall(None)
         if not ret:
             return self.failure("Setup failed")
 
 #   FIXME!  This should use the CM class to get the pattern
 #       then it would be applicable in general
         watchpats = []
         watchpats.append("crmd.*Connecting to cluster infrastructure")
         watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
         watch.setwatch()
 
         self.CM.StartaCMnoBlock(node)
         ret = watch.lookforall()
         if not ret:
             self.logger.log("Patterns not found: " + repr(watch.unmatched))
             return self.failure("Setup of %s failed" % node)
 
         ret = self.stop(node)
         if not ret:
             return self.failure("%s did not stop in time" % node)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
 
         # We might do some fencing in the 2-node case if we make it up far enough
         return [ """Executing reboot fencing operation""" ]
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(PartialStart)
 
 
 class StandbyTest(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Standby"
         self.benchmark = 1
 
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
 
     # make sure the node is active
     # set the node to standby mode
     # check resources, none resource should be running on the node
     # set the node to active mode
     # check resouces, resources should have been migrated back (SHOULD THEY?)
 
     def __call__(self, node):
 
         self.incr("calls")
         ret = self.startall(None)
         if not ret:
             return self.failure("Start all nodes failed")
 
         self.debug("Make sure node %s is active" % node)
         if self.CM.StandbyStatus(node) != "off":
             if not self.CM.SetStandbyMode(node, "off"):
                 return self.failure("can't set node %s to active mode" % node)
 
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "off":
             return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))
 
         self.debug("Getting resources running on node %s" % node)
         rsc_on_node = self.CM.active_resources(node)
 
         watchpats = []
         watchpats.append("do_state_transition:.*-> S_POLICY_ENGINE")
         watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
         watch.setwatch()
 
         self.debug("Setting node %s to standby mode" % node)
         if not self.CM.SetStandbyMode(node, "on"):
             return self.failure("can't set node %s to standby mode" % node)
 
         self.set_timer("on")
 
         ret = watch.lookforall()
         if not ret:
             self.logger.log("Patterns not found: " + repr(watch.unmatched))
             self.CM.SetStandbyMode(node, "off")
             return self.failure("cluster didn't react to standby change on %s" % node)
 
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "on":
             return self.failure("standby status of %s is [%s] but we expect [on]" % (node, status))
         self.log_timer("on")
 
         self.debug("Checking resources")
         bad_run = self.CM.active_resources(node)
         if len(bad_run) > 0:
             rc = self.failure("%s set to standby, %s is still running on it" % (node, repr(bad_run)))
             self.debug("Setting node %s to active mode" % node)
             self.CM.SetStandbyMode(node, "off")
             return rc
 
         self.debug("Setting node %s to active mode" % node)
         if not self.CM.SetStandbyMode(node, "off"):
             return self.failure("can't set node %s to active mode" % node)
 
         self.set_timer("off")
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "off":
             return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))
         self.log_timer("off")
 
         return self.success()
 
 AllTestClasses.append(StandbyTest)
 
 
 class ValgrindTest(CTSTest):
     '''Check for memory leaks'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Valgrind"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
         self.is_valgrind = 1
         self.is_loop = 1
 
     def setup(self, node):
         self.incr("calls")
 
         ret = self.stopall(None)
         if not ret:
             return self.failure("Stop all nodes failed")
 
         # Enable valgrind
         self.logger.logPat = "/tmp/%s-*.valgrind" % self.name
 
         self.Env["valgrind-prefix"] = self.name
 
         self.rsh(node, "rm -f %s" % self.logger.logPat, None)
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Start all nodes failed")
 
         for node in self.Env["nodes"]:
             (rc, output) = self.rsh(node, "ps u --ppid `pidofproc aisexec`", None)
             for line in output:
                 self.debug(line)
 
         return self.success()
 
     def teardown(self, node):
         # Disable valgrind
         self.Env["valgrind-prefix"] = None
 
         # Return all nodes to normal
         ret = self.stopall(None)
         if not ret:
             return self.failure("Stop all nodes failed")
 
         return self.success()
 
     def find_leaks(self):
         # Check for leaks
         leaked = []
         self.stop = StopTest(self.CM)
 
         for node in self.Env["nodes"]:
             (rc, ps_out) = self.rsh(node, "ps u --ppid `pidofproc aisexec`", None)
             rc = self.stop(node)
             if not rc:
                 self.failure("Couldn't shut down %s" % node)
 
             rc = self.rsh(node, "grep -e indirectly.*lost:.*[1-9] -e definitely.*lost:.*[1-9] -e (ERROR|error).*SUMMARY:.*[1-9].*errors %s" % self.logger.logPat, 0)
             if rc != 1:
                 leaked.append(node)
                 self.failure("Valgrind errors detected on %s" % node)
                 for line in ps_out:
                     self.logger.log(line)
                 (rc, output) = self.rsh(node, "grep -e lost: -e SUMMARY: %s" % self.logger.logPat, None)
                 for line in output:
                     self.logger.log(line)
                 (rc, output) = self.rsh(node, "cat %s" % self.logger.logPat, None)
                 for line in output:
                     self.debug(line)
 
         self.rsh(node, "rm -f %s" % self.logger.logPat, None)
         return leaked
 
     def __call__(self, node):
         leaked = self.find_leaks()
         if len(leaked) > 0:
             return self.failure("Nodes %s leaked" % repr(leaked))
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [ """cib:.*readCibXmlFile:""", """HA_VALGRIND_ENABLED""" ]
 
 
 class StandbyLoopTest(ValgrindTest):
     '''Check for memory leaks by putting a node in and out of standby for an hour'''
     def __init__(self, cm):
         ValgrindTest.__init__(self,cm)
         self.name = "StandbyLoop"
 
     def __call__(self, node):
 
         lpc = 0
         delay = 2
         failed = 0
         done = time.time() + self.Env["loop-minutes"] * 60
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             time.sleep(delay)
             if not self.CM.SetStandbyMode(node, "on"):
                 self.failure("can't set node %s to standby mode" % node)
                 failed = lpc
 
             time.sleep(delay)
             if not self.CM.SetStandbyMode(node, "off"):
                 self.failure("can't set node %s to active mode" % node)
                 failed = lpc
 
         leaked = self.find_leaks()
         if failed:
             return self.failure("Iteration %d failed" % failed)
         elif len(leaked) > 0:
             return self.failure("Nodes %s leaked" % repr(leaked))
 
         return self.success()
 
 AllTestClasses.append(StandbyLoopTest)
 
 
 class BandwidthTest(CTSTest):
 #        Tests should not be cluster-manager-specific
 #        If you need to find out cluster manager configuration to do this, then
 #        it should be added to the generic cluster manager API.
     '''Test the bandwidth which heartbeat uses'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "Bandwidth"
         self.start = StartTest(cm)
         self.__setitem__("min",0)
         self.__setitem__("max",0)
         self.__setitem__("totalbandwidth",0)
         self.tempfile = tempfile.mktemp(".cts")
         self.startall = SimulStartLite(cm)
 
     def __call__(self, node):
         '''Perform the Bandwidth test'''
         self.incr("calls")
 
         if self.CM.upcount() < 1:
             return self.skipped()
 
         Path = self.CM.InternalCommConfig()
         if "ip" not in Path["mediatype"]:
              return self.skipped()
 
         port = Path["port"][0]
         port = int(port)
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Test setup failed")
         time.sleep(5)  # We get extra messages right after startup.
 
         fstmpfile = "/var/run/band_estimate"
         dumpcmd = "tcpdump -p -n -c 102 -i any udp port %d > %s 2>&1" \
         %                (port, fstmpfile)
 
         rc = self.rsh(node, dumpcmd)
         if rc == 0:
             farfile = "root@%s:%s" % (node, fstmpfile)
             self.rsh.cp(farfile, self.tempfile)
             Bandwidth = self.countbandwidth(self.tempfile)
             if not Bandwidth:
                 self.logger.log("Could not compute bandwidth.")
                 return self.success()
             intband = int(Bandwidth + 0.5)
             self.logger.log("...bandwidth: %d bits/sec" % intband)
             self.Stats["totalbandwidth"] = self.Stats["totalbandwidth"] + Bandwidth
             if self.Stats["min"] == 0:
                 self.Stats["min"] = Bandwidth
             if Bandwidth > self.Stats["max"]:
                 self.Stats["max"] = Bandwidth
             if Bandwidth < self.Stats["min"]:
                 self.Stats["min"] = Bandwidth
             self.rsh(node, "rm -f %s" % fstmpfile)
             os.unlink(self.tempfile)
             return self.success()
         else:
             return self.failure("no response from tcpdump command [%d]!" % rc)
 
     def countbandwidth(self, file):
         fp = open(file, "r")
         fp.seek(0)
         count = 0
         sum = 0
         while 1:
             line = fp.readline()
             if not line:
                 return None
             if re.search("udp",line) or re.search("UDP,", line):
                 count = count + 1
                 linesplit = string.split(line," ")
                 for j in range(len(linesplit)-1):
                     if linesplit[j] == "udp": break
                     if linesplit[j] == "length:": break
 
                 try:
                     sum = sum + int(linesplit[j+1])
                 except ValueError:
                     self.logger.log("Invalid tcpdump line: %s" % line)
                     return None
                 T1 = linesplit[0]
                 timesplit = string.split(T1,":")
                 time2split = string.split(timesplit[2],".")
                 time1 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001
                 break
 
         while count < 100:
             line = fp.readline()
             if not line:
                 return None
             if re.search("udp",line) or re.search("UDP,", line):
                 count = count+1
                 linessplit = string.split(line," ")
                 for j in range(len(linessplit)-1):
                     if linessplit[j] == "udp": break
                     if linesplit[j] == "length:": break
                 try:
                     sum = int(linessplit[j+1]) + sum
                 except ValueError:
                     self.logger.log("Invalid tcpdump line: %s" % line)
                     return None
 
         T2 = linessplit[0]
         timesplit = string.split(T2,":")
         time2split = string.split(timesplit[2],".")
         time2 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001
         time = time2-time1
         if (time <= 0):
             return 0
         return (sum*8)/time
 
     def is_applicable(self):
         '''BandwidthTest never applicable'''
         return 0
 
 AllTestClasses.append(BandwidthTest)
 
 
 ###################################################################
 class MaintenanceMode(CTSTest):
 ###################################################################
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "MaintenanceMode"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.max = 30
         #self.is_unsafe = 1
         self.benchmark = 1
         self.action = "asyncmon"
         self.interval = 0
         self.rid = "maintenanceDummy"
 
     def toggleMaintenanceMode(self, node, action):
         pats = []
         pats.append(self.templates["Pat:DC_IDLE"])
 
         # fail the resource right after turning Maintenance mode on
         # verify it is not recovered until maintenance mode is turned off
         if action == "On":
             pats.append("Updating failcount for %s on .* after .* %s" % (self.rid, self.action))
         else:
             pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0"))
             pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "start_0"))
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
 
         self.debug("Turning maintenance mode %s" % action)
         self.rsh(node, self.templates["MaintenanceMode%s" % (action)])
         if (action == "On"):
             self.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node))
 
         self.set_timer("recover%s" % (action))
         watch.lookforall()
         self.log_timer("recover%s" % (action))
         if watch.unmatched:
             self.debug("Failed to find patterns when turning maintenance mode %s" % action)
             return repr(watch.unmatched)
 
         return ""
 
     def insertMaintenanceDummy(self, node):
         pats = []
         pats.append(("%s.*" % node) + (self.templates["Pat:RscOpOK"] % (self.rid, "start_0")))
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
 
         self.CM.AddDummyRsc(node, self.rid)
 
         self.set_timer("addDummy")
         watch.lookforall()
         self.log_timer("addDummy")
 
         if watch.unmatched:
             self.debug("Failed to find patterns when adding maintenance dummy resource")
             return repr(watch.unmatched)
         return ""
 
     def removeMaintenanceDummy(self, node):
         pats = []
         pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0"))
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
         self.CM.RemoveDummyRsc(node, self.rid)
 
         self.set_timer("removeDummy")
         watch.lookforall()
         self.log_timer("removeDummy")
 
         if watch.unmatched:
             self.debug("Failed to find patterns when removing maintenance dummy resource")
             return repr(watch.unmatched)
         return ""
 
     def managedRscList(self, node):
         rscList = []
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 tmp = AuditResource(self.CM, line)
                 if tmp.managed():
                     rscList.append(tmp.id)
 
         return rscList
 
     def verifyResources(self, node, rscList, managed):
         managedList = list(rscList)
         managed_str = "managed"
         if not managed:
             managed_str = "unmanaged"
 
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 tmp = AuditResource(self.CM, line)
                 if managed and not tmp.managed():
                     continue
                 elif not managed and tmp.managed():
                     continue
                 elif managedList.count(tmp.id):
                     managedList.remove(tmp.id)
 
         if len(managedList) == 0:
             self.debug("Found all %s resources on %s" % (managed_str, node))
             return True
 
         self.logger.log("Could not find all %s resources on %s. %s" % (managed_str, node, managedList))
         return False
 
     def __call__(self, node):
         '''Perform the 'MaintenanceMode' test. '''
         self.incr("calls")
         verify_managed = False
         verify_unmanaged = False
         failPat = ""
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         # get a list of all the managed resources. We use this list
         # after enabling maintenance mode to verify all managed resources
         # become un-managed.  After maintenance mode is turned off, we use
         # this list to verify all the resources become managed again.
         managedResources = self.managedRscList(node)
         if len(managedResources) == 0:
             self.logger.log("No managed resources on %s" % node)
             return self.skipped()
 
         # insert a fake resource we can fail during maintenance mode
         # so we can verify recovery does not take place until after maintenance
         # mode is disabled.
         failPat = failPat + self.insertMaintenanceDummy(node)
 
         # toggle maintenance mode ON, then fail dummy resource.
         failPat = failPat + self.toggleMaintenanceMode(node, "On")
 
         # verify all the resources are now unmanaged
         if self.verifyResources(node, managedResources, False):
             verify_unmanaged = True
 
         # Toggle maintenance mode  OFF, verify dummy is recovered.
         failPat = failPat + self.toggleMaintenanceMode(node, "Off")
 
         # verify all the resources are now managed again
         if self.verifyResources(node, managedResources, True):
             verify_managed = True
 
         # Remove our maintenance dummy resource.
         failPat = failPat + self.removeMaintenanceDummy(node)
 
         self.CM.cluster_stable()
 
         if failPat != "":
             return self.failure("Unmatched patterns: %s" % (failPat))
         elif verify_unmanaged is False:
             return self.failure("Failed to verify resources became unmanaged during maintenance mode")
         elif verify_managed is False:
             return self.failure("Failed to verify resources switched back to managed after disabling maintenance mode")
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [ """Updating failcount for %s""" % self.rid,
                  """LogActions: Recover %s""" % self.rid,
                  """Unknown operation: fail""",
                  """(ERROR|error): sending stonithRA op to stonithd failed.""",
                  self.templates["Pat:RscOpOK"] % (self.rid, ("%s_%d" % (self.action, self.interval))),
                  """(ERROR|error): process_graph_event: Action %s_%s_%d .* initiated outside of a transition""" % (self.rid, self.action, self.interval),
                 ]
 
 AllTestClasses.append(MaintenanceMode)
 
 
 class ResourceRecover(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "ResourceRecover"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.max = 30
         self.rid = None
         self.rid_alt = None
         #self.is_unsafe = 1
         self.benchmark = 1
 
         # these are the values used for the new LRM API call
         self.action = "asyncmon"
         self.interval = 0
 
     def __call__(self, node):
         '''Perform the 'ResourceRecover' test. '''
         self.incr("calls")
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         resourcelist = self.CM.active_resources(node)
         # if there are no resourcelist, return directly
         if len(resourcelist) == 0:
             self.logger.log("No active resources on %s" % node)
             return self.skipped()
 
         self.rid = self.Env.RandomGen.choice(resourcelist)
         self.rid_alt = self.rid
 
         rsc = None
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 tmp = AuditResource(self.CM, line)
                 if tmp.id == self.rid:
                     rsc = tmp
                     # Handle anonymous clones that get renamed
                     self.rid = rsc.clone_id
                     break
 
         if not rsc:
             return self.failure("Could not find %s in the resource list" % self.rid)
 
         self.debug("Shooting %s aka. %s" % (rsc.clone_id, rsc.id))
 
         pats = []
         pats.append("Updating failcount for %s on .* after .* %s"
                     % (self.rid, self.action))
 
         if rsc.managed():
             pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0"))
             if rsc.unique():
                 pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "start_0"))
             else:
                 # Anonymous clones may get restarted with a different clone number
                 pats.append(self.templates["Pat:RscOpOK"] % (".*", "start_0"))
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
 
         self.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node))
 
         self.set_timer("recover")
         watch.lookforall()
         self.log_timer("recover")
 
         self.CM.cluster_stable()
         recovered = self.CM.ResourceLocation(self.rid)
 
         if watch.unmatched:
             return self.failure("Patterns not found: %s" % repr(watch.unmatched))
 
         elif rsc.unique() and len(recovered) > 1:
             return self.failure("%s is now active on more than one node: %s"%(self.rid, repr(recovered)))
 
         elif len(recovered) > 0:
             self.debug("%s is running on: %s" % (self.rid, repr(recovered)))
 
         elif rsc.managed():
             return self.failure("%s was not recovered and is inactive" % self.rid)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [ """Updating failcount for %s""" % self.rid,
                  """LogActions: Recover %s""" % self.rid,
                  """LogActions: Recover %s""" % self.rid_alt,
                  """Unknown operation: fail""",
                  """(ERROR|error): sending stonithRA op to stonithd failed.""",
                  self.templates["Pat:RscOpOK"] % (self.rid, ("%s_%d" % (self.action, self.interval))),
                  """(ERROR|error): process_graph_event: Action %s_%s_%d .* initiated outside of a transition""" % (self.rid, self.action, self.interval),
                  ]
 
 AllTestClasses.append(ResourceRecover)
 
 
 class ComponentFail(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "ComponentFail"
+        # TODO make this work correctly in docker.
+        self.is_docker_unsafe = 1
         self.startall = SimulStartLite(cm)
         self.complist = cm.Components()
         self.patterns = []
         self.okerrpatterns = []
         self.is_unsafe = 1
 
     def __call__(self, node):
         '''Perform the 'ComponentFail' test. '''
         self.incr("calls")
         self.patterns = []
         self.okerrpatterns = []
 
         # start all nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         if not self.CM.cluster_stable(self.Env["StableTime"]):
             return self.failure("Setup failed - unstable")
 
         node_is_dc = self.CM.is_node_dc(node, None)
 
         # select a component to kill
         chosen = self.Env.RandomGen.choice(self.complist)
         while chosen.dc_only == 1 and node_is_dc == 0:
             chosen = self.Env.RandomGen.choice(self.complist)
 
         self.debug("...component %s (dc=%d,boot=%d)" % (chosen.name, node_is_dc,chosen.triggersreboot))
         self.incr(chosen.name)
 
         if chosen.name != "aisexec" and chosen.name != "corosync":
             if self.Env["Name"] != "crm-lha" or chosen.name != "pengine":
                 self.patterns.append(self.templates["Pat:ChildKilled"] %(node, chosen.name))
                 self.patterns.append(self.templates["Pat:ChildRespawn"] %(node, chosen.name))
 
         self.patterns.extend(chosen.pats)
         if node_is_dc:
           self.patterns.extend(chosen.dc_pats)
 
         # In an ideal world, this next stuff should be in the "chosen" object as a member function
         if self.Env["Name"] == "crm-lha" and chosen.triggersreboot:
             # Make sure the node goes down and then comes back up if it should reboot...
             for other in self.Env["nodes"]:
                 if other != node:
                     self.patterns.append(self.templates["Pat:They_stopped"] %(other, self.CM.key_for_node(node)))
             self.patterns.append(self.templates["Pat:Slave_started"] % node)
             self.patterns.append(self.templates["Pat:Local_started"] % node)
 
             if chosen.dc_only:
                 # Sometimes these will be in the log, and sometimes they won't...
                 self.okerrpatterns.append("%s .*Process %s:.* exited" % (node, chosen.name))
                 self.okerrpatterns.append("%s .*I_ERROR.*crmdManagedChildDied" % node)
                 self.okerrpatterns.append("%s .*The %s subsystem terminated unexpectedly" % (node, chosen.name))
                 self.okerrpatterns.append("(ERROR|error): Client .* exited with return code")
             else:
                 # Sometimes this won't be in the log...
                 self.okerrpatterns.append(self.templates["Pat:ChildKilled"] %(node, chosen.name))
                 self.okerrpatterns.append(self.templates["Pat:ChildRespawn"] %(node, chosen.name))
                 self.okerrpatterns.append(self.templates["Pat:ChildExit"])
 
         if chosen.name == "stonith":
             # Ignore actions for STONITH resources
             (rc, lines) = self.rsh(node, "crm_resource -c", None)
             for line in lines:
                 if re.search("^Resource", line):
                     r = AuditResource(self.CM, line)
                     if r.rclass == "stonith":
                         self.okerrpatterns.append(self.templates["LogActions: Recover.*%s"] % r.id)
 
         # supply a copy so self.patterns doesnt end up empty
         tmpPats = []
         tmpPats.extend(self.patterns)
         self.patterns.extend(chosen.badnews_ignore)
 
         # Look for STONITH ops, depending on Env["at-boot"] we might need to change the nodes status
         stonithPats = []
         stonithPats.append(self.templates["Pat:Fencing_ok"] % node)
         stonith = self.create_watch(stonithPats, 0)
         stonith.setwatch()
 
         # set the watch for stable
         watch = self.create_watch(
             tmpPats, self.Env["DeadTime"] + self.Env["StableTime"] + self.Env["StartTime"])
         watch.setwatch()
 
         # kill the component
         chosen.kill(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         self.debug("Waiting for any STONITHd node to come back up")
         self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)
 
         self.debug("Waiting for the cluster to re-stabilize with all nodes")
         self.CM.cluster_stable(self.Env["StartTime"])
 
         self.debug("Checking if %s was shot" % node)
         shot = stonith.look(60)
         if shot:
             self.debug("Found: " + repr(shot))
             self.okerrpatterns.append(self.templates["Pat:Fencing_start"] % node)
 
             if self.Env["at-boot"] == 0:
                 self.CM.ShouldBeStatus[node] = "down"
 
             # If fencing occurred, chances are many (if not all) the expected logs
             # will not be sent - or will be lost when the node reboots
             return self.success()
 
         # check for logs indicating a graceful recovery
         matched = watch.lookforall(allow_multiple_matches=1)
         if watch.unmatched:
             self.logger.log("Patterns not found: " + repr(watch.unmatched))
 
         self.debug("Waiting for the cluster to re-stabilize with all nodes")
         is_stable = self.CM.cluster_stable(self.Env["StartTime"])
 
         if not matched:
             return self.failure("Didn't find all expected %s patterns" % chosen.name)
         elif not is_stable:
             return self.failure("Cluster did not become stable after killing %s" % chosen.name)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
     # Note that okerrpatterns refers to the last time we ran this test
     # The good news is that this works fine for us...
         self.okerrpatterns.extend(self.patterns)
         return self.okerrpatterns
 
 AllTestClasses.append(ComponentFail)
 
 
 class SplitBrainTest(CTSTest):
     '''It is used to test split-brain. when the path between the two nodes break
        check the two nodes both take over the resource'''
     def __init__(self,cm):
         CTSTest.__init__(self,cm)
         self.name = "SplitBrain"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.is_experimental = 1
 
     def isolate_partition(self, partition):
         other_nodes = []
         other_nodes.extend(self.Env["nodes"])
 
         for node in partition:
             try:
                 other_nodes.remove(node)
             except ValueError:
                 self.logger.log("Node "+node+" not in " + repr(self.Env["nodes"]) + " from " +repr(partition))
 
         if len(other_nodes) == 0:
             return 1
 
         self.debug("Creating partition: " + repr(partition))
         self.debug("Everyone else: " + repr(other_nodes))
 
         for node in partition:
             if not self.CM.isolate_node(node, other_nodes):
                 self.logger.log("Could not isolate %s" % node)
                 return 0
 
         return 1
 
     def heal_partition(self, partition):
         other_nodes = []
         other_nodes.extend(self.Env["nodes"])
 
         for node in partition:
             try:
                 other_nodes.remove(node)
             except ValueError:
                 self.logger.log("Node "+node+" not in " + repr(self.Env["nodes"]))
 
         if len(other_nodes) == 0:
             return 1
 
         self.debug("Healing partition: " + repr(partition))
         self.debug("Everyone else: " + repr(other_nodes))
 
         for node in partition:
             self.CM.unisolate_node(node, other_nodes)
 
     def __call__(self, node):
         '''Perform split-brain test'''
         self.incr("calls")
         self.passed = 1
         partitions = {}
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         while 1:
             # Retry until we get multiple partitions
             partitions = {}
             p_max = len(self.Env["nodes"])
             for node in self.Env["nodes"]:
                 p = self.Env.RandomGen.randint(1, p_max)
                 if not partitions.has_key(p):
                     partitions[p] = []
                 partitions[p].append(node)
             p_max = len(partitions.keys())
             if p_max > 1:
                 break
             # else, try again
 
         self.debug("Created %d partitions" % p_max)
         for key in partitions.keys():
             self.debug("Partition["+str(key)+"]:\t"+repr(partitions[key]))
 
         # Disabling STONITH to reduce test complexity for now
         self.rsh(node, "crm_attribute -V -n stonith-enabled -v false")
 
         for key in partitions.keys():
             self.isolate_partition(partitions[key])
 
         count = 30
         while count > 0:
             if len(self.CM.find_partitions()) != p_max:
                 time.sleep(10)
             else:
                 break
         else:
             self.failure("Expected partitions were not created")
 
         # Target number of partitions formed - wait for stability
         if not self.CM.cluster_stable():
             self.failure("Partitioned cluster not stable")
 
         # Now audit the cluster state
         self.CM.partitions_expected = p_max
         if not self.audit():
             self.failure("Audits failed")
         self.CM.partitions_expected = 1
 
         # And heal them again
         for key in partitions.keys():
             self.heal_partition(partitions[key])
 
         # Wait for a single partition to form
         count = 30
         while count > 0:
             if len(self.CM.find_partitions()) != 1:
                 time.sleep(10)
                 count -= 1
             else:
                 break
         else:
             self.failure("Cluster did not reform")
 
         # Wait for it to have the right number of members
         count = 30
         while count > 0:
             members = []
 
             partitions = self.CM.find_partitions()
             if len(partitions) > 0:
                 members = partitions[0].split()
 
             if len(members) != len(self.Env["nodes"]):
                 time.sleep(10)
                 count -= 1
             else:
                 break
         else:
             self.failure("Cluster did not completely reform")
 
         # Wait up to 20 minutes - the delay is more preferable than
         # trying to continue with in a messed up state
         if not self.CM.cluster_stable(1200):
             self.failure("Reformed cluster not stable")
             answer = raw_input('Continue? [nY]')
             if answer and answer == "n":
                 raise ValueError("Reformed cluster not stable")
 
         # Turn fencing back on
         if self.Env["DoFencing"]:
             self.rsh(node, "crm_attribute -V -D -n stonith-enabled")
 
         self.CM.cluster_stable()
 
         if self.passed:
             return self.success()
         return self.failure("See previous errors")
 
     def errorstoignore(self):
         '''Return list of errors which are 'normal' and should be ignored'''
         return [
             "Another DC detected:",
             "(ERROR|error): attrd_cib_callback: .*Application of an update diff failed",
             "crmd_ha_msg_callback:.*not in our membership list",
             "CRIT:.*node.*returning after partition",
             ]
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
         return len(self.Env["nodes"]) > 2
 
 AllTestClasses.append(SplitBrainTest)
 
 
 class Reattach(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Reattach"
         self.startall = SimulStartLite(cm)
         self.restart1 = RestartTest(cm)
         self.stopall = SimulStopLite(cm)
         self.is_unsafe = 0 # Handled by canrunnow()
 
     def setup(self, node):
         attempt = 0
         if not self.startall(None):
             return None
 
         # Make sure we are really _really_ stable and that all
         # resources, including those that depend on transient node
         # attributes, are started
         while not self.CM.cluster_stable(double_check=True):
             if attempt < 5:
                 attempt += 1
                 self.debug("Not stable yet, re-testing")
             else:
                 self.logger.log("Cluster is not stable")
                 return None
 
         return 1
 
     def teardown(self, node):
 
         # Make sure 'node' is up
         start = StartTest(self.CM)
         start(node)
 
         is_managed = self.rsh(node, "crm_attribute -Q -G -t crm_config -n is-managed-default -d true", 1)
         is_managed = is_managed[:-1] # Strip off the newline
         if is_managed != "true":
             self.logger.log("Attempting to re-enable resource management on %s (%s)" % (node, is_managed))
             managed = self.create_watch(["is-managed-default"], 60)
             managed.setwatch()
 
             self.rsh(node, "crm_attribute -V -D -n is-managed-default")
 
             if not managed.lookforall():
                 self.logger.log("Patterns not found: " + repr(managed.unmatched))
                 self.logger.log("Could not re-enable resource management")
                 return 0
 
         return 1
 
     def canrunnow(self, node):
         '''Return TRUE if we can meaningfully run right now'''
         if self.find_ocfs2_resources(node):
             self.logger.log("Detach/Reattach scenarios are not possible with OCFS2 services present")
             return 0
         return 1
 
     def __call__(self, node):
         self.incr("calls")
 
         pats = []
         managed = self.create_watch(["is-managed-default"], 60)
         managed.setwatch()
 
         self.debug("Disable resource management")
         self.rsh(node, "crm_attribute -V -n is-managed-default -v false")
 
         if not managed.lookforall():
             self.logger.log("Patterns not found: " + repr(managed.unmatched))
             return self.failure("Resource management not disabled")
 
         pats = []
         pats.append(self.templates["Pat:RscOpOK"] % (".*", "start"))
         pats.append(self.templates["Pat:RscOpOK"] % (".*", "stop"))
         pats.append(self.templates["Pat:RscOpOK"] % (".*", "promote"))
         pats.append(self.templates["Pat:RscOpOK"] % (".*", "demote"))
         pats.append(self.templates["Pat:RscOpOK"] % (".*", "migrate"))
 
         watch = self.create_watch(pats, 60, "ShutdownActivity")
         watch.setwatch()
 
         self.debug("Shutting down the cluster")
         ret = self.stopall(None)
         if not ret:
             self.debug("Re-enable resource management")
             self.rsh(node, "crm_attribute -V -D -n is-managed-default")
             return self.failure("Couldn't shut down the cluster")
 
         self.debug("Bringing the cluster back up")
         ret = self.startall(None)
         time.sleep(5) # allow ping to update the CIB
         if not ret:
             self.debug("Re-enable resource management")
             self.rsh(node, "crm_attribute -V -D -n is-managed-default")
             return self.failure("Couldn't restart the cluster")
 
         if self.local_badnews("ResourceActivity:", watch):
             self.debug("Re-enable resource management")
             self.rsh(node, "crm_attribute -V -D -n is-managed-default")
             return self.failure("Resources stopped or started during cluster restart")
 
         watch = self.create_watch(pats, 60, "StartupActivity")
         watch.setwatch()
 
         managed = self.create_watch(["is-managed-default"], 60)
         managed.setwatch()
 
         self.debug("Re-enable resource management")
         self.rsh(node, "crm_attribute -V -D -n is-managed-default")
 
         if not managed.lookforall():
             self.logger.log("Patterns not found: " + repr(managed.unmatched))
             return self.failure("Resource management not enabled")
 
         self.CM.cluster_stable()
 
         # Ignore actions for STONITH resources
         ignore = []
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rclass == "stonith":
 
                     self.debug("Ignoring start actions for %s" % r.id)
                     ignore.append(self.templates["Pat:RscOpOK"] % (r.id, "start_0"))
 
         if self.local_badnews("ResourceActivity:", watch, ignore):
             return self.failure("Resources stopped or started after resource management was re-enabled")
 
         return ret
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [
             "resources were active at shutdown",
             "pingd: .*(ERROR|error): send_ipc_message:",
             "pingd: .*(ERROR|error): send_update:",
             "lrmd: .*(ERROR|error): notify_client:",
             ]
 
     def is_applicable(self):
         if self.Env["Name"] == "crm-lha":
             return None
         return 1
 
 AllTestClasses.append(Reattach)
 
 
 class SpecialTest1(CTSTest):
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SpecialTest1"
         self.startall = SimulStartLite(cm)
         self.restart1 = RestartTest(cm)
         self.stopall = SimulStopLite(cm)
 
     def __call__(self, node):
         '''Perform the 'SpecialTest1' test for Andrew. '''
         self.incr("calls")
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Could not stop all nodes")
 
         # Test config recovery when the other nodes come up
         self.rsh(node, "rm -f "+CTSvars.CRM_CONFIG_DIR+"/cib*")
 
         #        Start the selected node
         ret = self.restart1(node)
         if not ret:
             return self.failure("Could not start "+node)
 
         #        Start all remaining nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Could not start the remaining nodes")
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         # Errors that occur as a result of the CIB being wiped
         return [
             """warning: retrieveCib: Cluster configuration not found:""",
             """error: cib_perform_op: v1 patchset error, patch failed to apply: Application of an update diff failed""",
             """error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined""",
             """error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option""",
             """error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity""",
         ]
 
 AllTestClasses.append(SpecialTest1)
 
 
 class HAETest(CTSTest):
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "HAETest"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
         self.is_loop = 1
 
     def setup(self, node):
         #  Start all remaining nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Couldn't start all nodes")
         return self.success()
 
     def teardown(self, node):
         # Stop everything
         ret = self.stopall(None)
         if not ret:
             return self.failure("Couldn't stop all nodes")
         return self.success()
 
     def wait_on_state(self, node, resource, expected_clones, attempts=240):
         while attempts > 0:
             active = 0
             (rc, lines) = self.rsh(node, "crm_resource -r %s -W -Q" % resource, stdout=None)
 
             # Hack until crm_resource does the right thing
             if rc == 0 and lines:
                 active = len(lines)
 
             if len(lines) == expected_clones:
                 return 1
 
             elif rc == 1:
                 self.debug("Resource %s is still inactive" % resource)
 
             elif rc == 234:
                 self.logger.log("Unknown resource %s" % resource)
                 return 0
 
             elif rc == 246:
                 self.logger.log("Cluster is inactive")
                 return 0
 
             elif rc != 0:
                 self.logger.log("Call to crm_resource failed, rc=%d" % rc)
                 return 0
 
             else:
                 self.debug("Resource %s is active on %d times instead of %d" % (resource, active, expected_clones))
 
             attempts -= 1
             time.sleep(1)
 
         return 0
 
     def find_dlm(self, node):
         self.r_dlm = None
 
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rtype == "controld" and r.parent != "NA":
                     self.debug("Found dlm: %s" % self.r_dlm)
                     self.r_dlm = r.parent
                     return 1
         return 0
 
     def find_hae_resources(self, node):
         self.r_dlm = None
         self.r_o2cb = None
         self.r_ocfs2 = []
 
         if self.find_dlm(node):
             self.find_ocfs2_resources(node)
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
         if self.Env["Schema"] == "hae":
             return 1
         return None
 
 
 class HAERoleTest(HAETest):
     def __init__(self, cm):
         '''Lars' mount/unmount test for the HA extension. '''
         HAETest.__init__(self,cm)
         self.name = "HAERoleTest"
 
     def change_state(self, node, resource, target):
         rc = self.rsh(node, "crm_resource -V -r %s -p target-role -v %s  --meta" % (resource, target))
         return rc
 
     def __call__(self, node):
         self.incr("calls")
         lpc = 0
         failed = 0
         delay = 2
         done = time.time() + self.Env["loop-minutes"]*60
         self.find_hae_resources(node)
 
         clone_max = len(self.Env["nodes"])
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             self.change_state(node, self.r_dlm, "Stopped")
             if not self.wait_on_state(node, self.r_dlm, 0):
                 self.failure("%s did not go down correctly" % self.r_dlm)
                 failed = lpc
 
             self.change_state(node, self.r_dlm, "Started")
             if not self.wait_on_state(node, self.r_dlm, clone_max):
                 self.failure("%s did not come up correctly" % self.r_dlm)
                 failed = lpc
 
             if not self.wait_on_state(node, self.r_o2cb, clone_max):
                 self.failure("%s did not come up correctly" % self.r_o2cb)
                 failed = lpc
 
             for fs in self.r_ocfs2:
                 if not self.wait_on_state(node, fs, clone_max):
                     self.failure("%s did not come up correctly" % fs)
                     failed = lpc
 
         if failed:
             return self.failure("iteration %d failed" % failed)
         return self.success()
 
 AllTestClasses.append(HAERoleTest)
 
 
 class HAEStandbyTest(HAETest):
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         HAETest.__init__(self,cm)
         self.name = "HAEStandbyTest"
 
     def change_state(self, node, resource, target):
         rc = self.rsh(node, "crm_standby -V -l reboot -v %s" % (target))
         return rc
 
     def __call__(self, node):
         self.incr("calls")
 
         lpc = 0
         failed = 0
         done = time.time() + self.Env["loop-minutes"]*60
         self.find_hae_resources(node)
 
         clone_max = len(self.Env["nodes"])
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             self.change_state(node, self.r_dlm, "true")
             if not self.wait_on_state(node, self.r_dlm, clone_max-1):
                 self.failure("%s did not go down correctly" % self.r_dlm)
                 failed = lpc
 
             self.change_state(node, self.r_dlm, "false")
             if not self.wait_on_state(node, self.r_dlm, clone_max):
                 self.failure("%s did not come up correctly" % self.r_dlm)
                 failed = lpc
 
             if not self.wait_on_state(node, self.r_o2cb, clone_max):
                 self.failure("%s did not come up correctly" % self.r_o2cb)
                 failed = lpc
 
             for fs in self.r_ocfs2:
                 if not self.wait_on_state(node, fs, clone_max):
                     self.failure("%s did not come up correctly" % fs)
                     failed = lpc
 
         if failed:
             return self.failure("iteration %d failed" % failed)
         return self.success()
 
 AllTestClasses.append(HAEStandbyTest)
 
 
 class NearQuorumPointTest(CTSTest):
     '''
     This test brings larger clusters near the quorum point (50%).
     In addition, it will test doing starts and stops at the same time.
 
     Here is how I think it should work:
     - loop over the nodes and decide randomly which will be up and which
       will be down  Use a 50% probability for each of up/down.
     - figure out what to do to get into that state from the current state
     - in parallel, bring up those going up  and bring those going down.
     '''
 
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "NearQuorumPoint"
 
     def __call__(self, dummy):
         '''Perform the 'NearQuorumPoint' test. '''
         self.incr("calls")
         startset = []
         stopset = []
 
         stonith = self.CM.prepare_fencing_watcher("NearQuorumPoint")
         #decide what to do with each node
         for node in self.Env["nodes"]:
             action = self.Env.RandomGen.choice(["start","stop"])
             #action = self.Env.RandomGen.choice(["start","stop","no change"])
             if action == "start" :
                 startset.append(node)
             elif action == "stop" :
                 stopset.append(node)
 
         self.debug("start nodes:" + repr(startset))
         self.debug("stop nodes:" + repr(stopset))
 
         #add search patterns
         watchpats = [ ]
         for node in stopset:
             if self.CM.ShouldBeStatus[node] == "up":
                 watchpats.append(self.templates["Pat:We_stopped"] % node)
 
         for node in startset:
             if self.CM.ShouldBeStatus[node] == "down":
                 #watchpats.append(self.templates["Pat:Slave_started"] % node)
                 watchpats.append(self.templates["Pat:Local_started"] % node)
             else:
                 for stopping in stopset:
                     if self.CM.ShouldBeStatus[stopping] == "up":
                         watchpats.append(self.templates["Pat:They_stopped"] % (node, self.CM.key_for_node(stopping)))
 
         if len(watchpats) == 0:
             return self.skipped()
 
         if len(startset) != 0:
             watchpats.append(self.templates["Pat:DC_IDLE"])
 
         watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
 
         watch.setwatch()
 
         #begin actions
         for node in stopset:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.CM.StopaCMnoBlock(node)
 
         for node in startset:
             if self.CM.ShouldBeStatus[node] == "down":
                 self.CM.StartaCMnoBlock(node)
 
         #get the result
         if watch.lookforall():
             self.CM.cluster_stable()
             self.CM.fencing_cleanup("NearQuorumPoint", stonith)
             return self.success()
 
         self.logger.log("Warn: Patterns not found: " + repr(watch.unmatched))
 
         #get the "bad" nodes
         upnodes = []
         for node in stopset:
             if self.CM.StataCM(node) == 1:
                 upnodes.append(node)
 
         downnodes = []
         for node in startset:
             if self.CM.StataCM(node) == 0:
                 downnodes.append(node)
 
         self.CM.fencing_cleanup("NearQuorumPoint", stonith)
         if upnodes == [] and downnodes == []:
             self.CM.cluster_stable()
 
             # Make sure they're completely down with no residule
             for node in stopset:
                 self.rsh(node, self.templates["StopCmd"])
 
             return self.success()
 
         if len(upnodes) > 0:
             self.logger.log("Warn: Unstoppable nodes: " + repr(upnodes))
 
         if len(downnodes) > 0:
             self.logger.log("Warn: Unstartable nodes: " + repr(downnodes))
 
         return self.failure()
 
     def is_applicable(self):
         if self.Env["Name"] == "crm-cman":
             return None
         return 1
 
 AllTestClasses.append(NearQuorumPointTest)
 
 
 class RollingUpgradeTest(CTSTest):
     '''Perform a rolling upgrade of the cluster'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RollingUpgrade"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
 
     def setup(self, node):
         #  Start all remaining nodes
         ret = self.stopall(None)
         if not ret:
             return self.failure("Couldn't stop all nodes")
 
         for node in self.Env["nodes"]:
             if not self.downgrade(node, None):
                 return self.failure("Couldn't downgrade %s" % node)
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Couldn't start all nodes")
         return self.success()
 
     def teardown(self, node):
         # Stop everything
         ret = self.stopall(None)
         if not ret:
             return self.failure("Couldn't stop all nodes")
 
         for node in self.Env["nodes"]:
             if not self.upgrade(node, None):
                 return self.failure("Couldn't upgrade %s" % node)
 
         return self.success()
 
     def install(self, node, version, start=1, flags="--force"):
 
         target_dir = "/tmp/rpm-%s" % version
         src_dir = "%s/%s" % (self.Env["rpm-dir"], version)
 
         self.logger.log("Installing %s on %s with %s" % (version, node, flags))
         if not self.stop(node):
             return self.failure("stop failure: "+node)
 
         rc = self.rsh(node, "mkdir -p %s" % target_dir)
         rc = self.rsh(node, "rm -f %s/*.rpm" % target_dir)
         (rc, lines) = self.rsh(node, "ls -1 %s/*.rpm" % src_dir, None)
         for line in lines:
             line = line[:-1]
             rc = self.rsh.cp("%s" % (line), "%s:%s/" % (node, target_dir))
         rc = self.rsh(node, "rpm -Uvh %s %s/*.rpm" % (flags, target_dir))
 
         if start and not self.start(node):
             return self.failure("start failure: "+node)
 
         return self.success()
 
     def upgrade(self, node, start=1):
         return self.install(node, self.Env["current-version"], start)
 
     def downgrade(self, node, start=1):
         return self.install(node, self.Env["previous-version"], start, "--force --nodeps")
 
     def __call__(self, node):
         '''Perform the 'Rolling Upgrade' test. '''
         self.incr("calls")
 
         for node in self.Env["nodes"]:
             if self.upgrade(node):
                 return self.failure("Couldn't upgrade %s" % node)
 
             self.CM.cluster_stable()
 
         return self.success()
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return None
 
         if not self.Env.has_key("rpm-dir"):
             return None
         if not self.Env.has_key("current-version"):
             return None
         if not self.Env.has_key("previous-version"):
             return None
 
         return 1
 
 #        Register RestartTest as a good test to run
 AllTestClasses.append(RollingUpgradeTest)
 
 
 class BSC_AddResource(CTSTest):
     '''Add a resource to the cluster'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "AddResource"
         self.resource_offset = 0
         self.cib_cmd = """cibadmin -C -o %s -X '%s' """
 
     def __call__(self, node):
         self.incr("calls")
         self.resource_offset =         self.resource_offset  + 1
 
         r_id = "bsc-rsc-%s-%d" % (node, self.resource_offset)
         start_pat = "crmd.*%s_start_0.*confirmed.*ok"
 
         patterns = []
         patterns.append(start_pat % r_id)
 
         watch = self.create_watch(patterns, self.Env["DeadTime"])
         watch.setwatch()
 
         ip = self.NextIP()
         if not self.make_ip_resource(node, r_id, "ocf", "IPaddr", ip):
             return self.failure("Make resource %s failed" % r_id)
 
         failed = 0
         watch_result = watch.lookforall()
         if watch.unmatched:
             for regex in watch.unmatched:
                 self.logger.log ("Warn: Pattern not found: %s" % (regex))
                 failed = 1
 
         if failed:
             return self.failure("Resource pattern(s) not found")
 
         if not self.CM.cluster_stable(self.Env["DeadTime"]):
             return self.failure("Unstable cluster")
 
         return self.success()
 
     def NextIP(self):
         ip = self.Env["IPBase"]
         if ":" in ip:
             fields = ip.rpartition(":")
             fields[2] = str(hex(int(fields[2], 16)+1))
             print str(hex(int(f[2], 16)+1))
         else:
             fields = ip.rpartition('.')
             fields[2] = str(int(fields[2])+1)
 
         ip = fields[0] + fields[1] + fields[3];
         self.Env["IPBase"] = ip
         return ip.strip()
 
     def make_ip_resource(self, node, id, rclass, type, ip):
         self.logger.log("Creating %s::%s:%s (%s) on %s" % (rclass,type,id,ip,node))
         rsc_xml="""
 <primitive id="%s" class="%s" type="%s"  provider="heartbeat">
     <instance_attributes id="%s"><attributes>
         <nvpair id="%s" name="ip" value="%s"/>
     </attributes></instance_attributes>
 </primitive>""" % (id, rclass, type, id, id, ip)
 
         node_constraint = """
       <rsc_location id="run_%s" rsc="%s">
         <rule id="pref_run_%s" score="100">
           <expression id="%s_loc_expr" attribute="#uname" operation="eq" value="%s"/>
         </rule>
       </rsc_location>""" % (id, id, id, id, node)
 
         rc = 0
         (rc, lines) = self.rsh(node, self.cib_cmd % ("constraints", node_constraint), None)
         if rc != 0:
             self.logger.log("Constraint creation failed: %d" % rc)
             return None
 
         (rc, lines) = self.rsh(node, self.cib_cmd % ("resources", rsc_xml), None)
         if rc != 0:
             self.logger.log("Resource creation failed: %d" % rc)
             return None
 
         return 1
 
     def is_applicable(self):
         if self.Env["DoBSC"]:
             return 1
         return None
 
 AllTestClasses.append(BSC_AddResource)
 
 
 class SimulStopLite(CTSTest):
     '''Stop any active nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SimulStopLite"
 
     def __call__(self, dummy):
         '''Perform the 'SimulStopLite' setup work. '''
         self.incr("calls")
 
         self.debug("Setup: " + self.name)
 
         #     We ignore the "node" parameter...
         watchpats = [ ]
 
         for node in self.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.incr("WasStarted")
                 watchpats.append(self.templates["Pat:We_stopped"] % node)
                 #if self.Env["use_logd"]:
                 #    watchpats.append(self.templates["Pat:Logd_stopped"] % node)
 
         if len(watchpats) == 0:
             self.CM.clear_all_caches()
             return self.success()
 
         #     Stop all the nodes - at about the same time...
         watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
 
         watch.setwatch()
         self.set_timer()
         for node in self.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.CM.StopaCMnoBlock(node)
         if watch.lookforall():
             self.CM.clear_all_caches()
 
             # Make sure they're completely down with no residule
             for node in self.Env["nodes"]:
                 self.rsh(node, self.templates["StopCmd"])
 
             return self.success()
 
         did_fail = 0
         up_nodes = []
         for node in self.Env["nodes"]:
             if self.CM.StataCM(node) == 1:
                 did_fail = 1
                 up_nodes.append(node)
 
         if did_fail:
             return self.failure("Active nodes exist: " + repr(up_nodes))
 
         self.logger.log("Warn: All nodes stopped but CTS didnt detect: "
                     + repr(watch.unmatched))
 
         self.CM.clear_all_caches()
         return self.failure("Missing log message: "+repr(watch.unmatched))
 
     def is_applicable(self):
         '''SimulStopLite is a setup test and never applicable'''
         return 0
 
 
 class SimulStartLite(CTSTest):
     '''Start any stopped nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SimulStartLite"
 
     def __call__(self, dummy):
         '''Perform the 'SimulStartList' setup work. '''
         self.incr("calls")
         self.debug("Setup: " + self.name)
 
         #        We ignore the "node" parameter...
         node_list = []
         for node in self.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "down":
                 self.incr("WasStopped")
                 node_list.append(node)
 
         self.set_timer()
         while len(node_list) > 0:
             # Repeat until all nodes come up
             watchpats = [ ]
 
             uppat = self.templates["Pat:Slave_started"]
             if self.CM.upcount() == 0:
                 uppat = self.templates["Pat:Local_started"]
 
             watchpats.append(self.templates["Pat:DC_IDLE"])
             for node in node_list:
                 watchpats.append(uppat % node)
                 watchpats.append(self.templates["Pat:InfraUp"] % node)
                 watchpats.append(self.templates["Pat:PacemakerUp"] % node)
 
             #   Start all the nodes - at about the same time...
             watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
             watch.setwatch()
 
             stonith = self.CM.prepare_fencing_watcher(self.name)
 
             for node in node_list:
                 self.CM.StartaCMnoBlock(node)
 
             watch.lookforall()
 
             node_list = self.CM.fencing_cleanup(self.name, stonith)
 
             # Remove node_list messages from watch.unmatched
             for node in node_list:
                 self.logger.debug("Dealing with stonith operations for %s" % repr(node_list))
                 if watch.unmatched:
                     try:
                         watch.unmatched.remove(uppat % node)
                     except:
                         self.debug("Already matched: %s" % (uppat % node))
                     try:                        
                         watch.unmatched.remove(self.templates["Pat:InfraUp"] % node)
                     except:
                         self.debug("Already matched: %s" % (self.templates["Pat:InfraUp"] % node))
                     try:
                         watch.unmatched.remove(self.templates["Pat:PacemakerUp"] % node)
                     except:
                         self.debug("Already matched: %s" % (self.templates["Pat:PacemakerUp"] % node))
 
             if watch.unmatched:
                 for regex in watch.unmatched:
                     self.logger.log ("Warn: Startup pattern not found: %s" %(regex))
 
             if not self.CM.cluster_stable():
                 return self.failure("Cluster did not stabilize")
 
         did_fail = 0
         unstable = []
         for node in self.Env["nodes"]:
             if self.CM.StataCM(node) == 0:
                 did_fail = 1
                 unstable.append(node)
 
         if did_fail:
             return self.failure("Unstarted nodes exist: " + repr(unstable))
 
         unstable = []
         for node in self.Env["nodes"]:
             if not self.CM.node_stable(node):
                 did_fail = 1
                 unstable.append(node)
 
         if did_fail:
             return self.failure("Unstable cluster nodes exist: " + repr(unstable))
 
         return self.success()
 
     def is_applicable(self):
         '''SimulStartLite is a setup test and never applicable'''
         return 0
 
 
 def TestList(cm, audits):
     result = []
     for testclass in AllTestClasses:
         bound_test = testclass(cm)
         if bound_test.is_applicable():
             bound_test.Audits = audits
             result.append(bound_test)
     return result
 
 
 class RemoteLXC(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RemoteLXC"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.num_containers = 2
         self.is_container = 1
+        self.is_docker_unsafe = 1
         self.failed = 0
         self.fail_string = ""
 
     def start_lxc_simple(self, node):
 
         # restore any artifacts laying around from a previous test.
         self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null")
 
         # generate the containers, put them in the config, add some resources to them
         pats = [ ]
         watch = self.create_watch(pats, 120)
         watch.setwatch()
         pats.append(self.templates["Pat:RscOpOK"] % ("lxc1", "start_0"))
         pats.append(self.templates["Pat:RscOpOK"] % ("lxc2", "start_0"))
         pats.append(self.templates["Pat:RscOpOK"] % ("lxc-ms", "start_0"))
         pats.append(self.templates["Pat:RscOpOK"] % ("lxc-ms", "promote_0"))
 
         self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -g -a -m -s -c %d &>/dev/null" % self.num_containers)
         self.set_timer("remoteSimpleInit")
         watch.lookforall()
         self.log_timer("remoteSimpleInit")
         if watch.unmatched:
             self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
             self.failed = 1
 
     def cleanup_lxc_simple(self, node):
 
         pats = [ ]
         # if the test failed, attempt to clean up the cib and libvirt environment
         # as best as possible 
         if self.failed == 1:
             # restore libvirt and cib
             self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null")
             self.rsh(node, "crm_resource -C -r container1 &>/dev/null")
             self.rsh(node, "crm_resource -C -r container2 &>/dev/null")
             self.rsh(node, "crm_resource -C -r lxc1 &>/dev/null")
             self.rsh(node, "crm_resource -C -r lxc2 &>/dev/null")
             self.rsh(node, "crm_resource -C -r lxc-ms &>/dev/null")
             time.sleep(20)
             return
 
         watch = self.create_watch(pats, 120)
         watch.setwatch()
 
         pats.append(self.templates["Pat:RscOpOK"] % ("container1", "stop_0"))
         pats.append(self.templates["Pat:RscOpOK"] % ("container2", "stop_0"))
 
         self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -p &>/dev/null")
         self.set_timer("remoteSimpleCleanup")
         watch.lookforall()
         self.log_timer("remoteSimpleCleanup")
 
         if watch.unmatched:
             self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
             self.failed = 1
 
         # cleanup libvirt
         self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null")
 
     def __call__(self, node):
         '''Perform the 'RemoteLXC' test. '''
         self.incr("calls")
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed, start all nodes failed.")
 
         rc = self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -v &>/dev/null")
         if rc == 1:
             self.log("Environment test for lxc support failed.")
             return self.skipped()
 
         self.start_lxc_simple(node)
         self.cleanup_lxc_simple(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         if self.failed == 1:
             return self.failure(self.fail_string)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [ """Updating failcount for ping""",
                  """LogActions: Recover ping""",
                  """LogActions: Recover lxc-ms""",
                  """LogActions: Recover container""",
                  # The orphaned lxc-ms resource causes an expected transition error
                  # that is a result of the pengine not having knowledge that the 
                  # ms resource used to be a clone.  As a result it looks like that 
                  # resource is running in multiple locations when it shouldn't... But in
                  # this instance we know why this error is occurring and that it is expected.
                  """Calculated Transition .* /var/lib/pacemaker/pengine/pe-error""",
                  """Resource lxc-ms .* is active on 2 nodes attempting recovery""",
                  """Unknown operation: fail""",
                  """notice: operation_finished: ping-""",
                  """notice: operation_finished: container""",
                  """notice: operation_finished: .*_monitor_0:.*:stderr""",
                  """(ERROR|error): sending stonithRA op to stonithd failed.""",
                 ]
 
 AllTestClasses.append(RemoteLXC)
 
 
 ###################################################################
 class RemoteBaremetal(CTSTest):
 ###################################################################
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RemoteBaremetal"
+        self.is_docker_unsafe = 1
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.stop = StopTest(cm)
         self.pcmk_started = 0
         self.failed = 0
         self.fail_string = ""
         self.remote_node_added = 0
         self.remote_node = "remote1"
         self.remote_rsc_added = 0
         self.remote_rsc = "remote1-rsc"
         self.cib_cmd = """cibadmin -C -o %s -X '%s' """
 
     def del_rsc(self, node, rsc):
 
         for othernode in self.Env["nodes"]:
             if othernode == node:
                 # we don't want to try and use the cib that we just shutdown.
                 # find a cluster node that is not our soon to be remote-node.
                 continue
             rc = self.rsh(othernode, "crm_resource -D -r %s -t primitive" % (rsc))
             if rc != 0:
                 self.fail_string = ("Removal of resource '%s' failed" % (rsc))
                 self.failed = 1
             return
 
     def add_rsc(self, node, rsc_xml):
         for othernode in self.CM.Env["nodes"]:
             if othernode == node:
                 # we don't want to try and use the cib that we just shutdown.
                 # find a cluster node that is not our soon to be remote-node.
                 continue
             rc = self.rsh(othernode, self.cib_cmd % ("resources", rsc_xml))
             if rc != 0:
                 self.fail_string = "resource creation failed"
                 self.failed = 1
             return
 
     def add_primitive_rsc(self, node):
         rsc_xml = """
 <primitive class="ocf" id="%s" provider="pacemaker" type="Dummy">
     <operations>
       <op id="remote1-rsc-monitor-interval-10s" interval="10s" name="monitor"/>
     </operations>
 </primitive>""" % (self.remote_rsc)
         self.add_rsc(node, rsc_xml)
         if self.failed == 0:
             self.remote_rsc_added = 1
 
     def add_connection_rsc(self, node):
         rsc_xml = """
 <primitive class="ocf" id="%s" provider="pacemaker" type="remote">
     <instance_attributes id="remote1-instance_attributes"/>
         <instance_attributes id="remote1-instance_attributes">
           <nvpair id="remote1-instance_attributes-server" name="server" value="%s"/>
         </instance_attributes>
     <operations>
       <op id="remote1-monitor-interval-60s" interval="60s" name="monitor"/>
           <op id="remote1-name-start-interval-0-timeout-60" interval="0" name="start" timeout="60"/>
     </operations>
     <meta_attributes id="remote1-meta_attributes"/>
 </primitive>""" % (self.remote_node, node)
         self.add_rsc(node, rsc_xml)
         if self.failed == 0:
             self.remote_node_added = 1
 
     def step1_start_metal(self, node):
         pcmk_started = 0
 
         # make sure the resource doesn't already exist for some reason
         self.rsh(node, "crm_resource -D -r %s -t primitive" % (self.remote_rsc))
         self.rsh(node, "crm_resource -D -r %s -t primitive" % (self.remote_node))
 
         if not self.stop(node):
             self.failed = 1
             self.fail_string = "Failed to shutdown cluster node %s" % (node)
             return
 
         for i in range(10):
             rc = self.rsh(node, "service pacemaker_remote start")
             if rc != 0:
                 time.sleep(6)
             else:
                 self.pcmk_started = 1
                 break
 
         if self.pcmk_started == 0:
             self.failed = 1
             self.fail_string = "Failed to start pacemaker_remote on node %s" % (node)
             return
 
         # convert node to baremetal node now that it has shutdow the cluster stack
         pats = [ ]
         watch = self.create_watch(pats, 120)
         watch.setwatch()
         pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "start"))
 
         self.add_connection_rsc(node)
 
         self.set_timer("remoteMetalInit")
         watch.lookforall()
         self.log_timer("remoteMetalInit")
         if watch.unmatched:
             self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
             self.failed = 1
 
     def step2_add_rsc(self, node):
         if self.failed == 1:
             return
 
         # verify we can put a resource on the remote node
         pats = [ ]
         watch = self.create_watch(pats, 120)
         watch.setwatch()
         pats.append("process_lrm_event:.*Operation %s_start_0.*node=%s, .*confirmed.*true" % (self.remote_rsc, self.remote_node))
 
         # Add a resource that must live on remote-node
         self.add_primitive_rsc(node)
         # this crm_resource command actually occurs on the remote node
         # which verifies that the ipc proxy works
         time.sleep(1)
 
         (rc, lines) = self.rsh(node, "crm_resource -W -r remote1-rsc --quiet", None)
         if rc != 0:
             self.fail_string = "Failed to get location of resource remote1-rsc"
             self.failed = 1
             return
 
         find = 0
         for line in lines:
             if self.remote_node in line.split():
                 find = 1
                 break
 
         if find == 0:
             rc = self.rsh(node, "crm_resource -M -r remote1-rsc -N %s" % (self.remote_node))
             if rc != 0:
                 self.fail_string = "Failed to place primitive on remote-node"
                 self.failed = 1
                 return
 
         self.set_timer("remoteMetalRsc")
         watch.lookforall()
         self.log_timer("remoteMetalRsc")
         if watch.unmatched:
             self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
             self.failed = 1
 
     def step3_test_attributes(self, node):
         if self.failed == 1:
             return
 
         # This verifies permanent attributes can be set on a remote-node. It also
         # verifies the remote-node can edit it's own cib node section remotely.
         (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -v testval -N %s" % (self.remote_node), None)
         if rc != 0:
             self.fail_string = "Failed to set remote-node attribute. rc:%s output:%s" % (rc, line)
             self.failed = 1
             return
 
         (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -Q -N %s" % (self.remote_node), None)
         if rc != 0:
             self.fail_string = "Failed to get remote-node attribute"
             self.failed = 1
             return
 
         (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -D -N %s" % (self.remote_node), None)
         if rc != 0:
             self.fail_string = "Failed to delete remote-node attribute"
             self.failed = 1
             return
 
     def cleanup_metal(self, node):
         if self.pcmk_started == 0:
             return
 
         pats = [ ]
 
         watch = self.create_watch(pats, 120)
         watch.setwatch()
 
         if self.remote_rsc_added == 1:
             pats.append(self.templates["Pat:RscOpOK"] % (self.remote_rsc, "stop"))
         if self.remote_node_added == 1:
             pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "stop"))
 
         self.set_timer("remoteMetalCleanup")
         if self.remote_rsc_added == 1:
             self.rsh(node, "crm_resource -U -r remote1-rsc -N %s" % (self.remote_node))
             self.del_rsc(node, self.remote_rsc)
         if self.remote_node_added == 1:
             self.del_rsc(node, self.remote_node)
         watch.lookforall()
         self.log_timer("remoteMetalCleanup")
 
         if watch.unmatched:
             self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
             self.failed = 1
 
         # disable pcmk remote
         for i in range(10):
             rc = self.rsh(node, "service pacemaker_remote stop")
             if rc != 0:
                 time.sleep(6)
             else:
                 break
 
     def setup_env(self):
         sync_key = 0
 
         # we are assuming if all nodes have a key, that it is
         # the right key... If any node doesn't have a remote
         # key, we regenerate it everywhere.
         for node in self.Env["nodes"]:
             rc = self.rsh(node, "ls /etc/pacemaker/authkey")
             if rc != 0:
                 sync_key = 1
                 break
 
         if sync_key == 0:
             return
 
         # create key locally
         os.system("/usr/share/pacemaker/tests/cts/lxc_autogen.sh -k &> /dev/null")
 
         # sync key throughout the cluster
         for node in self.Env["nodes"]:
             rc = self.rsh(node, "mkdir /etc/pacemaker")
             self.rsh.cp("/etc/pacemaker/authkey", "%s:/etc/pacemaker/authkey" % (node))
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return False
 
         for node in self.Env["nodes"]:
             rc = self.rsh(node, "type pacemaker_remoted >/dev/null 2>&1")
             if rc != 0:
                 return False
         return True
 
     def __call__(self, node):
         '''Perform the 'RemoteBaremetal' test. '''
         self.incr("calls")
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed, start all nodes failed.")
 
         self.setup_env()
         self.step1_start_metal(node)
         self.step2_add_rsc(node)
         self.step3_test_attributes(node)
         self.cleanup_metal(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
         if self.failed == 1:
             return self.failure(self.fail_string)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [ """is running on remote1 which isn't allowed""",
                  """Connection terminated""",
                  """Failed to send remote""",
                 ]
 
 AllTestClasses.append(RemoteBaremetal)
 
 # vim:ts=4:sw=4:et:
diff --git a/cts/environment.py b/cts/environment.py
index de1d099365..d741452ab2 100644
--- a/cts/environment.py
+++ b/cts/environment.py
@@ -1,663 +1,677 @@
 '''
 Classes related to producing and searching logs
 '''
 
 __copyright__='''
 Copyright (C) 2014 Andrew Beekhof <andrew@beekhof.net>
 Licensed under the GNU GPL.
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
 
 import types, string, select, sys, time, re, os, struct, signal, socket
 import time, syslog, random, traceback, base64, pickle, binascii, fcntl
 
 from cts.remote import *
 
 class Environment:
 
     def __init__(self, args):
         print repr(self)
         self.data = {}
         self.Nodes = []
 
         self["DeadTime"] = 300
         self["StartTime"] = 300
         self["StableTime"] = 30
         self["tests"] = []
         self["IPagent"] = "IPaddr2"
         self["DoStandby"] = 1
         self["DoFencing"] = 1
         self["XmitLoss"] = "0.0"
         self["RecvLoss"] = "0.0"
         self["ClobberCIB"] = 0
         self["CIBfilename"] = None
         self["CIBResource"] = 0
         self["DoBSC"]    = 0
         self["use_logd"] = 0
         self["oprofile"] = []
         self["warn-inactive"] = 0
         self["ListTests"] = 0
         self["benchmark"] = 0
         self["LogWatcher"] = "any"
         self["SyslogFacility"] = "daemon"
         self["LogFileName"] = "/var/log/messages"
         self["Schema"] = "pacemaker-2.0"
         self["Stack"] = "corosync"
         self["stonith-type"] = "external/ssh"
         self["stonith-params"] = "hostlist=all,livedangerously=yes"
         self["loop-minutes"] = 60
         self["valgrind-prefix"] = None
         self["valgrind-procs"] = "cib crmd attrd pengine stonith-ng"
         self["valgrind-opts"] = """--leak-check=full --show-reachable=yes --trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp"""
 
         self["experimental-tests"] = 0
         self["container-tests"] = 0
         self["valgrind-tests"] = 0
         self["unsafe-tests"] = 1
         self["loop-tests"] = 1
         self["scenario"] = "random"
         self["stats"] = 0
+        self["docker"] = 0
 
         self.RandomGen = random.Random()
         self.logger = LogFactory()
 
         self.SeedRandom()
         self.rsh = RemoteFactory().getInstance()
 
         self.target = "localhost"
 
         self.parse_args(args)
         self.discover()
         self.validate()
 
     def SeedRandom(self, seed=None):
         if not seed:
             seed = int(time.time())
 
         if self.has_key("RandSeed"):
             self.logger.log("New random seed is: " + str(seed))
         else:
             self.logger.log("Random seed is: " + str(seed))
 
         self["RandSeed"] = seed
         self.RandomGen.seed(str(seed))
 
     def dump(self):
         keys = []
         for key in self.data.keys():
             keys.append(key)
 
         keys.sort()
         for key in keys:
             self.logger.debug("Environment["+key+"]:\t"+str(self[key]))
 
     def keys(self):
         return self.data.keys()
 
     def has_key(self, key):
         if key == "nodes":
             return True
 
         return self.data.has_key(key)
 
     def __getitem__(self, key):
         if key == "nodes":
             return self.Nodes
 
         elif key == "Name":
             return self.get_stack_short()
 
         elif self.data.has_key(key):
             return self.data[key]
 
         else:
             return None
 
     def __setitem__(self, key, value):
         if key == "Stack":
             self.set_stack(value)
 
         elif key == "node-limit":
             self.data[key] = value
             self.filter_nodes()
 
         elif key == "nodes":
             self.Nodes = []
             for node in value:
                 # I don't think I need the IP address, etc. but this validates
                 # the node name against /etc/hosts and/or DNS, so it's a
                 # GoodThing(tm).
                 try:
                     n = node.strip()
-                    gethostbyname_ex(n)
+                    if self.data["docker"] == 0:
+                        gethostbyname_ex(n)
+
                     self.Nodes.append(n) 
                 except:
                     self.logger.log(node+" not found in DNS... aborting")
                     raise
 
             self.filter_nodes()
 
         else:
             self.data[key] = value
 
     def RandomNode(self):
         '''Choose a random node from the cluster'''
         return self.RandomGen.choice(self["nodes"])
 
     def set_stack(self, name):
         # Normalize stack names
         if name == "heartbeat" or name == "lha":
             self.data["Stack"] = "heartbeat"
 
         elif name == "openais" or name == "ais"  or name == "whitetank":
             self.data["Stack"] = "openais (whitetank)"
 
         elif name == "corosync" or name == "cs" or name == "mcp":
             self.data["Stack"] = "corosync 2.x"
 
         elif name == "cman":
             self.data["Stack"] = "corosync (cman)"
 
         elif name == "v1":
             self.data["Stack"] = "corosync (plugin v1)"
 
         elif name == "v0":
             self.data["Stack"] = "corosync (plugin v0)"
 
         else:
             print "Unknown stack: "+name
             sys.exit(1)
 
     def get_stack_short(self):
         # Create the Cluster Manager object
         if not self.data.has_key("Stack"):
             return "unknown"
 
         elif self.data["Stack"] == "heartbeat":
             return "crm-lha"
 
         elif self.data["Stack"] == "corosync 2.x":
-            return "crm-mcp"
+            if self["docker"]:
+                return "crm-mcp-docker"
+            else:
+                return "crm-mcp"
 
         elif self.data["Stack"] == "corosync (cman)":
             return "crm-cman"
         
         elif self.data["Stack"] == "corosync (plugin v1)":
             return "crm-plugin-v1"
         
         elif self.data["Stack"] == "corosync (plugin v0)":
             return "crm-plugin-v0"
 
         else:
             LogFactory().log("Unknown stack: "+self.data["stack"])
             sys.exit(1)
 
     def detect_syslog(self):
         # Detect syslog variant
         if not self.has_key("syslogd"):
             if self["have_systemd"]:
                 # Systemd
                 self["syslogd"] = self.rsh(self.target, "systemctl list-units | grep syslog.*\.service.*active.*running | sed 's:.service.*::'", stdout=1).strip()
             else:
                 # SYS-V
                 self["syslogd"] = self.rsh(self.target, "chkconfig --list | grep syslog.*on | awk '{print $1}' | head -n 1", stdout=1).strip()
 
             if not self.has_key("syslogd") or not self["syslogd"]:
                 # default
                 self["syslogd"] = "rsyslog"
 
     def detect_at_boot(self):
         # Detect if the cluster starts at boot
         if not self.has_key("at-boot"):
             atboot = 0
 
             if self["have_systemd"]:
             # Systemd
                 atboot = atboot or not self.rsh(self.target, "systemctl is-enabled heartbeat.service")
                 atboot = atboot or not self.rsh(self.target, "systemctl is-enabled corosync.service")
                 atboot = atboot or not self.rsh(self.target, "systemctl is-enabled pacemaker.service")
             else:
                 # SYS-V
                 atboot = atboot or not self.rsh(self.target, "chkconfig --list | grep -e corosync.*on -e heartbeat.*on -e pacemaker.*on")
 
             self["at-boot"] = atboot
 
     def detect_ip_offset(self):
         # Try to determin an offset for IPaddr resources
         if self["CIBResource"] and not self.has_key("IPBase"):
             network=self.rsh(self.target, "ip addr | grep inet | grep -v -e link -e inet6 -e '/32' -e ' lo' | awk '{print $2}'", stdout=1).strip()
             self["IPBase"] = self.rsh(self.target, "nmap -sn -n %s | grep 'scan report' | awk '{print $NF}' | sed 's:(::' | sed 's:)::' | sort -V | tail -n 1" % network, stdout=1).strip()
             if not self["IPBase"]:
                 self["IPBase"] = " fe80::1234:56:7890:1000"
                 self.logger.log("Could not determine an offset for IPaddr resources.  Perhaps nmap is not installed on the nodes.")
                 self.logger.log("Defaulting to '%s', use --test-ip-base to override" % self["IPBase"])
 
             elif int(self["IPBase"].split('.')[3]) >= 240:
                 self.logger.log("Could not determine an offset for IPaddr resources. Upper bound is too high: %s %s"
                                 % (self["IPBase"], self["IPBase"].split('.')[3]))
                 self["IPBase"] = " fe80::1234:56:7890:1000"
                 self.logger.log("Defaulting to '%s', use --test-ip-base to override" % self["IPBase"])
 
     def filter_nodes(self):
         if self["node-limit"] > 0:
             if len(self["nodes"]) > self["node-limit"]:
                 self.logger.log("Limiting the number of nodes configured=%d (max=%d)"
                                 %(len(self["nodes"]), self["node-limit"]))
                 while len(self["nodes"]) > self["node-limit"]:
                     self["nodes"].pop(len(self["nodes"])-1)
 
     def validate(self):
         if len(self["nodes"]) < 1:
             print "No nodes specified!"
             sys.exit(1)
 
     def discover(self):
         self.target = random.Random().choice(self["nodes"])
 
         master = socket.gethostname()
 
         # Use the IP where possible to avoid name lookup failures
         for ip in socket.gethostbyname_ex(master)[2]:
             if ip != "127.0.0.1":
                 master = ip
                 break;
         self["cts-master"] = master
 
         if self.has_key("have_systemd"):
             self["have_systemd"] = not rsh(discover, "systemctl list-units")
 
         self.detect_syslog()
         self.detect_at_boot()
         self.detect_ip_offset()
 
         self.validate()
 
     def parse_args(self, args):
         skipthis=None
 
         if not args:
             args=sys.argv[1:]
 
         for i in range(0, len(args)):
             if skipthis:
                 skipthis=None
                 continue
 
             elif args[i] == "-l" or args[i] == "--limit-nodes":
                 skipthis=1
                 self["node-limit"] = int(args[i+1])
 
             elif args[i] == "-r" or args[i] == "--populate-resources":
                 self["CIBResource"] = 1
                 self["ClobberCIB"] = 1
 
             elif args[i] == "--outputfile":
                 skipthis=1
                 self["OutputFile"] = args[i+1]
                 LogFactory().add_file(self["OutputFile"])
 
             elif args[i] == "-L" or args[i] == "--logfile":
                 skipthis=1
                 self["LogWatcher"] = "remote"
                 self["LogAuditDisabled"] = 1
                 self["LogFileName"] = args[i+1]
 
             elif args[i] == "--ip" or args[i] == "--test-ip-base":
                 skipthis=1
                 self["IPBase"] = args[i+1]
                 self["CIBResource"] = 1
                 self["ClobberCIB"] = 1
 
             elif args[i] == "--oprofile":
                 skipthis=1
                 self["oprofile"] = args[i+1].split(' ')
 
             elif args[i] == "--trunc":
                 self["TruncateLog"]=1
 
             elif args[i] == "--list-tests" or args[i] == "--list" :
                 self["ListTests"]=1
 
             elif args[i] == "--benchmark":
                 self["benchmark"]=1
 
             elif args[i] == "--bsc":
                 self["DoBSC"] = 1
                 self["scenario"] = "basic-sanity"
 
             elif args[i] == "--qarsh":
                 RemoteFactory().enable_qarsh()
 
+            elif args[i] == "--docker":
+                self["docker"] = 1
+                RemoteFactory().enable_docker()
+
             elif args[i] == "--stonith" or args[i] == "--fencing":
                 skipthis=1
                 if args[i+1] == "1" or args[i+1] == "yes":
                     self["DoFencing"]=1
                 elif args[i+1] == "0" or args[i+1] == "no":
                     self["DoFencing"]=0
                 elif args[i+1] == "rhcs" or args[i+1] == "xvm" or args[i+1] == "virt":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_xvm"
                     self["stonith-params"] = "pcmk_arg_map=domain:uname,delay=0"
+                elif args[i+1] == "docker":
+                    self["DoStonith"]=1
+                    self["stonith-type"] = "fence_docker_cts"
                 elif args[i+1] == "scsi":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_scsi"
                     self["stonith-params"] = "delay=0"
                 elif args[i+1] == "ssh" or args[i+1] == "lha":
                     self["DoStonith"]=1
                     self["stonith-type"] = "external/ssh"
                     self["stonith-params"] = "hostlist=all,livedangerously=yes"
                 elif args[i+1] == "north":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_apc"
                     self["stonith-params"] = "ipaddr=north-apc,login=apc,passwd=apc,pcmk_host_map=north-01:2;north-02:3;north-03:4;north-04:5;north-05:6;north-06:7;north-07:9;north-08:10;north-09:11;north-10:12;north-11:13;north-12:14;north-13:15;north-14:18;north-15:17;north-16:19;"
                 elif args[i+1] == "south":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_apc"
                     self["stonith-params"] = "ipaddr=south-apc,login=apc,passwd=apc,pcmk_host_map=south-01:2;south-02:3;south-03:4;south-04:5;south-05:6;south-06:7;south-07:9;south-08:10;south-09:11;south-10:12;south-11:13;south-12:14;south-13:15;south-14:18;south-15:17;south-16:19;"
                 elif args[i+1] == "east":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_apc"
                     self["stonith-params"] = "ipaddr=east-apc,login=apc,passwd=apc,pcmk_host_map=east-01:2;east-02:3;east-03:4;east-04:5;east-05:6;east-06:7;east-07:9;east-08:10;east-09:11;east-10:12;east-11:13;east-12:14;east-13:15;east-14:18;east-15:17;east-16:19;"
                 elif args[i+1] == "west":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_apc"
                     self["stonith-params"] = "ipaddr=west-apc,login=apc,passwd=apc,pcmk_host_map=west-01:2;west-02:3;west-03:4;west-04:5;west-05:6;west-06:7;west-07:9;west-08:10;west-09:11;west-10:12;west-11:13;west-12:14;west-13:15;west-14:18;west-15:17;west-16:19;"
                 elif args[i+1] == "openstack":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_openstack"
                     
                     print "Obtaining OpenStack credentials from the current environment"
                     self["stonith-params"] = "region=%s,tenant=%s,auth=%s,user=%s,password=%s" % (
                         os.environ['OS_REGION_NAME'],
                         os.environ['OS_TENANT_NAME'],
                         os.environ['OS_AUTH_URL'],
                         os.environ['OS_USERNAME'],
                         os.environ['OS_PASSWORD']
                     )
                     
                 elif args[i+1] == "rhevm":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_rhevm"
                     
                     print "Obtaining RHEV-M credentials from the current environment"
                     self["stonith-params"] = "login=%s,passwd=%s,ipaddr=%s,ipport=%s,ssl=1,shell_timeout=10" % (
                         os.environ['RHEVM_USERNAME'],
                         os.environ['RHEVM_PASSWORD'],
                         os.environ['RHEVM_SERVER'],
                         os.environ['RHEVM_PORT'],
                     )
                     
                 else:
                     self.usage(args[i+1])
 
             elif args[i] == "--stonith-type":
                 self["stonith-type"] = args[i+1]
                 skipthis=1
 
             elif args[i] == "--stonith-args":
                 self["stonith-params"] = args[i+1]
                 skipthis=1
 
             elif args[i] == "--standby":
                 skipthis=1
                 if args[i+1] == "1" or args[i+1] == "yes":
                     self["DoStandby"] = 1
                 elif args[i+1] == "0" or args[i+1] == "no":
                     self["DoStandby"] = 0
                 else:
                     self.usage(args[i+1])
 
             elif args[i] == "--clobber-cib" or args[i] == "-c":
                 self["ClobberCIB"] = 1
                 
             elif args[i] == "--cib-filename":
                 skipthis=1
                 self["CIBfilename"] = args[i+1]
 
             elif args[i] == "--xmit-loss":
                 try:
                     float(args[i+1])
                 except ValueError:
                     print ("--xmit-loss parameter should be float")
                     self.usage(args[i+1])
                 skipthis=1
                 self["XmitLoss"] = args[i+1]
 
             elif args[i] == "--recv-loss":
                 try:
                     float(args[i+1])
                 except ValueError:
                     print ("--recv-loss parameter should be float")
                     self.usage(args[i+1])
                 skipthis=1
                 self["RecvLoss"] = args[i+1]
 
             elif args[i] == "--choose":
                 skipthis=1
                 self["tests"].append(args[i+1])
                 self["scenario"] = "sequence"
 
             elif args[i] == "--nodes":
                 skipthis=1
                 self["nodes"] = args[i+1].split(' ')
 
             elif args[i] == "-g" or args[i] == "--group" or args[i] == "--dsh-group":
                 skipthis=1
                 self["OutputFile"] = "%s/cluster-%s.log" % (os.environ['HOME'], args[i+1])
                 LogFactory().add_file(self["OutputFile"], "CTS")
 
                 dsh_file = "%s/.dsh/group/%s" % (os.environ['HOME'], args[i+1])
 
                 # Hacks to make my life easier
                 if args[i+1] == "r6":
                     self["Stack"] = "cman"
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_xvm"
                     self["stonith-params"] = "delay=0"
                     self["IPBase"] = " fe80::1234:56:7890:4000"
 
                 elif args[i+1] == "virt1":
                     self["Stack"] = "corosync"
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_xvm"
                     self["stonith-params"] = "delay=0"
                     self["IPBase"] = " fe80::1234:56:7890:1000"
 
                 elif args[i+1] == "east16" or args[i+1] == "nsew":
                     self["Stack"] = "corosync"
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_apc"
                     self["stonith-params"] = "ipaddr=east-apc,login=apc,passwd=apc,pcmk_host_map=east-01:2;east-02:3;east-03:4;east-04:5;east-05:6;east-06:7;east-07:9;east-08:10;east-09:11;east-10:12;east-11:13;east-12:14;east-13:15;east-14:18;east-15:17;east-16:19;"
                     self["IPBase"] = " fe80::1234:56:7890:2000"
 
                     if args[i+1] == "east16":
                         # Requires newer python than available via nsew
                         self["IPagent"] = "Dummy"
 
                 elif args[i+1] == "corosync8":
                     self["Stack"] = "corosync"
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_rhevm"
 
                     print "Obtaining RHEV-M credentials from the current environment"
                     self["stonith-params"] = "login=%s,passwd=%s,ipaddr=%s,ipport=%s,ssl=1,shell_timeout=10" % (
                         os.environ['RHEVM_USERNAME'],
                         os.environ['RHEVM_PASSWORD'],
                         os.environ['RHEVM_SERVER'],
                         os.environ['RHEVM_PORT'],
                    )
                     self["IPBase"] = " fe80::1234:56:7890:3000"
 
                 if os.path.isfile(dsh_file):
                     self["nodes"] = []
                     f = open(dsh_file, 'r')
                     for line in f:
                         l = line.strip().rstrip()
                         if not l.startswith('#'):
                             self["nodes"].append(l)
                     f.close()
 
                 else:
                     print("Unknown DSH group: %s" % args[i+1])
 
             elif args[i] == "--syslog-facility" or args[i] == "--facility":
                 skipthis=1
                 self["SyslogFacility"] = args[i+1]
                 
             elif args[i] == "--seed":
                 skipthis=1
                 self.SeedRandom(args[i+1])
 
             elif args[i] == "--warn-inactive":
                 self["warn-inactive"] = 1
 
             elif args[i] == "--schema":
                 skipthis=1
                 self["Schema"] = args[i+1]
 
             elif args[i] == "--ais":
                 self["Stack"] = "openais"
 
             elif args[i] == "--at-boot" or args[i] == "--cluster-starts-at-boot":
                 skipthis=1
                 if args[i+1] == "1" or args[i+1] == "yes":
                     self["at-boot"] = 1
                 elif args[i+1] == "0" or args[i+1] == "no":
                     self["at-boot"] = 0
                 else:
                     self.usage(args[i+1])
 
             elif args[i] == "--heartbeat" or args[i] == "--lha":
                 self["Stack"] = "heartbeat"
 
             elif args[i] == "--hae":
                 self["Stack"] = "openais"
                 self["Schema"] = "hae"
 
             elif args[i] == "--stack":
                 if args[i+1] == "fedora" or args[i+1] == "fedora-17" or args[i+1] == "fedora-18":
                     self["Stack"] = "corosync"
                 elif args[i+1] == "rhel-6":
                     self["Stack"] = "cman"
                 elif args[i+1] == "rhel-7":
                     self["Stack"] = "corosync"
                 else:
                     self["Stack"] = args[i+1]
                 skipthis=1
 
             elif args[i] == "--once":
                 self["scenario"] = "all-once"
 
             elif args[i] == "--boot":
                 self["scenario"] = "boot"
 
             elif args[i] == "--valgrind-tests":
                 self["valgrind-tests"] = 1
 
             elif args[i] == "--no-loop-tests":
                 self["loop-tests"] = 0
 
             elif args[i] == "--loop-minutes":
                 skipthis=1
                 try:
                     self["loop-minutes"]=int(args[i+1])
                 except ValueError:
                     self.usage(args[i])
 
             elif args[i] == "--no-unsafe-tests":
                 self["unsafe-tests"] = 0
 
             elif args[i] == "--experimental-tests":
                 self["experimental-tests"] = 1
 
             elif args[i] == "--container-tests":
                 self["container-tests"] = 1
 
             elif args[i] == "--set":
                 skipthis=1
                 (name, value) = args[i+1].split('=')
                 self[name] = value
                 print "Setting %s = %s" % (name, value)
                 
             elif args[i] == "--":
                 break
 
             else:
                 try:
                     NumIter=int(args[i])
                     self["iterations"] = NumIter
                 except ValueError:
                     self.usage(args[i])
 
     def usage(arg, status=1):
         print "Illegal argument %s" % (arg)
         print "usage: " + sys.argv[0] +" [options] number-of-iterations"
         print "\nCommon options: "
         print "\t [--nodes 'node list']        list of cluster nodes separated by whitespace"
         print "\t [--group | -g 'name']        use the nodes listed in the named DSH group (~/.dsh/groups/$name)"
         print "\t [--limit-nodes max]          only use the first 'max' cluster nodes supplied with --nodes"
         print "\t [--stack (v0|v1|cman|corosync|heartbeat|openais)]    which cluster stack is installed"
         print "\t [--list-tests]               list the valid tests"
         print "\t [--benchmark]                add the timing information"
         print "\t "
         print "Options that CTS will usually auto-detect correctly: "
         print "\t [--logfile path]             where should the test software look for logs from cluster nodes"
         print "\t [--syslog-facility name]     which syslog facility should the test software log to"
         print "\t [--at-boot (1|0)]            does the cluster software start at boot time"
         print "\t [--test-ip-base ip]          offset for generated IP address resources"
         print "\t "
         print "Options for release testing: "
         print "\t [--populate-resources | -r]  generate a sample configuration"
         print "\t [--choose name]              run only the named test"
         print "\t [--stonith (1 | 0 | yes | no | rhcs | ssh)]"
         print "\t [--once]                     run all valid tests once"
         print "\t "
         print "Additional (less common) options: "
         print "\t [--clobber-cib | -c ]        erase any existing configuration"
         print "\t [--outputfile path]          optional location for the test software to write logs to"
         print "\t [--trunc]                    truncate logfile before starting"
         print "\t [--xmit-loss lost-rate(0.0-1.0)]"
         print "\t [--recv-loss lost-rate(0.0-1.0)]"
         print "\t [--standby (1 | 0 | yes | no)]"
         print "\t [--fencing (1 | 0 | yes | no | rhcs | lha | openstack )]"
         print "\t [--stonith-type type]"
         print "\t [--stonith-args name=value]"
         print "\t [--bsc]"
         print "\t [--no-loop-tests]            dont run looping/time-based tests"
         print "\t [--no-unsafe-tests]          dont run tests that are unsafe for use with ocfs2/drbd"
         print "\t [--valgrind-tests]           include tests using valgrind"
         print "\t [--experimental-tests]       include experimental tests"
         print "\t [--container-tests]          include pacemaker_remote tests that run in lxc container resources"
         print "\t [--oprofile 'node list']     list of cluster nodes to run oprofile on]"
         print "\t [--qarsh]                    use the QARSH backdoor to access nodes instead of SSH"
+        print "\t [--docker]                   Indicates nodes are docker nodes."
         print "\t [--seed random_seed]"
         print "\t [--set option=value]"
         print "\t "
         print "\t Example: "
         print "\t    python sys.argv[0] -g virt1 --stack cs -r --stonith ssh --schema pacemaker-1.0 500"
 
         sys.exit(status)
 
 class EnvFactory:
     instance = None
     def __init__(self):
         pass
 
     def getInstance(self, args=None):
         if not EnvFactory.instance:
             EnvFactory.instance = Environment(args)
         return EnvFactory.instance
diff --git a/cts/patterns.py b/cts/patterns.py
index 50887257bd..8d34e1c051 100644
--- a/cts/patterns.py
+++ b/cts/patterns.py
@@ -1,510 +1,526 @@
 from UserDict import UserDict
 import sys, time, types, syslog, os, struct, string, signal, traceback, warnings, socket
 
 from cts.CTSvars import *
 
 patternvariants = {}
 class BasePatterns:
     def __init__(self, name):
         self.name = name
         patternvariants[name] = self
         self.ignore = []
         self.BadNews = []
         self.components = {}
         self.commands = {
             "StatusCmd"      : "crmadmin -t 60000 -S %s 2>/dev/null",
             "CibQuery"       : "cibadmin -Ql",
             "CibAddXml"      : "cibadmin --modify -c --xml-text %s",
             "CibDelXpath"    : "cibadmin --delete --xpath %s",
             # 300,000 == 5 minutes
             "RscRunning"     : CTSvars.CRM_DAEMON_DIR + "/lrmd_test -R -r %s",
             "CIBfile"        : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml",
             "TmpDir"         : "/tmp",
 
             "BreakCommCmd"   : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1",
             "FixCommCmd"     : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1",
 
 # tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit
 # tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated
 # tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1
 # tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null
             "ReduceCommCmd"  : "",
             "RestoreCommCmd" : "tc qdisc del dev lo root",
 
             "UUIDQueryCmd"    : "crmadmin -N",
 
             "MaintenanceModeOn"    : "cibadmin --modify -c --xml-text '<cluster_property_set id=\"cib-bootstrap-options\"><nvpair id=\"cts-maintenance-mode-setting\" name=\"maintenance-mode\" value=\"true\"/></cluster_property_set>'",
             "MaintenanceModeOff"    : "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"",
 
             "StandbyCmd"      : "crm_attribute -VQ  -U %s -n standby -l forever -v %s 2>/dev/null",
             "StandbyQueryCmd" : "crm_attribute -QG -U %s -n standby -l forever -d off 2>/dev/null",
         }
         self.search = {
             "Pat:DC_IDLE"      : "crmd.*State transition.*-> S_IDLE",
             
             # This wont work if we have multiple partitions
             "Pat:Local_started" : "%s\W.*The local CRM is operational",
             "Pat:Slave_started" : "%s\W.*State transition.*-> S_NOT_DC",
             "Pat:Master_started": "%s\W.*State transition.*-> S_IDLE",
             "Pat:We_stopped"    : "heartbeat.*%s.*Heartbeat shutdown complete",
             "Pat:Logd_stopped"  : "%s\W.*logd:.*Exiting write process",
             "Pat:They_stopped"  : "%s\W.*LOST:.* %s ",
             "Pat:They_dead"     : "node %s.*: is dead",
             "Pat:TransitionComplete" : "Transition status: Complete: complete",
 
             "Pat:Fencing_start" : "Initiating remote operation .* for %s",
             "Pat:Fencing_ok"    : "stonith.*remote_op_done:.*Operation .* of %s by .*: OK",
 
             "Pat:RscOpOK"       : "process_lrm_event:.*Operation %s_%s.*ok.*confirmed",
         }
 
     def get_component(self, key):
         if self.components.has_key(key):
             return self.components[key]
         print "Unknown component '%s' for %s" % (key, self.name)
         return []
 
     def get_patterns(self, key):
         if key == "BadNews":
             return self.BadNews
         elif key == "BadNewsIgnore":
             return self.ignore
         elif key == "Commands":
             return self.commands
         elif key == "Search":
             return self.search
         elif key == "Components":
             return self.components
 
     def __getitem__(self, key):
         if key == "Name":
             return self.name
         elif self.commands.has_key(key):
             return self.commands[key]
         elif self.search.has_key(key):
             return self.search[key]
         else:
             print "Unknown template '%s' for %s" % (key, self.name)
             return None
 
 class crm_lha(BasePatterns):
     def __init__(self, name):
         BasePatterns.__init__(self, name)
 
         self.commands.update({
             "StartCmd"       : "service heartbeat start > /dev/null 2>&1",
             "StopCmd"        : "service heartbeat stop  > /dev/null 2>&1",
             "EpocheCmd"      : "crm_node -H -e",
             "QuorumCmd"      : "crm_node -H -q",
             "ParitionCmd"    : "crm_node -H -p",
         })
 
         self.search.update({
             # Patterns to look for in the log files for various occasions...
             "Pat:ChildKilled"  : "%s\W.*heartbeat.*%s.*killed by signal 9",
             "Pat:ChildRespawn" : "%s\W.*heartbeat.*Respawning client.*%s",
             "Pat:ChildExit"    : "(ERROR|error): Client .* exited with return code",            
         })
         self.BadNews = [
                 r"error:",
                 r"crit:",
                 r"ERROR:",
                 r"CRIT:",
                 r"Shutting down...NOW",
                 r"Timer I_TERMINATE just popped",
                 r"input=I_ERROR",
                 r"input=I_FAIL",
                 r"input=I_INTEGRATED cause=C_TIMER_POPPED",
                 r"input=I_FINALIZED cause=C_TIMER_POPPED",
                 r"input=I_ERROR",
                 r", exiting\.",
                 r"WARN.*Ignoring HA message.*vote.*not in our membership list",
                 r"pengine.*Attempting recovery of resource",
                 r"is taking more than 2x its timeout",
                 r"Confirm not received from",
                 r"Welcome reply not received from",
                 r"Attempting to schedule .* after a stop",
                 r"Resource .* was active at shutdown",
                 r"duplicate entries for call_id",
                 r"Search terminated:",
                 r"No need to invoke the TE",
                 r"global_timer_callback:",
                 r"Faking parameter digest creation",
                 r"Parameters to .* action changed:",
                 r"Parameters to .* changed",
             ]
 
         self.ignore = [
                 "(ERROR|error): crm_abort:.*crm_glib_handler: ",
                 "(ERROR|error): Message hist queue is filling up",
                 "stonithd.*CRIT: external_hostlist:.*'vmware gethosts' returned an empty hostlist",
                 "stonithd.*(ERROR|error): Could not list nodes for stonith RA external/vmware.",
                 "pengine.*Preventing .* from re-starting",
                 ]
 
 class crm_cs_v0(BasePatterns):
     def __init__(self, name):
         BasePatterns.__init__(self, name)
 
         self.commands.update({
             "EpocheCmd"      : "crm_node -e --openais",
             "QuorumCmd"      : "crm_node -q --openais",
             "ParitionCmd"    : "crm_node -p --openais",
             "StartCmd"       : "service corosync start",
             "StopCmd"        : "service corosync stop",
         })
 
         self.search.update({
 # The next pattern is too early
 #            "Pat:We_stopped"   : "%s.*Service engine unloaded: Pacemaker Cluster Manager",
 # The next pattern would be preferred, but it doesn't always come out
 #            "Pat:We_stopped"   : "%s.*Corosync Cluster Engine exiting with status",
             "Pat:We_stopped"   : "%s\W.*Service engine unloaded: corosync cluster quorum service",
             "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost",
             "Pat:They_dead"    : "corosync:.*Node %s is now: lost",
 
             "Pat:ChildExit"    : "Child process .* exited",
             "Pat:ChildKilled"  : "%s\W.*corosync.*Child process %s terminated with signal 9",
             "Pat:ChildRespawn" : "%s\W.*corosync.*Respawning failed child process: %s",
         })
 
         self.ignore = [
             r"crm_mon:",
             r"crmadmin:",
             r"update_trace_data",
             r"async_notify:.*strange, client not found",
             r"Parse error: Ignoring unknown option .*nodename",
             r"error: log_operation:.*Operation 'reboot' .* with device 'FencingFail' returned:",
             r"Child process .* terminated with signal 9",
             r"getinfo response error: 1$",
         ]
 
         self.BadNews = [
             r"error:",
             r"crit:",
             r"ERROR:",
             r"CRIT:",
             r"Shutting down...NOW",
             r"Timer I_TERMINATE just popped",
             r"input=I_ERROR",
             r"input=I_FAIL",
             r"input=I_INTEGRATED cause=C_TIMER_POPPED",
             r"input=I_FINALIZED cause=C_TIMER_POPPED",
             r"input=I_ERROR",
             r", exiting\.",
             r"(WARN|warn).*Ignoring HA message.*vote.*not in our membership list",
             r"pengine.*Attempting recovery of resource",
             r"is taking more than 2x its timeout",
             r"Confirm not received from",
             r"Welcome reply not received from",
             r"Attempting to schedule .* after a stop",
             r"Resource .* was active at shutdown",
             r"duplicate entries for call_id",
             r"Search terminated:",
             r":global_timer_callback",
             r"Faking parameter digest creation",
             r"Parameters to .* action changed:",
             r"Parameters to .* changed",
             r"The .* process .* terminated with signal",
             r"Child process .* terminated with signal",
             r"LogActions:.*Recover",
             r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting",
             r"Peer is not part of our cluster",
             r"We appear to be in an election loop",
             r"Unknown node -> we will not deliver message",
             r"crm_write_blackbox",
             r"pacemakerd.*Could not connect to Cluster Configuration Database API",
             r"Receiving messages from a node we think is dead",
             r"share the same cluster nodeid",
             r"share the same name",
 
             #r"crm_ipc_send:.*Request .* failed",
             #r"crm_ipc_send:.*Sending to .* is disabled until pending reply is received",
 
                 # Not inherently bad, but worth tracking
             #r"No need to invoke the TE",
             #r"ping.*: DEBUG: Updated connected = 0",
             #r"Digest mis-match:",
             r"te_graph_trigger:.*Transition failed: terminated",
             r"process_ping_reply",
             r"warn.*:retrieveCib",
             #r"Executing .* fencing operation",
             #r"fence_pcmk.* Call to fence",
             #r"fence_pcmk",
             r"cman killed by node",
             r"Election storm",
             r"stalled the FSA with pending inputs",
         ]
 
 
         self.components["common-ignore"] = [
                     "Pending action:",
                     "error: crm_log_message_adv:",
                     "resources were active at shutdown",
                     "pending LRM operations at shutdown",
                     "Lost connection to the CIB service",
                     "Connection to the CIB terminated...",
                     "Sending message to CIB service FAILED",
                     "apply_xml_diff:.*Diff application failed!",
                     "crmd.*Action A_RECOVER .* not supported",
                     "unconfirmed_actions:.*Waiting on .* unconfirmed actions",
                     "cib_native_msgready:.*Message pending on command channel",
                     "crmd.*do_exit:.*Performing A_EXIT_1 - forcefully exiting the CRMd",
                     "verify_stopped:.*Resource .* was active at shutdown.  You may ignore this error if it is unmanaged.",
                     "error: attrd_connection_destroy:.*Lost connection to attrd",
                     "info: te_fence_node:.*Executing .* fencing operation",
                     "crm_write_blackbox:",
 #                    "error: native_create_actions: Resource .*stonith::.* is active on 2 nodes attempting recovery",
 #                    "error: process_pe_message: Transition .* ERRORs found during PE processing",
             ]
         
         self.components["corosync-ignore"] = [
             r"error: pcmk_cpg_dispatch:.*Connection to the CPG API failed: Library error",
             r"The .* process .* exited",
             r"pacemakerd.*error: pcmk_child_exit:.*Child process .* exited",
             r"cib.*error: cib_cs_destroy:.*Corosync connection lost",
             r"stonith-ng.*error: stonith_peer_cs_destroy:.*Corosync connection terminated",
             r"The cib process .* exited: Invalid argument",
             r"The attrd process .* exited: Transport endpoint is not connected",
             r"The crmd process .* exited: Link has been severed",
             r"error: pcmk_child_exit:.*Child process cib .* exited: Invalid argument",
             r"error: pcmk_child_exit:.*Child process attrd .* exited: Transport endpoint is not connected",
             r"error: pcmk_child_exit:.*Child process crmd .* exited: Link has been severed",
             r"lrmd.*error: crm_ipc_read:.*Connection to stonith-ng failed",
             r"lrmd.*error: mainloop_gio_callback:.*Connection to stonith-ng.* closed",
             r"lrmd.*error: stonith_connection_destroy_cb:.*LRMD lost STONITH connection",
             r"crmd.*do_state_transition:.*State transition .* S_RECOVERY",
             r"crmd.*error: do_log:.*FSA: Input I_ERROR",
             r"crmd.*error: do_log:.*FSA: Input I_TERMINATE",
             r"crmd.*error: pcmk_cman_dispatch:.*Connection to cman failed",
             r"crmd.*error: crmd_fast_exit:.*Could not recover from internal error",
             r"error: crm_ipc_read:.*Connection to cib_shm failed",
             r"error: mainloop_gio_callback:.*Connection to cib_shm.* closed",
             r"error: stonith_connection_failed:.*STONITH connection failed",
             ]
 
         self.components["corosync"] = [
             r"pacemakerd.*error: cfg_connection_destroy:.*Connection destroyed",
             r"pacemakerd.*error: mcp_cpg_destroy:.*Connection destroyed",
             r"crit: attrd_(cs|cpg)_destroy:.*Lost connection to Corosync service",
             r"stonith_peer_cs_destroy:.*Corosync connection terminated",
             r"cib_cs_destroy:.*Corosync connection lost!  Exiting.",
             r"crmd_(cs|quorum)_destroy:.*connection terminated",
             r"pengine.*Scheduling Node .* for STONITH",
             r"tengine_stonith_notify:.*Peer .* was terminated .*: OK",
         ]
 
         self.components["cib-ignore"] = [
             "lrmd.*Connection to stonith-ng failed",
             "lrmd.*Connection to stonith-ng.* closed",
             "lrmd.*LRMD lost STONITH connection",
             "lrmd.*STONITH connection failed, finalizing .* pending operations",
             ]
 
         self.components["cib"] = [
                     "State transition .* S_RECOVERY",
                     "Respawning .* crmd",
                     "Respawning .* attrd",
                     "Connection to cib_.* failed",
                     "Connection to cib_.* closed",
                     "Connection to the CIB terminated...",
                     "(Child process|The) crmd .* exited: Generic Pacemaker error",
                     "(Child process|The) attrd .* exited: (Connection reset by peer|Transport endpoint is not connected)",
                     "Lost connection to CIB service",
                     "crmd.*Input I_TERMINATE from do_recover",
                     "crmd.*I_ERROR.*crmd_cib_connection_destroy",
                     "crmd.*Could not recover from internal error",
                     ]
 
         self.components["lrmd"] = [
                     "State transition .* S_RECOVERY",
                     "LRM Connection failed",
                     "Respawning .* crmd",
                     "Connection to lrmd failed",
                     "Connection to lrmd.* closed",
                     "crmd.*I_ERROR.*lrm_connection_destroy",
                     "(Child process|The) crmd .* exited: Generic Pacemaker error",
                     "crmd.*Input I_TERMINATE from do_recover",
                     "crmd.*Could not recover from internal error",
                     ]
         self.components["lrmd-ignore"] = []
 
         self.components["crmd"] = [
 #                    "WARN: determine_online_status: Node .* is unclean",
 #                    "Scheduling Node .* for STONITH",
 #                    "Executing .* fencing operation",
 # Only if the node wasn't the DC:  "State transition S_IDLE",
                     "State transition .* -> S_IDLE",
                     ]
         self.components["crmd-ignore"] = []
 
         self.components["attrd"] = []
         self.components["attrd-ignore"] = []
 
         self.components["pengine"] = [
                     "State transition .* S_RECOVERY",
                     "Respawning .* crmd",
                     "(The|Child process) crmd .* exited: Generic Pacemaker error",
                     "Connection to pengine failed",
                     "Connection to pengine.* closed",
                     "Connection to the Policy Engine failed",
                     "crmd.*I_ERROR.*save_cib_contents",
                     "crmd.*Input I_TERMINATE from do_recover",
                     "crmd.*Could not recover from internal error",
                     ]
         self.components["pengine-ignore"] = []
 
         self.components["stonith"] = [
             "Connection to stonith-ng failed",
             "LRMD lost STONITH connection",
             "Connection to stonith-ng.* closed",
             "Fencing daemon connection failed",
             "crmd.*stonith_api_add_notification:.*Callback already present",
         ]
         self.components["stonith-ignore"] = [
             "LogActions: Recover Fencing",
             "Updating failcount for Fencing",
             "error: crm_ipc_read: Connection to stonith-ng failed",
             "error: mainloop_gio_callback: Connection to stonith-ng.*closed (I/O condition=17)",
             "crit: tengine_stonith_connection_destroy: Fencing daemon connection failed",
             "error: te_connect_stonith:.*Sign-in failed: triggered a retry",
             "STONITH connection failed, finalizing .* pending operations.",
             "process_lrm_event:.*Operation Fencing.* Error",
         ]
         self.components["stonith-ignore"].extend(self.components["common-ignore"])
 
 class crm_mcp(crm_cs_v0):
     '''
     The crm version 4 cluster manager class.
     It implements the things we need to talk to and manipulate
     crm clusters running on top of native corosync (no plugins)
     '''
     def __init__(self, name):
         crm_cs_v0.__init__(self, name)
 
         self.commands.update({
             "StartCmd"       : "service corosync start && service pacemaker start",
             "StopCmd"        : "service pacemaker stop; service pacemaker_remote stop; service corosync stop",
 
             "EpocheCmd"      : "crm_node -e",
             "QuorumCmd"      : "crm_node -q",
             "ParitionCmd"    : "crm_node -p",
         })
 
         self.search.update({
             # Close enough... "Corosync Cluster Engine exiting normally" isn't printed
             #   reliably and there's little interest in doing anything it
             "Pat:We_stopped"   : "%s\W.*Unloading all Corosync service engines",
             "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost",
             "Pat:They_dead"    : "crmd.*Node %s\[.*state is now lost",
 
             "Pat:ChildExit"    : "The .* process exited",
             "Pat:ChildKilled"  : "%s\W.*pacemakerd.*The %s process .* terminated with signal 9",
             "Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning failed child process: %s",
 
             "Pat:InfraUp"      : "%s\W.*corosync.*Initializing transport",
             "Pat:PacemakerUp"  : "%s\W.*pacemakerd.*Starting Pacemaker",
         })
 
 #        if self.Env["have_systemd"]:
 #            self.update({
 #                # When systemd is in use, we can look for this instead
 #                "Pat:We_stopped"   : "%s.*Stopped Corosync Cluster Engine",
 #            })
 
+class crm_mcp_docker(crm_mcp):
+    '''
+    The crm version 4 cluster manager class.
+    It implements the things we need to talk to and manipulate
+    crm clusters running on top of native corosync (no plugins)
+    '''
+    def __init__(self, name):
+        crm_mcp.__init__(self, name)
+
+        self.commands.update({
+            "StartCmd"       : "pcmk_start",
+            "StopCmd"        : "pcmk_stop",
+        })
+
 class crm_cman(crm_cs_v0):
     '''
     The crm version 3 cluster manager class.
     It implements the things we need to talk to and manipulate
     crm clusters running on top of openais
     '''
     def __init__(self, name):
         crm_cs_v0.__init__(self, name)
 
         self.commands.update({
             "StartCmd"       : "service pacemaker start",
             "StopCmd"        : "service pacemaker stop; service pacemaker_remote stop",
 
             "EpocheCmd"      : "crm_node -e --cman",
             "QuorumCmd"      : "crm_node -q --cman",
             "ParitionCmd"    : "crm_node -p --cman",
 
             "Pat:We_stopped"   : "%s.*Unloading all Corosync service engines",
             "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost",
             "Pat:They_dead"    : "crmd.*Node %s\[.*state is now lost",
 
             "Pat:ChildKilled"  : "%s\W.*pacemakerd.*The %s process .* terminated with signal 9",
             "Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning failed child process: %s",
         })
 
 
 class PatternSelector:
 
     def __init__(self, name=None):
         self.name = name
         self.base = BasePatterns("crm-base")
 
         if not name:
             crm_cs_v0("crm-plugin-v0")
             crm_cman("crm-cman")
             crm_mcp("crm-mcp")
             crm_lha("crm-lha")
         elif name == "crm-lha":
             crm_lha(name)
         elif name == "crm-plugin-v0":
             crm_cs_v0(name)
         elif name == "crm-cman":
             crm_cman(name)
         elif name == "crm-mcp":
             crm_mcp(name)
+        elif name == "crm-mcp-docker":
+            crm_mcp_docker(name)
 
     def get_variant(self, variant):
         if patternvariants.has_key(variant):
             return patternvariants[variant]
         print "defaulting to crm-base for %s" % variant
         return self.base
 
     def get_patterns(self, variant, kind):
         return self.get_variant(variant).get_patterns(kind)
 
     def get_template(self, variant, key):
         v = self.get_variant(variant)
         return v[key]
 
     def get_component(self, variant, kind):
         return self.get_variant(variant).get_component(kind)
 
     def __getitem__(self, key):
         return self.get_template(self.name, key)
 
 # python cts/CTSpatt.py -k crm-mcp -t StartCmd
 if __name__ == '__main__':
 
     pdir=os.path.dirname(sys.path[0])
     sys.path.insert(0, pdir) # So that things work from the source directory
 
     from cts.CTSvars   import *
 
     kind=None
     template=None
 
     skipthis=None
     args=sys.argv[1:]
     for i in range(0, len(args)):
        if skipthis:
            skipthis=None
            continue
 
        elif args[i] == "-k" or args[i] == "--kind":
            skipthis=1
            kind = args[i+1]
 
        elif args[i] == "-t" or args[i] == "--template":
            skipthis=1
            template = args[i+1]
 
        else:
            print "Illegal argument " + args[i]
 
 
     print PatternSelector(kind)[template]
diff --git a/cts/remote.py b/cts/remote.py
index c8253c3504..7920fc9756 100644
--- a/cts/remote.py
+++ b/cts/remote.py
@@ -1,270 +1,276 @@
 '''
 Classes related to running command remotely
 '''
 
 __copyright__='''
 Copyright (C) 2014 Andrew Beekhof <andrew@beekhof.net>
 Licensed under the GNU GPL.
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
 
 import types, string, select, sys, time, re, os, struct, signal
 import time, syslog, random, traceback, base64, pickle, binascii, fcntl
 
 from cts.logging import LogFactory 
 
 from socket import gethostbyname_ex
 from UserDict import UserDict
 from subprocess import Popen,PIPE
 
 pdir=os.path.dirname(sys.path[0])
 sys.path.insert(0, pdir) # So that things work from the source directory
 
 from cts.CTSvars import *
 from cts.logging import *
 from threading import Thread
 
 trace_rsh=None
 trace_lw=None
 
 class AsyncWaitProc(Thread):
     def __init__(self, proc, node, command, completionDelegate=None):
         self.proc = proc
         self.node = node
         self.command = command
         self.logger = LogFactory()
         self.delegate = completionDelegate;
         Thread.__init__(self)
 
     def run(self):
         outLines = None
         errLines = None
         self.logger.debug("cmd: async: target=%s, pid=%d: %s" % (self.node, self.proc.pid, self.command))
 
         self.proc.wait()
         self.logger.debug("cmd: pid %d returned %d" % (self.proc.pid, self.proc.returncode))
 
         if self.proc.stderr:
             errLines = self.proc.stderr.readlines()
             self.proc.stderr.close()
             for line in errLines:
                 self.logger.debug("cmd: stderr[%d]: %s" % (self.proc.pid, line))
 
         if self.proc.stdout:
             outLines = self.proc.stdout.readlines()
             self.proc.stdout.close()
 #            for line in outLines:
 #                self.logger.debug("cmd: stdout[%d]: %s" % (self.proc.pid, line))
 
         if self.delegate:
             self.delegate.async_complete(self.proc.pid, self.proc.returncode, outLines, errLines)
 
 class AsyncRemoteCmd(Thread):
     def __init__(self, node, command, completionDelegate=None):
         self.proc = None
         self.node = node
         self.command = command
         self.logger = LogFactory()
         self.delegate = completionDelegate;
         Thread.__init__(self)
 
     def run(self):
         outLines = None
         errLines = None
 
         self.proc = Popen(self.command, stdout = PIPE, stderr = PIPE, close_fds = True, shell = True)
 
         self.logger.debug("cmd: async: target=%s, pid=%d: %s" % (self.node, self.proc.pid, self.command))
         self.proc.wait()
         self.logger.debug("cmd: pid %d returned %d to %s" % (self.proc.pid, self.proc.returncode, repr(self.delegate)))
 
         if self.proc.stderr:
             errLines = self.proc.stderr.readlines()
             self.proc.stderr.close()
             for line in errLines:
                 self.logger.debug("cmd: stderr[%d]: %s" % (self.proc.pid, line))
 
         if self.proc.stdout:
             outLines = self.proc.stdout.readlines()
             self.proc.stdout.close()
 #            for line in outLines:
 #                self.logger.log("cmd: stdout[%d]: %s" % (self.proc.pid, line))
 
         if self.delegate:
             self.delegate.async_complete(self.proc.pid, self.proc.returncode, outLines, errLines)
 
 class RemotePrimitives:
     def __init__(self, Command=None, CpCommand=None):
         if CpCommand:
             self.CpCommand = CpCommand
         else:
             #        -B: batch mode, -q: no stats (quiet)
             self.CpCommand = "scp -B -q"
 
         if Command:
             self.Command = Command
         else:
             #   -n: no stdin, -x: no X11,
             #   -o ServerAliveInterval=5 disconnect after 3*5s if the server stops responding
             self.Command = "ssh -l root -n -x -o ServerAliveInterval=5 -o ConnectTimeout=10 -o TCPKeepAlive=yes -o ServerAliveCountMax=3 "
 
 class RemoteExec:
     '''This is an abstract remote execution class.  It runs a command on another
        machine - somehow.  The somehow is up to us.  This particular
        class uses ssh.
        Most of the work is done by fork/exec of ssh or scp.
     '''
 
     def __init__(self, rsh, silent=False):
         print repr(self)
         self.async = []
         self.rsh = rsh
         self.silent = silent
         self.logger = LogFactory()
 
         if trace_rsh:
             self.silent = False
 
         self.OurNode=string.lower(os.uname()[1])
 
     def _fixcmd(self, cmd):
         return re.sub("\'", "'\\''", cmd)
 
     def _cmd(self, *args):
 
         '''Compute the string that will run the given command on the
         given remote system'''
 
         args= args[0]
         sysname = args[0]
         command = args[1]
 
         #print "sysname: %s, us: %s" % (sysname, self.OurNode)
         if sysname == None or string.lower(sysname) == self.OurNode or sysname == "localhost":
             ret = command
         else:
             ret = self.rsh.Command + " " + sysname + " '" + self._fixcmd(command) + "'"
         #print ("About to run %s\n" % ret)
         return ret
 
     def log(self, args):
         if not self.silent:
             self.logger.log(args)
 
     def debug(self, args):
         if not self.silent:
             self.logger.debug(args)
 
     def call_async(self, node, command, completionDelegate=None):
         #if completionDelegate: print "Waiting for %d on %s: %s" % (proc.pid, node, command)
         aproc = AsyncRemoteCmd(node, self._cmd([node, command]), completionDelegate=completionDelegate)
         aproc.start()
         return aproc
 
 
     def __call__(self, node, command, stdout=0, synchronous=1, silent=False, blocking=True, completionDelegate=None):
         '''Run the given command on the given remote system
         If you call this class like a function, this is the function that gets
         called.  It just runs it roughly as though it were a system() call
         on the remote machine.  The first argument is name of the machine to
         run it on.
         '''
 
         if trace_rsh:
             silent = False
 
         rc = 0
         result = None
         proc = Popen(self._cmd([node, command]),
                      stdout = PIPE, stderr = PIPE, close_fds = True, shell = True)
 
         #if completionDelegate: print "Waiting for %d on %s: %s" % (proc.pid, node, command)
         if not synchronous and proc.pid > 0 and not self.silent:
             aproc = AsyncWaitProc(proc, node, command, completionDelegate=completionDelegate)
             aproc.start()
             return 0
 
         #if not blocking:
         #    fcntl.fcntl(proc.stdout.fileno(), fcntl.F_SETFL, os.O_NONBLOCK)
 
         if proc.stdout:
             if stdout == 1:
                 result = proc.stdout.readline()
             else:
                 result = proc.stdout.readlines()
             proc.stdout.close()
         else:
             self.log("No stdout stream")
 
         rc = proc.wait()
 
         if not silent: self.debug("cmd: target=%s, rc=%d: %s" % (node, rc, command))
         if stdout == 1:
             return result
 
         if proc.stderr:
             errors = proc.stderr.readlines()
             proc.stderr.close()
 
         if completionDelegate:
             completionDelegate.async_complete(proc.pid, proc.returncode, result, errors)
 
         if not silent:
             for err in errors:
                 if stdout == 3:
                     result.append("error: "+err)
                 else:
                     self.debug("cmd: stderr: %s" % err)
 
         if stdout == 0:
             if not silent and result:
                 for line in result:
                     self.debug("cmd: stdout: %s" % line)
             return rc
 
         return (rc, result)
 
     def cp(self, source, target, silent=False):
         '''Perform a remote copy'''
         cpstring = self.rsh.CpCommand  + " \'" + source + "\'"  + " \'" + target + "\'"
         rc = os.system(cpstring)
         if trace_rsh:
             silent = False
         if not silent: self.debug("cmd: rc=%d: %s" % (rc, cpstring))
 
         return rc
 
 class RemoteFactory:
     # Class variables
     rsh = RemotePrimitives()
     instance = None
 
     def getInstance(self):
         if not RemoteFactory.instance:
             RemoteFactory.instance = RemoteExec(RemoteFactory.rsh, False)
         return RemoteFactory.instance
 
     def new(self, silent=False):
         return RemoteExec(RemoteFactory.rsh, silent)
 
+    def enable_docker(self):
+        print "Using DOCKER backend for connections to cluster nodes"
+
+        RemoteFactory.rsh.Command = "/usr/libexec/phd/docker/phd_docker_remote_cmd "
+        RemoteFactory.rsh.CpCommand = "/usr/libexec/phd/docker/phd_docker_cp"
+
     def enable_qarsh(self):
         # http://nstraz.wordpress.com/2008/12/03/introducing-qarsh/
         print "Using QARSH for connections to cluster nodes"
 
         RemoteFactory.rsh.Command = "qarsh -t 300 -l root"
         RemoteFactory.rsh.CpCommand = "qacp -q"
 
diff --git a/cts/watcher.py b/cts/watcher.py
index a21633898e..5e6ee43d72 100644
--- a/cts/watcher.py
+++ b/cts/watcher.py
@@ -1,542 +1,546 @@
 '''
 Classes related to searching logs
 '''
 
 __copyright__='''
 Copyright (C) 2014 Andrew Beekhof <andrew@beekhof.net>
 Licensed under the GNU GPL.
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
 
 import types, string, select, sys, time, re, os, struct, signal
 import time, syslog, random, traceback, base64, pickle, binascii, fcntl
 import threading
 
 
 from cts.remote import *
 from cts.logging import *
 
 has_log_watcher = {}
 log_watcher_bin = CTSvars.CRM_DAEMON_DIR + "/cts_log_watcher.py"
 log_watcher = """
 import sys, os, fcntl
 
 '''
 Remote logfile reader for CTS
 Reads a specified number of lines from the supplied offset
 Returns the current offset
 
 Contains logic for handling truncation
 '''
 
 limit    = 0
 offset   = 0
 prefix   = ''
 filename = '/var/log/messages'
 
 skipthis=None
 args=sys.argv[1:]
 for i in range(0, len(args)):
     if skipthis:
         skipthis=None
         continue
 
     elif args[i] == '-l' or args[i] == '--limit':
         skipthis=1
         limit = int(args[i+1])
 
     elif args[i] == '-f' or args[i] == '--filename':
         skipthis=1
         filename = args[i+1]
 
     elif args[i] == '-o' or args[i] == '--offset':
         skipthis=1
         offset = args[i+1]
 
     elif args[i] == '-p' or args[i] == '--prefix':
         skipthis=1
         prefix = args[i+1]
 
     elif args[i] == '-t' or args[i] == '--tag':
         skipthis=1
 
 if not os.access(filename, os.R_OK):
     print prefix + 'Last read: %d, limit=%d, count=%d - unreadable' % (0, limit, 0)
     sys.exit(1)
 
 logfile=open(filename, 'r')
 logfile.seek(0, os.SEEK_END)
 newsize=logfile.tell()
 
 if offset != 'EOF':
     offset = int(offset)
     if newsize >= offset:
         logfile.seek(offset)
     else:
         print prefix + ('File truncated from %d to %d' % (offset, newsize))
         if (newsize*1.05) < offset:
             logfile.seek(0)
         # else: we probably just lost a few logs after a fencing op
         #       continue from the new end
         # TODO: accept a timestamp and discard all messages older than it
 
 # Don't block when we reach EOF
 fcntl.fcntl(logfile.fileno(), fcntl.F_SETFL, os.O_NONBLOCK)
 
 count = 0
 while True:
     if logfile.tell() >= newsize:   break
     elif limit and count >= limit: break
 
     line = logfile.readline()
     if not line: break
 
     print line.strip()
     count += 1
 
 print prefix + 'Last read: %d, limit=%d, count=%d' % (logfile.tell(), limit, count)
 logfile.close()
 """
 
 class SearchObj:
     def __init__(self, filename, host=None, name=None):
 
         self.limit = None
         self.cache = []
         self.logger = LogFactory()
         self.host = host
         self.name = name
         self.filename = filename
         self.rsh = RemoteFactory().getInstance()
 
         self.offset = "EOF"
 
         if host == None:
             host = "localhost"
 
     def __str__(self):
         if self.host:
             return "%s:%s" % (self.host, self.filename)
         return self.filename
 
     def log(self, args):
         message = "lw: %s: %s" % (self, args)
         self.logger.log(message)
 
     def debug(self, args):
         message = "lw: %s: %s" % (self, args)
         self.logger.debug(message)
 
     def harvest(self, delegate=None):
         async = self.harvest_async(delegate)
         async.join()
 
     def harvest_async(self, delegate=None):
         self.log("Not implemented")
         raise
 
     def end(self):
         self.debug("Unsetting the limit")
         # Unset the limit
         self.limit = None
 
 class FileObj(SearchObj):
     def __init__(self, filename, host=None, name=None):
         global has_log_watcher
         SearchObj.__init__(self, filename, host, name)
 
         if not has_log_watcher.has_key(host):
 
             global log_watcher
             global log_watcher_bin
 
             self.debug("Installing %s on %s" % (log_watcher_bin, host))
-            self.rsh(host, '''echo "%s" > %s''' % (log_watcher, log_watcher_bin), silent=True)
+
+            os.system("cat << END >> %s\n%s\nEND" %(log_watcher_bin, log_watcher))
+            os.system("chmod 755 %s" %(log_watcher_bin))
+
+            self.rsh.cp(log_watcher_bin, "root@%s:%s" % (host, log_watcher_bin))
             has_log_watcher[host] = 1
 
         self.harvest()
 
     def async_complete(self, pid, returncode, outLines, errLines):
         for line in outLines:
             match = re.search("^CTSwatcher:Last read: (\d+)", line)
             if match:
                 last_offset = self.offset
                 self.offset = match.group(1)
                 #if last_offset == "EOF": self.debug("Got %d lines, new offset: %s" % (len(outLines), self.offset))
                 self.debug("Got %d lines, new offset: %s  %s" % (len(outLines), self.offset, repr(self.delegate)))
 
             elif re.search("^CTSwatcher:.*truncated", line):
                 self.log(line)
             elif re.search("^CTSwatcher:", line):
                 self.debug("Got control line: "+ line)
             else:
                 self.cache.append(line)
 
         if self.delegate:
             self.delegate.async_complete(pid, returncode, self.cache, errLines)
 
     def harvest_async(self, delegate=None):
         self.delegate = delegate
         self.cache = []
 
         if self.limit != None and self.offset > self.limit:
             if self.delegate:
                 self.delegate.async_complete(-1, -1, [], [])
             return None
 
         global log_watcher_bin
         return self.rsh.call_async(self.host,
                                    "python %s -t %s -p CTSwatcher: -l 200 -f %s -o %s -t %s" % (log_watcher_bin, self.name, self.filename, self.offset, self.name),
                 completionDelegate=self)
 
     def setend(self):
         if self.limit: 
             return
 
         global log_watcher_bin
         (rc, lines) = self.rsh(self.host,
                                "python %s -t %s -p CTSwatcher: -l 2 -f %s -o %s -t %s" % (log_watcher_bin, self.name, self.filename, "EOF", self.name),
                  None, silent=True)
 
         for line in lines:
             match = re.search("^CTSwatcher:Last read: (\d+)", line)
             if match:
                 last_offset = self.offset
                 self.limit = int(match.group(1))
                 #if last_offset == "EOF": self.debug("Got %d lines, new offset: %s" % (len(lines), self.offset))
                 self.debug("Set limit to: %d" % self.limit)
 
         return
 
 class JournalObj(SearchObj):
 
     def __init__(self, host=None, name=None):
         SearchObj.__init__(self, name, host, name)
         self.harvest()
 
     def async_complete(self, pid, returncode, outLines, errLines):
         #self.log( "%d returned on %s" % (pid, self.host))
         foundCursor = False
         for line in outLines:
             match = re.search("^-- cursor: ([^.]+)", line)
             if match:
                 foundCursor = True
                 last_offset = self.offset
                 self.offset = match.group(1).strip()
                 self.debug("Got %d lines, new cursor: %s" % (len(outLines), self.offset))
             else:
                 self.cache.append(line)
 
         if self.limit and not foundCursor:
             self.hitLimit = True
             self.debug("Got %d lines but no cursor: %s" % (len(outLines), self.offset))
             
             # Get the current cursor
             (rc, outLines) = self.rsh(self.host, "journalctl -q -n 0 --show-cursor", stdout=None, silent=True, synchronous=True)
             for line in outLines:
                 match = re.search("^-- cursor: ([^.]+)", line)
                 if match:
                     last_offset = self.offset
                     self.offset = match.group(1).strip()
                     self.debug("Got %d lines, new cursor: %s" % (len(outLines), self.offset))
                 else:
                     self.log("Not a new cursor: %s" % line)
                     self.cache.append(line)
 
         if self.delegate:
             self.delegate.async_complete(pid, returncode, self.cache, errLines)
 
     def harvest_async(self, delegate=None):
         self.delegate = delegate
         self.cache = []
 
         # Use --lines to prevent journalctl from overflowing the Popen input buffer
         if self.limit and self.hitLimit:
             return None
 
         elif self.limit:
             command = "journalctl -q --after-cursor='%s' --until '%s' --lines=200 --show-cursor" % (self.offset, self.limit)
         else:
             command = "journalctl -q --after-cursor='%s' --lines=200 --show-cursor" % (self.offset)
 
         if self.offset == "EOF":
             command = "journalctl -q -n 0 --show-cursor"
 
         return self.rsh.call_async(self.host, command, completionDelegate=self)
 
     def setend(self):
         if self.limit: 
             return
 
         self.hitLimit = False
         (rc, lines) = self.rsh(self.host, "date +'%Y-%m-%d %H:%M:%S'", stdout=None, silent=True)
 
         for line in lines:
             self.limit = line.strip()
             self.debug("Set limit to: %s" % self.limit)
 
 
         return
 
 class LogWatcher(RemoteExec):
 
     '''This class watches logs for messages that fit certain regular
        expressions.  Watching logs for events isn't the ideal way
        to do business, but it's better than nothing :-)
 
        On the other hand, this class is really pretty cool ;-)
 
        The way you use this class is as follows:
           Construct a LogWatcher object
           Call setwatch() when you want to start watching the log
           Call look() to scan the log looking for the patterns
     '''
 
     def __init__(self, log, regexes, name="Anon", timeout=10, debug_level=None, silent=False, hosts=None, kind=None):
         '''This is the constructor for the LogWatcher class.  It takes a
         log name to watch, and a list of regular expressions to watch for."
         '''
         self.logger = LogFactory()
 
         self.name        = name
         self.regexes     = regexes
         self.debug_level = debug_level
         self.whichmatch  = -1
         self.unmatched   = None
         self.cache_lock = threading.Lock()
 
         self.file_list = []
         self.line_cache = []
 
         #  Validate our arguments.  Better sooner than later ;-)
         for regex in regexes:
             assert re.compile(regex)
 
         if kind:
             self.kind    = kind
         else:
             raise
             self.kind    = self.Env["LogWatcher"]
 
         if log:
             self.filename    = log
         else:
             raise
             self.filename    = self.Env["LogFileName"]
 
         if hosts:
             self.hosts = hosts
         else:
             raise
             self.hosts = self.Env["nodes"]
 
         if trace_lw:
             self.debug_level = 3
             silent = False
 
         if not silent:
             for regex in self.regexes:
                 self.debug("Looking for regex: "+regex)
 
         self.Timeout = int(timeout)
         self.returnonlymatch = None
 
     def debug(self, args):
         message = "lw: %s: %s" % (self.name, args)
         self.logger.debug(message)
 
     def setwatch(self):
         '''Mark the place to start watching the log from.
         '''
 
         if self.kind == "remote":
             for node in self.hosts:
                 self.file_list.append(FileObj(self.filename, node, self.name))
 
         elif self.kind == "journal":
             for node in self.hosts:
                 self.file_list.append(JournalObj(node, self.name))
 
         else:
             self.file_list.append(FileObj(self.filename))
 
         # print "%s now has %d files" % (self.name, len(self.file_list))
 
     def __del__(self):
         if self.debug_level > 1: self.debug("Destroy")
 
     def ReturnOnlyMatch(self, onlymatch=1):
         '''Specify one or more subgroups of the match to return rather than the whole string
            http://www.python.org/doc/2.5.2/lib/match-objects.html
         '''
         self.returnonlymatch = onlymatch
 
     def async_complete(self, pid, returncode, outLines, errLines):
         # TODO: Probably need a lock for updating self.line_cache
         self.logger.debug("%s: Got %d lines from %d (total %d)" % (self.name, len(outLines), pid, len(self.line_cache)))
         if len(outLines):
             self.cache_lock.acquire()
             self.line_cache.extend(outLines)
             self.cache_lock.release()
 
     def __get_lines(self, timeout):
         count=0
         if not len(self.file_list):
             raise ValueError("No sources to read from")
 
         pending = []
         #print "%s waiting for %d operations" % (self.name, self.pending)
         for f in self.file_list:
             t = f.harvest_async(self)
             if t:
                 pending.append(t)
 
         for t in pending:
             t.join(60.0)
             if t.isAlive():
-                self.logger.log("%s: Aborting after 20s waiting for %d logging commands" % (self.name, repr(t)))
+                self.logger.log("%s: Aborting after 20s waiting for %s logging commands" % (self.name, repr(t)))
                 return
 
         #print "Got %d lines" % len(self.line_cache)
 
     def end(self):
         for f in self.file_list:
             f.end()
 
     def look(self, timeout=None, silent=False):
         '''Examine the log looking for the given patterns.
         It starts looking from the place marked by setwatch().
         This function looks in the file in the fashion of tail -f.
         It properly recovers from log file truncation, but not from
         removing and recreating the log.  It would be nice if it
         recovered from this as well :-)
 
         We return the first line which matches any of our patterns.
         '''
         if timeout == None: timeout = self.Timeout
 
         if trace_lw:
             silent = False
 
         lines=0
         needlines=True
         begin=time.time()
         end=begin+timeout+1
         if self.debug_level > 2: self.debug("starting single search: timeout=%d, begin=%d, end=%d" % (timeout, begin, end))
 
         if not self.regexes:
             self.debug("Nothing to look for")
             return None
 
         if timeout == 0:
             for f in self.file_list:
                 f.setend()
 
         while True:
             if len(self.line_cache):
                 lines += 1
 
                 self.cache_lock.acquire()
                 line = self.line_cache[0]
                 self.line_cache.remove(line)
                 self.cache_lock.release()
 
                 which=-1
                 if re.search("CTS:", line):
                     continue
                 if self.debug_level > 2: self.debug("Processing: "+ line)
                 for regex in self.regexes:
                     which=which+1
                     if self.debug_level > 3: self.debug("Comparing line to: "+ regex)
                     #matchobj = re.search(string.lower(regex), string.lower(line))
                     matchobj = re.search(regex, line)
                     if matchobj:
                         self.whichmatch=which
                         if self.returnonlymatch:
                             return matchobj.group(self.returnonlymatch)
                         else:
                             self.debug("Matched: "+line)
                             if self.debug_level > 1: self.debug("With: "+ regex)
                             return line
 
             elif timeout > 0 and end < time.time():
                 if self.debug_level > 1: self.debug("hit timeout: %d" % timeout)
 
                 timeout = 0
                 for f in self.file_list:
                     f.setend()
 
             else:
                 self.__get_lines(timeout)
                 if len(self.line_cache) == 0 and end < time.time():
                     self.debug("Single search terminated: start=%d, end=%d, now=%d, lines=%d" % (begin, end, time.time(), lines))
                     return None
                 else:
                     self.debug("Waiting: start=%d, end=%d, now=%d, lines=%d" % (begin, end, time.time(), len(self.line_cache)))
                     time.sleep(1)
 
         self.debug("How did we get here")
         return None
 
     def lookforall(self, timeout=None, allow_multiple_matches=None, silent=False):
         '''Examine the log looking for ALL of the given patterns.
         It starts looking from the place marked by setwatch().
 
         We return when the timeout is reached, or when we have found
         ALL of the regexes that were part of the watch
         '''
 
         if timeout == None: timeout = self.Timeout
         save_regexes = self.regexes
         returnresult = []
 
         if trace_lw:
             silent = False
 
         if not silent:
             self.debug("starting search: timeout=%d" % timeout)
             for regex in self.regexes:
                 if self.debug_level > 2: self.debug("Looking for regex: "+regex)
 
         while (len(self.regexes) > 0):
             oneresult = self.look(timeout)
             if not oneresult:
                 self.unmatched = self.regexes
                 self.matched = returnresult
                 self.regexes = save_regexes
                 self.end()
                 return None
 
             returnresult.append(oneresult)
             if not allow_multiple_matches:
                 del self.regexes[self.whichmatch]
 
             else:
                 # Allow multiple regexes to match a single line
                 tmp_regexes = self.regexes
                 self.regexes = []
                 which = 0
                 for regex in tmp_regexes:
                     matchobj = re.search(regex, oneresult)
                     if not matchobj:
                         self.regexes.append(regex)
 
         self.unmatched = None
         self.matched = returnresult
         self.regexes = save_regexes
         return returnresult
 
diff --git a/extra/resources/remote b/extra/resources/remote
index 9e0482bf74..9f141a2289 100644
--- a/extra/resources/remote
+++ b/extra/resources/remote
@@ -1,110 +1,110 @@
 #!/bin/sh
 #
 #
 #	remote OCF RA. This script provides metadata for the internal
 #	pacemaker remote lrmd connection agent.  Outside of acting
 #	as a place holder so the remote ra script can be indexed and
 #	providing metadata, this script should never be invoked.  The
 #	actual functionality behind the remote lrmd connection lives
 #	within pacemaker's crmd component.
 #
 # Copyright (c) 2013 David Vossel
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of version 2 of the GNU General Public License as
 # published by the Free Software Foundation.
 #
 # This program is distributed in the hope that it would be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # Further, this software is distributed without any warranty that it is
 # free of the rightful claim of any third person regarding infringement
 # or the like.  Any license provided herein, whether implied or
 # otherwise, applies only to this software file.  Patent licenses, if
 # any, provided herein do not apply to combinations of this program with
 # other software, or any other product whatsoever.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write the Free Software Foundation,
 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
 #
 
 #######################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
 . ${OCF_FUNCTIONS}
 : ${__OCF_ACTION=$1}
 
 #######################################################################
 
 meta_data() {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="remote" version="0.1">
   <version>0.1</version>
   <parameters>
     <parameter name="server" unique="1">
     <longdesc lang="en">
        Server location to connect to.  This can be an ip address or hostname.
     </longdesc>
     <shortdesc lang="en">Server location</shortdesc>
     <content type="string"/>
     </parameter>
     <parameter name="port" unique="1">
     <longdesc lang="en">
        tcp port to connect to.
     </longdesc>
     <shortdesc lang="en">tcp port</shortdesc>
     <content type="string" default="1984"/>
     </parameter>
   </parameters>
   <actions>
-    <action name="start"   timeout="15" />
-    <action name="stop"    timeout="15" />
-    <action name="monitor"    timeout="15" />
-    <action name="migrate_to"   timeout="15" />
-    <action name="migrate_from" timeout="15" />
+    <action name="start"   timeout="40" />
+    <action name="stop"    timeout="40" />
+    <action name="monitor"    timeout="30" />
+    <action name="migrate_to"   timeout="60" />
+    <action name="migrate_from" timeout="60" />
     <action name="meta-data"  timeout="5" />
   </actions>
 </resource-agent>
 END
 }
 
 #######################################################################
 
 remote_usage() {
 	cat <<END
 usage: $0 {meta-data}
 
 Expects to have a fully populated OCF RA-compliant environment set.
 END
 }
 
 remote_unsupported() {
 	ocf_log info "This pacemaker version does not support the ocf:pacemaker:remote agent"
 	return $OCF_ERR_GENERIC
 }
 
 case $__OCF_ACTION in
 meta-data)	meta_data
 		exit $OCF_SUCCESS
 		;;
 start)		remote_unsupported;;
 stop)		remote_unsupported;;
 monitor)	remote_unsupported;;
 migrate_to)	remote_unsupported;;
 migrate_from) remote_unsupported;;
 validate-all) remote_unsupported;;
 usage|help)	remote_usage
 		exit $OCF_SUCCESS
 		;;
 *)		dummy_usage
 		exit $OCF_ERR_UNIMPLEMENTED
 		;;
 esac
 rc=$?
 ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
 exit $rc
diff --git a/lib/common/ipc.c b/lib/common/ipc.c
index c1801a4feb..f26225f94f 100644
--- a/lib/common/ipc.c
+++ b/lib/common/ipc.c
@@ -1,1266 +1,1266 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #include <grp.h>
 
 #include <errno.h>
 #include <fcntl.h>
 #include <bzlib.h>
 
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 #include <crm/common/ipc.h>
 #include <crm/common/ipcs.h>
 
 #define PCMK_IPC_VERSION 1
 
 struct crm_ipc_response_header {
     struct qb_ipc_response_header qb;
     uint32_t size_uncompressed;
     uint32_t size_compressed;
     uint32_t flags;
     uint8_t  version; /* Protect against version changes for anyone that might bother to statically link us */
 };
 
 static int hdr_offset = 0;
 static int ipc_buffer_max = 0;
 static unsigned int pick_ipc_buffer(int max);
 
 static inline void
 crm_ipc_init(void)
 {
     if (hdr_offset == 0) {
         hdr_offset = sizeof(struct crm_ipc_response_header);
     }
     if (ipc_buffer_max == 0) {
         ipc_buffer_max = pick_ipc_buffer(0);
     }
 }
 
 int
 crm_ipc_default_buffer_size(void)
 {
     return pick_ipc_buffer(0);
 }
 
 static char *
 generateReference(const char *custom1, const char *custom2)
 {
     static uint ref_counter = 0;
     const char *local_cust1 = custom1;
     const char *local_cust2 = custom2;
     int reference_len = 4;
     char *since_epoch = NULL;
 
     reference_len += 20;        /* too big */
     reference_len += 40;        /* too big */
 
     if (local_cust1 == NULL) {
         local_cust1 = "_empty_";
     }
     reference_len += strlen(local_cust1);
 
     if (local_cust2 == NULL) {
         local_cust2 = "_empty_";
     }
     reference_len += strlen(local_cust2);
 
     since_epoch = calloc(1, reference_len);
 
     if (since_epoch != NULL) {
         sprintf(since_epoch, "%s-%s-%ld-%u",
                 local_cust1, local_cust2, (unsigned long)time(NULL), ref_counter++);
     }
 
     return since_epoch;
 }
 
 xmlNode *
 create_request_adv(const char *task, xmlNode * msg_data,
                    const char *host_to, const char *sys_to,
                    const char *sys_from, const char *uuid_from, const char *origin)
 {
     char *true_from = NULL;
     xmlNode *request = NULL;
     char *reference = generateReference(task, sys_from);
 
     if (uuid_from != NULL) {
         true_from = generate_hash_key(sys_from, uuid_from);
     } else if (sys_from != NULL) {
         true_from = strdup(sys_from);
     } else {
         crm_err("No sys from specified");
     }
 
     /* host_from will get set for us if necessary by CRMd when routed */
     request = create_xml_node(NULL, __FUNCTION__);
     crm_xml_add(request, F_CRM_ORIGIN, origin);
     crm_xml_add(request, F_TYPE, T_CRM);
     crm_xml_add(request, F_CRM_VERSION, CRM_FEATURE_SET);
     crm_xml_add(request, F_CRM_MSG_TYPE, XML_ATTR_REQUEST);
     crm_xml_add(request, F_CRM_REFERENCE, reference);
     crm_xml_add(request, F_CRM_TASK, task);
     crm_xml_add(request, F_CRM_SYS_TO, sys_to);
     crm_xml_add(request, F_CRM_SYS_FROM, true_from);
 
     /* HOSTTO will be ignored if it is to the DC anyway. */
     if (host_to != NULL && strlen(host_to) > 0) {
         crm_xml_add(request, F_CRM_HOST_TO, host_to);
     }
 
     if (msg_data != NULL) {
         add_message_xml(request, F_CRM_DATA, msg_data);
     }
     free(reference);
     free(true_from);
 
     return request;
 }
 
 /*
  * This method adds a copy of xml_response_data
  */
 xmlNode *
 create_reply_adv(xmlNode * original_request, xmlNode * xml_response_data, const char *origin)
 {
     xmlNode *reply = NULL;
 
     const char *host_from = crm_element_value(original_request, F_CRM_HOST_FROM);
     const char *sys_from = crm_element_value(original_request, F_CRM_SYS_FROM);
     const char *sys_to = crm_element_value(original_request, F_CRM_SYS_TO);
     const char *type = crm_element_value(original_request, F_CRM_MSG_TYPE);
     const char *operation = crm_element_value(original_request, F_CRM_TASK);
     const char *crm_msg_reference = crm_element_value(original_request, F_CRM_REFERENCE);
 
     if (type == NULL) {
         crm_err("Cannot create new_message, no message type in original message");
         CRM_ASSERT(type != NULL);
         return NULL;
 #if 0
     } else if (strcasecmp(XML_ATTR_REQUEST, type) != 0) {
         crm_err("Cannot create new_message, original message was not a request");
         return NULL;
 #endif
     }
     reply = create_xml_node(NULL, __FUNCTION__);
     if (reply == NULL) {
         crm_err("Cannot create new_message, malloc failed");
         return NULL;
     }
 
     crm_xml_add(reply, F_CRM_ORIGIN, origin);
     crm_xml_add(reply, F_TYPE, T_CRM);
     crm_xml_add(reply, F_CRM_VERSION, CRM_FEATURE_SET);
     crm_xml_add(reply, F_CRM_MSG_TYPE, XML_ATTR_RESPONSE);
     crm_xml_add(reply, F_CRM_REFERENCE, crm_msg_reference);
     crm_xml_add(reply, F_CRM_TASK, operation);
 
     /* since this is a reply, we reverse the from and to */
     crm_xml_add(reply, F_CRM_SYS_TO, sys_from);
     crm_xml_add(reply, F_CRM_SYS_FROM, sys_to);
 
     /* HOSTTO will be ignored if it is to the DC anyway. */
     if (host_from != NULL && strlen(host_from) > 0) {
         crm_xml_add(reply, F_CRM_HOST_TO, host_from);
     }
 
     if (xml_response_data != NULL) {
         add_message_xml(reply, F_CRM_DATA, xml_response_data);
     }
 
     return reply;
 }
 
 /* Libqb based IPC */
 
 /* Server... */
 
 GHashTable *client_connections = NULL;
 
 crm_client_t *
 crm_client_get(qb_ipcs_connection_t * c)
 {
     if (client_connections) {
         return g_hash_table_lookup(client_connections, c);
     }
 
     crm_trace("No client found for %p", c);
     return NULL;
 }
 
 crm_client_t *
 crm_client_get_by_id(const char *id)
 {
     gpointer key;
     crm_client_t *client;
     GHashTableIter iter;
 
     if (client_connections && id) {
         g_hash_table_iter_init(&iter, client_connections);
         while (g_hash_table_iter_next(&iter, &key, (gpointer *) & client)) {
             if (strcmp(client->id, id) == 0) {
                 return client;
             }
         }
     }
 
     crm_trace("No client found with id=%s", id);
     return NULL;
 }
 
 const char *
 crm_client_name(crm_client_t * c)
 {
     if (c == NULL) {
         return "null";
     } else if (c->name == NULL && c->id == NULL) {
         return "unknown";
     } else if (c->name == NULL) {
         return c->id;
     } else {
         return c->name;
     }
 }
 
 void
 crm_client_init(void)
 {
     if (client_connections == NULL) {
         crm_trace("Creating client hash table");
         client_connections = g_hash_table_new(g_direct_hash, g_direct_equal);
     }
 }
 
 void
 crm_client_cleanup(void)
 {
     if (client_connections != NULL) {
         int active = g_hash_table_size(client_connections);
 
         if (active) {
             crm_err("Exiting with %d active connections", active);
         }
         g_hash_table_destroy(client_connections); client_connections = NULL;
     }
 }
 
 void
 crm_client_disconnect_all(qb_ipcs_service_t *service)
 {
     qb_ipcs_connection_t *c = qb_ipcs_connection_first_get(service);
 
     while (c != NULL) {
         qb_ipcs_connection_t *last = c;
 
         c = qb_ipcs_connection_next_get(service, last);
 
         /* There really shouldn't be anyone connected at this point */
         crm_notice("Disconnecting client %p, pid=%d...", last, crm_ipcs_client_pid(last));
         qb_ipcs_disconnect(last);
         qb_ipcs_connection_unref(last);
     }
 }
 
 crm_client_t *
 crm_client_new(qb_ipcs_connection_t * c, uid_t uid_client, gid_t gid_client)
 {
     static uid_t uid_server = 0;
     static gid_t gid_cluster = 0;
 
     crm_client_t *client = NULL;
 
     CRM_LOG_ASSERT(c);
     if (c == NULL) {
         return NULL;
     }
 
     if (gid_cluster == 0) {
         uid_server = getuid();
         if(crm_user_lookup(CRM_DAEMON_USER, NULL, &gid_cluster) < 0) {
             static bool have_error = FALSE;
             if(have_error == FALSE) {
                 crm_warn("Could not find group for user %s", CRM_DAEMON_USER);
                 have_error = TRUE;
             }
         }
     }
 
     if(gid_cluster != 0 && gid_client != 0) {
         uid_t best_uid = -1; /* Passing -1 to chown(2) means don't change */
 
         if(uid_client == 0 || uid_server == 0) { /* Someone is priveliged, but the other may not be */
             best_uid = QB_MAX(uid_client, uid_server);
             crm_trace("Allowing user %u to clean up after disconnect", best_uid);
         }
 
         crm_trace("Giving access to group %u", gid_cluster);
         qb_ipcs_connection_auth_set(c, best_uid, gid_cluster, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
     }
 
     crm_client_init();
 
     /* TODO: Do our own auth checking, return NULL if unauthorized */
     client = calloc(1, sizeof(crm_client_t));
 
     client->ipcs = c;
     client->kind = CRM_CLIENT_IPC;
     client->pid = crm_ipcs_client_pid(c);
 
     client->id = crm_generate_uuid();
 
     crm_debug("Connecting %p for uid=%d gid=%d pid=%u id=%s", c, uid_client, gid_client, client->pid, client->id);
 
 #if ENABLE_ACL
     client->user = uid2username(uid_client);
 #endif
 
     g_hash_table_insert(client_connections, c, client);
     return client;
 }
 
 void
 crm_client_destroy(crm_client_t * c)
 {
     if (c == NULL) {
         return;
     }
 
     if (client_connections) {
         if (c->ipcs) {
             crm_trace("Destroying %p/%p (%d remaining)",
                       c, c->ipcs, crm_hash_table_size(client_connections) - 1);
             g_hash_table_remove(client_connections, c->ipcs);
 
         } else {
             crm_trace("Destroying remote connection %p (%d remaining)",
                       c, crm_hash_table_size(client_connections) - 1);
             g_hash_table_remove(client_connections, c->id);
         }
     }
 
     if (c->event_timer) {
         g_source_remove(c->event_timer);
     }
 
     crm_debug("Destroying %d events", g_list_length(c->event_queue));
     while (c->event_queue) {
         struct iovec *event = c->event_queue->data;
 
         c->event_queue = g_list_remove(c->event_queue, event);
         free(event[0].iov_base);
         free(event[1].iov_base);
         free(event);
     }
 
     free(c->id);
     free(c->name);
     free(c->user);
     if (c->remote) {
         if (c->remote->auth_timeout) {
             g_source_remove(c->remote->auth_timeout);
         }
         free(c->remote->buffer);
         free(c->remote);
     }
     free(c);
 }
 
 int
 crm_ipcs_client_pid(qb_ipcs_connection_t * c)
 {
     struct qb_ipcs_connection_stats stats;
 
     stats.client_pid = 0;
     qb_ipcs_connection_stats_get(c, &stats, 0);
     return stats.client_pid;
 }
 
 xmlNode *
 crm_ipcs_recv(crm_client_t * c, void *data, size_t size, uint32_t * id, uint32_t * flags)
 {
     xmlNode *xml = NULL;
     char *uncompressed = NULL;
     char *text = ((char *)data) + sizeof(struct crm_ipc_response_header);
     struct crm_ipc_response_header *header = data;
 
     if (id) {
         *id = ((struct qb_ipc_response_header *)data)->id;
     }
     if (flags) {
         *flags = header->flags;
     }
 
     if (is_set(header->flags, crm_ipc_proxied)) {
         /* mark this client as being the endpoint of a proxy connection.
          * Proxy connections responses are sent on the event channel to avoid
          * blocking the proxy daemon (crmd) */
         c->flags |= crm_client_flag_ipc_proxied;
     }
 
     if(header->version > PCMK_IPC_VERSION) {
         crm_err("Filtering incompatible v%d IPC message, we only support versions <= %d",
                 header->version, PCMK_IPC_VERSION);
         return NULL;
     }
 
     if (header->size_compressed) {
         int rc = 0;
         unsigned int size_u = 1 + header->size_uncompressed;
         uncompressed = calloc(1, size_u);
 
         crm_trace("Decompressing message data %d bytes into %d bytes",
                   header->size_compressed, size_u);
 
         rc = BZ2_bzBuffToBuffDecompress(uncompressed, &size_u, text, header->size_compressed, 1, 0);
         text = uncompressed;
 
         if (rc != BZ_OK) {
             crm_err("Decompression failed: %s (%d)", bz2_strerror(rc), rc);
             free(uncompressed);
             return NULL;
         }
     }
 
     CRM_ASSERT(text[header->size_uncompressed - 1] == 0);
 
     crm_trace("Received %.200s", text);
     xml = string2xml(text);
 
     free(uncompressed);
     return xml;
 }
 
 ssize_t crm_ipcs_flush_events(crm_client_t * c);
 
 static gboolean
 crm_ipcs_flush_events_cb(gpointer data)
 {
     crm_client_t *c = data;
 
     c->event_timer = 0;
     crm_ipcs_flush_events(c);
     return FALSE;
 }
 
 ssize_t
 crm_ipcs_flush_events(crm_client_t * c)
 {
     int sent = 0;
     ssize_t rc = 0;
     int queue_len = 0;
 
     if (c == NULL) {
         return pcmk_ok;
 
     } else if (c->event_timer) {
         /* There is already a timer, wait until it goes off */
         crm_trace("Timer active for %p - %d", c->ipcs, c->event_timer);
         return pcmk_ok;
     }
 
     queue_len = g_list_length(c->event_queue);
     while (c->event_queue && sent < 100) {
         struct crm_ipc_response_header *header = NULL;
         struct iovec *event = c->event_queue->data;
 
         rc = qb_ipcs_event_sendv(c->ipcs, event, 2);
         if (rc < 0) {
             break;
         }
 
         sent++;
         header = event[0].iov_base;
         if (header->size_compressed) {
             crm_trace("Event %d to %p[%d] (%d compressed bytes) sent",
                       header->qb.id, c->ipcs, c->pid, rc);
         } else {
             crm_trace("Event %d to %p[%d] (%d bytes) sent: %.120s",
                       header->qb.id, c->ipcs, c->pid, rc, event[1].iov_base);
         }
 
         c->event_queue = g_list_remove(c->event_queue, event);
         free(event[0].iov_base);
         free(event[1].iov_base);
         free(event);
     }
 
     queue_len -= sent;
     if (sent > 0 || c->event_queue) {
         crm_trace("Sent %d events (%d remaining) for %p[%d]: %s (%d)",
                   sent, queue_len, c->ipcs, c->pid, pcmk_strerror(rc < 0 ? rc : 0), rc);
     }
 
     if (c->event_queue) {
         if (queue_len % 100 == 0 && queue_len > 99) {
             crm_warn("Event queue for %p[%d] has grown to %d", c->ipcs, c->pid, queue_len);
 
         } else if (queue_len > 500) {
             crm_err("Evicting slow client %p[%d]: event queue reached %d entries",
                     c->ipcs, c->pid, queue_len);
             qb_ipcs_disconnect(c->ipcs);
             return rc;
         }
 
         c->event_timer = g_timeout_add(1000 + 100 * queue_len, crm_ipcs_flush_events_cb, c);
     }
 
     return rc;
 }
 
 ssize_t
 crm_ipc_prepare(uint32_t request, xmlNode * message, struct iovec ** result, int32_t max_send_size)
 {
     static int biggest = 0;
     struct iovec *iov;
     unsigned int total = 0;
     char *compressed = NULL;
     char *buffer = dump_xml_unformatted(message);
     struct crm_ipc_response_header *header = calloc(1, sizeof(struct crm_ipc_response_header));
 
     CRM_ASSERT(result != NULL);
 
     crm_ipc_init();
 
     if (max_send_size == 0) {
         max_send_size = ipc_buffer_max;
     }
 
     CRM_LOG_ASSERT(max_send_size != 0);
 
     *result = NULL;
     iov = calloc(2, sizeof(struct iovec));
 
 
     iov[0].iov_len = hdr_offset;
     iov[0].iov_base = header;
 
     header->version = PCMK_IPC_VERSION;
     header->size_uncompressed = 1 + strlen(buffer);
     total = iov[0].iov_len + header->size_uncompressed;
 
     if (total < max_send_size) {
         iov[1].iov_base = buffer;
         iov[1].iov_len = header->size_uncompressed;
 
     } else {
         unsigned int new_size = 0;
 
         if (crm_compress_string
             (buffer, header->size_uncompressed, max_send_size, &compressed, &new_size)) {
 
             header->flags |= crm_ipc_compressed;
             header->size_compressed = new_size;
 
             iov[1].iov_len = header->size_compressed;
             iov[1].iov_base = compressed;
 
             free(buffer);
 
             if (header->size_compressed > biggest) {
                 biggest = 2 * QB_MAX(header->size_compressed, biggest);
             }
 
         } else {
             ssize_t rc = -EMSGSIZE;
 
             crm_log_xml_trace(message, "EMSGSIZE");
             biggest = 2 * QB_MAX(header->size_uncompressed, biggest);
 
             crm_err
                 ("Could not compress the message into less than the configured ipc limit (%d bytes)."
                  "Set PCMK_ipc_buffer to a higher value (%d bytes suggested)", max_send_size,
                  biggest);
 
             free(compressed);
             free(buffer);
             free(header);
             free(iov);
 
             return rc;
         }
     }
 
     header->qb.size = iov[0].iov_len + iov[1].iov_len;
     header->qb.id = (int32_t)request;    /* Replying to a specific request */
 
     *result = iov;
     CRM_ASSERT(header->qb.size > 0);
     return header->qb.size;
 }
 
 ssize_t
 crm_ipcs_sendv(crm_client_t * c, struct iovec * iov, enum crm_ipc_flags flags)
 {
     ssize_t rc;
     static uint32_t id = 1;
     struct crm_ipc_response_header *header = iov[0].iov_base;
 
     if (c->flags & crm_client_flag_ipc_proxied) {
         /* _ALL_ replies to proxied connections need to be sent as events */
         if (is_not_set(flags, crm_ipc_server_event)) {
             flags |= crm_ipc_server_event;
             /* this flag lets us know this was originally meant to be a response.
              * even though we're sending it over the event channel. */
             flags |= crm_ipc_proxied_relay_response;
         }
     }
 
     header->flags |= flags;
     if (flags & crm_ipc_server_event) {
         header->qb.id = id++;   /* We don't really use it, but doesn't hurt to set one */
 
         if (flags & crm_ipc_server_free) {
             crm_trace("Sending the original to %p[%d]", c->ipcs, c->pid);
             c->event_queue = g_list_append(c->event_queue, iov);
 
         } else {
             struct iovec *iov_copy = calloc(2, sizeof(struct iovec));
 
             crm_trace("Sending a copy to %p[%d]", c->ipcs, c->pid);
             iov_copy[0].iov_len = iov[0].iov_len;
             iov_copy[0].iov_base = malloc(iov[0].iov_len);
             memcpy(iov_copy[0].iov_base, iov[0].iov_base, iov[0].iov_len);
 
             iov_copy[1].iov_len = iov[1].iov_len;
             iov_copy[1].iov_base = malloc(iov[1].iov_len);
             memcpy(iov_copy[1].iov_base, iov[1].iov_base, iov[1].iov_len);
 
             c->event_queue = g_list_append(c->event_queue, iov_copy);
         }
 
     } else {
         CRM_LOG_ASSERT(header->qb.id != 0);     /* Replying to a specific request */
 
         rc = qb_ipcs_response_sendv(c->ipcs, iov, 2);
         if (rc < header->qb.size) {
             crm_notice("Response %d to %p[%d] (%d bytes) failed: %s (%d)",
                        header->qb.id, c->ipcs, c->pid, header->qb.size, pcmk_strerror(rc), rc);
 
         } else {
             crm_trace("Response %d sent, %d bytes to %p[%d]", header->qb.id, rc, c->ipcs, c->pid);
         }
 
         if (flags & crm_ipc_server_free) {
             free(iov[0].iov_base);
             free(iov[1].iov_base);
             free(iov);
         }
     }
 
     if (flags & crm_ipc_server_event) {
         rc = crm_ipcs_flush_events(c);
     } else {
         crm_ipcs_flush_events(c);
     }
 
     if (rc == -EPIPE || rc == -ENOTCONN) {
         crm_trace("Client %p disconnected", c->ipcs);
     }
 
     return rc;
 }
 
 ssize_t
 crm_ipcs_send(crm_client_t * c, uint32_t request, xmlNode * message,
               enum crm_ipc_flags flags)
 {
     struct iovec *iov = NULL;
     ssize_t rc = 0;
 
     if(c == NULL) {
         return -EDESTADDRREQ;
     }
     crm_ipc_init();
 
     rc = crm_ipc_prepare(request, message, &iov, ipc_buffer_max);
     if (rc > 0) {
         rc = crm_ipcs_sendv(c, iov, flags | crm_ipc_server_free);
 
     } else {
         free(iov);
         crm_notice("Message to %p[%d] failed: %s (%d)",
                    c->ipcs, c->pid, pcmk_strerror(rc), rc);
     }
 
     return rc;
 }
 
 void
 crm_ipcs_send_ack(crm_client_t * c, uint32_t request, uint32_t flags, const char *tag, const char *function,
                   int line)
 {
     if (flags & crm_ipc_client_response) {
         xmlNode *ack = create_xml_node(NULL, tag);
 
         crm_trace("Ack'ing msg from %s (%p)", crm_client_name(c), c);
         c->request_id = 0;
         crm_xml_add(ack, "function", function);
         crm_xml_add_int(ack, "line", line);
         crm_ipcs_send(c, request, ack, flags);
         free_xml(ack);
     }
 }
 
 /* Client... */
 
 #define MIN_MSG_SIZE    12336   /* sizeof(struct qb_ipc_connection_response) */
 #define MAX_MSG_SIZE    128*1024 /* 128k default */
 
 struct crm_ipc_s {
     struct pollfd pfd;
 
     /* the max size we can send/receive over ipc */
     unsigned int max_buf_size;
     /* Size of the allocated 'buffer' */
     unsigned int buf_size;
     int msg_size;
     int need_reply;
     char *buffer;
     char *name;
     uint32_t buffer_flags;
 
     qb_ipcc_connection_t *ipc;
 
 };
 
 static unsigned int
 pick_ipc_buffer(int max)
 {
     static int global_max = 0;
 
     if(global_max == 0) {
         const char *env = getenv("PCMK_ipc_buffer");
 
         if (env) {
             global_max = crm_parse_int(env, "0");
         } else {
             global_max = MAX_MSG_SIZE;
         }
     }
 
     return QB_MAX(max, global_max);
 }
 
 crm_ipc_t *
 crm_ipc_new(const char *name, size_t max_size)
 {
     crm_ipc_t *client = NULL;
 
     client = calloc(1, sizeof(crm_ipc_t));
 
     client->name = strdup(name);
     client->buf_size = pick_ipc_buffer(max_size);
     client->buffer = malloc(client->buf_size);
 
     /* Clients initiating connection pick the max buf size */
     client->max_buf_size = client->buf_size;
 
     client->pfd.fd = -1;
     client->pfd.events = POLLIN;
     client->pfd.revents = 0;
 
     return client;
 }
 
 bool
 crm_ipc_connect(crm_ipc_t * client)
 {
     client->need_reply = FALSE;
     client->ipc = qb_ipcc_connect(client->name, client->buf_size);
 
     if (client->ipc == NULL) {
         crm_perror(LOG_INFO, "Could not establish %s connection", client->name);
         return FALSE;
     }
 
     client->pfd.fd = crm_ipc_get_fd(client);
     if (client->pfd.fd < 0) {
         crm_perror(LOG_INFO, "Could not obtain file descriptor for %s connection", client->name);
         return FALSE;
     }
 
     qb_ipcc_context_set(client->ipc, client);
 
 #ifdef HAVE_IPCS_GET_BUFFER_SIZE
     client->max_buf_size = qb_ipcc_get_buffer_size(client->ipc);
-    if (client->max_buf_size < client->buf_size) {
+    if (client->max_buf_size > client->buf_size) {
         free(client->buffer);
         client->buffer = calloc(1, client->max_buf_size);
         client->buf_size = client->max_buf_size;
     }
 #endif
 
     return TRUE;
 }
 
 void
 crm_ipc_close(crm_ipc_t * client)
 {
     if (client) {
         crm_trace("Disconnecting %s IPC connection %p (%p.%p)", client->name, client, client->ipc);
 
         if (client->ipc) {
             qb_ipcc_connection_t *ipc = client->ipc;
 
             client->ipc = NULL;
             qb_ipcc_disconnect(ipc);
         }
     }
 }
 
 void
 crm_ipc_destroy(crm_ipc_t * client)
 {
     if (client) {
         if (client->ipc && qb_ipcc_is_connected(client->ipc)) {
             crm_notice("Destroying an active IPC connection to %s", client->name);
             /* The next line is basically unsafe
              *
              * If this connection was attached to mainloop and mainloop is active,
              *   the 'disconnected' callback will end up back here and we'll end
              *   up free'ing the memory twice - something that can still happen
              *   even without this if we destroy a connection and it closes before
              *   we call exit
              */
             /* crm_ipc_close(client); */
         }
         crm_trace("Destroying IPC connection to %s: %p", client->name, client);
         free(client->buffer);
         free(client->name);
         free(client);
     }
 }
 
 int
 crm_ipc_get_fd(crm_ipc_t * client)
 {
     int fd = 0;
 
     CRM_ASSERT(client != NULL);
     if (client->ipc && qb_ipcc_fd_get(client->ipc, &fd) == 0) {
         return fd;
     }
 
     crm_perror(LOG_ERR, "Could not obtain file IPC descriptor for %s", client->name);
     return -EINVAL;
 }
 
 bool
 crm_ipc_connected(crm_ipc_t * client)
 {
     bool rc = FALSE;
 
     if (client == NULL) {
         crm_trace("No client");
         return FALSE;
 
     } else if (client->ipc == NULL) {
         crm_trace("No connection");
         return FALSE;
 
     } else if (client->pfd.fd < 0) {
         crm_trace("Bad descriptor");
         return FALSE;
     }
 
     rc = qb_ipcc_is_connected(client->ipc);
     if (rc == FALSE) {
         client->pfd.fd = -EINVAL;
     }
     return rc;
 }
 
 int
 crm_ipc_ready(crm_ipc_t * client)
 {
     CRM_ASSERT(client != NULL);
 
     if (crm_ipc_connected(client) == FALSE) {
         return -ENOTCONN;
     }
 
     client->pfd.revents = 0;
     return poll(&(client->pfd), 1, 0);
 }
 
 static int
 crm_ipc_decompress(crm_ipc_t * client)
 {
     struct crm_ipc_response_header *header = (struct crm_ipc_response_header *)(void*)client->buffer;
 
     if (header->size_compressed) {
         int rc = 0;
         unsigned int size_u = 1 + header->size_uncompressed;
         /* never let buf size fall below our max size required for ipc reads. */
         unsigned int new_buf_size = QB_MAX((hdr_offset + size_u), client->max_buf_size);
         char *uncompressed = calloc(1, new_buf_size);
 
         crm_trace("Decompressing message data %d bytes into %d bytes",
                  header->size_compressed, size_u);
 
         rc = BZ2_bzBuffToBuffDecompress(uncompressed + hdr_offset, &size_u,
                                         client->buffer + hdr_offset, header->size_compressed, 1, 0);
 
         if (rc != BZ_OK) {
             crm_err("Decompression failed: %s (%d)", bz2_strerror(rc), rc);
             free(uncompressed);
             return -EILSEQ;
         }
 
         /*
          * This assert no longer holds true.  For an identical msg, some clients may
          * require compression, and others may not. If that same msg (event) is sent
          * to multiple clients, it could result in some clients receiving a compressed
          * msg even though compression was not explicitly required for them.
          *
          * CRM_ASSERT((header->size_uncompressed + hdr_offset) >= ipc_buffer_max);
          */
         CRM_ASSERT(size_u == header->size_uncompressed);
 
         memcpy(uncompressed, client->buffer, hdr_offset);       /* Preserve the header */
         header = (struct crm_ipc_response_header *)(void*)uncompressed;
 
         free(client->buffer);
         client->buf_size = new_buf_size;
         client->buffer = uncompressed;
     }
 
     CRM_ASSERT(client->buffer[hdr_offset + header->size_uncompressed - 1] == 0);
     return pcmk_ok;
 }
 
 long
 crm_ipc_read(crm_ipc_t * client)
 {
     struct crm_ipc_response_header *header = NULL;
 
     CRM_ASSERT(client != NULL);
     CRM_ASSERT(client->ipc != NULL);
     CRM_ASSERT(client->buffer != NULL);
 
     crm_ipc_init();
 
     client->buffer[0] = 0;
     client->msg_size = qb_ipcc_event_recv(client->ipc, client->buffer, client->buf_size - 1, 0);
     if (client->msg_size >= 0) {
         int rc = crm_ipc_decompress(client);
 
         if (rc != pcmk_ok) {
             return rc;
         }
 
         header = (struct crm_ipc_response_header *)(void*)client->buffer;
         if(header->version > PCMK_IPC_VERSION) {
             crm_err("Filtering incompatible v%d IPC message, we only support versions <= %d",
                     header->version, PCMK_IPC_VERSION);
             return -EBADMSG;
         }
 
         crm_trace("Received %s event %d, size=%d, rc=%d, text: %.100s",
                   client->name, header->qb.id, header->qb.size, client->msg_size,
                   client->buffer + hdr_offset);
 
     } else {
         crm_trace("No message from %s received: %s", client->name, pcmk_strerror(client->msg_size));
     }
 
     if (crm_ipc_connected(client) == FALSE || client->msg_size == -ENOTCONN) {
         crm_err("Connection to %s failed", client->name);
     }
 
     if (header) {
         /* Data excluding the header */
         return header->size_uncompressed;
     }
     return -ENOMSG;
 }
 
 const char *
 crm_ipc_buffer(crm_ipc_t * client)
 {
     CRM_ASSERT(client != NULL);
     return client->buffer + sizeof(struct crm_ipc_response_header);
 }
 
 uint32_t
 crm_ipc_buffer_flags(crm_ipc_t * client)
 {
     struct crm_ipc_response_header *header = NULL;
 
     CRM_ASSERT(client != NULL);
     if (client->buffer == NULL) {
         return 0;
     }
 
     header = (struct crm_ipc_response_header *)(void*)client->buffer;
     return header->flags;
 }
 
 const char *
 crm_ipc_name(crm_ipc_t * client)
 {
     CRM_ASSERT(client != NULL);
     return client->name;
 }
 
 static int
 internal_ipc_send_recv(crm_ipc_t * client, const void *iov)
 {
     int rc = 0;
 
     do {
         rc = qb_ipcc_sendv_recv(client->ipc, iov, 2, client->buffer, client->buf_size, -1);
     } while (rc == -EAGAIN && crm_ipc_connected(client));
 
     return rc;
 }
 
 static int
 internal_ipc_send_request(crm_ipc_t * client, const void *iov, int ms_timeout)
 {
     int rc = 0;
     time_t timeout = time(NULL) + 1 + (ms_timeout / 1000);
 
     do {
         rc = qb_ipcc_sendv(client->ipc, iov, 2);
     } while (rc == -EAGAIN && time(NULL) < timeout && crm_ipc_connected(client));
 
     return rc;
 }
 
 static int
 internal_ipc_get_reply(crm_ipc_t * client, int request_id, int ms_timeout)
 {
     time_t timeout = time(NULL) + 1 + (ms_timeout / 1000);
     int rc = 0;
 
     crm_ipc_init();
 
     /* get the reply */
     crm_trace("client %s waiting on reply to msg id %d", client->name, request_id);
     do {
 
         rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, 1000);
         if (rc > 0) {
             struct crm_ipc_response_header *hdr = NULL;
 
             int rc = crm_ipc_decompress(client);
 
             if (rc != pcmk_ok) {
                 return rc;
             }
 
             hdr = (struct crm_ipc_response_header *)(void*)client->buffer;
             if (hdr->qb.id == request_id) {
                 /* Got it */
                 break;
             } else if (hdr->qb.id < request_id) {
                 xmlNode *bad = string2xml(crm_ipc_buffer(client));
 
                 crm_err("Discarding old reply %d (need %d)", hdr->qb.id, request_id);
                 crm_log_xml_notice(bad, "OldIpcReply");
 
             } else {
                 xmlNode *bad = string2xml(crm_ipc_buffer(client));
 
                 crm_err("Discarding newer reply %d (need %d)", hdr->qb.id, request_id);
                 crm_log_xml_notice(bad, "ImpossibleReply");
                 CRM_ASSERT(hdr->qb.id <= request_id);
             }
         } else if (crm_ipc_connected(client) == FALSE) {
             crm_err("Server disconnected client %s while waiting for msg id %d", client->name,
                     request_id);
             break;
         }
 
     } while (time(NULL) < timeout);
 
     return rc;
 }
 
 int
 crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, int32_t ms_timeout,
              xmlNode ** reply)
 {
     long rc = 0;
     struct iovec *iov;
     static uint32_t id = 0;
     static int factor = 8;
     struct crm_ipc_response_header *header;
 
     crm_ipc_init();
 
     if (client == NULL) {
         crm_notice("Invalid connection");
         return -ENOTCONN;
 
     } else if (crm_ipc_connected(client) == FALSE) {
         /* Don't even bother */
         crm_notice("Connection to %s closed", client->name);
         return -ENOTCONN;
     }
 
     if (ms_timeout == 0) {
         ms_timeout = 5000;
     }
 
     if (client->need_reply) {
         crm_trace("Trying again to obtain pending reply from %s", client->name);
         rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, ms_timeout);
         if (rc < 0) {
             crm_warn("Sending to %s (%p) is disabled until pending reply is received", client->name,
                      client->ipc);
             return -EALREADY;
 
         } else {
             crm_notice("Lost reply from %s (%p) finally arrived, sending re-enabled", client->name,
                        client->ipc);
             client->need_reply = FALSE;
         }
     }
 
     id++;
     CRM_LOG_ASSERT(id != 0); /* Crude wrap-around detection */
     rc = crm_ipc_prepare(id, message, &iov, ipc_buffer_max);
     if(rc < 0) {
         return rc;
     }
 
     header = iov[0].iov_base;
     header->flags |= flags;
 
     if(is_set(flags, crm_ipc_proxied)) {
         /* Don't look for a synchronous response */
         clear_bit(flags, crm_ipc_client_response);
     }
 
     if(header->size_compressed) {
         if(factor < 10 && (ipc_buffer_max / 10) < (rc / factor)) {
             crm_notice("Compressed message exceeds %d0%% of the configured ipc limit (%d bytes), "
                        "consider setting PCMK_ipc_buffer to %d or higher",
                        factor, ipc_buffer_max, 2*ipc_buffer_max);
             factor++;
         }
     }
 
     crm_trace("Sending from client: %s request id: %d bytes: %u timeout:%d msg...",
               client->name, header->qb.id, header->qb.size, ms_timeout);
 
     if (ms_timeout > 0 || is_not_set(flags, crm_ipc_client_response)) {
 
         rc = internal_ipc_send_request(client, iov, ms_timeout);
 
         if (rc <= 0) {
             crm_trace("Failed to send from client %s request %d with %u bytes...",
                       client->name, header->qb.id, header->qb.size);
             goto send_cleanup;
 
         } else if (is_not_set(flags, crm_ipc_client_response)) {
             crm_trace("Message sent, not waiting for reply to %d from %s to %u bytes...",
                       header->qb.id, client->name, header->qb.size);
 
             goto send_cleanup;
         }
 
         rc = internal_ipc_get_reply(client, header->qb.id, ms_timeout);
         if (rc < 0) {
             /* No reply, for now, disable sending
              *
              * The alternative is to close the connection since we don't know
              * how to detect and discard out-of-sequence replies
              *
              * TODO - implement the above
              */
             client->need_reply = TRUE;
         }
 
     } else {
         rc = internal_ipc_send_recv(client, iov);
     }
 
     if (rc > 0) {
         struct crm_ipc_response_header *hdr = (struct crm_ipc_response_header *)(void*)client->buffer;
 
         crm_trace("Received response %d, size=%d, rc=%ld, text: %.200s", hdr->qb.id, hdr->qb.size,
                   rc, crm_ipc_buffer(client));
 
         if (reply) {
             *reply = string2xml(crm_ipc_buffer(client));
         }
 
     } else {
         crm_trace("Response not received: rc=%ld, errno=%d", rc, errno);
     }
 
   send_cleanup:
     if (crm_ipc_connected(client) == FALSE) {
         crm_notice("Connection to %s closed: %s (%ld)", client->name, pcmk_strerror(rc), rc);
 
     } else if (rc == -ETIMEDOUT) {
         crm_warn("Request %d to %s (%p) failed: %s (%ld) after %dms",
                  header->qb.id, client->name, client->ipc, pcmk_strerror(rc), rc, ms_timeout);
         crm_write_blackbox(0, NULL);
 
     } else if (rc <= 0) {
         crm_warn("Request %d to %s (%p) failed: %s (%ld)",
                  header->qb.id, client->name, client->ipc, pcmk_strerror(rc), rc);
     }
 
     free(header);
     free(iov[1].iov_base);
     free(iov);
     return rc;
 }
 
 /* Utils */
 
 xmlNode *
 create_hello_message(const char *uuid,
                      const char *client_name, const char *major_version, const char *minor_version)
 {
     xmlNode *hello_node = NULL;
     xmlNode *hello = NULL;
 
     if (uuid == NULL || strlen(uuid) == 0
         || client_name == NULL || strlen(client_name) == 0
         || major_version == NULL || strlen(major_version) == 0
         || minor_version == NULL || strlen(minor_version) == 0) {
         crm_err("Missing fields, Hello message will not be valid.");
         return NULL;
     }
 
     hello_node = create_xml_node(NULL, XML_TAG_OPTIONS);
     crm_xml_add(hello_node, "major_version", major_version);
     crm_xml_add(hello_node, "minor_version", minor_version);
     crm_xml_add(hello_node, "client_name", client_name);
     crm_xml_add(hello_node, "client_uuid", uuid);
 
     crm_trace("creating hello message");
     hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid);
     free_xml(hello_node);
 
     return hello;
 }
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index b699201cc6..7127c12795 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -1,3191 +1,3214 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include <crm_internal.h>
 
 #include <glib.h>
 
 #include <crm/crm.h>
 #include <crm/services.h>
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 
 #include <crm/common/util.h>
 #include <crm/pengine/rules.h>
 #include <crm/pengine/internal.h>
 #include <unpack.h>
 
 CRM_TRACE_INIT_DATA(pe_status);
 
 #define set_config_flag(data_set, option, flag) do {			\
 	const char *tmp = pe_pref(data_set->config_hash, option);	\
 	if(tmp) {							\
 	    if(crm_is_true(tmp)) {					\
 		set_bit(data_set->flags, flag);			\
 	    } else {							\
 		clear_bit(data_set->flags, flag);		\
 	    }								\
 	}								\
     } while(0)
 
 gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op,
                        enum action_fail_response *failed, pe_working_set_t * data_set);
 static gboolean determine_remote_online_status(node_t * this_node);
 
 static gboolean
 is_dangling_container_remote_node(node_t *node)
 {
     /* we are looking for a remote-node that was supposed to be mapped to a
      * container resource, but all traces of that container have disappeared 
      * from both the config and the status section. */
     if (is_remote_node(node) &&
         node->details->remote_rsc &&
         node->details->remote_rsc->container == NULL &&
         is_set(node->details->remote_rsc->flags, pe_rsc_orphan_container_filler)) {
         return TRUE;
     }
 
     return FALSE;
 }
 
 void
 pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason)
 {
     CRM_CHECK(node, return);
 
     /* fence remote nodes living in a container by marking the container as failed. */
     if (is_container_remote_node(node)) {
         resource_t *rsc = node->details->remote_rsc->container;
         if (is_set(rsc->flags, pe_rsc_failed) == FALSE) {
             crm_warn("Remote node %s will be fenced by recovering container resource %s",
                 node->details->uname, rsc->id, reason);
             set_bit(rsc->flags, pe_rsc_failed);
         }
     } else if (is_dangling_container_remote_node(node)) {
         crm_info("Fencing remote node %s has already occurred, container no longer exists. cleaning up dangling connection resource:  %s",
                   node->details->uname, reason);
         set_bit(node->details->remote_rsc->flags, pe_rsc_failed);
 
     } else if (node->details->unclean == FALSE) {
         if(pe_can_fence(data_set, node)) {
             crm_warn("Node %s will be fenced %s", node->details->uname, reason);
         } else {
             crm_warn("Node %s is unclean %s", node->details->uname, reason);
         }
         node->details->unclean = TRUE;
     } else {
         crm_trace("Huh? %s %s", node->details->uname, reason);
     }
 }
 
 gboolean
 unpack_config(xmlNode * config, pe_working_set_t * data_set)
 {
     const char *value = NULL;
     GHashTable *config_hash =
         g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
 
     xmlXPathObjectPtr xpathObj = NULL;
 
     if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) {
         xpathObj = xpath_search(data_set->input, "//nvpair[@name='provides' and @value='unfencing']");
         if(xpathObj && numXpathResults(xpathObj) > 0) {
             set_bit(data_set->flags, pe_flag_enable_unfencing);
         }
         freeXpathObject(xpathObj);
     }
 
     if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) {
         xpathObj = xpath_search(data_set->input, "//nvpair[@name='requires' and @value='unfencing']");
         if(xpathObj && numXpathResults(xpathObj) > 0) {
             set_bit(data_set->flags, pe_flag_enable_unfencing);
         }
         freeXpathObject(xpathObj);
     }
 
 
 #ifdef REDHAT_COMPAT_6
     if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) {
         xpathObj = xpath_search(data_set->input, "//primitive[@type='fence_scsi']");
         if(xpathObj && numXpathResults(xpathObj) > 0) {
             set_bit(data_set->flags, pe_flag_enable_unfencing);
         }
         freeXpathObject(xpathObj);
     }
 #endif
 
     data_set->config_hash = config_hash;
 
     unpack_instance_attributes(data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash,
                                CIB_OPTIONS_FIRST, FALSE, data_set->now);
 
     verify_pe_options(data_set->config_hash);
 
     set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes);
     if(is_not_set(data_set->flags, pe_flag_startup_probes)) {
         crm_info("Startup probes: disabled (dangerous)");
     }
 
     value = pe_pref(data_set->config_hash, "stonith-timeout");
     data_set->stonith_timeout = crm_get_msec(value);
     crm_debug("STONITH timeout: %d", data_set->stonith_timeout);
 
     set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled);
     crm_debug("STONITH of failed nodes is %s",
               is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled");
 
     data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action");
     crm_trace("STONITH will %s nodes", data_set->stonith_action);
 
     set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything);
     crm_debug("Stop all active resources: %s",
               is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false");
 
     set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster);
     if (is_set(data_set->flags, pe_flag_symmetric_cluster)) {
         crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
     }
 
     value = pe_pref(data_set->config_hash, "default-resource-stickiness");
     data_set->default_resource_stickiness = char2score(value);
     crm_debug("Default stickiness: %d", data_set->default_resource_stickiness);
 
     value = pe_pref(data_set->config_hash, "no-quorum-policy");
 
     if (safe_str_eq(value, "ignore")) {
         data_set->no_quorum_policy = no_quorum_ignore;
 
     } else if (safe_str_eq(value, "freeze")) {
         data_set->no_quorum_policy = no_quorum_freeze;
 
     } else if (safe_str_eq(value, "suicide")) {
         gboolean do_panic = FALSE;
 
         crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic);
 
         if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
             crm_config_err
                 ("Setting no-quorum-policy=suicide makes no sense if stonith-enabled=false");
         }
 
         if (do_panic && is_set(data_set->flags, pe_flag_stonith_enabled)) {
             data_set->no_quorum_policy = no_quorum_suicide;
 
         } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && do_panic == FALSE) {
             crm_notice("Resetting no-quorum-policy to 'stop': The cluster has never had quorum");
             data_set->no_quorum_policy = no_quorum_stop;
         }
 
     } else {
         data_set->no_quorum_policy = no_quorum_stop;
     }
 
     switch (data_set->no_quorum_policy) {
         case no_quorum_freeze:
             crm_debug("On loss of CCM Quorum: Freeze resources");
             break;
         case no_quorum_stop:
             crm_debug("On loss of CCM Quorum: Stop ALL resources");
             break;
         case no_quorum_suicide:
             crm_notice("On loss of CCM Quorum: Fence all remaining nodes");
             break;
         case no_quorum_ignore:
             crm_notice("On loss of CCM Quorum: Ignore");
             break;
     }
 
     set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans);
     crm_trace("Orphan resources are %s",
               is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored");
 
     set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans);
     crm_trace("Orphan resource actions are %s",
               is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored");
 
     set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop);
     crm_trace("Stopped resources are removed from the status section: %s",
               is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false");
 
     set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode);
     crm_trace("Maintenance mode: %s",
               is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false");
 
     if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
         clear_bit(data_set->flags, pe_flag_is_managed_default);
     } else {
         set_config_flag(data_set, "is-managed-default", pe_flag_is_managed_default);
     }
     crm_trace("By default resources are %smanaged",
               is_set(data_set->flags, pe_flag_is_managed_default) ? "" : "not ");
 
     set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal);
     crm_trace("Start failures are %s",
               is_set(data_set->flags,
                      pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount");
 
     node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red"));
     node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green"));
     node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow"));
 
     crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s",
              pe_pref(data_set->config_hash, "node-health-red"),
              pe_pref(data_set->config_hash, "node-health-yellow"),
              pe_pref(data_set->config_hash, "node-health-green"));
 
     data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
     crm_trace("Placement strategy: %s", data_set->placement_strategy);
 
     return TRUE;
 }
 
 static void
 destroy_digest_cache(gpointer ptr)
 {
     op_digest_cache_t *data = ptr;
 
     free_xml(data->params_all);
     free_xml(data->params_restart);
     free(data->digest_all_calc);
     free(data->digest_restart_calc);
     free(data);
 }
 
 static node_t *
 create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set)
 {
     node_t *new_node = NULL;
 
     if (pe_find_node(data_set->nodes, uname) != NULL) {
         crm_config_warn("Detected multiple node entries with uname=%s"
                         " - this is rarely intended", uname);
     }
 
     new_node = calloc(1, sizeof(node_t));
     if (new_node == NULL) {
         return NULL;
     }
 
     new_node->weight = char2score(score);
     new_node->fixed = FALSE;
     new_node->details = calloc(1, sizeof(struct node_shared_s));
 
     if (new_node->details == NULL) {
         free(new_node);
         return NULL;
     }
 
     crm_trace("Creating node for entry %s/%s", uname, id);
     new_node->details->id = id;
     new_node->details->uname = uname;
     new_node->details->online = FALSE;
     new_node->details->shutdown = FALSE;
     new_node->details->running_rsc = NULL;
     new_node->details->type = node_ping;
 
     if (safe_str_eq(type, "remote")) {
         new_node->details->type = node_remote;
         set_bit(data_set->flags, pe_flag_have_remote_nodes);
     } else if (type == NULL || safe_str_eq(type, "member")
         || safe_str_eq(type, NORMALNODE)) {
         new_node->details->type = node_member;
     }
 
     new_node->details->attrs = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                                      g_hash_destroy_str,
                                                      g_hash_destroy_str);
     new_node->details->utilization =
         g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str,
                               g_hash_destroy_str);
 
     new_node->details->digest_cache =
         g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str,
                               destroy_digest_cache);
 
     data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname);
     return new_node;
 }
 
 static const char *
 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, GHashTable **rsc_name_check)
 {
     xmlNode *xml_rsc = NULL;
     xmlNode *xml_tmp = NULL;
     xmlNode *attr_set = NULL;
     xmlNode *attr = NULL;
 
     const char *container_id = ID(xml_obj);
     const char *remote_name = NULL;
     const char *remote_server = NULL;
     const char *remote_port = NULL;
     const char *connect_timeout = "60s";
     const char *remote_allow_migrate=NULL;
     char *tmp_id = NULL;
 
     for (attr_set = __xml_first_child(xml_obj); attr_set != NULL; attr_set = __xml_next(attr_set)) {
         if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) {
             continue;
         }
 
         for (attr = __xml_first_child(attr_set); attr != NULL; attr = __xml_next(attr)) {
             const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
             const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
 
             if (safe_str_eq(name, XML_RSC_ATTR_REMOTE_NODE)) {
                 remote_name = value;
             } else if (safe_str_eq(name, "remote-addr")) {
                 remote_server = value;
             } else if (safe_str_eq(name, "remote-port")) {
                 remote_port = value;
             } else if (safe_str_eq(name, "remote-connect-timeout")) {
                 connect_timeout = value;
             } else if (safe_str_eq(name, "remote-allow-migrate")) {
                 remote_allow_migrate=value;
             }
         }
     }
 
     if (remote_name == NULL) {
         return NULL;
     }
 
     if (*rsc_name_check == NULL) {
         *rsc_name_check = g_hash_table_new(crm_str_hash, g_str_equal);
         for (xml_rsc = __xml_first_child(parent); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) {
             const char *id = ID(xml_rsc);
 
             /* avoiding heap allocation here because we know the duration of this hashtable allows us to */
             g_hash_table_insert(*rsc_name_check, (char *) id, (char *) id);
         }
     }
 
     if (g_hash_table_lookup(*rsc_name_check, remote_name)) {
 
         crm_err("Naming conflict with remote-node=%s.  remote-nodes can not have the same name as a resource.",
                 remote_name);
         return NULL;
     }
 
     xml_rsc = create_xml_node(parent, XML_CIB_TAG_RESOURCE);
 
     crm_xml_add(xml_rsc, XML_ATTR_ID, remote_name);
     crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, "ocf");
     crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, "pacemaker");
     crm_xml_add(xml_rsc, XML_ATTR_TYPE, "remote");
 
     xml_tmp = create_xml_node(xml_rsc, XML_TAG_META_SETS);
     tmp_id = crm_concat(remote_name, XML_TAG_META_SETS, '_');
     crm_xml_add(xml_tmp, XML_ATTR_ID, tmp_id);
     free(tmp_id);
 
     attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR);
     tmp_id = crm_concat(remote_name, "meta-attributes-container", '_');
     crm_xml_add(attr, XML_ATTR_ID, tmp_id);
     crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_RSC_ATTR_CONTAINER);
     crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, container_id);
     free(tmp_id);
 
     attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR);
     tmp_id = crm_concat(remote_name, "meta-attributes-internal", '_');
     crm_xml_add(attr, XML_ATTR_ID, tmp_id);
     crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_RSC_ATTR_INTERNAL_RSC);
     crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, "true");
     free(tmp_id);
 
     if (remote_allow_migrate) {
         attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR);
         tmp_id = crm_concat(remote_name, "meta-attributes-container", '_');
         crm_xml_add(attr, XML_ATTR_ID, tmp_id);
         crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_OP_ATTR_ALLOW_MIGRATE);
         crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_allow_migrate);
         free(tmp_id);
     }
 
     xml_tmp = create_xml_node(xml_rsc, "operations");
     attr = create_xml_node(xml_tmp, XML_ATTR_OP);
     tmp_id = crm_concat(remote_name, "monitor-interval-30s", '_');
     crm_xml_add(attr, XML_ATTR_ID, tmp_id);
     crm_xml_add(attr, XML_ATTR_TIMEOUT, "30s");
     crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "30s");
     crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "monitor");
     free(tmp_id);
 
     if (connect_timeout) {
         attr = create_xml_node(xml_tmp, XML_ATTR_OP);
         tmp_id = crm_concat(remote_name, "start-interval-0", '_');
         crm_xml_add(attr, XML_ATTR_ID, tmp_id);
         crm_xml_add(attr, XML_ATTR_TIMEOUT, connect_timeout);
         crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "0");
         crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "start");
         free(tmp_id);
     }
 
     if (remote_port || remote_server) {
         xml_tmp = create_xml_node(xml_rsc, XML_TAG_ATTR_SETS);
         tmp_id = crm_concat(remote_name, XML_TAG_ATTR_SETS, '_');
         crm_xml_add(xml_tmp, XML_ATTR_ID, tmp_id);
         free(tmp_id);
 
         if (remote_server) {
             attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR);
             tmp_id = crm_concat(remote_name, "instance-attributes-addr", '_');
             crm_xml_add(attr, XML_ATTR_ID, tmp_id);
             crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "addr");
             crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_server);
             free(tmp_id);
         }
         if (remote_port) {
             attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR);
             tmp_id = crm_concat(remote_name, "instance-attributes-port", '_');
             crm_xml_add(attr, XML_ATTR_ID, tmp_id);
             crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "port");
             crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_port);
             free(tmp_id);
         }
     }
 
     return remote_name;
 }
 
 static void
 handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node)
 {
     static const char *blind_faith = NULL;
     static gboolean unseen_are_unclean = TRUE;
 
     if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) {
         /* ignore fencing remote-nodes that don't have a conneciton resource associated
          * with them. This happens when remote-node entries get left in the nodes section
          * after the connection resource is removed */
         return;
     }
 
     blind_faith = pe_pref(data_set->config_hash, "startup-fencing");
 
     if (crm_is_true(blind_faith) == FALSE) {
         unseen_are_unclean = FALSE;
         crm_warn("Blind faith: not fencing unseen nodes");
     }
 
     if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE
         || unseen_are_unclean == FALSE) {
         /* blind faith... */
         new_node->details->unclean = FALSE;
 
     } else {
         /* all nodes are unclean until we've seen their
          * status entry
          */
         new_node->details->unclean = TRUE;
     }
 
     /* We need to be able to determine if a node's status section
      * exists or not separate from whether the node is unclean. */
     new_node->details->unseen = TRUE;
 }
 
 gboolean
 unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set)
 {
     xmlNode *xml_obj = NULL;
     node_t *new_node = NULL;
     const char *id = NULL;
     const char *uname = NULL;
     const char *type = NULL;
     const char *score = NULL;
 
     for (xml_obj = __xml_first_child(xml_nodes); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) {
         if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) {
             new_node = NULL;
 
             id = crm_element_value(xml_obj, XML_ATTR_ID);
             uname = crm_element_value(xml_obj, XML_ATTR_UNAME);
             type = crm_element_value(xml_obj, XML_ATTR_TYPE);
             score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
             crm_trace("Processing node %s/%s", uname, id);
 
             if (id == NULL) {
                 crm_config_err("Must specify id tag in <node>");
                 continue;
             }
             new_node = create_node(id, uname, type, score, data_set);
 
             if (new_node == NULL) {
                 return FALSE;
             }
 
 /* 		if(data_set->have_quorum == FALSE */
 /* 		   && data_set->no_quorum_policy == no_quorum_stop) { */
 /* 			/\* start shutting resources down *\/ */
 /* 			new_node->weight = -INFINITY; */
 /* 		} */
 
             handle_startup_fencing(data_set, new_node);
 
             add_node_attrs(xml_obj, new_node, FALSE, data_set);
             unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL,
                                        new_node->details->utilization, NULL, FALSE, data_set->now);
 
             crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
         }
     }
 
     if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) {
         crm_info("Creating a fake local node");
         create_node(data_set->localhost, data_set->localhost, NULL, 0, data_set);
     }
 
     return TRUE;
 }
 
 static void
 setup_container(resource_t * rsc, pe_working_set_t * data_set)
 {
     const char *container_id = NULL;
 
     if (rsc->children) {
         GListPtr gIter = rsc->children;
 
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child_rsc = (resource_t *) gIter->data;
 
             setup_container(child_rsc, data_set);
         }
         return;
     }
 
     container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
     if (container_id && safe_str_neq(container_id, rsc->id)) {
         resource_t *container = pe_find_resource(data_set->resources, container_id);
 
         if (container) {
             rsc->container = container;
             container->fillers = g_list_append(container->fillers, rsc);
             pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
         } else {
             pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
         }
     }
 }
 
 gboolean
 unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set)
 {
     xmlNode *xml_obj = NULL;
     GHashTable *rsc_name_check = NULL;
 
     /* generate remote nodes from resource config before unpacking resources */
     for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) {
         const char *new_node_id = NULL;
 
         /* remote rsc can be defined as primitive, or exist within the metadata of another rsc */
         if (xml_contains_remote_node(xml_obj)) {
             new_node_id = ID(xml_obj);
             /* This check is here to make sure we don't iterate over
              * an expanded node that has already been added to the node list. */
             if (new_node_id && pe_find_node(data_set->nodes, new_node_id) != NULL) {
                 continue;
             }
         } else {
             /* expands a metadata defined remote resource into the xml config
              * as an actual rsc primitive to be unpacked later. */
             new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, &rsc_name_check);
         }
 
         if (new_node_id) {
             crm_trace("detected remote node %s", new_node_id);
 
             /* only create the remote node entry if the node didn't already exist */
             if (pe_find_node(data_set->nodes, new_node_id) == NULL) {
                 create_node(new_node_id, new_node_id, "remote", NULL, data_set);
             }
 
         }
     }
     if (rsc_name_check) {
         g_hash_table_destroy(rsc_name_check);
     }
 
     return TRUE;
 }
 
 
 /* Call this after all the nodes and resources have been
  * unpacked, but before the status section is read.
  *
  * A remote node's online status is reflected by the state
  * of the remote node's connection resource. We need to link
  * the remote node to this connection resource so we can have
  * easy access to the connection resource during the PE calculations.
  */
 static void
 link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc)
 {
     node_t *remote_node = NULL;
 
     if (new_rsc->is_remote_node == FALSE) {
         return;
     }
 
     if (is_set(data_set->flags, pe_flag_quick_location)) {
         /* remote_nodes and remote_resources are not linked in quick location calculations */
         return;
     }
 
     print_resource(LOG_DEBUG_3, "Linking remote-node connection resource, ", new_rsc, FALSE);
 
     remote_node = pe_find_node(data_set->nodes, new_rsc->id);
     CRM_CHECK(remote_node != NULL, return;);
 
     remote_node->details->remote_rsc = new_rsc;
     /* If this is a baremetal remote-node (no container resource
      * associated with it) then we need to handle startup fencing the same way
      * as cluster nodes. */
     if (new_rsc->container == NULL) {
         handle_startup_fencing(data_set, remote_node);
         return;
     }
 }
 
 static void
 destroy_tag(gpointer data)
 {
     tag_t *tag = data;
 
     if (tag) {
         free(tag->id);
         g_list_free_full(tag->refs, free);
         free(tag);
     }
 }
 
 gboolean
 unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set)
 {
     xmlNode *xml_obj = NULL;
     GListPtr gIter = NULL;
 
     data_set->template_rsc_sets =
         g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str,
                               destroy_tag);
 
     for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) {
         resource_t *new_rsc = NULL;
 
         if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) {
             const char *template_id = ID(xml_obj);
 
             if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets,
                                                             template_id, NULL, NULL) == FALSE) {
                 /* Record the template's ID for the knowledge of its existence anyway. */
                 g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL);
             }
             continue;
         }
 
         crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj));
         if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) {
             data_set->resources = g_list_append(data_set->resources, new_rsc);
 
             if (xml_contains_remote_node(xml_obj)) {
                 new_rsc->is_remote_node = TRUE;
             }
             print_resource(LOG_DEBUG_3, "Added ", new_rsc, FALSE);
 
         } else {
             crm_config_err("Failed unpacking %s %s",
                            crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID));
             if (new_rsc != NULL && new_rsc->fns != NULL) {
                 new_rsc->fns->free(new_rsc);
             }
         }
     }
 
     for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
 
         setup_container(rsc, data_set);
         link_rsc2remotenode(data_set, rsc);
     }
 
     data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority);
 
     if (is_not_set(data_set->flags, pe_flag_quick_location)
         && is_set(data_set->flags, pe_flag_stonith_enabled)
         && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
         crm_config_err("Resource start-up disabled since no STONITH resources have been defined");
         crm_config_err("Either configure some or disable STONITH with the stonith-enabled option");
         crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
     }
 
     return TRUE;
 }
 
 gboolean
 unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set)
 {
     xmlNode *xml_tag = NULL;
 
     data_set->tags =
         g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_tag);
 
     for (xml_tag = __xml_first_child(xml_tags); xml_tag != NULL; xml_tag = __xml_next(xml_tag)) {
         xmlNode *xml_obj_ref = NULL;
         const char *tag_id = ID(xml_tag);
 
         if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) {
             continue;
         }
 
         if (tag_id == NULL) {
             crm_config_err("Failed unpacking %s: %s should be specified",
                            crm_element_name(xml_tag), XML_ATTR_ID);
             continue;
         }
 
         for (xml_obj_ref = __xml_first_child(xml_tag); xml_obj_ref != NULL; xml_obj_ref = __xml_next(xml_obj_ref)) {
             const char *obj_ref = ID(xml_obj_ref);
 
             if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) {
                 continue;
             }
 
             if (obj_ref == NULL) {
                 crm_config_err("Failed unpacking %s for tag %s: %s should be specified",
                                crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID);
                 continue;
             }
 
             if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) {
                 return FALSE;
             }
         }
     }
 
     return TRUE;
 }
 
 /* The ticket state section:
  * "/cib/status/tickets/ticket_state" */
 static gboolean
 unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set)
 {
     const char *ticket_id = NULL;
     const char *granted = NULL;
     const char *last_granted = NULL;
     const char *standby = NULL;
     xmlAttrPtr xIter = NULL;
 
     ticket_t *ticket = NULL;
 
     ticket_id = ID(xml_ticket);
     if (ticket_id == NULL || strlen(ticket_id) == 0) {
         return FALSE;
     }
 
     crm_trace("Processing ticket state for %s", ticket_id);
 
     ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
     if (ticket == NULL) {
         ticket = ticket_new(ticket_id, data_set);
         if (ticket == NULL) {
             return FALSE;
         }
     }
 
     for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
         const char *prop_name = (const char *)xIter->name;
         const char *prop_value = crm_element_value(xml_ticket, prop_name);
 
         if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) {
             continue;
         }
         g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
     }
 
     granted = g_hash_table_lookup(ticket->state, "granted");
     if (granted && crm_is_true(granted)) {
         ticket->granted = TRUE;
         crm_info("We have ticket '%s'", ticket->id);
     } else {
         ticket->granted = FALSE;
         crm_info("We do not have ticket '%s'", ticket->id);
     }
 
     last_granted = g_hash_table_lookup(ticket->state, "last-granted");
     if (last_granted) {
         ticket->last_granted = crm_parse_int(last_granted, 0);
     }
 
     standby = g_hash_table_lookup(ticket->state, "standby");
     if (standby && crm_is_true(standby)) {
         ticket->standby = TRUE;
         if (ticket->granted) {
             crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
         }
     } else {
         ticket->standby = FALSE;
     }
 
     crm_trace("Done with ticket state for %s", ticket_id);
 
     return TRUE;
 }
 
 static gboolean
 unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set)
 {
     xmlNode *xml_obj = NULL;
 
     for (xml_obj = __xml_first_child(xml_tickets); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) {
         if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) {
             continue;
         }
         unpack_ticket_state(xml_obj, data_set);
     }
 
     return TRUE;
 }
 
 /* Compatibility with the deprecated ticket state section:
  * "/cib/status/tickets/instance_attributes" */
 static void
 get_ticket_state_legacy(gpointer key, gpointer value, gpointer user_data)
 {
     const char *long_key = key;
     char *state_key = NULL;
 
     const char *granted_prefix = "granted-ticket-";
     const char *last_granted_prefix = "last-granted-";
     static int granted_prefix_strlen = 0;
     static int last_granted_prefix_strlen = 0;
 
     const char *ticket_id = NULL;
     const char *is_granted = NULL;
     const char *last_granted = NULL;
     const char *sep = NULL;
 
     ticket_t *ticket = NULL;
     pe_working_set_t *data_set = user_data;
 
     if (granted_prefix_strlen == 0) {
         granted_prefix_strlen = strlen(granted_prefix);
     }
 
     if (last_granted_prefix_strlen == 0) {
         last_granted_prefix_strlen = strlen(last_granted_prefix);
     }
 
     if (strstr(long_key, granted_prefix) == long_key) {
         ticket_id = long_key + granted_prefix_strlen;
         if (strlen(ticket_id)) {
             state_key = strdup("granted");
             is_granted = value;
         }
     } else if (strstr(long_key, last_granted_prefix) == long_key) {
         ticket_id = long_key + last_granted_prefix_strlen;
         if (strlen(ticket_id)) {
             state_key = strdup("last-granted");
             last_granted = value;
         }
     } else if ((sep = strrchr(long_key, '-'))) {
         ticket_id = sep + 1;
         state_key = strndup(long_key, strlen(long_key) - strlen(sep));
     }
 
     if (ticket_id == NULL || strlen(ticket_id) == 0) {
         free(state_key);
         return;
     }
 
     if (state_key == NULL || strlen(state_key) == 0) {
         free(state_key);
         return;
     }
 
     ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
     if (ticket == NULL) {
         ticket = ticket_new(ticket_id, data_set);
         if (ticket == NULL) {
             free(state_key);
             return;
         }
     }
 
     g_hash_table_replace(ticket->state, state_key, strdup(value));
 
     if (is_granted) {
         if (crm_is_true(is_granted)) {
             ticket->granted = TRUE;
             crm_info("We have ticket '%s'", ticket->id);
         } else {
             ticket->granted = FALSE;
             crm_info("We do not have ticket '%s'", ticket->id);
         }
 
     } else if (last_granted) {
         ticket->last_granted = crm_parse_int(last_granted, 0);
     }
 }
 
 /* remove nodes that are down, stopping */
 /* create +ve rsc_to_node constraints between resources and the nodes they are running on */
 /* anything else? */
 gboolean
 unpack_status(xmlNode * status, pe_working_set_t * data_set)
 {
     const char *id = NULL;
     const char *uname = NULL;
 
     xmlNode *state = NULL;
     xmlNode *lrm_rsc = NULL;
     node_t *this_node = NULL;
 
     crm_trace("Beginning unpack");
 
     if (data_set->tickets == NULL) {
         data_set->tickets =
             g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket);
     }
 
     for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) {
         if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) {
             xmlNode *xml_tickets = state;
             GHashTable *state_hash = NULL;
 
             /* Compatibility with the deprecated ticket state section:
              * Unpack the attributes in the deprecated "/cib/status/tickets/instance_attributes" if it exists. */
             state_hash =
                 g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str,
                                       g_hash_destroy_str);
 
             unpack_instance_attributes(data_set->input, xml_tickets, XML_TAG_ATTR_SETS, NULL,
                                        state_hash, NULL, TRUE, data_set->now);
 
             g_hash_table_foreach(state_hash, get_ticket_state_legacy, data_set);
 
             if (state_hash) {
                 g_hash_table_destroy(state_hash);
             }
 
             /* Unpack the new "/cib/status/tickets/ticket_state"s */
             unpack_tickets_state(xml_tickets, data_set);
         }
 
         if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) {
             xmlNode *attrs = NULL;
 
             id = crm_element_value(state, XML_ATTR_ID);
             uname = crm_element_value(state, XML_ATTR_UNAME);
             this_node = pe_find_node_any(data_set->nodes, id, uname);
 
             if (uname == NULL) {
                 /* error */
                 continue;
 
             } else if (this_node == NULL) {
                 crm_config_warn("Node %s in status section no longer exists", uname);
                 continue;
 
             } else if (is_remote_node(this_node)) {
                 /* online state for remote nodes is determined by the rsc state
                  * after all the unpacking is done. */
                 continue;
             }
 
             crm_trace("Processing node id=%s, uname=%s", id, uname);
 
             /* Mark the node as provisionally clean
              * - at least we have seen it in the current cluster's lifetime
              */
             this_node->details->unclean = FALSE;
             this_node->details->unseen = FALSE;
             attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
             add_node_attrs(attrs, this_node, TRUE, data_set);
 
             if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) {
                 crm_info("Node %s is in standby-mode", this_node->details->uname);
                 this_node->details->standby = TRUE;
             }
 
             if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance"))) {
                 crm_info("Node %s is in maintenance-mode", this_node->details->uname);
                 this_node->details->maintenance = TRUE;
             }
 
             crm_trace("determining node state");
             determine_online_status(state, this_node, data_set);
 
             if (this_node->details->online && data_set->no_quorum_policy == no_quorum_suicide) {
                 /* Everything else should flow from this automatically
                  * At least until the PE becomes able to migrate off healthy resources
                  */
                 pe_fence_node(data_set, this_node, "because the cluster does not have quorum");
             }
         }
     }
 
     /* Now that we know all node states, we can safely handle migration ops */
     for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) {
         if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
             continue;
         }
 
         id = crm_element_value(state, XML_ATTR_ID);
         uname = crm_element_value(state, XML_ATTR_UNAME);
         this_node = pe_find_node_any(data_set->nodes, id, uname);
 
         if (this_node == NULL) {
             crm_info("Node %s is unknown", id);
             continue;
 
         } else if (is_remote_node(this_node)) {
 
             /* online status of remote node can not be determined until all other
              * resource status is unpacked. */
             continue;
         } else if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
             crm_trace("Processing lrm resource entries on healthy node: %s",
                       this_node->details->uname);
             lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
             lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
             unpack_lrm_resources(this_node, lrm_rsc, data_set);
         }
     }
 
     /* now that the rest of the cluster's status is determined
      * calculate remote-nodes */
     unpack_remote_status(status, data_set);
 
     return TRUE;
 }
 
 gboolean
 unpack_remote_status(xmlNode * status, pe_working_set_t * data_set)
 {
     const char *id = NULL;
     const char *uname = NULL;
     GListPtr gIter = NULL;
 
     xmlNode *state = NULL;
     xmlNode *lrm_rsc = NULL;
     node_t *this_node = NULL;
 
     if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) {
         crm_trace("no remote nodes to unpack");
         return TRUE;
     }
 
     /* get online status */
     for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
         this_node = gIter->data;
 
         if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) {
             continue;
         }
         determine_remote_online_status(this_node);
     }
 
     /* process attributes */
     for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) {
         xmlNode *attrs = NULL;
         if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
             continue;
         }
 
         id = crm_element_value(state, XML_ATTR_ID);
         uname = crm_element_value(state, XML_ATTR_UNAME);
         this_node = pe_find_node_any(data_set->nodes, id, uname);
 
         if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) {
             continue;
         }
         crm_trace("Processing remote node id=%s, uname=%s", id, uname);
 
         this_node->details->unclean = FALSE;
         this_node->details->unseen = FALSE;
         attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
         add_node_attrs(attrs, this_node, TRUE, data_set);
 
         if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) {
             crm_info("Node %s is in standby-mode", this_node->details->uname);
             this_node->details->standby = TRUE;
         }
     }
 
     /* process node rsc status */
     for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) {
         if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
             continue;
         }
 
         id = crm_element_value(state, XML_ATTR_ID);
         uname = crm_element_value(state, XML_ATTR_UNAME);
         this_node = pe_find_node_any(data_set->nodes, id, uname);
 
         if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) {
             continue;
         }
         crm_trace("Processing lrm resource entries on healthy remote node: %s",
                   this_node->details->uname);
         lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
         lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
         unpack_lrm_resources(this_node, lrm_rsc, data_set);
     }
 
     return TRUE;
 }
 
 static gboolean
 determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state,
                                    node_t * this_node)
 {
     gboolean online = FALSE;
     const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
     const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
     const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
     const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
 
     if (!crm_is_true(in_cluster)) {
         crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster));
 
     } else if (safe_str_eq(is_peer, ONLINESTATUS)) {
         if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
             online = TRUE;
         } else {
             crm_debug("Node is not ready to run resources: %s", join);
         }
 
     } else if (this_node->details->expected_up == FALSE) {
         crm_trace("CRMd is down: in_cluster=%s", crm_str(in_cluster));
         crm_trace("\tis_peer=%s, join=%s, expected=%s",
                   crm_str(is_peer), crm_str(join), crm_str(exp_state));
 
     } else {
         /* mark it unclean */
         pe_fence_node(data_set, this_node, "unexpectedly down");
         crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s",
                  crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state));
     }
     return online;
 }
 
 static gboolean
 determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state,
                                 node_t * this_node)
 {
     gboolean online = FALSE;
     gboolean do_terminate = FALSE;
     const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
     const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
     const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
     const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
     const char *terminate = g_hash_table_lookup(this_node->details->attrs, "terminate");
 
 /*
   - XML_NODE_IN_CLUSTER    ::= true|false
   - XML_NODE_IS_PEER       ::= true|false|online|offline
   - XML_NODE_JOIN_STATE    ::= member|down|pending|banned
   - XML_NODE_EXPECTED      ::= member|down
 */
 
     if (crm_is_true(terminate)) {
         do_terminate = TRUE;
 
     } else if (terminate != NULL && strlen(terminate) > 0) {
         /* could be a time() value */
         char t = terminate[0];
 
         if (t != '0' && isdigit(t)) {
             do_terminate = TRUE;
         }
     }
 
     crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d",
               this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
               crm_str(join), crm_str(exp_state), do_terminate);
 
     online = crm_is_true(in_cluster);
     if (safe_str_eq(is_peer, ONLINESTATUS)) {
         is_peer = XML_BOOLEAN_YES;
     }
     if (exp_state == NULL) {
         exp_state = CRMD_JOINSTATE_DOWN;
     }
 
     if (this_node->details->shutdown) {
         crm_debug("%s is shutting down", this_node->details->uname);
         online = crm_is_true(is_peer);  /* Slightly different criteria since we cant shut down a dead peer */
 
     } else if (in_cluster == NULL) {
         pe_fence_node(data_set, this_node, "because the peer has not been seen by the cluster");
 
     } else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) {
         pe_fence_node(data_set, this_node, "because it failed the pacemaker membership criteria");
 
     } else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) {
 
         if (crm_is_true(in_cluster) || crm_is_true(is_peer)) {
             crm_info("- Node %s is not ready to run resources", this_node->details->uname);
             this_node->details->standby = TRUE;
             this_node->details->pending = TRUE;
 
         } else {
             crm_trace("%s is down or still coming up", this_node->details->uname);
         }
 
     } else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN)
                && crm_is_true(in_cluster) == FALSE && crm_is_true(is_peer) == FALSE) {
         crm_info("Node %s was just shot", this_node->details->uname);
         online = FALSE;
 
     } else if (crm_is_true(in_cluster) == FALSE) {
         pe_fence_node(data_set, this_node, "because the node is no longer part of the cluster");
 
     } else if (crm_is_true(is_peer) == FALSE) {
         pe_fence_node(data_set, this_node, "because our peer process is no longer available");
 
         /* Everything is running at this point, now check join state */
     } else if (do_terminate) {
         pe_fence_node(data_set, this_node, "because termination was requested");
 
     } else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
         crm_info("Node %s is active", this_node->details->uname);
 
     } else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING)
                || safe_str_eq(join, CRMD_JOINSTATE_DOWN)) {
         crm_info("Node %s is not ready to run resources", this_node->details->uname);
         this_node->details->standby = TRUE;
         this_node->details->pending = TRUE;
 
     } else {
         pe_fence_node(data_set, this_node, "because the peer was in an unknown state");
         crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d",
                  this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
                  crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown);
     }
 
     return online;
 }
 
 static gboolean
 determine_remote_online_status(node_t * this_node)
 {
     resource_t *rsc = this_node->details->remote_rsc;
     resource_t *container = NULL;
 
     if (rsc == NULL) {
         this_node->details->online = FALSE;
         goto remote_online_done;
     }
 
     container = rsc->container;
 
     CRM_ASSERT(rsc != NULL);
 
     /* If the resource is currently started, mark it online. */
     if (rsc->role == RSC_ROLE_STARTED) {
         crm_trace("Remote node %s is set to ONLINE. role == started", this_node->details->id);
         this_node->details->online = TRUE;
     }
 
     /* consider this node shutting down if transitioning start->stop */
     if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) {
         crm_trace("Remote node %s shutdown. transition from start to stop role", this_node->details->id);
         this_node->details->shutdown = TRUE;
     }
 
     /* Now check all the failure conditions. */
     if (is_set(rsc->flags, pe_rsc_failed) ||
         (rsc->role == RSC_ROLE_STOPPED) ||
         (container && is_set(container->flags, pe_rsc_failed)) ||
         (container && container->role == RSC_ROLE_STOPPED)) {
 
         crm_trace("Remote node %s is set to OFFLINE. node is stopped or rsc failed.", this_node->details->id);
         this_node->details->online = FALSE;
     }
 
 remote_online_done:
     crm_trace("Remote node %s online=%s",
         this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
     return this_node->details->online;
 }
 
 gboolean
 determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set)
 {
     gboolean online = FALSE;
     const char *shutdown = NULL;
     const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
 
     if (this_node == NULL) {
         crm_config_err("No node to check");
         return online;
     }
 
     this_node->details->shutdown = FALSE;
     this_node->details->expected_up = FALSE;
     shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN);
 
     if (shutdown != NULL && safe_str_neq("0", shutdown)) {
         this_node->details->shutdown = TRUE;
 
     } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) {
         this_node->details->expected_up = TRUE;
     }
 
     if (this_node->details->type == node_ping) {
         this_node->details->unclean = FALSE;
         online = FALSE;         /* As far as resource management is concerned,
                                  * the node is safely offline.
                                  * Anyone caught abusing this logic will be shot
                                  */
 
     } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
         online = determine_online_status_no_fencing(data_set, node_state, this_node);
 
     } else {
         online = determine_online_status_fencing(data_set, node_state, this_node);
     }
 
     if (online) {
         this_node->details->online = TRUE;
 
     } else {
         /* remove node from contention */
         this_node->fixed = TRUE;
         this_node->weight = -INFINITY;
     }
 
     if (online && this_node->details->shutdown) {
         /* dont run resources here */
         this_node->fixed = TRUE;
         this_node->weight = -INFINITY;
     }
 
     if (this_node->details->type == node_ping) {
         crm_info("Node %s is not a pacemaker node", this_node->details->uname);
 
     } else if (this_node->details->unclean) {
         pe_proc_warn("Node %s is unclean", this_node->details->uname);
 
     } else if (this_node->details->online) {
         crm_info("Node %s is %s", this_node->details->uname,
                  this_node->details->shutdown ? "shutting down" :
                  this_node->details->pending ? "pending" :
                  this_node->details->standby ? "standby" :
                  this_node->details->maintenance ? "maintenance" : "online");
 
     } else {
         crm_trace("Node %s is offline", this_node->details->uname);
     }
 
     return online;
 }
 
 char *
 clone_strip(const char *last_rsc_id)
 {
     int lpc = 0;
     char *zero = NULL;
 
     CRM_CHECK(last_rsc_id != NULL, return NULL);
     lpc = strlen(last_rsc_id);
     while (--lpc > 0) {
         switch (last_rsc_id[lpc]) {
             case 0:
                 crm_err("Empty string: %s", last_rsc_id);
                 return NULL;
                 break;
             case '0':
             case '1':
             case '2':
             case '3':
             case '4':
             case '5':
             case '6':
             case '7':
             case '8':
             case '9':
                 break;
             case ':':
                 zero = calloc(1, lpc + 1);
                 memcpy(zero, last_rsc_id, lpc);
                 zero[lpc] = 0;
                 return zero;
             default:
                 goto done;
         }
     }
   done:
     zero = strdup(last_rsc_id);
     return zero;
 }
 
 char *
 clone_zero(const char *last_rsc_id)
 {
     int lpc = 0;
     char *zero = NULL;
 
     CRM_CHECK(last_rsc_id != NULL, return NULL);
     if (last_rsc_id != NULL) {
         lpc = strlen(last_rsc_id);
     }
 
     while (--lpc > 0) {
         switch (last_rsc_id[lpc]) {
             case 0:
                 return NULL;
                 break;
             case '0':
             case '1':
             case '2':
             case '3':
             case '4':
             case '5':
             case '6':
             case '7':
             case '8':
             case '9':
                 break;
             case ':':
                 zero = calloc(1, lpc + 3);
                 memcpy(zero, last_rsc_id, lpc);
                 zero[lpc] = ':';
                 zero[lpc + 1] = '0';
                 zero[lpc + 2] = 0;
                 return zero;
             default:
                 goto done;
         }
     }
   done:
     lpc = strlen(last_rsc_id);
     zero = calloc(1, lpc + 3);
     memcpy(zero, last_rsc_id, lpc);
     zero[lpc] = ':';
     zero[lpc + 1] = '0';
     zero[lpc + 2] = 0;
     crm_trace("%s -> %s", last_rsc_id, zero);
     return zero;
 }
 
 static resource_t *
 create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set)
 {
     resource_t *rsc = NULL;
     xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
 
     copy_in_properties(xml_rsc, rsc_entry);
     crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
     crm_log_xml_debug(xml_rsc, "Orphan resource");
 
     if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) {
         return NULL;
     }
 
     if (xml_contains_remote_node(xml_rsc)) {
         node_t *node;
 
         crm_debug("Detected orphaned remote node %s", rsc_id);
         rsc->is_remote_node = TRUE;
         node = pe_find_node(data_set->nodes, rsc_id);
         if (node == NULL) {
 	        node = create_node(rsc_id, rsc_id, "remote", NULL, data_set);
         }
         link_rsc2remotenode(data_set, rsc);
 
         if (node) {
             crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
             node->details->shutdown = TRUE;
         }
     }
 
     if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
         /* This orphaned rsc needs to be mapped to a container. */
         crm_trace("Detected orphaned container filler %s", rsc_id);
         set_bit(rsc->flags, pe_rsc_orphan_container_filler);
     }
     set_bit(rsc->flags, pe_rsc_orphan);
     data_set->resources = g_list_append(data_set->resources, rsc);
     return rsc;
 }
 
 extern resource_t *create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set);
 
 static resource_t *
 find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent,
                      const char *rsc_id)
 {
     GListPtr rIter = NULL;
     resource_t *rsc = NULL;
     gboolean skip_inactive = FALSE;
 
     CRM_ASSERT(parent != NULL);
     CRM_ASSERT(parent->variant == pe_clone || parent->variant == pe_master);
     CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique));
 
     /* Find an instance active (or partially active for grouped clones) on the specified node */
     pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id);
     for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
         GListPtr nIter = NULL;
         GListPtr locations = NULL;
         resource_t *child = rIter->data;
 
         child->fns->location(child, &locations, TRUE);
         if (locations == NULL) {
             pe_rsc_trace(child, "Resource %s, skip inactive", child->id);
             continue;
         }
 
         for (nIter = locations; nIter && rsc == NULL; nIter = nIter->next) {
             node_t *childnode = nIter->data;
 
             if (childnode->details == node->details) {
                 /* ->find_rsc() because we might be a cloned group */
                 rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
                 if(rsc) {
                     pe_rsc_trace(rsc, "Resource %s, active", rsc->id);
                 }
             }
 
             /* Keep this block, it means we'll do the right thing if
              * anyone toggles the unique flag to 'off'
              */
             if (rsc && rsc->running_on) {
                 crm_notice("/Anonymous/ clone %s is already running on %s",
                            parent->id, node->details->uname);
                 skip_inactive = TRUE;
                 rsc = NULL;
             }
         }
 
         g_list_free(locations);
     }
 
     /* Find an inactive instance */
     if (skip_inactive == FALSE) {
         pe_rsc_trace(parent, "Looking for %s anywhere", rsc_id);
         for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
             GListPtr locations = NULL;
             resource_t *child = rIter->data;
 
             if (is_set(child->flags, pe_rsc_block)) {
                 pe_rsc_trace(child, "Skip: blocked in stopped state");
                 continue;
             }
 
             child->fns->location(child, &locations, TRUE);
             if (locations == NULL) {
                 /* ->find_rsc() because we might be a cloned group */
                 rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
                 pe_rsc_trace(parent, "Resource %s, empty slot", rsc->id);
             }
             g_list_free(locations);
         }
     }
 
     if (rsc == NULL) {
         /* Create an extra orphan */
         resource_t *top = create_child_clone(parent, -1, data_set);
 
         /* ->find_rsc() because we might be a cloned group */
         rsc = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone);
         CRM_ASSERT(rsc != NULL);
 
         pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id,
                      node->details->uname);
     }
 
     if (safe_str_neq(rsc_id, rsc->id)) {
         pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
                     rsc_id, node->details->uname, rsc->id,
                     is_set(rsc->flags, pe_rsc_orphan) ? " (ORPHAN)" : "");
     }
 
     return rsc;
 }
 
 static resource_t *
 unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id,
                      xmlNode * rsc_entry)
 {
     resource_t *rsc = NULL;
     resource_t *parent = NULL;
 
     crm_trace("looking for %s", rsc_id);
     rsc = pe_find_resource(data_set->resources, rsc_id);
 
     /* no match */
     if (rsc == NULL) {
         /* Even when clone-max=0, we still create a single :0 orphan to match against */
         char *tmp = clone_zero(rsc_id);
         resource_t *clone0 = pe_find_resource(data_set->resources, tmp);
 
         if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) {
             rsc = clone0;
         } else {
             crm_trace("%s is not known as %s either", rsc_id, tmp);
         }
 
         parent = uber_parent(clone0);
         free(tmp);
 
         crm_trace("%s not found: %s", rsc_id, parent ? parent->id : "orphan");
 
     } else if (rsc->variant > pe_native) {
         crm_trace("%s is no longer a primitve resource, the lrm_resource entry is obsolete",
                   rsc_id);
         return NULL;
 
     } else {
         parent = uber_parent(rsc);
     }
 
     if (parent && parent->variant > pe_group) {
         if (is_not_set(parent->flags, pe_rsc_unique)) {
             char *base = clone_strip(rsc_id);
 
             rsc = find_anonymous_clone(data_set, node, parent, base);
             CRM_ASSERT(rsc != NULL);
             free(base);
         }
 
         if (rsc && safe_str_neq(rsc_id, rsc->id)) {
             free(rsc->clone_name);
             rsc->clone_name = strdup(rsc_id);
         }
     }
 
     return rsc;
 }
 
 static resource_t *
 process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set)
 {
     resource_t *rsc = NULL;
     const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
 
     crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname);
     rsc = create_fake_resource(rsc_id, rsc_entry, data_set);
 
     if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) {
         clear_bit(rsc->flags, pe_rsc_managed);
 
     } else {
         GListPtr gIter = NULL;
 
         print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE);
 
         CRM_CHECK(rsc != NULL, return NULL);
         resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set);
 
         for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
             node_t *node = (node_t *) gIter->data;
 
             if (node->details->online && get_failcount(node, rsc, NULL, data_set)) {
                 action_t *clear_op = NULL;
                 action_t *ready = NULL;
 
                 if (is_remote_node(node)) {
                     char *pseudo_op_name = crm_concat(CRM_OP_PROBED, node->details->id, '_');
                     ready = get_pseudo_op(pseudo_op_name, data_set);
                     free(pseudo_op_name);
                 } else {
                     ready = get_pseudo_op(CRM_OP_PROBED, data_set);
                 }
 
                 clear_op = custom_action(rsc, crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'),
                                          CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set);
 
                 add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
                 pe_rsc_info(rsc, "Clearing failcount (%d) for orphaned resource %s on %s (%s)",
                             get_failcount(node, rsc, NULL, data_set), rsc->id, node->details->uname,
                             clear_op->uuid);
 
                 order_actions(clear_op, ready, pe_order_optional);
             }
         }
     }
     return rsc;
 }
 
 static void
 process_rsc_state(resource_t * rsc, node_t * node,
                   enum action_fail_response on_fail,
                   xmlNode * migrate_op, pe_working_set_t * data_set)
 {
     CRM_ASSERT(rsc);
     pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
                  rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail));
 
     /* process current state */
     if (rsc->role != RSC_ROLE_UNKNOWN) {
         resource_t *iter = rsc;
 
         while (iter) {
             if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
                 node_t *n = node_copy(node);
 
                 pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name,
                              n->details->uname);
                 g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
             }
             if (is_set(iter->flags, pe_rsc_unique)) {
                 break;
             }
             iter = iter->parent;
         }
     }
 
     if (rsc->role > RSC_ROLE_STOPPED
         && node->details->online == FALSE && is_set(rsc->flags, pe_rsc_managed)) {
 
+        char *reason = NULL;
         gboolean should_fence = FALSE;
 
         /* if this is a remote_node living in a container, fence the container
          * by recovering it. Mark the resource as unmanaged. Once the container
          * and remote connenction are re-established, the status section will
          * get reset in the crmd freeing up this resource to run again once we
          * are sure we know the resources state. */
         if (is_container_remote_node(node)) {
             set_bit(rsc->flags, pe_rsc_failed);
 
             should_fence = TRUE;
         } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
+            if (is_baremetal_remote_node(node) && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) {
+                /* setting unceen = true means that fencing of the remote node will
+                 * only occur if the connection resource is not going to start somewhere.
+                 * This allows connection resources on a failed cluster-node to move to
+                 * another node without requiring the baremetal remote nodes to be fenced
+                 * as well. */
+                node->details->unseen = TRUE;
+                reason = g_strdup_printf("because %s is active there. Fencing will be revoked if remote-node connection can be re-established on another cluster-node.", rsc->id);
+            }
             should_fence = TRUE;
         }
 
         if (should_fence) {
-            char *reason = g_strdup_printf("because %s is thought to be active there", rsc->id);
+            if (reason == NULL) {
+               reason = g_strdup_printf("because %s is thought to be active there", rsc->id);
+            }
             pe_fence_node(data_set, node, reason);
-            g_free(reason);
         }
+        g_free(reason);
     }
 
     if (node->details->unclean) {
         /* No extra processing needed
          * Also allows resources to be started again after a node is shot
          */
         on_fail = action_fail_ignore;
     }
 
     switch (on_fail) {
         case action_fail_ignore:
             /* nothing to do */
             break;
 
         case action_fail_fence:
             /* treat it as if it is still running
              * but also mark the node as unclean
              */
             pe_fence_node(data_set, node, "because of resource failure(s)");
             break;
 
         case action_fail_standby:
             node->details->standby = TRUE;
             node->details->standby_onfail = TRUE;
             break;
 
         case action_fail_block:
             /* is_managed == FALSE will prevent any
              * actions being sent for the resource
              */
             clear_bit(rsc->flags, pe_rsc_managed);
             set_bit(rsc->flags, pe_rsc_block);
             break;
 
         case action_fail_migrate:
             /* make sure it comes up somewhere else
              * or not at all
              */
             resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set);
             break;
 
         case action_fail_stop:
             rsc->next_role = RSC_ROLE_STOPPED;
             break;
 
         case action_fail_recover:
             if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
                 set_bit(rsc->flags, pe_rsc_failed);
                 stop_action(rsc, node, FALSE);
             }
             break;
 
         case action_fail_restart_container:
             set_bit(rsc->flags, pe_rsc_failed);
 
             if (rsc->container) {
                 stop_action(rsc->container, node, FALSE);
 
             } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
                 stop_action(rsc, node, FALSE);
             }
             break;
     }
 
+    /* ensure a remote-node connection failure forces an unclean remote-node
+     * to be fenced. By setting unseen = FALSE, the remote-node failure will
+     * result in a fencing operation regardless if we're going to attempt to 
+     * reconnect to the remote-node in this transition or not. */
+    if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) {
+        node_t *tmpnode = pe_find_node(data_set->nodes, rsc->id);
+        if (tmpnode->details->unclean) {
+            tmpnode->details->unseen = FALSE;
+        }
+    }
+
     if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
         if (is_set(rsc->flags, pe_rsc_orphan)) {
             if (is_set(rsc->flags, pe_rsc_managed)) {
                 crm_config_warn("Detected active orphan %s running on %s",
                                 rsc->id, node->details->uname);
             } else {
                 crm_config_warn("Cluster configured not to stop active orphans."
                                 " %s must be stopped manually on %s",
                                 rsc->id, node->details->uname);
             }
         }
 
         native_add_running(rsc, node, data_set);
         if (on_fail != action_fail_ignore) {
             set_bit(rsc->flags, pe_rsc_failed);
         }
 
     } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
         /* Only do this for older status sections that included instance numbers
          * Otherwise stopped instances will appear as orphans
          */
         pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
         free(rsc->clone_name);
         rsc->clone_name = NULL;
 
     } else {
         char *key = stop_key(rsc);
         GListPtr possible_matches = find_actions(rsc->actions, key, node);
         GListPtr gIter = possible_matches;
 
         for (; gIter != NULL; gIter = gIter->next) {
             action_t *stop = (action_t *) gIter->data;
 
             stop->flags |= pe_action_optional;
         }
 
         g_list_free(possible_matches);
         free(key);
     }
 }
 
 /* create active recurring operations as optional */
 static void
 process_recurring(node_t * node, resource_t * rsc,
                   int start_index, int stop_index,
                   GListPtr sorted_op_list, pe_working_set_t * data_set)
 {
     int counter = -1;
     const char *task = NULL;
     const char *status = NULL;
     GListPtr gIter = sorted_op_list;
 
     CRM_ASSERT(rsc);
     pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
 
     for (; gIter != NULL; gIter = gIter->next) {
         xmlNode *rsc_op = (xmlNode *) gIter->data;
 
         int interval = 0;
         char *key = NULL;
         const char *id = ID(rsc_op);
         const char *interval_s = NULL;
 
         counter++;
 
         if (node->details->online == FALSE) {
             pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname);
             break;
 
             /* Need to check if there's a monitor for role="Stopped" */
         } else if (start_index < stop_index && counter <= stop_index) {
             pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname);
             continue;
 
         } else if (counter < start_index) {
             pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter);
             continue;
         }
 
         interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL);
         interval = crm_parse_int(interval_s, "0");
         if (interval == 0) {
             pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname);
             continue;
         }
 
         status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
         if (safe_str_eq(status, "-1")) {
             pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname);
             continue;
         }
         task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
         /* create the action */
         key = generate_op_key(rsc->id, task, interval);
         pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname);
         custom_action(rsc, key, task, node, TRUE, TRUE, data_set);
     }
 }
 
 void
 calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
 {
     int counter = -1;
     int implied_monitor_start = -1;
     int implied_master_start = -1;
     const char *task = NULL;
     const char *status = NULL;
     GListPtr gIter = sorted_op_list;
 
     *stop_index = -1;
     *start_index = -1;
 
     for (; gIter != NULL; gIter = gIter->next) {
         xmlNode *rsc_op = (xmlNode *) gIter->data;
 
         counter++;
 
         task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
         status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
 
         if (safe_str_eq(task, CRMD_ACTION_STOP)
             && safe_str_eq(status, "0")) {
             *stop_index = counter;
 
         } else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
             *start_index = counter;
 
         } else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
             const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
 
             if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) {
                 implied_monitor_start = counter;
             }
         } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
             implied_master_start = counter;
         }
     }
 
     if (*start_index == -1) {
         if (implied_master_start != -1) {
             *start_index = implied_master_start;
         } else if (implied_monitor_start != -1) {
             *start_index = implied_monitor_start;
         }
     }
 }
 
 static resource_t *
 unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     int stop_index = -1;
     int start_index = -1;
     enum rsc_role_e req_role = RSC_ROLE_UNKNOWN;
 
     const char *task = NULL;
     const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
 
     resource_t *rsc = NULL;
     GListPtr op_list = NULL;
     GListPtr sorted_op_list = NULL;
 
     xmlNode *migrate_op = NULL;
     xmlNode *rsc_op = NULL;
 
     enum action_fail_response on_fail = FALSE;
     enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN;
 
     crm_trace("[%s] Processing %s on %s",
               crm_element_name(rsc_entry), rsc_id, node->details->uname);
 
     /* extract operations */
     op_list = NULL;
     sorted_op_list = NULL;
 
     for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) {
         if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
             op_list = g_list_prepend(op_list, rsc_op);
         }
     }
 
     if (op_list == NULL) {
         /* if there are no operations, there is nothing to do */
         return NULL;
     }
 
     /* find the resource */
     rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
     if (rsc == NULL) {
         rsc = process_orphan_resource(rsc_entry, node, data_set);
     }
     CRM_ASSERT(rsc != NULL);
 
     /* process operations */
     saved_role = rsc->role;
     on_fail = action_fail_ignore;
     rsc->role = RSC_ROLE_UNKNOWN;
     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
 
     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
         xmlNode *rsc_op = (xmlNode *) gIter->data;
 
         task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
         if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
             migrate_op = rsc_op;
         }
 
         unpack_rsc_op(rsc, node, rsc_op, &on_fail, data_set);
     }
 
     /* create active recurring operations as optional */
     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
     process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set);
 
     /* no need to free the contents */
     g_list_free(sorted_op_list);
 
     process_rsc_state(rsc, node, on_fail, migrate_op, data_set);
 
     if (get_target_role(rsc, &req_role)) {
         if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) {
             pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s"
                          " with requested next role %s",
                          rsc->id, role2text(rsc->next_role), role2text(req_role));
             rsc->next_role = req_role;
 
         } else if (req_role > rsc->next_role) {
             pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
                         " with requested next role %s",
                         rsc->id, role2text(rsc->next_role), role2text(req_role));
         }
     }
 
     if (saved_role > rsc->role) {
         rsc->role = saved_role;
     }
 
     return rsc;
 }
 
 static void
 handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
 {
     xmlNode *rsc_entry = NULL;
     for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL;
         rsc_entry = __xml_next(rsc_entry)) {
 
         resource_t *rsc;
         resource_t *container;
         const char *rsc_id;
         const char *container_id;
 
         if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) {
             continue;
         }
 
         container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
         rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
         if (container_id == NULL || rsc_id == NULL) {
             continue;
         }
 
         container = pe_find_resource(data_set->resources, container_id);
         if (container == NULL) {
             continue;
         }
 
         rsc = pe_find_resource(data_set->resources, rsc_id);
         if (rsc == NULL ||
             is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE ||
             rsc->container != NULL) {
             continue;
         }
 
         pe_rsc_trace(rsc, "Mapped orphaned rsc %s's container to  %s", rsc->id, container_id);
         rsc->container = container;
         container->fillers = g_list_append(container->fillers, rsc);
     }
 }
 
 gboolean
 unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
 {
     xmlNode *rsc_entry = NULL;
     gboolean found_orphaned_container_filler = FALSE;
     GListPtr unexpected_containers = NULL;
     GListPtr gIter = NULL;
     resource_t *remote = NULL;
 
     CRM_CHECK(node != NULL, return FALSE);
 
     crm_trace("Unpacking resources on %s", node->details->uname);
 
     for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL;
          rsc_entry = __xml_next(rsc_entry)) {
 
         if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
             resource_t *rsc;
             rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set);
             if (!rsc) {
                 continue;
             }
             if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) {
                 found_orphaned_container_filler = TRUE;
             }
             if (is_set(rsc->flags, pe_rsc_unexpectedly_running)) {
                 remote = rsc_contains_remote_node(data_set, rsc);
                 if (remote) {
                     unexpected_containers = g_list_append(unexpected_containers, remote);
                 }
             }
         }
     }
 
     /* If a container resource is unexpectedly up... and the remote-node
      * connection resource for that container is not up, the entire container
      * must be recovered. */
     for (gIter = unexpected_containers; gIter != NULL; gIter = gIter->next) {
         remote = (resource_t *) gIter->data;
         if (remote->role != RSC_ROLE_STARTED) {
-            crm_warn("Recovering container resource %s. Resource is unexpectedly running and involves a remote-node.");
+            crm_warn("Recovering container resource %s. Resource is unexpectedly running and involves a remote-node.", remote->container->id);
             set_bit(remote->container->flags, pe_rsc_failed);
         }
     }
 
     /* now that all the resource state has been unpacked for this node
      * we have to go back and map any orphaned container fillers to their
      * container resource */
     if (found_orphaned_container_filler) {
         handle_orphaned_container_fillers(lrm_rsc_list, data_set);
     }
     g_list_free(unexpected_containers);
     return TRUE;
 }
 
 static void
 set_active(resource_t * rsc)
 {
     resource_t *top = uber_parent(rsc);
 
     if (top && top->variant == pe_master) {
         rsc->role = RSC_ROLE_SLAVE;
     } else {
         rsc->role = RSC_ROLE_STARTED;
     }
 }
 
 static void
 set_node_score(gpointer key, gpointer value, gpointer user_data)
 {
     node_t *node = value;
     int *score = user_data;
 
     node->weight = *score;
 }
 
 #define STATUS_PATH_MAX 1024
 static xmlNode *
 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
             pe_working_set_t * data_set)
 {
     int offset = 0;
     char xpath[STATUS_PATH_MAX];
 
     offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node);
     offset +=
         snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']",
                  resource);
 
     /* Need to check against transition_magic too? */
     if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) {
         offset +=
             snprintf(xpath + offset, STATUS_PATH_MAX - offset,
                      "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op,
                      source);
     } else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) {
         offset +=
             snprintf(xpath + offset, STATUS_PATH_MAX - offset,
                      "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op,
                      source);
     } else {
         offset +=
             snprintf(xpath + offset, STATUS_PATH_MAX - offset,
                      "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op);
     }
 
     CRM_LOG_ASSERT(offset > 0);
     return get_xpath_object(xpath, data_set->input, LOG_DEBUG);
 }
 
 static void
 unpack_rsc_migration(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) 
 {
                 
     /*
      * The normal sequence is (now): migrate_to(Src) -> migrate_from(Tgt) -> stop(Src)
      *
      * So if a migrate_to is followed by a stop, then we dont need to care what
      * happended on the target node
      *
      * Without the stop, we need to look for a successful migrate_from.
      * This would also imply we're no longer running on the source
      *
      * Without the stop, and without a migrate_from op we make sure the resource
      * gets stopped on both source and target (assuming the target is up)
      *
      */
     int stop_id = 0;
     int task_id = 0;
     xmlNode *stop_op =
         find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id, NULL, data_set);
 
     if (stop_op) {
         crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
     }
 
     crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
 
     if (stop_op == NULL || stop_id < task_id) {
         int from_rc = 0, from_status = 0;
         const char *migrate_source =
             crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
         const char *migrate_target =
             crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
 
         node_t *target = pe_find_node(data_set->nodes, migrate_target);
         node_t *source = pe_find_node(data_set->nodes, migrate_source);
         xmlNode *migrate_from =
             find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source,
                         data_set);
 
         rsc->role = RSC_ROLE_STARTED;       /* can be master? */
         if (migrate_from) {
             crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
             crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status);
             pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d",
                          ID(migrate_from), migrate_target, from_status, from_rc);
         }
 
         if (migrate_from && from_rc == PCMK_OCF_OK
             && from_status == PCMK_LRM_OP_DONE) {
             pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op),
                          migrate_source);
 
             /* all good
              * just need to arrange for the stop action to get sent
              * but _without_ affecting the target somehow
              */
             rsc->role = RSC_ROLE_STOPPED;
             rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
 
         } else if (migrate_from) {  /* Failed */
             if (target && target->details->online) {
                 pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
                              target->details->online);
                 native_add_running(rsc, target, data_set);
             }
 
         } else {    /* Pending or complete but erased */
             if (target && target->details->online) {
                 pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
                              target->details->online);
 
                 native_add_running(rsc, target, data_set);
                 if (source && source->details->online) {
                     /* If we make it here we have a partial migration.  The migrate_to
                      * has completed but the migrate_from on the target has not. Hold on
                      * to the target and source on the resource. Later on if we detect that
                      * the resource is still going to run on that target, we may continue
                      * the migration */
                     rsc->partial_migration_target = target;
                     rsc->partial_migration_source = source;
                 }
             } else {
                 /* Consider it failed here - forces a restart, prevents migration */
                 set_bit(rsc->flags, pe_rsc_failed);
                 clear_bit(rsc->flags, pe_rsc_allow_migrate);
             }
         }
     }
 }
 
 static void
 unpack_rsc_migration_failure(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) 
 {
     const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
 
     CRM_ASSERT(rsc);
     if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
         int stop_id = 0;
         int migrate_id = 0;
         const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
         const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
 
         xmlNode *stop_op =
             find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_source, NULL, data_set);
         xmlNode *migrate_op =
             find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, migrate_source, migrate_target,
                         data_set);
 
         if (stop_op) {
             crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
         }
         if (migrate_op) {
             crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
         }
 
         /* Get our state right */
         rsc->role = RSC_ROLE_STARTED;   /* can be master? */
 
         if (stop_op == NULL || stop_id < migrate_id) {
             node_t *source = pe_find_node(data_set->nodes, migrate_source);
 
             if (source && source->details->online) {
                 native_add_running(rsc, source, data_set);
             }
         }
 
     } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
         int stop_id = 0;
         int migrate_id = 0;
         const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
         const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
 
         xmlNode *stop_op =
             find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_target, NULL, data_set);
         xmlNode *migrate_op =
             find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source,
                         data_set);
 
         if (stop_op) {
             crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
         }
         if (migrate_op) {
             crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
         }
 
         /* Get our state right */
         rsc->role = RSC_ROLE_STARTED;   /* can be master? */
 
         if (stop_op == NULL || stop_id < migrate_id) {
             node_t *target = pe_find_node(data_set->nodes, migrate_target);
 
             pe_rsc_trace(rsc, "Stop: %p %d, Migrated: %p %d", stop_op, stop_id, migrate_op,
                          migrate_id);
             if (target && target->details->online) {
                 native_add_running(rsc, target, data_set);
             }
 
         } else if (migrate_op == NULL) {
             /* Make sure it gets cleaned up, the stop may pre-date the migrate_from */
             rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
         }
     }
 }
 
 static const char *get_op_key(xmlNode *xml_op)
 {
     const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
     if(key == NULL) {
         key = ID(xml_op);
     }
     return key;
 }
 
 static void
 unpack_rsc_op_failure(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, enum action_fail_response *on_fail, pe_working_set_t * data_set) 
 {
     int interval = 0;
     bool is_probe = FALSE;
     action_t *action = NULL;
 
     const char *key = get_op_key(xml_op);
     const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
     const char *op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
 
     CRM_ASSERT(rsc);
     crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
     if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) {
         is_probe = TRUE;
         pe_rsc_trace(rsc, "is a probe: %s", key);
     }
 
     if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
         crm_warn("Processing failed op %s for %s on %s: %s (%d)",
                  task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc),
                  rc);
 
         crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
         if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
             add_node_copy(data_set->failed, xml_op);
         }
     } else {
         crm_trace("Processing failed op %s for %s on %s: %s (%d)",
                  task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc),
                  rc);
     }
 
     action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
     if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) ||
         (action->on_fail == action_fail_restart_container
          && *on_fail <= action_fail_recover) || (*on_fail == action_fail_restart_container
                                                  && action->on_fail >=
                                                  action_fail_migrate)) {
         pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail),
                      fail2text(action->on_fail), action->uuid, key);
         *on_fail = action->on_fail;
     }
 
     if (safe_str_eq(task, CRMD_ACTION_STOP)) {
         resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set);
 
     } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
         unpack_rsc_migration_failure(rsc, node, xml_op, data_set);
 
     } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
         rsc->role = RSC_ROLE_MASTER;
 
     } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
         /*
          * staying in role=master ends up putting the PE/TE into a loop
          * setting role=slave is not dangerous because no master will be
          * promoted until the failed resource has been fully stopped
          */
         rsc->next_role = RSC_ROLE_STOPPED;
         if (action->on_fail == action_fail_block) {
             rsc->role = RSC_ROLE_MASTER;
 
         } else {
             crm_warn("Forcing %s to stop after a failed demote action", rsc->id);
             rsc->role = RSC_ROLE_SLAVE;
         }
 
     } else if (compare_version("2.0", op_version) > 0 && safe_str_eq(task, CRMD_ACTION_START)) {
         crm_warn("Compatibility handling for failed op %s on %s", key, node->details->uname);
         resource_location(rsc, node, -INFINITY, "__legacy_start__", data_set);
     }
 
     if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) {
         /* leave stopped */
         pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id);
         rsc->role = RSC_ROLE_STOPPED;
 
     } else if (rsc->role < RSC_ROLE_STARTED) {
         pe_rsc_trace(rsc, "Setting %s active", rsc->id);
         set_active(rsc);
     }
 
     pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
                  rsc->id, role2text(rsc->role),
                  node->details->unclean ? "true" : "false",
                  fail2text(action->on_fail), role2text(action->fail_role));
 
     if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) {
         rsc->next_role = action->fail_role;
     }
 
     if (action->fail_role == RSC_ROLE_STOPPED) {
         int score = -INFINITY;
 
         resource_t *fail_rsc = rsc;
 
         if (fail_rsc->parent) {
             resource_t *parent = uber_parent(fail_rsc);
 
             if ((parent->variant == pe_clone || parent->variant == pe_master)
                 && is_not_set(parent->flags, pe_rsc_unique)) {
                 /* for clone and master resources, if a child fails on an operation
                  * with on-fail = stop, all the resources fail.  Do this by preventing
                  * the parent from coming up again. */
                 fail_rsc = parent;
             }
         }
         crm_warn("Making sure %s doesn't come up again", fail_rsc->id);
         /* make sure it doesnt come up again */
         g_hash_table_destroy(fail_rsc->allowed_nodes);
         fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes);
         g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
     }
 
     pe_free_action(action);
 }
 
 static int
 determine_op_status(
     resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set) 
 {
     int interval = 0;
     int result = PCMK_LRM_OP_DONE;
 
     const char *key = get_op_key(xml_op);
     const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
 
     bool is_probe = FALSE;
 
     CRM_ASSERT(rsc);
     crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
     if (interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) {
         is_probe = TRUE;
     }
 
     if (target_rc >= 0 && target_rc != rc) {
         result = PCMK_LRM_OP_ERROR;
         pe_rsc_debug(rsc, "%s on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
                      key, node->details->uname,
                      services_ocf_exitcode_str(rc), rc,
                      services_ocf_exitcode_str(target_rc), target_rc);
     }
     
     /* we could clean this up significantly except for old LRMs and CRMs that
      * didnt include target_rc and liked to remap status
      */
     switch (rc) {
         case PCMK_OCF_OK:
             if (is_probe && target_rc == 7) {
                 result = PCMK_LRM_OP_DONE;
                 set_bit(rsc->flags, pe_rsc_unexpectedly_running);
                 pe_rsc_info(rsc, "Operation %s found resource %s active on %s",
                             task, rsc->id, node->details->uname);
 
                 /* legacy code for pre-0.6.5 operations */
             } else if (target_rc < 0 && interval > 0 && rsc->role == RSC_ROLE_MASTER) {
                 /* catch status ops that return 0 instead of 8 while they
                  *   are supposed to be in master mode
                  */
                 result = PCMK_LRM_OP_ERROR;
             }
             break;
 
         case PCMK_OCF_NOT_RUNNING:
             if (is_probe || target_rc == rc) {
                 result = PCMK_LRM_OP_DONE;
                 rsc->role = RSC_ROLE_STOPPED;
 
                 /* clear any previous failure actions */
                 *on_fail = action_fail_ignore;
                 rsc->next_role = RSC_ROLE_UNKNOWN;
 
             } else if (safe_str_neq(task, CRMD_ACTION_STOP)) {
                 result = PCMK_LRM_OP_ERROR;
             }
             break;
 
         case PCMK_OCF_RUNNING_MASTER:
             if (is_probe) {
                 result = PCMK_LRM_OP_DONE;
                 pe_rsc_info(rsc, "Operation %s found resource %s active in master mode on %s",
                             task, rsc->id, node->details->uname);
 
             } else if (target_rc == rc) {
                 /* nothing to do */
 
             } else if (target_rc >= 0) {
                 result = PCMK_LRM_OP_ERROR;
 
                 /* legacy code for pre-0.6.5 operations */
             } else if (safe_str_neq(task, CRMD_ACTION_STATUS)
                        || rsc->role != RSC_ROLE_MASTER) {
                 result = PCMK_LRM_OP_ERROR;
                 if (rsc->role != RSC_ROLE_MASTER) {
                     crm_err("%s reported %s in master mode on %s",
                             key, rsc->id, node->details->uname);
                 }
             }
             rsc->role = RSC_ROLE_MASTER;
             break;
 
         case PCMK_OCF_FAILED_MASTER:
             rsc->role = RSC_ROLE_MASTER;
             result = PCMK_LRM_OP_ERROR;
             break;
 
         case PCMK_OCF_NOT_CONFIGURED:
             result = PCMK_LRM_OP_ERROR_FATAL;
             break;
 
         case PCMK_OCF_NOT_INSTALLED:
         case PCMK_OCF_INVALID_PARAM:
         case PCMK_OCF_INSUFFICIENT_PRIV:
         case PCMK_OCF_UNIMPLEMENT_FEATURE:
             if (rc == PCMK_OCF_UNIMPLEMENT_FEATURE && interval > 0) {
                 result = PCMK_LRM_OP_NOTSUPPORTED;
                 break;
 
             } else if(pe_can_fence(data_set, node) == FALSE
                && safe_str_eq(task, CRMD_ACTION_STOP)) {
                 /* If a stop fails and we can't fence, there's nothing else we can do */
                 pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)",
                             rsc->id, task, services_ocf_exitcode_str(rc), rc);
                 clear_bit(rsc->flags, pe_rsc_managed);
                 set_bit(rsc->flags, pe_rsc_block);
             }
             result = PCMK_LRM_OP_ERROR_HARD;
             break;
 
         default:
             if (result == PCMK_LRM_OP_DONE) {
                 crm_info("Treating %s (rc=%d) on %s as an ERROR",
                          key, rc, node->details->uname);
                 result = PCMK_LRM_OP_ERROR;
             }
     }
 
     return result;
 }
 
 static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, pe_working_set_t * data_set)
 {
     bool expired = FALSE;
     time_t last_failure = 0;
     int clear_failcount = 0;
     int interval = 0;
     const char *key = get_op_key(xml_op);
     const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
 
     if (rsc->failure_timeout > 0) {
         int last_run = 0;
 
         if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) {
             time_t now = get_effective_time(data_set);
 
             if (now > (last_run + rsc->failure_timeout)) {
                 expired = TRUE;
             }
         }
     }
 
     if (expired) {
         if (rsc->failure_timeout > 0) {
             int fc = get_failcount_full(node, rsc, &last_failure, FALSE, xml_op, data_set);
             if(fc) {
                 if (get_failcount_full(node, rsc, &last_failure, TRUE, xml_op, data_set) == 0) {
                     clear_failcount = 1;
                     crm_notice("Clearing expired failcount for %s on %s", rsc->id, node->details->uname);
 
                 } else {
                     expired = FALSE;
                 }
             }
         }
 
     } else if (strstr(ID(xml_op), "last_failure") &&
                ((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) {
 
         op_digest_cache_t *digest_data = NULL;
 
         digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set);
 
         if (digest_data->rc == RSC_DIGEST_UNKNOWN) {
             crm_trace("rsc op %s on node %s does not have a op digest to compare against", rsc->id,
                       key, node->details->id);
         } else if (digest_data->rc != RSC_DIGEST_MATCH) {
             clear_failcount = 1;
             crm_info
                 ("Clearing failcount for %s on %s, %s failed and now resource parameters have changed.",
                  task, rsc->id, node->details->uname);
         }
     }
 
     if (clear_failcount) {
         action_t *clear_op = NULL;
 
         clear_op = custom_action(rsc, crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'),
                                  CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set);
         add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
     }
 
     crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
     if(expired && interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) {
         switch(rc) {
             case PCMK_OCF_OK:
             case PCMK_OCF_NOT_RUNNING:
             case PCMK_OCF_RUNNING_MASTER:
                 /* Don't expire probes that return these values */ 
                 expired = FALSE;
                 break;
         }
     }
     
     return expired;
 }
 
 int get_target_rc(xmlNode *xml_op)
 {
     int dummy = 0;
     int target_rc = 0;
     char *dummy_string = NULL;
     const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
     if (key == NULL) {
         return -1;
     }
 
     decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc);
     free(dummy_string);
     return target_rc;
 }
 
 static enum action_fail_response
 get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set) 
 {
     int result = action_fail_recover;
     action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
 
     result = action->on_fail;
     pe_free_action(action);
 
     return result;
 }
 
 static void
 update_resource_state(resource_t *rsc, node_t * node, xmlNode * xml_op, const char *task, int rc,
                       enum action_fail_response *on_fail, pe_working_set_t * data_set) 
 {
     gboolean clear_past_failure = FALSE;
 
     CRM_ASSERT(rsc);
     if (rc == PCMK_OCF_NOT_RUNNING) {
         clear_past_failure = TRUE;
 
     } else if (rc == PCMK_OCF_NOT_INSTALLED) {
         rsc->role = RSC_ROLE_STOPPED;
 
     } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
         clear_past_failure = TRUE;
         if (rsc->role < RSC_ROLE_STARTED) {
             set_active(rsc);
         }
 
     } else if (safe_str_eq(task, CRMD_ACTION_START)) {
         rsc->role = RSC_ROLE_STARTED;
         clear_past_failure = TRUE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
         rsc->role = RSC_ROLE_STOPPED;
         clear_past_failure = TRUE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
         rsc->role = RSC_ROLE_MASTER;
         clear_past_failure = TRUE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
         /* Demote from Master does not clear an error */
         rsc->role = RSC_ROLE_SLAVE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
         rsc->role = RSC_ROLE_STARTED;
         clear_past_failure = TRUE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
         unpack_rsc_migration(rsc, node, xml_op, data_set);
 
     } else if (rsc->role < RSC_ROLE_STARTED) {
         /* migrate_to and migrate_from will land here */
         pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname);
         set_active(rsc);
     }
 
     /* clear any previous failure actions */
     if (clear_past_failure) {
         switch (*on_fail) {
             case action_fail_stop:
             case action_fail_fence:
             case action_fail_migrate:
             case action_fail_standby:
                 pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop",
                              rsc->id, fail2text(*on_fail));
                 break;
 
             case action_fail_block:
             case action_fail_ignore:
             case action_fail_recover:
                 *on_fail = action_fail_ignore;
                 rsc->next_role = RSC_ROLE_UNKNOWN;
                 break;
 
             case action_fail_restart_container:
                 *on_fail = action_fail_ignore;
                 rsc->next_role = RSC_ROLE_UNKNOWN;
         }
     }
 }
 
 gboolean
 unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op,
               enum action_fail_response * on_fail, pe_working_set_t * data_set)
 {
     int task_id = 0;
 
     const char *key = NULL;
     const char *task = NULL;
     const char *task_key = NULL;
 
     int rc = 0;
     int status = PCMK_LRM_OP_PENDING-1;
     int target_rc = get_target_rc(xml_op);
     int interval = 0;
 
     gboolean expired = FALSE;
     resource_t *parent = rsc;
     enum action_fail_response failure_strategy = action_fail_recover;
 
     CRM_CHECK(rsc != NULL, return FALSE);
     CRM_CHECK(node != NULL, return FALSE);
     CRM_CHECK(xml_op != NULL, return FALSE);
 
     task_key = get_op_key(xml_op);
 
     task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
     key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
 
     crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc);
     crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
     crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status);
     crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
 
     CRM_CHECK(task != NULL, return FALSE);
     CRM_CHECK(status <= PCMK_LRM_OP_NOT_INSTALLED, return FALSE);
     CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE);
 
     if (safe_str_eq(task, CRMD_ACTION_NOTIFY)) {
         /* safe to ignore these */
         return TRUE;
     }
 
     if (is_not_set(rsc->flags, pe_rsc_unique)) {
         parent = uber_parent(rsc);
     }
     
     pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)",
                  task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role));
 
     if (node->details->unclean) {
         pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean."
                      " Further action depends on the value of the stop's on-fail attribue",
                      node->details->uname, rsc->id);
     }
 
     if (status == PCMK_LRM_OP_ERROR) {
         /* Older versions set this if rc != 0 but its up to us to decide */
         status = PCMK_LRM_OP_DONE;
     }
 
     if(status != PCMK_LRM_OP_NOT_INSTALLED) {
         expired = check_operation_expiry(rsc, node, rc, xml_op, data_set);
     }
 
     if (expired && target_rc != rc) {
         const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC);
 
         pe_rsc_debug(rsc, "Expired operation '%s' on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
                      key, node->details->uname,
                      services_ocf_exitcode_str(rc), rc,
                      services_ocf_exitcode_str(target_rc), target_rc);
 
         if(interval == 0) {
             crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s",
                        task_key, rc, magic, node->details->uname);
             goto done;
 
         } else if(node->details->online && node->details->unclean == FALSE) {
             crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s",
                        task_key, rc, magic, node->details->uname);
             /* This is SO horrible, but we don't have access to CancelXmlOp() yet */
             crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout");
             goto done;
         }
     }
 
     if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) {
         status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set);
     }
 
     pe_rsc_trace(rsc, "Handling status: %d", status);
     switch (status) {
         case PCMK_LRM_OP_CANCELLED:
             /* do nothing?? */
             pe_err("Dont know what to do for cancelled ops yet");
             break;
 
         case PCMK_LRM_OP_PENDING:
             if (safe_str_eq(task, CRMD_ACTION_START)) {
                 set_bit(rsc->flags, pe_rsc_start_pending);
                 set_active(rsc);
 
             } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
                 rsc->role = RSC_ROLE_MASTER;
 
             } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) && node->details->unclean) {
                 /* If a pending migrate_to action is out on a unclean node,
                  * we have to force the stop action on the target. */
                 const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
                 node_t *target = pe_find_node(data_set->nodes, migrate_target);
                 if (target) {
                     stop_action(rsc, target, FALSE);
                 }
             }
 
             if (rsc->pending_task == NULL) {
                 if (safe_str_eq(task, CRMD_ACTION_STATUS) && interval == 0) {
                     /* Comment this out until someone requests it */
                     /* Comment this out until cl#5184 is fixed */
                     /*rsc->pending_task = strdup("probe");*/
 
                 } else {
                     rsc->pending_task = strdup(task);
                 }
             }
             break;
 
         case PCMK_LRM_OP_DONE:
             pe_rsc_trace(rsc, "%s/%s completed on %s", rsc->id, task, node->details->uname);
             update_resource_state(rsc, node, xml_op, task, rc, on_fail, data_set);
             break;
 
         case PCMK_LRM_OP_NOT_INSTALLED:
             failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
             if (failure_strategy == action_fail_ignore) {
                 crm_warn("Cannot ignore failed %s (status=%d, rc=%d) on %s: "
                          "Resource agent doesn't exist",
                          task_key, status, rc, node->details->uname);
                 /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */
                 *on_fail = action_fail_migrate;
             }
             resource_location(parent, node, -INFINITY, "hard-error", data_set);
             unpack_rsc_op_failure(rsc, node, rc, xml_op, on_fail, data_set);
             break;
 
         case PCMK_LRM_OP_ERROR:
         case PCMK_LRM_OP_ERROR_HARD:
         case PCMK_LRM_OP_ERROR_FATAL:
         case PCMK_LRM_OP_TIMEOUT:
         case PCMK_LRM_OP_NOTSUPPORTED:
 
             failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
             if ((failure_strategy == action_fail_ignore)
                 || (failure_strategy == action_fail_restart_container
                     && safe_str_eq(task, CRMD_ACTION_STOP))) {
 
                 crm_warn("Pretending the failure of %s (rc=%d) on %s succeeded",
                          task_key, rc, node->details->uname);
 
                 update_resource_state(rsc, node, xml_op, task, target_rc, on_fail, data_set);
                 crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
                 set_bit(rsc->flags, pe_rsc_failure_ignored);
 
                 if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
                     crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
                     add_node_copy(data_set->failed, xml_op);
                 }
 
                 if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) {
                     *on_fail = failure_strategy;
                 }
 
             } else {
                 unpack_rsc_op_failure(rsc, node, rc, xml_op, on_fail, data_set);
 
                 if(status == PCMK_LRM_OP_ERROR_HARD) {
                     do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE,
                                "Preventing %s from re-starting on %s: operation %s failed '%s' (%d)",
                                parent->id, node->details->uname,
                                task, services_ocf_exitcode_str(rc), rc);
 
                     resource_location(parent, node, -INFINITY, "hard-error", data_set);
 
                 } else if(status == PCMK_LRM_OP_ERROR_FATAL) {
                     crm_err("Preventing %s from re-starting anywhere: operation %s failed '%s' (%d)",
                             parent->id, task, services_ocf_exitcode_str(rc), rc);
 
                     resource_location(parent, NULL, -INFINITY, "fatal-error", data_set);
                 }
             }
             break;
     }
 
   done:
     pe_rsc_trace(rsc, "Resource %s after %s: role=%s", rsc->id, task, role2text(rsc->role));
     return TRUE;
 }
 
 gboolean
 add_node_attrs(xmlNode * xml_obj, node_t * node, gboolean overwrite, pe_working_set_t * data_set)
 {
     const char *cluster_name = NULL;
 
     g_hash_table_insert(node->details->attrs,
                         strdup("#uname"), strdup(node->details->uname));
     g_hash_table_insert(node->details->attrs,
                         strdup("#kind"), strdup(node->details->remote_rsc?"container":"cluster"));
     g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_ID), strdup(node->details->id));
     if (safe_str_eq(node->details->id, data_set->dc_uuid)) {
         data_set->dc_node = node;
         node->details->is_dc = TRUE;
         g_hash_table_insert(node->details->attrs,
                             strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_TRUE));
     } else {
         g_hash_table_insert(node->details->attrs,
                             strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_FALSE));
     }
 
     cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name");
     if (cluster_name) {
         g_hash_table_insert(node->details->attrs, strdup("#cluster-name"), strdup(cluster_name));
     }
 
     unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL,
                                node->details->attrs, NULL, overwrite, data_set->now);
 
     if (g_hash_table_lookup(node->details->attrs, "#site-name") == NULL) {
         const char *site_name = g_hash_table_lookup(node->details->attrs, "site-name");
 
         if (site_name) {
             /* Prefix '#' to the key */
             g_hash_table_insert(node->details->attrs, strdup("#site-name"), strdup(site_name));
 
         } else if (cluster_name) {
             /* Default to cluster-name if unset */
             g_hash_table_insert(node->details->attrs, strdup("#site-name"), strdup(cluster_name));
         }
     }
     return TRUE;
 }
 
 static GListPtr
 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
 {
     int counter = -1;
     int stop_index = -1;
     int start_index = -1;
 
     xmlNode *rsc_op = NULL;
 
     GListPtr gIter = NULL;
     GListPtr op_list = NULL;
     GListPtr sorted_op_list = NULL;
 
     /* extract operations */
     op_list = NULL;
     sorted_op_list = NULL;
 
     for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) {
         if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
             crm_xml_add(rsc_op, "resource", rsc);
             crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
             op_list = g_list_prepend(op_list, rsc_op);
         }
     }
 
     if (op_list == NULL) {
         /* if there are no operations, there is nothing to do */
         return NULL;
     }
 
     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
 
     /* create active recurring operations as optional */
     if (active_filter == FALSE) {
         return sorted_op_list;
     }
 
     op_list = NULL;
 
     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
 
     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
         xmlNode *rsc_op = (xmlNode *) gIter->data;
 
         counter++;
 
         if (start_index < stop_index) {
             crm_trace("Skipping %s: not active", ID(rsc_entry));
             break;
 
         } else if (counter < start_index) {
             crm_trace("Skipping %s: old", ID(rsc_op));
             continue;
         }
         op_list = g_list_append(op_list, rsc_op);
     }
 
     g_list_free(sorted_op_list);
     return op_list;
 }
 
 GListPtr
 find_operations(const char *rsc, const char *node, gboolean active_filter,
                 pe_working_set_t * data_set)
 {
     GListPtr output = NULL;
     GListPtr intermediate = NULL;
 
     xmlNode *tmp = NULL;
     xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE);
 
     node_t *this_node = NULL;
 
     xmlNode *node_state = NULL;
 
     for (node_state = __xml_first_child(status); node_state != NULL;
          node_state = __xml_next(node_state)) {
 
         if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
             const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
 
             if (node != NULL && safe_str_neq(uname, node)) {
                 continue;
             }
 
             this_node = pe_find_node(data_set->nodes, uname);
             if(this_node == NULL) {
                 CRM_LOG_ASSERT(this_node != NULL);
                 continue;
 
             } else if (is_remote_node(this_node)) {
                 determine_remote_online_status(this_node);
             } else {
                 determine_online_status(node_state, this_node, data_set);
             }
 
             if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
                 /* offline nodes run no resources...
                  * unless stonith is enabled in which case we need to
                  *   make sure rsc start events happen after the stonith
                  */
                 xmlNode *lrm_rsc = NULL;
 
                 tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
                 tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
 
                 for (lrm_rsc = __xml_first_child(tmp); lrm_rsc != NULL;
                      lrm_rsc = __xml_next(lrm_rsc)) {
                     if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) {
 
                         const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
 
                         if (rsc != NULL && safe_str_neq(rsc_id, rsc)) {
                             continue;
                         }
 
                         intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
                         output = g_list_concat(output, intermediate);
                     }
                 }
             }
         }
     }
 
     return output;
 }
diff --git a/lrmd/regression.py.in b/lrmd/regression.py.in
index b6b671810f..a9a32efef2 100755
--- a/lrmd/regression.py.in
+++ b/lrmd/regression.py.in
@@ -1,1035 +1,1042 @@
 #!/usr/bin/python
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
 
 import os
 import sys
 import subprocess
 import shlex
 import time
 
 # Where to find test binaries
 # Prefer the source tree if available
 build_dir="@abs_top_builddir@"
 test_dir=sys.path[0]
 
 new_path=os.environ['PATH']
 
 if os.path.exists("%s/regression.py.in" % test_dir):
     print "Running tests from the source tree: %s (%s)" % (build_dir, test_dir)
     new_path = "%s/lrmd:%s" % (build_dir, new_path)    # For lrmd, lrmd_test and pacemaker_remoted
     new_path = "%s/tools:%s" % (build_dir, new_path)   # For crm_resource
     new_path = "%s/fencing:%s" % (build_dir, new_path) # For stonithd
 else:
     print "Running tests from the install tree: @CRM_DAEMON_DIR@ (not %s)" % test_dir
     new_path = "@CRM_DAEMON_DIR@:%s" % (new_path) # For stonithd, lrmd, lrmd_test and pacemaker_remoted
 
 print new_path
 os.environ['PATH']=new_path
 
 def output_from_command(command, no_wait=0):
 	test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE)
 
 	if no_wait == 0:
 		test.wait()
 	else:
 		return 0
 
 	return test.communicate()[0].split("\n")
 
 class Test:
 	def __init__(self, name, description, verbose = 0, tls = 0):
 		self.name = name
 		self.description = description
 		self.cmds = []
 
 		if tls:
 			self.daemon_location = "pacemaker_remoted"
 		else:
 			self.daemon_location = "lrmd"
 
 		self.test_tool_location = "lrmd_test"
 		self.verbose = verbose
 		self.tls = tls
 
 		self.result_txt = ""
 		self.cmd_tool_output = ""
 		self.result_exitcode = 0;
 
 		self.lrmd_process = None
 		self.stonith_process = None
 
 		self.executed = 0
 
 	def __new_cmd(self, cmd, args, exitcode, stdout_match = "", no_wait = 0, stdout_negative_match = "", kill=None):
 		if self.verbose and cmd == self.test_tool_location:
 			args = args + " -V "
 
 		if (cmd == self.test_tool_location) and self.tls:
 			args = args + " -S "
 
 		self.cmds.append(
 			{
 				"cmd" : cmd,
 				"kill" : kill,
 				"args" : args,
 				"expected_exitcode" : exitcode,
 				"stdout_match" : stdout_match,
 				"stdout_negative_match" : stdout_negative_match,
 				"no_wait" : no_wait,
 				"cmd_output" : "",
 			}
 		)
 
 	def start_environment(self):
 		### make sure we are in full control here ###
 		cmd = shlex.split("killall -q -9 stonithd lt-stonithd lrmd lt-lrmd lrmd_test lt-lrmd_test pacemaker_remoted")
 		test = subprocess.Popen(cmd, stdout=subprocess.PIPE)
 		test.wait()
 
 		additional_args = ""
 
 		if self.tls == 0:
 			self.stonith_process = subprocess.Popen(shlex.split("stonithd -s"))
 
 		if self.verbose:
 			additional_args = additional_args + " -V"
 
 		self.lrmd_process = subprocess.Popen(shlex.split("%s %s -l /tmp/lrmd-regression.log" % (self.daemon_location, additional_args)))
 
 		time.sleep(1)
 
 	def clean_environment(self):
 		if self.lrmd_process:
 			self.lrmd_process.terminate()
 			self.lrmd_process.wait()
 
 			if self.verbose:
 				print "Daemon output"
 				f = open('/tmp/lrmd-regression.log', 'r')
 				for line in f.readlines():
 					print line.strip()
 			os.remove('/tmp/lrmd-regression.log')
 
 		if self.stonith_process:
 			self.stonith_process.terminate()
 			self.stonith_process.wait()
 
 		self.lrmd_process = None
 		self.stonith_process = None
 
 	def add_sys_cmd(self, cmd, args):
 		self.__new_cmd(cmd, args, 0, "")
 
 	def add_sys_cmd_no_wait(self, cmd, args):
 		self.__new_cmd(cmd, args, 0, "", 1)
 
 	def add_cmd_check_stdout(self, args, match, no_match = ""):
 		self.__new_cmd(self.test_tool_location, args, 0, match, 0, no_match)
 
 	def add_cmd(self, args):
 		self.__new_cmd(self.test_tool_location, args, 0, "")
 
 	def add_cmd_and_kill(self, killProc, args):
 		self.__new_cmd(self.test_tool_location, args, 0, "", kill=killProc)
 
 	def add_expected_fail_cmd(self, args):
 		self.__new_cmd(self.test_tool_location, args, 1, "")
 
 	def get_exitcode(self):
 		return self.result_exitcode
 
 	def print_result(self, filler):
 		print "%s%s" % (filler, self.result_txt)
 
 	def run_cmd(self, args):
 		cmd = shlex.split(args['args'])
 		cmd.insert(0, args['cmd'])
 		if self.verbose:
 			print "\n\nRunning: "+" ".join(cmd)
 		test = subprocess.Popen(cmd, stdout=subprocess.PIPE)
 
 		if args['kill']:
 			if self.verbose:
 				print "Also running: "+args['kill']
 			### Typically the kill argument is used to detect some sort of
 			### failure.  Without yeilding for a few seconds here the process
 			### launched earlier that is listening for the failure may not have time
 			### to connect to the lrmd.
 			time.sleep(2)
 			subprocess.Popen(shlex.split(args['kill']))
 
 		if args['no_wait'] == 0:
 			test.wait()
 		else:
 			return 0
 
 		output = test.communicate()[0]
 
 		if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0:
 			test.returncode = -2
 			print "STDOUT string '%s' was not found in cmd output" % (args['stdout_match'])
 
 		if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0:
 			test.returncode = -2
 			print "STDOUT string '%s' was found in cmd output" % (args['stdout_negative_match'])
 
 		args['cmd_output'] = output
 
 		return test.returncode;
 
 	def run(self):
 		res = 0
 		i = 1
 
 		if self.tls and self.name.count("stonith") != 0:
 			self.result_txt = "SKIPPED - '%s' - disabled when testing pacemaker_remote" % (self.name)
 			print self.result_txt
 			return res
 
 		self.start_environment()
 
 		if self.verbose:
 			print "\n--- START TEST - %s" % self.name
 
 		self.result_txt = "SUCCESS - '%s'" % (self.name)
 		self.result_exitcode = 0
 		for cmd in self.cmds:
 			res = self.run_cmd(cmd)
 			if res != cmd['expected_exitcode']:
 				print cmd['cmd_output']
 				print "Step %d FAILED - command returned %d, expected %d" % (i, res, cmd['expected_exitcode'])
 				self.result_txt = "FAILURE - '%s' failed at step %d. Command: lrmd_test %s" % (self.name, i, cmd['args'])
 				self.result_exitcode = -1
 				break
 			else:
 				if self.verbose:
 					print cmd['cmd_output'].strip()
 					print "Step %d SUCCESS" % (i)
 			i = i + 1
 		self.clean_environment()
 
 		print self.result_txt
 		if self.verbose:
 			print "--- END TEST - %s\n" % self.name
 
 		self.executed = 1
 		return res
 
 class Tests:
 	def __init__(self, verbose = 0, tls = 0):
 		self.tests = []
 		self.verbose = verbose
 		self.tls = tls;
 		self.rsc_classes = output_from_command("crm_resource --list-standards")
 		self.rsc_classes = self.rsc_classes[:-1] # Strip trailing empty line
 		self.need_authkey = 0
                 self.action_timeout = " -t 5000 "
 		if self.tls:
 			self.rsc_classes.remove("stonith")
+		if "systemd" in self.rsc_classes:
+			# the lrmd_dummy_daemon requires this, we are importing it
+			# here just to guarantee it is installed before allowing this
+			# script to run. Otherwise, running without this import being
+			# available will make all the systemd tests look like they fail,
+			# which is really scary looking. I'd rather see the import fail.
+			import systemd.daemon
 
 		print "Testing "+repr(self.rsc_classes)
 
 		self.common_cmds = {
 			"ocf_reg_line"      : "-c register_rsc -r ocf_test_rsc "+self.action_timeout+" -C ocf -P pacemaker -T Dummy",
 			"ocf_reg_event"     : "-l \"NEW_EVENT event_type:register rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"",
 			"ocf_unreg_line"    : "-c unregister_rsc -r \"ocf_test_rsc\" "+self.action_timeout,
 			"ocf_unreg_event"   : "-l \"NEW_EVENT event_type:unregister rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"",
 			"ocf_start_line"    : "-c exec -r \"ocf_test_rsc\" -a \"start\" "+self.action_timeout,
 			"ocf_start_event"   : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:start rc:ok op_status:complete\" ",
 			"ocf_stop_line"     : "-c exec -r \"ocf_test_rsc\" -a \"stop\" "+self.action_timeout,
 			"ocf_stop_event"    : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:stop rc:ok op_status:complete\" ",
 			"ocf_monitor_line"  : "-c exec -r \"ocf_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout,
 			"ocf_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout,
 			"ocf_cancel_line"   : "-c cancel -r \"ocf_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ",
 			"ocf_cancel_event"  : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:Cancelled\" ",
 
 			"systemd_reg_line"      : "-c register_rsc -r systemd_test_rsc "+self.action_timeout+" -C systemd -T lrmd_dummy_daemon",
 			"systemd_reg_event"     : "-l \"NEW_EVENT event_type:register rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"",
 			"systemd_unreg_line"    : "-c unregister_rsc -r \"systemd_test_rsc\" "+self.action_timeout,
 			"systemd_unreg_event"   : "-l \"NEW_EVENT event_type:unregister rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"",
 			"systemd_start_line"    : "-c exec -r \"systemd_test_rsc\" -a \"start\" "+self.action_timeout,
 			"systemd_start_event"   : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:start rc:ok op_status:complete\" ",
 			"systemd_stop_line"     : "-c exec -r \"systemd_test_rsc\" -a \"stop\" "+self.action_timeout,
 			"systemd_stop_event"    : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:stop rc:ok op_status:complete\" ",
 			"systemd_monitor_line"  : "-c exec -r \"systemd_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout,
 			"systemd_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout,
 			"systemd_cancel_line"   : "-c cancel -r \"systemd_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ",
 			"systemd_cancel_event"  : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:Cancelled\" ",
 
 			"upstart_reg_line"      : "-c register_rsc -r upstart_test_rsc "+self.action_timeout+" -C upstart -T lrmd_dummy_daemon",
 			"upstart_reg_event"     : "-l \"NEW_EVENT event_type:register rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"",
 			"upstart_unreg_line"    : "-c unregister_rsc -r \"upstart_test_rsc\" "+self.action_timeout,
 			"upstart_unreg_event"   : "-l \"NEW_EVENT event_type:unregister rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"",
 			"upstart_start_line"    : "-c exec -r \"upstart_test_rsc\" -a \"start\" "+self.action_timeout,
 			"upstart_start_event"   : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:start rc:ok op_status:complete\" ",
 			"upstart_stop_line"     : "-c exec -r \"upstart_test_rsc\" -a \"stop\" "+self.action_timeout,
 			"upstart_stop_event"    : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:stop rc:ok op_status:complete\" ",
 			"upstart_monitor_line"  : "-c exec -r \"upstart_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout,
 			"upstart_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout,
 			"upstart_cancel_line"   : "-c cancel -r \"upstart_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ",
 			"upstart_cancel_event"  : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:Cancelled\" ",
 
 			"service_reg_line"      : "-c register_rsc -r service_test_rsc "+self.action_timeout+" -C service -T LSBDummy",
 			"service_reg_event"     : "-l \"NEW_EVENT event_type:register rsc_id:service_test_rsc action:none rc:ok op_status:complete\"",
 			"service_unreg_line"    : "-c unregister_rsc -r \"service_test_rsc\" "+self.action_timeout,
 			"service_unreg_event"   : "-l \"NEW_EVENT event_type:unregister rsc_id:service_test_rsc action:none rc:ok op_status:complete\"",
 			"service_start_line"    : "-c exec -r \"service_test_rsc\" -a \"start\" "+self.action_timeout,
 			"service_start_event"   : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:start rc:ok op_status:complete\" ",
 			"service_stop_line"     : "-c exec -r \"service_test_rsc\" -a \"stop\" "+self.action_timeout,
 			"service_stop_event"    : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:stop rc:ok op_status:complete\" ",
 			"service_monitor_line"  : "-c exec -r \"service_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout,
 			"service_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout,
 			"service_cancel_line"   : "-c cancel -r \"service_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ",
 			"service_cancel_event"  : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:Cancelled\" ",
 
 			"lsb_reg_line"      : "-c register_rsc -r lsb_test_rsc "+self.action_timeout+" -C lsb -T LSBDummy",
 			"lsb_reg_event"     : "-l \"NEW_EVENT event_type:register rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\" ",
 			"lsb_unreg_line"    : "-c unregister_rsc -r \"lsb_test_rsc\" "+self.action_timeout,
 			"lsb_unreg_event"   : "-l \"NEW_EVENT event_type:unregister rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\"",
 			"lsb_start_line"    : "-c exec -r \"lsb_test_rsc\" -a \"start\" "+self.action_timeout,
 			"lsb_start_event"   : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:start rc:ok op_status:complete\" ",
 			"lsb_stop_line"     : "-c exec -r \"lsb_test_rsc\" -a \"stop\" "+self.action_timeout,
 			"lsb_stop_event"    : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:stop rc:ok op_status:complete\" ",
 			"lsb_monitor_line"  : "-c exec -r \"lsb_test_rsc\" -a status -i \"2000\" "+self.action_timeout,
 			"lsb_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:complete\" "+self.action_timeout,
 			"lsb_cancel_line"   : "-c cancel -r \"lsb_test_rsc\" -a \"status\" -i \"2000\" -t \"3000\" ",
 			"lsb_cancel_event"  : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:Cancelled\" ",
 
 			"stonith_reg_line"      : "-c register_rsc -r stonith_test_rsc "+self.action_timeout+" -C stonith -P pacemaker -T fence_dummy_monitor",
 			"stonith_reg_event"     : "-l \"NEW_EVENT event_type:register rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\" ",
 			"stonith_unreg_line"    : "-c unregister_rsc -r \"stonith_test_rsc\" "+self.action_timeout,
 			"stonith_unreg_event"   : "-l \"NEW_EVENT event_type:unregister rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\"",
 			"stonith_start_line"    : "-c exec -r \"stonith_test_rsc\" -a \"start\" -t 8000 ",
 			"stonith_start_event"   : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:start rc:ok op_status:complete\" ",
 			"stonith_stop_line"     : "-c exec -r \"stonith_test_rsc\" -a \"stop\" "+self.action_timeout,
 			"stonith_stop_event"    : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:stop rc:ok op_status:complete\" ",
 			"stonith_monitor_line"  : "-c exec -r \"stonith_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout,
 			"stonith_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout,
 			"stonith_cancel_line"   : "-c cancel -r \"stonith_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ",
 			"stonith_cancel_event"  : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:Cancelled\" ",
 		}
 
 	def new_test(self, name, description):
 		test = Test(name, description, self.verbose, self.tls)
 		self.tests.append(test)
 		return test
 
 	def setup_test_environment(self):
 		os.system("service pacemaker_remote stop")
 		self.cleanup_test_environment()
 
 		if self.tls and not os.path.isfile("/etc/pacemaker/authkey"):
 			self.need_authkey = 1
 			os.system("mkdir -p /etc/pacemaker")
 			os.system("dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1")
 
 		### Make fake systemd daemon and unit file ###
 		dummy_daemon = """#!/bin/python
 import time, systemd.daemon
 time.sleep(3)
 systemd.daemon.notify("READY=1")
 while True: time.sleep(5)
 """
 		dummy_service_file = """
 [Unit]
 Description=Dummy resource that takes a while to start
 
 [Service]
 Type=notify
 ExecStart=/usr/sbin/lrmd_dummy_daemon
 """
 
 		dummy_upstart_job = ("""
 description     "Dummy service for regression tests"
 exec dd if=/dev/random of=/dev/null
 """)
 
 		dummy_fence_sleep_agent = ("""#!/usr/bin/python
 import sys
 import time
 def main():
     for line in sys.stdin.readlines():
         if line.count("monitor") > 0:
             time.sleep(30000)
             sys.exit(0)
     sys.exit(-1)
 if __name__ == "__main__":
     main()
 """)
 		dummy_fence_agent = ("""#!/usr/bin/python
 import sys
 def main():
     for line in sys.stdin.readlines():
         if line.count("monitor") > 0:
             sys.exit(0)
         if line.count("metadata") > 0:
             print '<resource-agent name="fence_dummy_monitor" shortdesc="Dummy Fence agent for testing">'
             print '  <longdesc>dummy description.</longdesc>'
             print '  <vendor-url>http://www.example.com</vendor-url>'
             print '  <parameters>'
             print '    <parameter name="action" unique="0" required="1">'
             print '      <getopt mixed="-o, --action=[action]"/>'
             print '      <content type="string" default="reboot"/>'
             print '      <shortdesc lang="en">Fencing Action</shortdesc>'
             print '    </parameter>'
             print '    <parameter name="port" unique="0" required="0">'
             print '      <getopt mixed="-n, --plug=[id]"/>'
             print '      <content type="string"/>'
             print '      <shortdesc lang="en">Physical plug number or name of virtual machine</shortdesc>'
             print '    </parameter>'
             print '  </parameters>'
             print '  <actions>'
             print '    <action name="on"/>'
             print '    <action name="off"/>'
             print '    <action name="monitor"/>'
             print '    <action name="metadata"/>'
             print '  </actions>'
             print '</resource-agent>'
             sys.exit(0)
     sys.exit(-1)
 if __name__ == "__main__":
     main()
 """)
 
 		os.system("cat <<-END >>/etc/init/lrmd_dummy_daemon.conf\n%s\nEND" % (dummy_upstart_job))
 		os.system("cat <<-END >>/usr/sbin/lrmd_dummy_daemon\n%s\nEND" % (dummy_daemon))
 		os.system("cat <<-END >>/lib/systemd/system/lrmd_dummy_daemon.service\n%s\nEND" % (dummy_service_file))
 		os.system("chmod a+x /usr/sbin/lrmd_dummy_daemon")
 
 		os.system("cat <<-END >>/usr/sbin/fence_dummy_sleep\n%s\nEND" % (dummy_fence_sleep_agent))
 		os.system("chmod 711 /usr/sbin/fence_dummy_sleep")
 
 		os.system("cat <<-END >>/usr/sbin/fence_dummy_monitor\n%s\nEND" % (dummy_fence_agent))
 		os.system("chmod 711 /usr/sbin/fence_dummy_monitor")
 
 		if os.path.exists("%s/cts/LSBDummy" % build_dir):
 			print "Using %s/cts/LSBDummy" % build_dir
 			os.system("cp %s/cts/LSBDummy /etc/init.d/LSBDummy" % build_dir)
 
 			if not os.path.exists("@OCF_RA_DIR@/pacemaker"):
 				os.system("mkdir -p @OCF_RA_DIR@/pacemaker/")
 
 			# Install helper OCF agents
 			for ra in [ "Dummy", "Stateful", "ping" ]:
 				os.system("cp %s/extra/resources/%s @OCF_RA_DIR@/pacemaker/%s" % (build_dir, ra, ra))
 				os.system("chmod a+x @OCF_RA_DIR@/pacemaker/%s" % (ra))
 			else:
 				# Assume it's installed
 				print "Using @datadir@/@PACKAGE@/tests/cts/LSBDummy"
 				os.system("cp @datadir@/@PACKAGE@/tests/cts/LSBDummy /etc/init.d/LSBDummy")
 
 				os.system("chmod a+x /etc/init.d/LSBDummy")
 				os.system("ls -al /etc/init.d/LSBDummy")
 		os.system("mkdir -p @CRM_CORE_DIR@/root")
 
 		if os.path.exists("/bin/systemctl"):
 			os.system("systemctl daemon-reload")
 
 	def cleanup_test_environment(self):
 		if self.need_authkey:
 		    os.system("rm -f /etc/pacemaker/authkey")
 
 		os.system("rm -f /etc/init.d/LSBDummy")
 		os.system("rm -f /lib/systemd/system/lrmd_dummy_daemon.service")
 		os.system("rm -f /usr/sbin/lrmd_dummy_daemon")
 		os.system("rm -f /usr/sbin/fence_dummy_monitor")
 		os.system("rm -f /usr/sbin/fence_dummy_sleep")
 		if os.path.exists("/bin/systemctl"):
                     os.system("systemctl daemon-reload")
 
 	### These are tests that should apply to all resource classes ###
 	def build_generic_tests(self):
 		common_cmds = self.common_cmds
 
 		### register/unregister tests ###
 		for rsc in self.rsc_classes:
 			test = self.new_test("generic_registration_%s" % (rsc), "Simple resource registration test for %s standard" % (rsc))
 			test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
 
 		### start/stop tests  ###
 		for rsc in self.rsc_classes:
 			test = self.new_test("generic_start_stop_%s" % (rsc), "Simple start and stop test for %s standard" % (rsc))
 			test.add_cmd(common_cmds["%s_reg_line" % (rsc)]   + " " + common_cmds["%s_reg_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_stop_line" % (rsc)]  + " " + common_cmds["%s_stop_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
 
 		### monitor cancel test ###
 		for rsc in self.rsc_classes:
 			test = self.new_test("generic_monitor_cancel_%s" % (rsc), "Simple monitor cancel test for %s standard" % (rsc))
 			test.add_cmd(common_cmds["%s_reg_line" % (rsc)]   + " " + common_cmds["%s_reg_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled ####
 			test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled ####
 			test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)])
 			test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ###
 			test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ###
 			test.add_cmd(common_cmds["%s_stop_line" % (rsc)]  + " " + common_cmds["%s_stop_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
 
 		### monitor duplicate test ###
 		for rsc in self.rsc_classes:
 			test = self.new_test("generic_monitor_duplicate_%s" % (rsc), "Test creation and canceling of duplicate monitors for %s standard" % (rsc))
 			test.add_cmd(common_cmds["%s_reg_line" % (rsc)]   + " " + common_cmds["%s_reg_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled ####
 			test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled ####
 
 			# Add the duplicate monitors. 
 			test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
 			# verify we still get update events
 			test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled ####
 
 			# cancel the monitor, if the duplicate merged with the original, we should no longer see monitor updates
 			test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)])
 			test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ###
 			test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ###
 			test.add_cmd(common_cmds["%s_stop_line" % (rsc)]  + " " + common_cmds["%s_stop_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
 
 		### stop implies cancel test ###
 		for rsc in self.rsc_classes:
 			test = self.new_test("generic_stop_implies_cancel_%s" % (rsc), "Verify stopping a resource implies cancel of recurring ops for %s standard" % (rsc))
 			test.add_cmd(common_cmds["%s_reg_line" % (rsc)]   + " " + common_cmds["%s_reg_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
 			test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled ####
 			test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled ####
 			test.add_cmd(common_cmds["%s_stop_line" % (rsc)]  + " " + common_cmds["%s_stop_event" % (rsc)])
 			test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ###
 			test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ###
 			test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
 
 
 	### These are complex tests that involve managing multiple resouces of different types ###
 	def build_multi_rsc_tests(self):
 		common_cmds = self.common_cmds
 		# do not use service and systemd at the same time, it is the same resource.
 
 		### register start monitor stop unregister resources of each type at the same time. ###
 		test = self.new_test("multi_rsc_start_stop_all", "Start, monitor, and stop resources of multiple types and classes")
 		for rsc in self.rsc_classes:
 			test.add_cmd(common_cmds["%s_reg_line" % (rsc)]   + " " + common_cmds["%s_reg_event" % (rsc)])
 		for rsc in self.rsc_classes:
 			test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)])
 		for rsc in self.rsc_classes:
 			test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
 		for rsc in self.rsc_classes:
 			test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor is not being rescheduled ####
 		for rsc in self.rsc_classes:
 			test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)])
 		for rsc in self.rsc_classes:
 			test.add_cmd(common_cmds["%s_stop_line" % (rsc)]  + " " + common_cmds["%s_stop_event" % (rsc)])
 		for rsc in self.rsc_classes:
 			test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
 
 	### These are tests related to how the lrmd handles failures.  ###
 	def build_negative_tests(self):
 
 		### ocf start timeout test  ###
 		test = self.new_test("ocf_start_timeout", "Force start timeout to occur, verify start failure.")
 		test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 		test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -k \"op_sleep\" -v \"5\" -t 1000 -w")  # -t must be less than self.action_timeout
 		test.add_cmd("-l "
 			"\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out\" "+self.action_timeout)
 		test.add_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ")
 		test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
 
 		### stonith start timeout test  ###
 		test = self.new_test("stonith_start_timeout", "Force start timeout to occur, verify start failure.")
 		test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy_sleep\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 		test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 1000 -w") # -t must be less than self.action_timeout
 		test.add_cmd("-l "
 			"\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out\" "+self.action_timeout)
 		test.add_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ")
 		test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
 		### stonith component fail ###
 		common_cmds = self.common_cmds
 		test = self.new_test("stonith_component_fail", "Kill stonith component after lrmd connects")
 		test.add_cmd(common_cmds["stonith_reg_line"]   + " " + common_cmds["stonith_reg_event"])
 		test.add_cmd(common_cmds["stonith_start_line"] + " " + common_cmds["stonith_start_event"])
 
 		test.add_cmd("-c exec -r \"stonith_test_rsc\" -a \"monitor\" -i \"600000\" "
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
 
 		test.add_cmd_and_kill("killall -9 -q stonithd lt-stonithd" ,"-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:unknown error op_status:error\" -t 15000")
 		test.add_cmd(common_cmds["stonith_unreg_line"] + " " + common_cmds["stonith_unreg_event"])
 
 
 		### monitor fail for ocf resources ###
 		test = self.new_test("monitor_fail_ocf", "Force ocf monitor to fail, verify failure is reported.")
 		test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 		test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
 		test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
 		test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ")
 		test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
 		test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
 		test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000")
 		test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" "
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
 		test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout)
 		test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
 		test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
 		### verify notify changes only for monitor operation.  ###
 		test = self.new_test("monitor_changes_only", "Verify when flag is set, only monitor changes are notified.")
 		test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 		test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+" -o "
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
 		test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+" -o "
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ")
 		test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
 		test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000")
 		test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" "
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
 		test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout)
 		test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
 		test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
 		### monitor fail for systemd resource ###
 		if "systemd" in self.rsc_classes:
 			test = self.new_test("monitor_fail_systemd", "Force systemd monitor to fail, verify failure is reported..")
 			test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T lrmd_dummy_daemon "+self.action_timeout+
 				     "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 			test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
 				     "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
 			test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
 				     "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
 			test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+
 				     "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ")
 			test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
 			test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
 			test.add_cmd_and_kill("killall -9 -q lrmd_dummy_daemon", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 8000")
 			test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" "
 				     "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
 			test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout)
 			test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
 			test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
 				     "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
 		### monitor fail for upstart resource ###
 		if "upstart" in self.rsc_classes:
 			test = self.new_test("monitor_fail_upstart", "Force upstart monitor to fail, verify failure is reported..")
 			test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T lrmd_dummy_daemon "+self.action_timeout+
 				     "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 			test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
 				     "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
 			test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
 				     "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
 			test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+
 				     "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ")
 			test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
 			test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
 			test.add_cmd_and_kill("killall -9 -q dd", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 8000")
 			test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" "
 				     "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
 			test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout)
 			test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
 			test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
 				     "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
 		### Cancel non-existent operation on a resource ###
 		test = self.new_test("cancel_non_existent_op", "Attempt to cancel the wrong monitor operation, verify expected failure")
 		test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 		test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
 		test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
 		test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ")
 		test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
 		test.add_expected_fail_cmd("-c cancel -r test_rsc -a \"monitor\" -i 1234 -t \"3000\" " ### interval is wrong, should fail
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
 		test.add_expected_fail_cmd("-c cancel -r test_rsc -a stop -i 100 -t \"3000\" " ### action name is wrong, should fail
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
 		test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
 		### Attempt to invoke non-existent rsc id ###
 		test = self.new_test("invoke_non_existent_rsc", "Attempt to perform operations on a non-existent rsc id.")
 		test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:complete\" ")
 		test.add_expected_fail_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ")
 		test.add_expected_fail_cmd("-c exec -r test_rsc -a monitor -i 3000 "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ")
 		test.add_expected_fail_cmd("-c cancel -r test_rsc -a start "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ")
 		test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
 		### Register and start a resource that doesn't exist, systemd  ###
 		if "systemd" in self.rsc_classes:
 			test = self.new_test("start_uninstalled_systemd", "Register uninstalled systemd agent, try to start, verify expected failure")
 			test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T this_is_fake1234 "+self.action_timeout+
 				     "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 			test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
 				     "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ")
 			test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
 				     "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
 		if "upstart" in self.rsc_classes:
 			test = self.new_test("start_uninstalled_upstart", "Register uninstalled upstart agent, try to start, verify expected failure")
 			test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T this_is_fake1234 "+self.action_timeout+
 				     "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 			test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
 				     "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ")
 			test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
 				     "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
 		### Register and start a resource that doesn't exist, ocf ###
 		test = self.new_test("start_uninstalled_ocf", "Register uninstalled ocf agent, try to start, verify expected failure.")
 		test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pacemaker -T this_is_fake1234 "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 		test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ")
 		test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
 		### Register ocf with non-existent provider  ###
 		test = self.new_test("start_ocf_bad_provider", "Register ocf agent with a non-existent provider, verify expected failure.")
 		test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pancakes -T Dummy "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 		test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ")
 		test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
 		### Register ocf with empty provider field  ###
 		test = self.new_test("start_ocf_no_provider", "Register ocf agent with a no provider, verify expected failure.")
 		test.add_expected_fail_cmd("-c register_rsc -r \"test_rsc\" -C ocf -T Dummy "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 		test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Error\" ")
 		test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
 	### These are tests that target specific cases ###
 	def build_custom_tests(self):
 
 		### verify resource temporary folder is created and used by heartbeat agents.  ###
 		test = self.new_test("rsc_tmp_dir", "Verify creation and use of rsc temporary state directory")
 		test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@")
 		test.add_cmd("-c register_rsc -r test_rsc -P heartbeat -C ocf -T Dummy "
 			"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout)
 		test.add_cmd("-c exec -r test_rsc -a start -t 4000")
 		test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@")
 		test.add_sys_cmd("ls", "@CRM_RSCTMP_DIR@/Dummy-test_rsc.state")
 		test.add_cmd("-c exec -r test_rsc -a stop -t 4000")
 		test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
 		### start delay then stop test ###
 		test = self.new_test("start_delay", "Verify start delay works as expected.")
 		test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy "
 			"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout)
 		test.add_cmd("-c exec -r test_rsc -s 6000 -a start -w -t 6000")
 		test.add_expected_fail_cmd("-l "
 			"\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 2000")
 		test.add_cmd("-l "
 			"\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 6000")
 		test.add_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ")
 		test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
 		### start delay, but cancel before it gets a chance to start.  ###
 		test = self.new_test("start_delay_cancel", "Using start_delay, start a rsc, but cancel the start op before execution.")
 		test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy "
 			"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout)
 		test.add_cmd("-c exec -r test_rsc -s 5000 -a start -w -t 4000")
 		test.add_cmd("-c cancel -r test_rsc -a start "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ")
 		test.add_expected_fail_cmd("-l "
 			"\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 5000")
 		test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
 		### Register a bunch of resources, verify we can get info on them ###
 		test = self.new_test("verify_get_rsc_info", "Register multiple resources, verify retrieval of rsc info.")
 		if "systemd" in self.rsc_classes:
 			test.add_cmd("-c register_rsc -r rsc1 -C systemd -T lrmd_dummy_daemon "+self.action_timeout)
 			test.add_cmd("-c get_rsc_info -r rsc1 ")
 			test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout)
 			test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ")
 
 		if "upstart" in self.rsc_classes:
 			test.add_cmd("-c register_rsc -r rsc1 -C upstart -T lrmd_dummy_daemon "+self.action_timeout)
 			test.add_cmd("-c get_rsc_info -r rsc1 ")
 			test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout)
 			test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ")
 
 		test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout)
 		test.add_cmd("-c get_rsc_info -r rsc2 ")
 		test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout)
 		test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ")
 
 		### Register duplicate, verify only one entry exists and can still be removed.
 		test = self.new_test("duplicate_registration", "Register resource multiple times, verify only one entry exists and can be removed.")
 		test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout)
 		test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy")
 		test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout)
 		test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy")
 		test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Stateful -P pacemaker "+self.action_timeout)
 		test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Stateful")
 		test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout)
 		test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ")
 
 		### verify the option to only send notification to the original client. ###
 		test = self.new_test("notify_orig_client_only", "Verify option to only send notifications to the client originating the action.")
 		test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 		test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
 		test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+" -n "
 			"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ")
 		# this will fail because the monitor notifications should only go to the original caller, which no longer exists.
 		test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
 		test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" ")
 		test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
 			"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
 		### get metadata ###
 		test = self.new_test("get_ocf_metadata", "Retrieve metadata for a resource")
 		test.add_cmd_check_stdout("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Dummy\""
 			,"resource-agent name=\"Dummy\"")
 		test.add_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Stateful\"")
 		test.add_expected_fail_cmd("-c metadata -P \"pacemaker\" -T \"Stateful\"")
 		test.add_expected_fail_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"fake_agent\"")
 
 		### get metadata ###
 		test = self.new_test("get_lsb_metadata", "Retrieve metadata for a resource")
 		test.add_cmd_check_stdout("-c metadata -C \"lsb\" -T \"LSBDummy\""
 			,"resource-agent name='LSBDummy'")
 
 		### get stonith metadata ###
 		test = self.new_test("get_stonith_metadata", "Retrieve stonith metadata for a resource")
 		test.add_cmd_check_stdout("-c metadata -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy_monitor\"",
 			"resource-agent name=\"fence_dummy_monitor\"")
 
 		### get metadata ###
 		if "systemd" in self.rsc_classes:
 			test = self.new_test("get_systemd_metadata", "Retrieve metadata for a resource")
 			test.add_cmd_check_stdout("-c metadata -C \"systemd\" -T \"lrmd_dummy_daemon\""
 				,"resource-agent name=\"lrmd_dummy_daemon\"")
 
 		### get metadata ###
 		if "upstart" in self.rsc_classes:
 			test = self.new_test("get_upstart_metadata", "Retrieve metadata for a resource")
 			test.add_cmd_check_stdout("-c metadata -C \"upstart\" -T \"lrmd_dummy_daemon\""
 				,"resource-agent name=\"lrmd_dummy_daemon\"")
 
 		### get ocf providers  ###
 		test = self.new_test("list_ocf_providers", "Retrieve list of available resource providers, verifies pacemaker is a provider.")
 		test.add_cmd_check_stdout("-c list_ocf_providers ", "pacemaker")
 		test.add_cmd_check_stdout("-c list_ocf_providers -T ping", "pacemaker")
 
 		### Verify agents only exist in their lists ###
 		test = self.new_test("verify_agent_lists", "Verify the agent lists contain the right data.")
 		test.add_cmd_check_stdout("-c list_agents ", "Stateful")                                  ### ocf ###
 		test.add_cmd_check_stdout("-c list_agents -C ocf", "Stateful")
 		test.add_cmd_check_stdout("-c list_agents -C lsb", "", "Stateful")                        ### should not exist
 		test.add_cmd_check_stdout("-c list_agents -C service", "", "Stateful")                    ### should not exist
 		test.add_cmd_check_stdout("-c list_agents ", "LSBDummy")                                  ### init.d ###
 		test.add_cmd_check_stdout("-c list_agents -C lsb", "LSBDummy")
 		test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy")
 		test.add_cmd_check_stdout("-c list_agents -C ocf", "", "lrmd_dummy_daemon")               ### should not exist
 
 		test.add_cmd_check_stdout("-c list_agents -C ocf", "", "lrmd_dummy_daemon")               ### should not exist
 		test.add_cmd_check_stdout("-c list_agents -C lsb", "", "fence_dummy_monitor")             ### should not exist
 		test.add_cmd_check_stdout("-c list_agents -C service", "", "fence_dummy_monitor")         ### should not exist
 		test.add_cmd_check_stdout("-c list_agents -C ocf", "", "fence_dummy_monitor")             ### should not exist
 
 		if "systemd" in self.rsc_classes:
 			test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon")                 ### systemd ###
 			test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy")
 			test.add_cmd_check_stdout("-c list_agents -C systemd", "", "Stateful")            ### should not exist
 			test.add_cmd_check_stdout("-c list_agents -C systemd", "lrmd_dummy_daemon")
 			test.add_cmd_check_stdout("-c list_agents -C systemd", "", "fence_dummy_monitor") ### should not exist
 
 		if "upstart" in self.rsc_classes:
 			test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon")                 ### upstart ###
 			test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy")
 			test.add_cmd_check_stdout("-c list_agents -C upstart", "", "Stateful")            ### should not exist
 			test.add_cmd_check_stdout("-c list_agents -C upstart", "lrmd_dummy_daemon")
 			test.add_cmd_check_stdout("-c list_agents -C upstart", "", "fence_dummy_monitor") ### should not exist
 
 		if "stonith" in self.rsc_classes:
 			test.add_cmd_check_stdout("-c list_agents -C stonith", "fence_dummy_monitor")     ### stonith ###
 			test.add_cmd_check_stdout("-c list_agents -C stonith", "", "lrmd_dummy_daemon")   ### should not exist
 			test.add_cmd_check_stdout("-c list_agents -C stonith", "", "Stateful")            ### should not exist
 			test.add_cmd_check_stdout("-c list_agents ", "fence_dummy_monitor")
 
 	def print_list(self):
 		print "\n==== %d TESTS FOUND ====" % (len(self.tests))
 		print "%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")
 		print "%35s - %s" % ("--------------------", "--------------------")
 		for test in self.tests:
 			print "%35s - %s" % (test.name, test.description)
 		print "==== END OF LIST ====\n"
 
 	def run_single(self, name):
 		for test in self.tests:
 			if test.name == name:
 				test.run()
 				break;
 
 	def run_tests_matching(self, pattern):
 		for test in self.tests:
 			if test.name.count(pattern) != 0:
 				test.run()
 
 	def run_tests(self):
 		for test in self.tests:
 			test.run()
 
 	def exit(self):
 		for test in self.tests:
 			if test.executed == 0:
 				continue
 
 			if test.get_exitcode() != 0:
 				sys.exit(-1)
 
 		sys.exit(0);
 
 	def print_results(self):
 		failures = 0;
 		success = 0;
 		print "\n\n======= FINAL RESULTS =========="
 		print "\n--- FAILURE RESULTS:"
 		for test in self.tests:
 			if test.executed == 0:
 				continue
 
 			if test.get_exitcode() != 0:
 				failures = failures + 1
 				test.print_result("    ")
 			else:
 				success = success + 1
 
 		if failures == 0:
 			print "    None"
 
 		print "\n--- TOTALS\n    Pass:%d\n    Fail:%d\n" % (success, failures)
 
 class TestOptions:
 	def __init__(self):
 		self.options = {}
 		self.options['list-tests'] = 0
 		self.options['run-all'] = 1
 		self.options['run-only'] = ""
 		self.options['run-only-pattern'] = ""
 		self.options['verbose'] = 0
 		self.options['invalid-arg'] = ""
 		self.options['show-usage'] = 0
 		self.options['pacemaker-remote'] = 0
 
 	def build_options(self, argv):
 		args = argv[1:]
 		skip = 0
 		for i in range(0, len(args)):
 			if skip:
 				skip = 0
 				continue
 			elif args[i] == "-h" or args[i] == "--help":
 				self.options['show-usage'] = 1
 			elif args[i] == "-l" or args[i] == "--list-tests":
 				self.options['list-tests'] = 1
 			elif args[i] == "-V" or args[i] == "--verbose":
 				self.options['verbose'] = 1
 			elif args[i] == "-R" or args[i] == "--pacemaker-remote":
 				self.options['pacemaker-remote'] = 1
 			elif args[i] == "-r" or args[i] == "--run-only":
 				self.options['run-only'] = args[i+1]
 				skip = 1
 			elif args[i] == "-p" or args[i] == "--run-only-pattern":
 				self.options['run-only-pattern'] = args[i+1]
 				skip = 1
 
 	def show_usage(self):
 		print "usage: " + sys.argv[0] + " [options]"
 		print "If no options are provided, all tests will run"
 		print "Options:"
 		print "\t [--help | -h]                        Show usage"
 		print "\t [--list-tests | -l]                  Print out all registered tests."
 		print "\t [--run-only | -r 'testname']         Run a specific test"
 		print "\t [--verbose | -V]                     Verbose output"
 		print "\t [--pacemaker-remote | -R             Test pacemaker-remote binary instead of lrmd."
 		print "\t [--run-only-pattern | -p 'string']   Run only tests containing the string value"
 		print "\n\tExample: Run only the test 'start_top'"
 		print "\t\t python ./regression.py --run-only start_stop"
 		print "\n\tExample: Run only the tests with the string 'systemd' present in them"
 		print "\t\t python ./regression.py --run-only-pattern systemd"
 
 
 def main(argv):
 	o = TestOptions()
 	o.build_options(argv)
 
 	tests = Tests(o.options['verbose'], o.options['pacemaker-remote'])
 
 	tests.build_generic_tests()
 	tests.build_multi_rsc_tests()
 	tests.build_negative_tests()
 	tests.build_custom_tests()
 
 	tests.setup_test_environment()
 
 	print "Starting ..."
 
 	if o.options['list-tests']:
 		tests.print_list()
 	elif o.options['show-usage']:
 		o.show_usage()
 	elif o.options['run-only-pattern'] != "":
 		tests.run_tests_matching(o.options['run-only-pattern'])
 		tests.print_results()
 	elif o.options['run-only'] != "":
 		tests.run_single(o.options['run-only'])
 		tests.print_results()
 	else:
 		tests.run_tests()
 		tests.print_results()
 
 	tests.cleanup_test_environment()
 	tests.exit()
 
 if __name__=="__main__":
 	main(sys.argv)
diff --git a/mcp/pacemaker.combined.upstart.in b/mcp/pacemaker.combined.upstart.in
index 9540019c6a..6301d1068f 100644
--- a/mcp/pacemaker.combined.upstart.in
+++ b/mcp/pacemaker.combined.upstart.in
@@ -1,64 +1,70 @@
 # pacemaker-corosync - High-Availability cluster
 #
 # Starts Corosync cluster engine and Pacemaker cluster manager.
 
 stop on runlevel [0123456]
 kill timeout 3600
 respawn
 
 env prog=pacemakerd
 env rpm_sysconf=@sysconfdir@/sysconfig/pacemaker
 env rpm_lockfile=@localstatedir@/lock/subsys/pacemaker
 env deb_sysconf=@sysconfdir@/default/pacemaker
 env deb_lockfile=@localstatedir@/lock/pacemaker
 
 script
     [ -f "$rpm_sysconf" ] && . $rpm_sysconf
     [ -f "$deb_sysconf" ] && . $deb_sysconf
     exec $prog
 end script
 
 pre-start script
     # setup the software watchdog which corosync uses.
     # rewrite according to environment.
     [ -c /dev/watchdog ] || modprobe softdog soft_margin=60
     pidof corosync || start corosync
 
     # if you use corosync-notifyd, uncomment the line below.
     #start corosync-notifyd
 
     # give it time to fail.
     sleep 2
     pidof corosync || { exit 1; }
+
+    # if you use crm_mon, uncomment the line below.
+    #start crm_mon
 end script
 
 post-start script
     [ -f "$rpm_sysconf" ] && . $rpm_sysconf
     [ -f "$deb_sysconf" ] && . $deb_sysconf
     [ -z "$LOCK_FILE" -a -d @sysconfdir@/sysconfig ] && LOCK_FILE="$rpm_lockfile"
     [ -z "$LOCK_FILE" -a -d @sysconfdir@/default ] && LOCK_FILE="$deb_lockfile"
     touch $LOCK_FILE
     pidof $prog > @localstatedir@/run/$prog.pid
 end script
 
 post-stop script
     [ -f "$rpm_sysconf" ] && . $rpm_sysconf
     [ -f "$deb_sysconf" ] && . $deb_sysconf
     [ -z "$LOCK_FILE" -a -d @sysconfdir@/sysconfig ] && LOCK_FILE="$rpm_lockfile"
     [ -z "$LOCK_FILE" -a -d @sysconfdir@/default ] && LOCK_FILE="$deb_lockfile"
     rm -f $LOCK_FILE
     rm -f @localstatedir@/run/$prog.pid
 
     # if you use watchdog of corosync, uncomment the line below.
     #pidof corosync || false
 
     pidof crmd || stop corosync
 
     # if you want to reboot a machine by watchdog of corosync when
     # pacemakerd disappeared unexpectedly, uncomment the line below
     # and invalidate above "respawn" stanza.
     #pidof crmd && killall -q -9 corosync
 
+    # if you use crm_mon, uncomment the line below.
+    #stop crm_mon
+
     # if you use corosync-notifyd, uncomment the line below.
     #stop corosync-notifyd || true
 end script
diff --git a/pacemaker.spec.in b/pacemaker.spec.in
index bee6bfc40e..597fb3ae3a 100644
--- a/pacemaker.spec.in
+++ b/pacemaker.spec.in
@@ -1,865 +1,869 @@
 %global gname haclient
 %global uname hacluster
 %global pcmk_docdir %{_docdir}/%{name}
 
 %global specversion 1
 %global commit HEAD
 %global shortcommit %(c=%{commit}; echo ${c:0:7})
 %global github_owner ClusterLabs
 
 # Turn off the auto compilation of python files not in the site-packages directory
 # Needed so that the -devel package is multilib compliant
 %global __os_install_post %(echo '%{__os_install_post}' | sed -e 's!/usr/lib[^[:space:]]*/brp-python-bytecompile[[:space:]].*$!!g')
 
 %global rawhide  %(test ! -e /etc/yum.repos.d/fedora-rawhide.repo; echo $?)
 %global cs_version %(pkg-config corosync --modversion  | awk -F . '{print $1}')
 %global py_site %(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib(1))")
 
 # Conditionals
 # Invoke "rpmbuild --without <feature>" or "rpmbuild --with <feature>"
 # to disable or enable specific features
 
 # Legacy stonithd fencing agents
 %bcond_with stonithd
 
 # Build with/without support for profiling tools
 %bcond_with profiling
 
 # Include Build with/without support for performing coverage analysis
 %bcond_with coverage
 
 # We generate docs using Publican, Asciidoc and Inkscape, but they're not available everywhere
 %bcond_without doc
 
 # Use a different versioning scheme
 %bcond_with pre_release
 
 # Ship an Upstart job file
 %bcond_with upstart_job
 
 # Turn off cman support on platforms that normally ship with it
 %bcond_without cman
 
 %if %{with profiling}
 # This disables -debuginfo package creation and also the stripping binaries/libraries
 # Useful if you want sane profiling data
 %global debug_package %{nil}
 %endif
 
 %if %{with pre_release}
 %global pcmk_release 0.%{specversion}.%{shortcommit}.git
 %else
 %global pcmk_release %{specversion}
 %endif
 
 Name:          pacemaker
 Summary:       Scalable High-Availability cluster resource manager
 Version:       1.1.11
 Release:       %{pcmk_release}%{?dist}
 License:       GPLv2+ and LGPLv2+
 Url:           http://www.clusterlabs.org
 Group:         System Environment/Daemons
 
 Source0:        https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{name}-%{commit}.tar.gz
 BuildRoot:     %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
 AutoReqProv:   on
 Requires:      python
 Requires:      resource-agents
 Requires:      %{name}-libs = %{version}-%{release}
 Requires:      %{name}-cluster-libs = %{version}-%{release}
 Requires:      %{name}-cli = %{version}-%{release}
 
 %if %{defined systemd_requires}
 %systemd_requires
 %endif
 
 %if 0%{?rhel} > 0
 ExclusiveArch: i386 i686 x86_64
 %endif
 
 
 # Required for core functionality
 BuildRequires: automake autoconf libtool pkgconfig python libtool-ltdl-devel
 BuildRequires: pkgconfig(glib-2.0) libxml2-devel libxslt-devel libuuid-devel
 BuildRequires: pkgconfig python-devel gcc-c++ bzip2-devel pam-devel
 
 # Required for agent_config.h which specifies the correct scratch directory
 BuildRequires: resource-agents
 
 # We need reasonably recent versions of libqb
 BuildRequires: libqb-devel > 0.11.0
 Requires:      libqb > 0.11.0
 
 # Enables optional functionality
 BuildRequires: ncurses-devel openssl-devel libselinux-devel docbook-style-xsl
 BuildRequires: bison byacc flex help2man gnutls-devel pkgconfig(dbus-1)
 
 %if %{defined _unitdir}
 BuildRequires: systemd-devel
 %endif
 
 %if %{with cman}
 
 %if 0%{?fedora} > 0
 %if 0%{?fedora} < 17
 BuildRequires: clusterlib-devel
 %endif
 %endif
 
 %if 0%{?rhel} > 0
 %if 0%{?rhel} < 7
 BuildRequires: clusterlib-devel
 %endif
 %endif
 
 %endif
 
 Requires:      corosync
 BuildRequires: corosynclib-devel
 
 %if %{with stonithd}
 BuildRequires: cluster-glue-libs-devel
 %endif
 
 %if !%{rawhide}
 # More often than not, inkscape is busted on rawhide, don't even bother
 
 %if %{with doc}
 %ifarch %{ix86} x86_64
 BuildRequires: publican inkscape asciidoc
 %endif
 %endif
 
 %endif
 
 %description
 Pacemaker is an advanced, scalable High-Availability cluster resource
 manager for Corosync, CMAN and/or Linux-HA.
 
 It supports more than 16 node clusters with significant capabilities
 for managing resources and dependencies.
 
 It will run scripts at initialization, when machines go up or down,
 when related resources fail and can be configured to periodically check
 resource health.
 
 Available rpmbuild rebuild options:
   --with(out) : cman stonithd doc coverage profiling pre_release upstart_job
 
 %package cli
 License:       GPLv2+ and LGPLv2+
 Summary:       Command line tools for controlling Pacemaker clusters
 Group:         System Environment/Daemons
 Requires:      %{name}-libs = %{version}-%{release}
 Requires:      perl-TimeDate
 
 %description cli
 Pacemaker is an advanced, scalable High-Availability cluster resource
 manager for Corosync, CMAN and/or Linux-HA.
 
 The %{name}-cli package contains command line tools that can be used
 to query and control the cluster from machines that may, or may not,
 be part of the cluster.
 
 %package -n %{name}-libs
 License:       GPLv2+ and LGPLv2+
 Summary:       Core Pacemaker libraries
 Group:         System Environment/Daemons
 
 %description -n %{name}-libs
 Pacemaker is an advanced, scalable High-Availability cluster resource
 manager for Corosync, CMAN and/or Linux-HA.
 
 The %{name}-libs package contains shared libraries needed for cluster
 nodes and those just running the CLI tools.
 
 %package -n %{name}-cluster-libs
 License:       GPLv2+ and LGPLv2+
 Summary:       Cluster Libraries used by Pacemaker
 Group:         System Environment/Daemons
 Requires:      %{name}-libs = %{version}-%{release}
 
 %description -n %{name}-cluster-libs
 Pacemaker is an advanced, scalable High-Availability cluster resource
 manager for Corosync, CMAN and/or Linux-HA.
 
 The %{name}-cluster-libs package contains cluster-aware shared
 libraries needed for nodes that will form part of the cluster nodes.
 
 %package remote
 License:       GPLv2+ and LGPLv2+
 Summary:       Pacemaker remote daemon for non-cluster nodes
 Group:         System Environment/Daemons
 Requires:      %{name}-libs = %{version}-%{release}
 Requires:      %{name}-cli = %{version}-%{release}
 Requires:      resource-agents
 %if %{defined systemd_requires}
 %systemd_requires
 %endif
 
 %description remote
 Pacemaker is an advanced, scalable High-Availability cluster resource
 manager for Corosync, CMAN and/or Linux-HA.
 
 The %{name}-remote package contains the Pacemaker Remote daemon
 which is capable of extending pacemaker functionality to remote
 nodes not running the full corosync/cluster stack.
 
 %package -n %{name}-libs-devel
 License:       GPLv2+ and LGPLv2+
 Summary:       Pacemaker development package
 Group:         Development/Libraries
 Requires:      %{name}-cts = %{version}-%{release}
 Requires:      %{name}-libs = %{version}-%{release}
 Requires:      %{name}-cluster-libs = %{version}-%{release}
 Requires:      libtool-ltdl-devel libqb-devel libuuid-devel
 Requires:      libxml2-devel libxslt-devel bzip2-devel glib2-devel
 Requires:      corosynclib-devel
 
 %description -n %{name}-libs-devel
 Pacemaker is an advanced, scalable High-Availability cluster resource
 manager for Corosync, CMAN and/or Linux-HA.
 
 The %{name}-libs-devel package contains headers and shared libraries
 for developing tools for Pacemaker.
 
 %package       cts
 License:       GPLv2+ and LGPLv2+
 Summary:       Test framework for cluster-related technologies like Pacemaker
 Group:         System Environment/Daemons
 Requires:      python
 Requires:      %{name}-libs = %{version}-%{release}
 
 %description   cts
 Test framework for cluster-related technologies like Pacemaker
 
 %package       doc
 License:       GPLv2+ and LGPLv2+
 Summary:       Documentation for Pacemaker
 Group:         Documentation
 
 %description   doc
 Documentation for Pacemaker.
 
 Pacemaker is an advanced, scalable High-Availability cluster resource
 manager for Corosync, CMAN and/or Linux-HA.
 
 %prep
 %setup -q -n %{name}-%{commit}
 
 # Force the local time
 #
 # 'git' sets the file date to the date of the last commit.
 # This can result in files having been created in the future
 # when building on machines in timezones 'behind' the one the
 # commit occurred in - which seriously confuses 'make'
 find . -exec touch \{\} \;
 
 %build
 ./autogen.sh
 
 # RHEL <= 5 does not support --docdir
 docdir=%{pcmk_docdir} %{configure}                 \
         %{?with_profiling:   --with-profiling}     \
         %{?with_coverage:    --with-coverage}      \
         %{!?with_cman:       --without-cman}       \
 	--without-heartbeat			   \
         --with-initdir=%{_initrddir}               \
         --localstatedir=%{_var}                    \
         --with-version=%{version}-%{release}
 
 %if 0%{?suse_version} >= 1200
 # Fedora handles rpath removal automagically
 sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool
 sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool
 %endif
 
 make %{_smp_mflags} V=1 docdir=%{pcmk_docdir} all
 
 %install
 rm -rf %{buildroot}
 make DESTDIR=%{buildroot} docdir=%{pcmk_docdir} V=1 install
 
 mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig
 mkdir -p ${RPM_BUILD_ROOT}%{_var}/lib/pacemaker/cores
 install -m 644 mcp/pacemaker.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/pacemaker
+install -m 644 tools/crm_mon.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/crm_mon
 
 %if %{with upstart_job}
 mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/init
 install -m 644 mcp/pacemaker.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/pacemaker.conf
 install -m 644 mcp/pacemaker.combined.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/pacemaker.combined.conf
+install -m 644 tools/crm_mon.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/crm_mon.conf
 %endif
 
 # Scripts that should be executable
 chmod a+x %{buildroot}/%{_datadir}/pacemaker/tests/cts/CTSlab.py
 
 # These are not actually scripts
 find %{buildroot} -name '*.xml' -type f -print0 | xargs -0 chmod a-x
 find %{buildroot} -name '*.xsl' -type f -print0 | xargs -0 chmod a-x
 find %{buildroot} -name '*.rng' -type f -print0 | xargs -0 chmod a-x
 find %{buildroot} -name '*.dtd' -type f -print0 | xargs -0 chmod a-x
 
 # Dont package static libs
 find %{buildroot} -name '*.a' -type f -print0 | xargs -0 rm -f
 find %{buildroot} -name '*.la' -type f -print0 | xargs -0 rm -f
 
 # Do not package these either
 rm -f %{buildroot}/%{_libdir}/service_crm.so
 
 # Don't ship init scripts for systemd based platforms
 %if %{defined _unitdir}
 rm -f %{buildroot}/%{_initrddir}/pacemaker
 rm -f %{buildroot}/%{_initrddir}/pacemaker_remote
 %endif
 
 # Only useful on rhel6
 rm -f %{buildroot}/%{_bindir}/ccs2cib
 rm -f %{buildroot}/%{_bindir}/ccs_flatten
 rm -f %{buildroot}/%{_bindir}/disable_rgmanager
 
 %if %{with coverage}
 GCOV_BASE=%{buildroot}/%{_var}/lib/pacemaker/gcov
 mkdir -p $GCOV_BASE
 find . -name '*.gcno' -type f | while read F ; do
         D=`dirname $F`
         mkdir -p ${GCOV_BASE}/$D
         cp $F ${GCOV_BASE}/$D
 done
 %endif
 
 %clean
 rm -rf %{buildroot}
 
 %if %{defined _unitdir}
 
 %post
 %systemd_post pacemaker.service
 
 %preun
 %systemd_preun pacemaker.service
 
 %postun
 %systemd_postun_with_restart pacemaker.service 
 
 %post remote
 %systemd_post pacemaker_remote.service
 
 %preun remote
 %systemd_preun pacemaker_remote.service
 
 %postun remote
 %systemd_postun_with_restart pacemaker_remote.service 
 
 %else
 
 %post
 /sbin/chkconfig --add pacemaker || :
 
 %preun
 /sbin/service pacemaker stop || :
 if [ $1 -eq 0 ]; then
     # Package removal, not upgrade
     /sbin/chkconfig --del pacemaker || :
 fi
 
 %post remote
 /sbin/chkconfig --add pacemaker_remote || :
 
 %preun remote
 /sbin/service pacemaker_remote stop &>/dev/null || :
 if [ $1 -eq 0 ]; then
     # Package removal, not upgrade
     /sbin/chkconfig --del pacemaker_remote || :
 fi
 
 %endif
 
 %pre -n %{name}-libs
 
 getent group %{gname} >/dev/null || groupadd -r %{gname} -g 189
 getent passwd %{uname} >/dev/null || useradd -r -g %{gname} -u 189 -s /sbin/nologin -c "cluster user" %{uname}
 exit 0
 
 %post -n %{name}-libs -p /sbin/ldconfig
 
 %postun -n %{name}-libs -p /sbin/ldconfig
 
 %post -n %{name}-cluster-libs -p /sbin/ldconfig
 
 %postun -n %{name}-cluster-libs -p /sbin/ldconfig
 
 %files
 ###########################################################
 %defattr(-,root,root)
 
 %exclude %{_datadir}/pacemaker/tests
 
 %config(noreplace) %{_sysconfdir}/sysconfig/pacemaker
+%config(noreplace) %{_sysconfdir}/sysconfig/crm_mon
 %config(noreplace) %{_sysconfdir}/logrotate.d/pacemaker
 %{_sbindir}/pacemakerd
 
 %if %{defined _unitdir}
 %{_unitdir}/pacemaker.service
 %else
 %{_initrddir}/pacemaker
 %endif
 
 %{_datadir}/pacemaker
 %{_datadir}/snmp/mibs/PCMK-MIB.txt
 %exclude %{_libexecdir}/pacemaker/lrmd_test
 %exclude %{_sbindir}/pacemaker_remoted
 %{_libexecdir}/pacemaker/*
 
 %{_sbindir}/crm_attribute
 %{_sbindir}/crm_master
 %{_sbindir}/crm_node
 %{_sbindir}/attrd_updater
 %{_sbindir}/fence_legacy
 %{_sbindir}/fence_pcmk
 %{_sbindir}/stonith_admin
 
 %doc %{_mandir}/man7/*
 %doc %{_mandir}/man8/attrd_updater.*
 %doc %{_mandir}/man8/crm_attribute.*
 %doc %{_mandir}/man8/crm_node.*
 %doc %{_mandir}/man8/crm_master.*
 %doc %{_mandir}/man8/fence_pcmk.*
 %doc %{_mandir}/man8/fence_legacy.*
 %doc %{_mandir}/man8/pacemakerd.*
 %doc %{_mandir}/man8/stonith_admin.*
 
 %doc COPYING
 %doc AUTHORS
 %doc ChangeLog
 
 %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker
 %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/cib
 %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/cores
 %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/pengine
 %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/blackbox
 %ghost %dir %attr (750, %{uname}, %{gname}) %{_var}/run/crm
 %dir /usr/lib/ocf
 %dir /usr/lib/ocf/resource.d
 /usr/lib/ocf/resource.d/pacemaker
 
 %if "%{?cs_version}" != "UNKNOWN"
 %if 0%{?cs_version} < 2
 %{_libexecdir}/lcrso/pacemaker.lcrso
 %endif
 %endif
 
 %if %{with upstart_job}
 %config(noreplace) %{_sysconfdir}/init/pacemaker.conf
 %config(noreplace) %{_sysconfdir}/init/pacemaker.combined.conf
+%config(noreplace) %{_sysconfdir}/init/crm_mon.conf
 %endif
 
 %files cli
 %defattr(-,root,root)
 %{_sbindir}/cibadmin
 %{_sbindir}/crm_diff
 %{_sbindir}/crm_error
 %{_sbindir}/crm_failcount
 %{_sbindir}/crm_mon
 %{_sbindir}/crm_resource
 %{_sbindir}/crm_standby
 %{_sbindir}/crm_verify
 %{_sbindir}/crmadmin
 %{_sbindir}/iso8601
 %{_sbindir}/crm_shadow
 %{_sbindir}/crm_simulate
 %{_sbindir}/crm_report
 %{_sbindir}/crm_ticket
 %doc %{_mandir}/man8/*
 %exclude %{_mandir}/man8/attrd_updater.*
 %exclude %{_mandir}/man8/crm_attribute.*
 %exclude %{_mandir}/man8/crm_node.*
 %exclude %{_mandir}/man8/crm_master.*
 %exclude %{_mandir}/man8/fence_pcmk.*
 %exclude %{_mandir}/man8/fence_legacy.*
 %exclude %{_mandir}/man8/pacemakerd.*
 %exclude %{_mandir}/man8/pacemaker_remoted.*
 %exclude %{_mandir}/man8/stonith_admin.*
 
 %doc COPYING
 %doc AUTHORS
 %doc ChangeLog
 
 %files -n %{name}-libs
 %defattr(-,root,root)
 
 %{_libdir}/libcib.so.*
 %{_libdir}/liblrmd.so.*
 %{_libdir}/libcrmservice.so.*
 %{_libdir}/libcrmcommon.so.*
 %{_libdir}/libpe_status.so.*
 %{_libdir}/libpe_rules.so.*
 %{_libdir}/libpengine.so.*
 %{_libdir}/libstonithd.so.*
 %{_libdir}/libtransitioner.so.*
 %doc COPYING.LIB
 %doc AUTHORS
 
 %files -n %{name}-cluster-libs
 %defattr(-,root,root)
 %{_libdir}/libcrmcluster.so.*
 %doc COPYING.LIB
 %doc AUTHORS
 
 %files remote
 %defattr(-,root,root)
 
 %config(noreplace) %{_sysconfdir}/sysconfig/pacemaker
 %if %{defined _unitdir}
 %{_unitdir}/pacemaker_remote.service
 %else
 %{_initrddir}/pacemaker_remote
 %endif
 
 %{_sbindir}/pacemaker_remoted
 %{_mandir}/man8/pacemaker_remoted.*
 %doc COPYING.LIB
 %doc AUTHORS
 
 %files doc
 %defattr(-,root,root)
 %doc %{pcmk_docdir}
 
 %files cts
 %defattr(-,root,root)
 %{py_site}/cts
 %{_datadir}/pacemaker/tests/cts
 %{_libexecdir}/pacemaker/lrmd_test
 %doc COPYING.LIB
 %doc AUTHORS
 
 %files -n %{name}-libs-devel
 %defattr(-,root,root)
 %exclude %{_datadir}/pacemaker/tests/cts
 %{_datadir}/pacemaker/tests
 %{_includedir}/pacemaker
 %{_libdir}/*.so
 %if %{with coverage}
 %{_var}/lib/pacemaker
 %endif
 %{_libdir}/pkgconfig/*.pc
 %doc COPYING.LIB
 %doc AUTHORS
 
 %changelog
 * Fri Jul 26 2013 Andrew Beekhof <andrew@beekhof.net> 1.1.10-1
 - Update source tarball to revision: Pacemaker-1.1.10
 - Changesets: 602
 - Diff:       143 files changed, 8162 insertions(+), 5159 deletions(-)
 - See included ChangeLog file or https://raw.github.com/ClusterLabs/pacemaker/master/ChangeLog for full details
 
 * Thu Jun 20 2013 Andrew Beekhof <andrew@beekhof.net> Pacemaker-1.1.9-2
 - Simplify the spec file
 - Drop uncommon bcond variations: 
   with-heartbeat, without-cman, without-corosync, with-esmtp, with-snmp
 - Drop bcond compatibility macros (everywhere has them now)
 - Simplify python macros
 - Use macroized systemd pre,post scriptlets
 - Don't install SYSV init scripts if systemd is in use 
 
 * Fri Mar 08 2013 Andrew Beekhof <andrew@beekhof.net> Pacemaker-1.1.9-1
 - Update source tarball to revision: 7e42d77
 - Statistics:
   Changesets: 731
   Diff:       1301 files changed, 92909 insertions(+), 57455 deletions(-)
 - See included ChangeLog file or https://github.com/ClusterLabs/pacemaker/blob/master/ChangeLog for details
 
 * Thu Sep 20 2012 Andrew Beekhof <andrew@beekhof.net> Pacemaker-1.1.8-1
 - Update source tarball to revision: 1a5341f
 - Statistics:
   Changesets: 1019
   Diff:       2107 files changed, 117258 insertions(+), 73606 deletions(-)
 - See included ChangeLog file or https://github.com/ClusterLabs/pacemaker/blob/master/ChangeLog for details
 
 
 * Wed Mar 28 2012 Andrew Beekhof <andrew@beekhof.net> Pacemaker-1.1.7-1
 - Update source tarball to revision: bc7ff2c
 - Statistics:
   Changesets: 513
   Diff:       1171 files changed, 90472 insertions(+), 19368 deletions(-)
 - See included ChangeLog file or https://github.com/ClusterLabs/pacemaker/blob/master/ChangeLog for details
 
 * Wed Aug 31 2011 Andrew Beekhof <andrew@beekhof.net> 1.1.6-1
 - Update source tarball to revision: 676e5f25aa46 tip
 - Statistics:
   Changesets: 376
   Diff:       1761 files changed, 36259 insertions(+), 140578 deletions(-)
 - See included ChangeLog file or https://github.com/ClusterLabs/pacemaker/blob/master/ChangeLog for details
 
 * Fri Feb 11 2011 Andrew Beekhof <andrew@beekhof.net> 1.1.5-1
 - Update source tarball to revision: baad6636a053
 - Statistics:
   Changesets: 184
   Diff:       605 files changed, 46103 insertions(+), 26417 deletions(-)
 - See included ChangeLog file or https://github.com/ClusterLabs/pacemaker/blob/master/ChangeLog for details
 
 * Wed Oct 20 2010 Andrew Beekhof <andrew@beekhof.net> 1.1.4-1
 - Moved all the interesting parts of the changelog into a separate file as per the Fedora policy :-/
 - Update source tarball to revision: 75406c3eb2c1 tip
 - Significant performance enhancements to the Policy Engine and CIB
 - Statistics:
   Changesets: 169
   Diff:       772 files changed, 56172 insertions(+), 39309 deletions(-)
 - See included ChangeLog file or http://hg.clusterlabs.org/pacemaker/1.1/file/tip/ChangeLog for details
 
 * Tue Sep 21 2010 Andrew Beekhof <andrew@beekhof.net> 1.1.3-1
 - Update source tarball to revision: e3bb31c56244 tip
 - Statistics:
   Changesets: 352
   Diff:       481 files changed, 14130 insertions(+), 11156 deletions(-)
 
 * Wed May 12 2010 Andrew Beekhof <andrew@beekhof.net> 1.1.2-1
 - Update source tarball to revision: c25c972a25cc tip
 - Statistics:
   Changesets: 339
   Diff:       708 files changed, 37918 insertions(+), 10584 deletions(-)
 
 * Tue Feb 16 2010 Andrew Beekhof <andrew@beekhof.net> - 1.1.1-1
 - First public release of Pacemaker 1.1
 - Package reference documentation in a doc subpackage
 - Move cts into a subpackage so that it can be easily consumed by others
 - Update source tarball to revision: 17d9cd4ee29f
   + New stonith daemon that supports global notifications
   + Service placement influenced by the physical resources
   + A new tool for simulating failures and the cluster’s reaction to them
   + Ability to serialize an otherwise unrelated a set of resource actions (eg. Xen migrations)
 
 * Wed Feb 10 2010 Andrew Beekhof <andrew@beekhof.net> - 1.0.7-4
 - Rebuild for heartbeat 3.0.2-2
 
 * Wed Feb 10 2010 Andrew Beekhof <andrew@beekhof.net> - 1.0.7-3
 - Rebuild for cluster-glue 1.0.3
 
 * Tue Jan 19 2010 Andrew Beekhof <andrew@beekhof.net> - 1.0.7-2
 - Rebuild for corosync 1.2.0
 
 * Mon Jan 18 2010 Andrew Beekhof <andrew@beekhof.net> - 1.0.7-1
 - Update source tarball to revision: 2eed906f43e9 (stable-1.0) tip
 - Statistics:
       Changesets:      193
       Diff:            220 files changed, 15933 insertions(+), 8782 deletions(-)
 
 * Thu Oct 29 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-4
 - Include the fixes from CoroSync integration testing
 - Move the resource templates - they are not documentation
 - Ensure documentation is placed in a standard location
 - Exclude documentation that is included elsewhere in the package
 
 - Update the tarball from upstream to version ee19d8e83c2a
   + High: cib: Correctly clean up when both plaintext and tls remote ports are requested
   + High: PE: Bug bnc#515172 - Provide better defaults for lt(e) and gt(e) comparisions
   + High: PE: Bug lf#2197 - Allow master instances placemaker to be influenced by colocation constraints
   + High: PE: Make sure promote/demote pseudo actions are created correctly
   + High: PE: Prevent target-role from promoting more than master-max instances
   + High: ais: Bug lf#2199 - Prevent expected-quorum-votes from being populated with garbage
   + High: ais: Prevent deadlock - dont try to release IPC message if the connection failed
   + High: cib: For validation errors, send back the full CIB so the client can display the errors
   + High: cib: Prevent use-after-free for remote plaintext connections
   + High: crmd: Bug lf#2201 - Prevent use-of-NULL when running heartbeat
 
 * Tue Oct 13 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-3
 - Update the tarball from upstream to version 38cd629e5c3c
   + High: Core: Bug lf#2169 - Allow dtd/schema validation to be disabled
   + High: PE: Bug lf#2106 - Not all anonymous clone children are restarted after configuration change
   + High: PE: Bug lf#2170 - stop-all-resources option had no effect
   + High: PE: Bug lf#2171 - Prevent groups from starting if they depend on a complex resource which can not
   + High: PE: Disable resource management if stonith-enabled=true and no stonith resources are defined
   + High: PE: do not include master score if it would prevent allocation
   + High: ais: Avoid excessive load by checking for dead children every 1s (instead of 100ms)
   + High: ais: Bug rh#525589 - Prevent shutdown deadlocks when running on CoroSync
   + High: ais: Gracefully handle changes to the AIS nodeid
   + High: crmd: Bug bnc#527530 - Wait for the transition to complete before leaving S_TRANSITION_ENGINE
   + High: crmd: Prevent use-after-free with LOG_DEBUG_3
   + Medium: xml: Mask the "symmetrical" attribute on rsc_colocation constraints (bnc#540672)
   + Medium (bnc#520707): Tools: crm: new templates ocfs2 and clvm
   + Medium: Build: Invert the disable ais/heartbeat logic so that --without (ais|heartbeat) is available to rpmbuild
   + Medium: PE: Bug lf#2178 - Indicate unmanaged clones
   + Medium: PE: Bug lf#2180 - Include node information for all failed ops
   + Medium: PE: Bug lf#2189 - Incorrect error message when unpacking simple ordering constraint
   + Medium: PE: Correctly log resources that would like to start but can not
   + Medium: PE: Stop ptest from logging to syslog
   + Medium: ais: Include version details in plugin name
   + Medium: crmd: Requery the resource metadata after every start operation
 
 * Fri Aug 21 2009 Tomas Mraz <tmraz@redhat.com> - 1.0.5-2.1
 - rebuilt with new openssl
 
 * Wed Aug 19 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-2
 - Add versioned perl dependency as specified by
     https://fedoraproject.org/wiki/Packaging/Perl#Packages_that_link_to_libperl
 - No longer remove RPATH data, it prevents us finding libperl.so and no other
   libraries were being hardcoded
 - Compile in support for heartbeat
 - Conditionally add heartbeat-devel and corosynclib-devel to the -devel requirements
   depending on which stacks are supported
 
 * Mon Aug 17 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-1
 - Add dependency on resource-agents
 - Use the version of the configure macro that supplies --prefix, --libdir, etc
 - Update the tarball from upstream to version 462f1569a437 (Pacemaker 1.0.5 final)
   + High: Tools: crm_resource - Advertise --move instead of --migrate
   + Medium: Extra: New node connectivity RA that uses system ping and attrd_updater
   + Medium: crmd: Note that dc-deadtime can be used to mask the brokeness of some switches
 
 * Tue Aug 11 2009 Ville Skyttä <ville.skytta@iki.fi> - 1.0.5-0.7.c9120a53a6ae.hg
 - Use bzipped upstream tarball.
 
 * Wed Jul  29 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-0.6.c9120a53a6ae.hg
 - Add back missing build auto* dependancies
 - Minor cleanups to the install directive
 
 * Tue Jul  28 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-0.5.c9120a53a6ae.hg
 - Add a leading zero to the revision when alphatag is used
 
 * Tue Jul  28 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-0.4.c9120a53a6ae.hg
 - Incorporate the feedback from the cluster-glue review
 - Realistically, the version is a 1.0.5 pre-release
 - Use the global directive instead of define for variables
 - Use the haclient/hacluster group/user instead of daemon
 - Use the _configure macro
 - Fix install dependancies
 
 * Fri Jul  24 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.4-3
 - Initial Fedora checkin
 - Include an AUTHORS and license file in each package
 - Change the library package name to pacemaker-libs to be more
   Fedora compliant
 - Remove execute permissions from xml related files
 - Reference the new cluster-glue devel package name
 - Update the tarball from upstream to version c9120a53a6ae
   + High: PE: Only prevent migration if the clone dependency is stopping/starting on the target node
   + High: PE: Bug 2160 - Dont shuffle clones due to colocation
   + High: PE: New implementation of the resource migration (not stop/start) logic
   + Medium: Tools: crm_resource - Prevent use-of-NULL by requiring a resource name for the -A and -a options
   + Medium: PE: Prevent use-of-NULL in find_first_action()
 
 * Tue Jul 14 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.4-2
 - Reference authors from the project AUTHORS file instead of listing in description
 - Change Source0 to reference the Mercurial repo
 - Cleaned up the summaries and descriptions
 - Incorporate the results of Fedora package self-review
 
 * Thu Jun 04 2009 Andrew Beekhof <abeekhof@suse.de> - 1.0.4-1
 - Update source tarball to revision: 1d87d3e0fc7f (stable-1.0)
 - Statistics:
     Changesets:      209
     Diff:            266 files changed, 12010 insertions(+), 8276 deletions(-)
 
 * Wed Apr 08 2009 Andrew Beekhof <abeekhof@suse.de> - 1.0.3-1
 - Update source tarball to revision: b133b3f19797 (stable-1.0) tip
 - Statistics:
     Changesets:      383
     Diff:            329 files changed, 15471 insertions(+), 15119 deletions(-)
 
 * Mon Feb 16 2009 Andrew Beekhof <abeekhof@suse.de> - 1.0.2-1
 - Update source tarball to revision: d232d19daeb9 (stable-1.0) tip
 - Statistics:
     Changesets:      441
     Diff:            639 files changed, 20871 insertions(+), 21594 deletions(-)
 
 * Tue Nov 18 2008 Andrew Beekhof <abeekhof@suse.de> - 1.0.1-1
 - Update source tarball to revision: 6fc5ce8302ab (stable-1.0) tip
 - Statistics:
     Changesets:      170
     Diff:            816 files changed, 7633 insertions(+), 6286 deletions(-)
 
 * Thu Oct 16 2008 Andrew Beekhof <abeekhof@suse.de> - 1.0.0-1
 - Update source tarball to revision: 388654dfef8f tip
 - Statistics:
     Changesets:      261
     Diff:            3021 files changed, 244985 insertions(+), 111596 deletions(-)
 
 * Mon Sep 22 2008 Andrew Beekhof <abeekhof@suse.de> - 0.7.3-1
 - Update source tarball to revision: 33e677ab7764+ tip
 - Statistics:
     Changesets:      133
     Diff:            89 files changed, 7492 insertions(+), 1125 deletions(-)
 
 * Wed Aug 20 2008 Andrew Beekhof <abeekhof@suse.de> - 0.7.1-1
 - Update source tarball to revision: f805e1b30103+ tip
 - Statistics:
     Changesets:      184
     Diff:            513 files changed, 43408 insertions(+), 43783 deletions(-)
 
 * Fri Jul 18 2008 Andrew Beekhof <abeekhof@suse.de> - 0.7.0-19
 - Update source tarball to revision: 007c3a1c50f5 (unstable) tip
 - Statistics:
     Changesets:      108
     Diff:            216 files changed, 4632 insertions(+), 4173 deletions(-)
 
 * Wed Jun 25 2008 Andrew Beekhof <abeekhof@suse.de> - 0.7.0-1
 - Update source tarball to revision: bde0c7db74fb tip
 - Statistics:
     Changesets:      439
     Diff:            676 files changed, 41310 insertions(+), 52071 deletions(-)
 
 * Thu Jun 19 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.5-1
 - Update source tarball to revision: b9fe723d1ac5 tip
 - Statistics:
     Changesets:      48
     Diff:            37 files changed, 1204 insertions(+), 234 deletions(-)
 
 * Thu May 22 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.4-1
 - Update source tarball to revision: 226d8e356924 tip
 - Statistics:
     Changesets:       55
     Diff:             199 files changed, 7103 insertions(+), 12378 deletions(-)
 
 * Wed Apr 23 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.3-1
 - Update source tarball to revision: fd8904c9bc67 tip
 - Statistics:
     Changesets:      117
     Diff:            354 files changed, 19094 insertions(+), 11338 deletions(-)
 
 * Thu Feb 14 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.2-1
 - Update source tarball to revision: 28b1a8c1868b tip
 - Statistics:
     Changesets:    11
     Diff:          7 files changed, 58 insertions(+), 18 deletions(-)
 
 * Tue Feb 12 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.1-1
 - Update source tarball to revision: e7152d1be933 tip
 - Statistics:
     Changesets:    25
     Diff:          37 files changed, 1323 insertions(+), 227 deletions(-)
 
 * Mon Jan 14 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.0-2
 - This is the first release of the Pacemaker Cluster Resource Manager formerly part of Heartbeat.
 - For those looking for the GUI, mgmtd, CIM or TSA components, they are now found in
   the new pacemaker-pygui project.  Build dependancies prevent them from being
   included in Heartbeat (since the built-in CRM is no longer supported) and,
   being non-core components, are not included with Pacemaker.
 - Update source tarball to revision: c94b92d550cf
 - Statistics:
     Changesets:      347
     Diff:            2272 files changed, 132508 insertions(+), 305991 deletions(-)
 - Test hardware:
     + 6-node vmware cluster (sles10-sp1/256Mb/vmware stonith) on a single host (opensuse10.3/2Gb/2.66Ghz Quad Core2)
     + 7-node EMC Centera cluster (sles10/512Mb/2Ghz Xeon/ssh stonith)
 - Notes: Heartbeat Stack
     + All testing was performed with STONITH enabled
     + The CRM was enabled using the "crm respawn" directive
 - Notes: OpenAIS Stack
     + This release contains a preview of support for the OpenAIS cluster stack
     + The current release of the OpenAIS project is missing two important
     patches that we require.  OpenAIS packages containing these patches are
     available for most major distributions at:
     http://download.opensuse.org/repositories/server:/ha-clustering
     + The OpenAIS stack is not currently recommended for use in clusters that
     have shared data as STONITH support is not yet implimented
     + pingd is not yet available for use with the OpenAIS stack
     + 3 significant OpenAIS issues were found during testing of 4 and 6 node
     clusters.  We are activly working together with the OpenAIS project to
     get these resolved.
 - Pending bugs encountered during testing:
     + OpenAIS   #1736 - Openais membership took 20s to stabilize
     + Heartbeat #1750 - ipc_bufpool_update: magic number in head does not match
     + OpenAIS   #1793 - Assertion failure in memb_state_gather_enter()
     + OpenAIS   #1796 - Cluster message corruption
 
 * Mon Dec 10 2007 Andrew Beekhof <abeekhof@suse.de> - 0.6.0-1
 - Initial opensuse package check-in
diff --git a/pengine/allocate.c b/pengine/allocate.c
index f9f9f3c6f5..8d02d9b6c2 100644
--- a/pengine/allocate.c
+++ b/pengine/allocate.c
@@ -1,2492 +1,2517 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 
 #include <crm/crm.h>
 #include <crm/cib.h>
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 
 #include <glib.h>
 
 #include <crm/pengine/status.h>
 #include <pengine.h>
 #include <allocate.h>
 #include <utils.h>
 
 CRM_TRACE_INIT_DATA(pe_allocate);
 
 void set_alloc_actions(pe_working_set_t * data_set);
 void migrate_reload_madness(pe_working_set_t * data_set);
 
 resource_alloc_functions_t resource_class_alloc_functions[] = {
     {
      native_merge_weights,
      native_color,
      native_create_actions,
      native_create_probe,
      native_internal_constraints,
      native_rsc_colocation_lh,
      native_rsc_colocation_rh,
      native_rsc_location,
      native_action_flags,
      native_update_actions,
      native_expand,
      native_append_meta,
      },
     {
      group_merge_weights,
      group_color,
      group_create_actions,
      native_create_probe,
      group_internal_constraints,
      group_rsc_colocation_lh,
      group_rsc_colocation_rh,
      group_rsc_location,
      group_action_flags,
      group_update_actions,
      group_expand,
      group_append_meta,
      },
     {
      clone_merge_weights,
      clone_color,
      clone_create_actions,
      clone_create_probe,
      clone_internal_constraints,
      clone_rsc_colocation_lh,
      clone_rsc_colocation_rh,
      clone_rsc_location,
      clone_action_flags,
      clone_update_actions,
      clone_expand,
      clone_append_meta,
      },
     {
      master_merge_weights,
      master_color,
      master_create_actions,
      clone_create_probe,
      master_internal_constraints,
      clone_rsc_colocation_lh,
      master_rsc_colocation_rh,
      clone_rsc_location,
      clone_action_flags,
      clone_update_actions,
      clone_expand,
      master_append_meta,
      }
 };
 
 static gboolean
 check_rsc_parameters(resource_t * rsc, node_t * node, xmlNode * rsc_entry,
                      gboolean active_here, pe_working_set_t * data_set)
 {
     int attr_lpc = 0;
     gboolean force_restart = FALSE;
     gboolean delete_resource = FALSE;
     gboolean changed = FALSE;
 
     const char *value = NULL;
     const char *old_value = NULL;
 
     const char *attr_list[] = {
         XML_ATTR_TYPE,
         XML_AGENT_ATTR_CLASS,
         XML_AGENT_ATTR_PROVIDER
     };
 
     for (; attr_lpc < DIMOF(attr_list); attr_lpc++) {
         value = crm_element_value(rsc->xml, attr_list[attr_lpc]);
         old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]);
         if (value == old_value  /* ie. NULL */
             || crm_str_eq(value, old_value, TRUE)) {
             continue;
         }
 
         changed = TRUE;
         trigger_unfencing(rsc, node, "Device definition changed", NULL, data_set);
         if (active_here) {
             force_restart = TRUE;
             crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s",
                        rsc->id, node->details->uname, attr_list[attr_lpc],
                        crm_str(old_value), crm_str(value));
         }
     }
     if (force_restart) {
         /* make sure the restart happens */
         stop_action(rsc, node, FALSE);
         set_bit(rsc->flags, pe_rsc_start_pending);
         delete_resource = TRUE;
 
     } else if (changed) {
         delete_resource = TRUE;
     }
     return delete_resource;
 }
 
 static void
 CancelXmlOp(resource_t * rsc, xmlNode * xml_op, node_t * active_node,
             const char *reason, pe_working_set_t * data_set)
 {
     int interval = 0;
     action_t *cancel = NULL;
 
     char *key = NULL;
     const char *task = NULL;
     const char *call_id = NULL;
     const char *interval_s = NULL;
 
     CRM_CHECK(xml_op != NULL, return);
     CRM_CHECK(active_node != NULL, return);
 
     task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
     call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
     interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL);
 
     interval = crm_parse_int(interval_s, "0");
 
     /* we need to reconstruct the key because of the way we used to construct resource IDs */
     key = generate_op_key(rsc->id, task, interval);
 
     crm_info("Action %s on %s will be stopped: %s",
              key, active_node->details->uname, reason ? reason : "unknown");
 
     /* TODO: This looks highly dangerous if we ever try to schedule 'key' too */
     cancel = custom_action(rsc, strdup(key), RSC_CANCEL, active_node, FALSE, TRUE, data_set);
 
     free(cancel->task);
     free(cancel->cancel_task);
     cancel->task = strdup(RSC_CANCEL);
     cancel->cancel_task = strdup(task);
 
     add_hash_param(cancel->meta, XML_LRM_ATTR_TASK, task);
     add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id);
     add_hash_param(cancel->meta, XML_LRM_ATTR_INTERVAL, interval_s);
 
     custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set);
     free(key);
     key = NULL;
 }
 
 static gboolean
 check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op,
                         pe_working_set_t * data_set)
 {
     char *key = NULL;
     int interval = 0;
     const char *interval_s = NULL;
     const op_digest_cache_t *digest_data = NULL;
     gboolean did_change = FALSE;
 
     const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
     const char *op_version;
 
     CRM_CHECK(active_node != NULL, return FALSE);
     if (safe_str_eq(task, RSC_STOP)) {
         return FALSE;
     }
 
     interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL);
     interval = crm_parse_int(interval_s, "0");
 
     if (interval > 0) {
         xmlNode *op_match = NULL;
 
         /* we need to reconstruct the key because of the way we used to construct resource IDs */
         key = generate_op_key(rsc->id, task, interval);
 
         pe_rsc_trace(rsc, "Checking parameters for %s", key);
         op_match = find_rsc_op_entry(rsc, key);
 
         if (op_match == NULL && is_set(data_set->flags, pe_flag_stop_action_orphans)) {
             CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set);
             free(key);
             return TRUE;
 
         } else if (op_match == NULL) {
             pe_rsc_debug(rsc, "Orphan action detected: %s on %s", key, active_node->details->uname);
             free(key);
             return TRUE;
         }
         free(key);
         key = NULL;
     }
 
     crm_trace("Testing %s_%s_%d on %s", rsc->id, task, interval, active_node?active_node->details->uname:"N/A");
     if (interval == 0 && safe_str_eq(task, RSC_STATUS)) {
         /* Reload based on the start action not a probe */
         task = RSC_START;
 
     } else if (interval == 0 && safe_str_eq(task, RSC_MIGRATED)) {
         /* Reload based on the start action not a migrate */
         task = RSC_START;
     }
 
     digest_data = rsc_action_digest_cmp(rsc, xml_op, active_node, data_set);
     op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
 
     /* Changes that force a restart */
     if (digest_data->rc == RSC_DIGEST_RESTART) {
         const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
 
         did_change = TRUE;
         key = generate_op_key(rsc->id, task, interval);
         crm_log_xml_info(digest_data->params_restart, "params:restart");
         pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s",
                  key, active_node->details->uname,
                  crm_str(digest_restart), digest_data->digest_restart_calc,
                  op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
 
         custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set);
         trigger_unfencing(rsc, NULL, "Device parameters changed", NULL, data_set);
 
     } else if ((digest_data->rc == RSC_DIGEST_ALL) || (digest_data->rc == RSC_DIGEST_UNKNOWN)) {
         /* Changes that can potentially be handled by a reload */
         const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
         const char *digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST);
 
         did_change = TRUE;
         trigger_unfencing(rsc, NULL, "Device parameters changed (reload)", NULL, data_set);
         crm_log_xml_info(digest_data->params_all, "params:reload");
         key = generate_op_key(rsc->id, task, interval);
         pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (reload:%s) %s",
                  key, active_node->details->uname,
                  crm_str(digest_all), digest_data->digest_all_calc, op_version,
                  crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
 
         if (interval > 0) {
             action_t *op = NULL;
 
 #if 0
             /* Always reload/restart the entire resource */
             op = custom_action(rsc, start_key(rsc), RSC_START, NULL, FALSE, TRUE, data_set);
             update_action_flags(op, pe_action_allow_reload_conversion);
 #else
             /* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */
             op = custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set);
             custom_action_order(rsc, start_key(rsc), NULL,
                                 NULL, NULL, op, pe_order_runnable_left, data_set);
 #endif
 
         } else if (digest_restart) {
             pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id);
 
             /* Allow this resource to reload - unless something else causes a full restart */
             set_bit(rsc->flags, pe_rsc_try_reload);
 
             /* Create these for now, it keeps the action IDs the same in the regression outputs */
             custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set);
 
         } else {
             pe_rsc_trace(rsc, "Resource %s doesn't know how to reload", rsc->id);
 
             /* Re-send the start/demote/promote op
              * Recurring ops will be detected independantly
              */
             custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set);
         }
     }
 
     return did_change;
 }
 
 extern gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional,
                           pe_working_set_t * data_set);
 
 static void
 check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     int offset = -1;
     int interval = 0;
     int stop_index = 0;
     int start_index = 0;
 
     const char *task = NULL;
     const char *interval_s = NULL;
 
     xmlNode *rsc_op = NULL;
     GListPtr op_list = NULL;
     GListPtr sorted_op_list = NULL;
     gboolean is_probe = FALSE;
     gboolean did_change = FALSE;
 
     CRM_CHECK(node != NULL, return);
 
     if (is_set(rsc->flags, pe_rsc_orphan)) {
         resource_t *parent = uber_parent(rsc);
         if(parent == NULL
            || parent->variant < pe_clone
            || is_set(parent->flags, pe_rsc_unique)) {
             pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id);
             DeleteRsc(rsc, node, FALSE, data_set);
         } else {
             pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id);
         }
         return;
 
     } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) {
         if (check_rsc_parameters(rsc, node, rsc_entry, FALSE, data_set)) {
             DeleteRsc(rsc, node, FALSE, data_set);
         }
         pe_rsc_trace(rsc, "Skipping param check for %s: no longer active on %s",
                      rsc->id, node->details->uname);
         return;
     }
 
     pe_rsc_trace(rsc, "Processing %s on %s", rsc->id, node->details->uname);
 
     if (check_rsc_parameters(rsc, node, rsc_entry, TRUE, data_set)) {
         DeleteRsc(rsc, node, FALSE, data_set);
     }
 
     for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) {
         if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
             op_list = g_list_prepend(op_list, rsc_op);
         }
     }
 
     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
 
     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
         xmlNode *rsc_op = (xmlNode *) gIter->data;
 
         offset++;
 
         if (start_index < stop_index) {
             /* stopped */
             continue;
         } else if (offset < start_index) {
             /* action occurred prior to a start */
             continue;
         }
 
         is_probe = FALSE;
         did_change = FALSE;
         task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
 
         interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL);
         interval = crm_parse_int(interval_s, "0");
 
         if (interval == 0 && safe_str_eq(task, RSC_STATUS)) {
             is_probe = TRUE;
         }
 
         if (interval > 0 &&
             (is_set(rsc->flags, pe_rsc_maintenance) || node->details->maintenance)) {
             CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set);
 
         } else if (is_probe || safe_str_eq(task, RSC_START) || interval > 0
                    || safe_str_eq(task, RSC_MIGRATED)) {
             did_change = check_action_definition(rsc, node, rsc_op, data_set);
         }
 
         if (did_change && get_failcount(node, rsc, NULL, data_set)) {
             char *key = NULL;
             action_t *action_clear = NULL;
 
             key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0);
             action_clear =
                 custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set);
             set_bit(action_clear->flags, pe_action_runnable);
         }
     }
 
     g_list_free(sorted_op_list);
 
 }
 
 static GListPtr
 find_rsc_list(GListPtr result, resource_t * rsc, const char *id, gboolean renamed_clones,
               gboolean partial, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     gboolean match = FALSE;
 
     if (id == NULL) {
         return NULL;
 
     } else if (rsc == NULL && data_set) {
 
         for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
             resource_t *child = (resource_t *) gIter->data;
 
             result = find_rsc_list(result, child, id, renamed_clones, partial, NULL);
         }
 
         return result;
 
     } else if (rsc == NULL) {
         return NULL;
     }
 
     if (partial) {
         if (strstr(rsc->id, id)) {
             match = TRUE;
 
         } else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) {
             match = TRUE;
         }
 
     } else {
         if (strcmp(rsc->id, id) == 0) {
             match = TRUE;
 
         } else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) {
             match = TRUE;
         }
     }
 
     if (match) {
         result = g_list_prepend(result, rsc);
     }
 
     if (rsc->children) {
         gIter = rsc->children;
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child = (resource_t *) gIter->data;
 
             result = find_rsc_list(result, child, id, renamed_clones, partial, NULL);
         }
     }
 
     return result;
 }
 
 static void
 check_actions(pe_working_set_t * data_set)
 {
     const char *id = NULL;
     node_t *node = NULL;
     xmlNode *lrm_rscs = NULL;
     xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input);
 
     xmlNode *node_state = NULL;
 
     for (node_state = __xml_first_child(status); node_state != NULL;
          node_state = __xml_next(node_state)) {
         if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
             id = crm_element_value(node_state, XML_ATTR_ID);
             lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
             lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE);
 
             node = pe_find_node_id(data_set->nodes, id);
 
             if (node == NULL) {
                 continue;
 
             /* Still need to check actions for a maintenance node to cancel existing monitor operations */
             } else if (can_run_resources(node) == FALSE && node->details->maintenance == FALSE) {
                 crm_trace("Skipping param check for %s: cant run resources", node->details->uname);
                 continue;
             }
 
             crm_trace("Processing node %s", node->details->uname);
             if (node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
                 xmlNode *rsc_entry = NULL;
 
                 for (rsc_entry = __xml_first_child(lrm_rscs); rsc_entry != NULL;
                      rsc_entry = __xml_next(rsc_entry)) {
                     if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
 
                         if (xml_has_children(rsc_entry)) {
                             GListPtr gIter = NULL;
                             GListPtr result = NULL;
                             const char *rsc_id = ID(rsc_entry);
 
                             CRM_CHECK(rsc_id != NULL, return);
 
                             result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set);
                             for (gIter = result; gIter != NULL; gIter = gIter->next) {
                                 resource_t *rsc = (resource_t *) gIter->data;
 
                                 if (rsc->variant != pe_native) {
                                     continue;
                                 }
                                 check_actions_for(rsc_entry, rsc, node, data_set);
                             }
                             g_list_free(result);
                         }
                     }
                 }
             }
         }
     }
 }
 
 static gboolean
 apply_placement_constraints(pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
 
     crm_trace("Applying constraints...");
 
     for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) {
         rsc_to_node_t *cons = (rsc_to_node_t *) gIter->data;
 
         cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons);
     }
 
     return TRUE;
 
 }
 
 static gboolean
 failcount_clear_action_exists(node_t * node, resource_t * rsc)
 {
     gboolean rc = FALSE;
     char *key = crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_');
     GListPtr list = find_actions_exact(rsc->actions, key, node);
 
     if (list) {
         rc = TRUE;
     }
     g_list_free(list);
     free(key);
 
     return rc;
 }
 
 static void
 common_apply_stickiness(resource_t * rsc, node_t * node, pe_working_set_t * data_set)
 {
     int fail_count = 0;
     resource_t *failed = rsc;
 
     if (rsc->children) {
         GListPtr gIter = rsc->children;
 
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child_rsc = (resource_t *) gIter->data;
 
             common_apply_stickiness(child_rsc, node, data_set);
         }
         return;
     }
 
     if (is_set(rsc->flags, pe_rsc_managed)
         && rsc->stickiness != 0 && g_list_length(rsc->running_on) == 1) {
         node_t *current = pe_find_node_id(rsc->running_on, node->details->id);
         node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
 
         if (current == NULL) {
 
         } else if (match != NULL || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
             resource_t *sticky_rsc = rsc;
 
             resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set);
             pe_rsc_debug(sticky_rsc, "Resource %s: preferring current location"
                          " (node=%s, weight=%d)", sticky_rsc->id,
                          node->details->uname, rsc->stickiness);
         } else {
             GHashTableIter iter;
             node_t *nIter = NULL;
 
             pe_rsc_debug(rsc, "Ignoring stickiness for %s: the cluster is asymmetric"
                          " and node %s is not explicitly allowed", rsc->id, node->details->uname);
             g_hash_table_iter_init(&iter, rsc->allowed_nodes);
             while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) {
                 crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight);
             }
         }
     }
 
     /* only check failcount here if a failcount clear action
      * has not already been placed for this resource on the node.
      * There is no sense in potentially forcing the rsc from this
      * node if the failcount is being reset anyway. */
     if (failcount_clear_action_exists(node, rsc) == FALSE) {
         fail_count = get_failcount_all(node, rsc, NULL, data_set);
     }
 
     if (fail_count > 0 && rsc->migration_threshold != 0) {
         if (is_not_set(rsc->flags, pe_rsc_unique)) {
             failed = uber_parent(rsc);
         }
         if (rsc->migration_threshold <= fail_count) {
             resource_location(failed, node, -INFINITY, "__fail_limit__", data_set);
             crm_warn("Forcing %s away from %s after %d failures (max=%d)",
                      failed->id, node->details->uname, fail_count, rsc->migration_threshold);
         } else {
             crm_info("%s can fail %d more times on %s before being forced off",
                      failed->id, rsc->migration_threshold - fail_count, node->details->uname);
         }
     }
 }
 
 static void
 complex_set_cmds(resource_t * rsc)
 {
     GListPtr gIter = rsc->children;
 
     rsc->cmds = &resource_class_alloc_functions[rsc->variant];
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         complex_set_cmds(child_rsc);
     }
 }
 
 void
 set_alloc_actions(pe_working_set_t * data_set)
 {
 
     GListPtr gIter = data_set->resources;
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
 
         complex_set_cmds(rsc);
     }
 }
 
 static void
 calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data)
 {
     const char *key = (const char *)gKey;
     const char *value = (const char *)gValue;
     int *system_health = (int *)user_data;
 
     if (!gKey || !gValue || !user_data) {
         return;
     }
 
     /* Does it start with #health? */
     if (0 == strncmp(key, "#health", 7)) {
         int score;
 
         /* Convert the value into an integer */
         score = char2score(value);
 
         /* Add it to the running total */
         *system_health = merge_weights(score, *system_health);
     }
 }
 
 static gboolean
 apply_system_health(pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy");
 
     if (health_strategy == NULL || safe_str_eq(health_strategy, "none")) {
         /* Prevent any accidental health -> score translation */
         node_score_red = 0;
         node_score_yellow = 0;
         node_score_green = 0;
         return TRUE;
 
     } else if (safe_str_eq(health_strategy, "migrate-on-red")) {
 
         /* Resources on nodes which have health values of red are
          * weighted away from that node.
          */
         node_score_red = -INFINITY;
         node_score_yellow = 0;
         node_score_green = 0;
 
     } else if (safe_str_eq(health_strategy, "only-green")) {
 
         /* Resources on nodes which have health values of red or yellow
          * are forced away from that node.
          */
         node_score_red = -INFINITY;
         node_score_yellow = -INFINITY;
         node_score_green = 0;
 
     } else if (safe_str_eq(health_strategy, "progressive")) {
         /* Same as the above, but use the r/y/g scores provided by the user
          * Defaults are provided by the pe_prefs table
          */
 
     } else if (safe_str_eq(health_strategy, "custom")) {
 
         /* Requires the admin to configure the rsc_location constaints for
          * processing the stored health scores
          */
         /* TODO: Check for the existance of appropriate node health constraints */
         return TRUE;
 
     } else {
         crm_err("Unknown node health strategy: %s", health_strategy);
         return FALSE;
     }
 
     crm_info("Applying automated node health strategy: %s", health_strategy);
 
     for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
         int system_health = 0;
         node_t *node = (node_t *) gIter->data;
 
         /* Search through the node hash table for system health entries. */
         g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health);
 
         crm_info(" Node %s has an combined system health of %d",
                  node->details->uname, system_health);
 
         /* If the health is non-zero, then create a new rsc2node so that the
          * weight will be added later on.
          */
         if (system_health != 0) {
 
             GListPtr gIter2 = data_set->resources;
 
             for (; gIter2 != NULL; gIter2 = gIter2->next) {
                 resource_t *rsc = (resource_t *) gIter2->data;
 
                 rsc2node_new(health_strategy, rsc, system_health, node, data_set);
             }
         }
 
     }
 
     return TRUE;
 }
 
 gboolean
 stage0(pe_working_set_t * data_set)
 {
     xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input);
 
     if (data_set->input == NULL) {
         return FALSE;
     }
 
     if (is_set(data_set->flags, pe_flag_have_status) == FALSE) {
         crm_trace("Calculating status");
         cluster_status(data_set);
     }
 
     set_alloc_actions(data_set);
     apply_system_health(data_set);
     unpack_constraints(cib_constraints, data_set);
 
     return TRUE;
 }
 
 static void
 wait_for_probe(resource_t * rsc, const char *action, action_t * probe_complete,
                pe_working_set_t * data_set)
 {
     if (probe_complete == NULL) {
         return;
     }
 
     if (rsc->children) {
         GListPtr gIter = rsc->children;
 
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child = (resource_t *) gIter->data;
 
             wait_for_probe(child, action, probe_complete, data_set);
         }
 
     } else {
         char *key = NULL;
 
         if (safe_str_eq(action, RSC_STOP) && g_list_length(rsc->running_on) == 1) {
             node_t *node = (node_t *) rsc->running_on->data;
 
             /* Stop actions on nodes that are shutting down do not need to wait for probes to complete
              * Doing so prevents node shutdown in the presence of nodes that are coming up
              * The purpose of waiting is to not stop resources until we know for sure the
              *  intended destination is able to take them
              */
             if (node && node->details->shutdown) {
                 crm_debug("Skipping %s before %s_%s_0 due to %s shutdown",
                           probe_complete->uuid, rsc->id, action, node->details->uname);
                 return;
             }
         }
 
         key = generate_op_key(rsc->id, action, 0);
         custom_action_order(NULL, NULL, probe_complete, rsc, key, NULL,
                             pe_order_optional, data_set);
     }
 }
 
 /*
  * Check nodes for resources started outside of the LRM
  */
 gboolean
 probe_resources(pe_working_set_t * data_set)
 {
     action_t *probe_complete = NULL;
     action_t *probe_node_complete = NULL;
     action_t *probe_cluster_nodes_complete = NULL;
 
     GListPtr gIter = NULL;
     GListPtr gIter2 = NULL;
 
     gIter = data_set->nodes;
     for (; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
         const char *probed = g_hash_table_lookup(node->details->attrs, CRM_OP_PROBED);
 
         if (node->details->online == FALSE) {
             continue;
 
         } else if (node->details->unclean) {
             continue;
 
         } else if (is_remote_node(node) && node->details->shutdown) {
             /* Don't try and probe a remote node we're shutting down.
              * It causes constraint conflicts to try and run any sort of action
              * other that 'stop' on resources living within a remote-node when
              * it is being shutdown. */
             continue;
 
         } else if (is_container_remote_node(node)) {
             /* TODO enable container node probes once ordered probing is implemented. */
             continue;
 
         } else if (probe_complete == NULL) {
             probe_complete = get_pseudo_op(CRM_OP_PROBED, data_set);
             if (is_set(data_set->flags, pe_flag_have_remote_nodes)) {
                 probe_cluster_nodes_complete = get_pseudo_op(CRM_OP_NODES_PROBED, data_set);
             }
         }
 
         if (probed != NULL && crm_is_true(probed) == FALSE) {
             action_t *probe_op = custom_action(NULL, g_strdup_printf("%s-%s", CRM_OP_REPROBE, node->details->uname),
                                                CRM_OP_REPROBE, node, FALSE, TRUE, data_set);
 
             add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
             continue;
         }
 
         probe_node_complete = custom_action(NULL, g_strdup_printf("%s-%s", CRM_OP_PROBED, node->details->uname),
                                             CRM_OP_PROBED, node, FALSE, TRUE, data_set);
         if (crm_is_true(probed)) {
             crm_trace("unset");
             update_action_flags(probe_node_complete, pe_action_optional);
         } else {
             crm_trace("set");
             update_action_flags(probe_node_complete, pe_action_optional | pe_action_clear);
         }
         crm_trace("%s - %d", node->details->uname, probe_node_complete->flags & pe_action_optional);
         probe_node_complete->priority = INFINITY;
         add_hash_param(probe_node_complete->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
 
         if (node->details->pending) {
             update_action_flags(probe_node_complete, pe_action_runnable | pe_action_clear);
             crm_info("Action %s on %s is unrunnable (pending)",
                      probe_node_complete->uuid, probe_node_complete->node->details->uname);
         }
 
         if (is_remote_node(node)) {
             order_actions(probe_node_complete, probe_complete,
                       pe_order_runnable_left /*|pe_order_implies_then */ );
         } else if (probe_cluster_nodes_complete == NULL) {
             order_actions(probe_node_complete, probe_complete,
                       pe_order_runnable_left /*|pe_order_implies_then */ );
         } else {
             order_actions(probe_node_complete, probe_cluster_nodes_complete,
                       pe_order_runnable_left /*|pe_order_implies_then */ );
         }
 
         gIter2 = data_set->resources;
         for (; gIter2 != NULL; gIter2 = gIter2->next) {
             resource_t *rsc = (resource_t *) gIter2->data;
 
             if (rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set)) {
                 update_action_flags(probe_complete, pe_action_optional | pe_action_clear);
                 update_action_flags(probe_node_complete, pe_action_optional | pe_action_clear);
 
                 if (probe_cluster_nodes_complete
                     && (rsc->is_remote_node || rsc_contains_remote_node(data_set, rsc))) {
                     update_action_flags(probe_cluster_nodes_complete, pe_action_optional | pe_action_clear);
                     /* allow remote connection resources and resources
                      * containing remote connection resources to run after all
                      * cluster nodes are probed */
                     wait_for_probe(rsc, RSC_START, probe_cluster_nodes_complete, data_set);
                 } else {
                     wait_for_probe(rsc, RSC_START, probe_complete, data_set);
                 }
             }
         }
     }
 
     gIter = data_set->resources;
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
 
         if (rsc->is_remote_node || rsc_contains_remote_node(data_set, rsc)) {
             /* allow remote connection resources and any resources containing
              * remote connection resources to run after cluster nodes are probed.*/
             wait_for_probe(rsc, RSC_STOP, probe_cluster_nodes_complete, data_set);
         } else {
             wait_for_probe(rsc, RSC_STOP, probe_complete, data_set);
         }
     }
 
     return TRUE;
 }
 
 /*
  * Count how many valid nodes we have (so we know the maximum number of
  *  colors we can resolve).
  *
  * Apply node constraints (ie. filter the "allowed_nodes" part of resources
  */
 gboolean
 stage2(pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
 
     crm_trace("Applying placement constraints");
 
     gIter = data_set->nodes;
     for (; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
 
         if (node == NULL) {
             /* error */
 
         } else if (node->weight >= 0.0  /* global weight */
                    && node->details->online && node->details->type != node_ping) {
             data_set->max_valid_nodes++;
         }
     }
 
     apply_placement_constraints(data_set);
 
     gIter = data_set->nodes;
     for (; gIter != NULL; gIter = gIter->next) {
         GListPtr gIter2 = NULL;
         node_t *node = (node_t *) gIter->data;
 
         gIter2 = data_set->resources;
         for (; gIter2 != NULL; gIter2 = gIter2->next) {
             resource_t *rsc = (resource_t *) gIter2->data;
 
             common_apply_stickiness(rsc, node, data_set);
         }
     }
 
     return TRUE;
 }
 
 /*
  * Create internal resource constraints before allocation
  */
 gboolean
 stage3(pe_working_set_t * data_set)
 {
 
     GListPtr gIter = data_set->resources;
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
 
         rsc->cmds->internal_constraints(rsc, data_set);
     }
 
     return TRUE;
 }
 
 /*
  * Check for orphaned or redefined actions
  */
 gboolean
 stage4(pe_working_set_t * data_set)
 {
     check_actions(data_set);
     return TRUE;
 }
 
 static gint
 sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data)
 {
     int rc = 0;
     int r1_weight = -INFINITY;
     int r2_weight = -INFINITY;
 
     const char *reason = "existance";
 
     const GListPtr nodes = (GListPtr) data;
     resource_t *resource1 = (resource_t *) convert_const_pointer(a);
     resource_t *resource2 = (resource_t *) convert_const_pointer(b);
 
     node_t *node = NULL;
     GListPtr gIter = NULL;
     GHashTable *r1_nodes = NULL;
     GHashTable *r2_nodes = NULL;
 
     if (a == NULL && b == NULL) {
         goto done;
     }
     if (a == NULL) {
         return 1;
     }
     if (b == NULL) {
         return -1;
     }
 
     reason = "priority";
     r1_weight = resource1->priority;
     r2_weight = resource2->priority;
 
     if (r1_weight > r2_weight) {
         rc = -1;
         goto done;
     }
 
     if (r1_weight < r2_weight) {
         rc = 1;
         goto done;
     }
 
     reason = "no node list";
     if (nodes == NULL) {
         goto done;
     }
 
     r1_nodes =
         rsc_merge_weights(resource1, resource1->id, NULL, NULL, 1,
                           pe_weights_forward | pe_weights_init);
     dump_node_scores(LOG_TRACE, NULL, resource1->id, r1_nodes);
     r2_nodes =
         rsc_merge_weights(resource2, resource2->id, NULL, NULL, 1,
                           pe_weights_forward | pe_weights_init);
     dump_node_scores(LOG_TRACE, NULL, resource2->id, r2_nodes);
 
     /* Current location score */
     reason = "current location";
     r1_weight = -INFINITY;
     r2_weight = -INFINITY;
 
     if (resource1->running_on) {
         node = g_list_nth_data(resource1->running_on, 0);
         node = g_hash_table_lookup(r1_nodes, node->details->id);
         if (node != NULL) {
             r1_weight = node->weight;
         }
     }
     if (resource2->running_on) {
         node = g_list_nth_data(resource2->running_on, 0);
         node = g_hash_table_lookup(r2_nodes, node->details->id);
         if (node != NULL) {
             r2_weight = node->weight;
         }
     }
 
     if (r1_weight > r2_weight) {
         rc = -1;
         goto done;
     }
 
     if (r1_weight < r2_weight) {
         rc = 1;
         goto done;
     }
 
     reason = "score";
     for (gIter = nodes; gIter != NULL; gIter = gIter->next) {
         node_t *r1_node = NULL;
         node_t *r2_node = NULL;
 
         node = (node_t *) gIter->data;
 
         r1_weight = -INFINITY;
         if (r1_nodes) {
             r1_node = g_hash_table_lookup(r1_nodes, node->details->id);
         }
         if (r1_node) {
             r1_weight = r1_node->weight;
         }
 
         r2_weight = -INFINITY;
         if (r2_nodes) {
             r2_node = g_hash_table_lookup(r2_nodes, node->details->id);
         }
         if (r2_node) {
             r2_weight = r2_node->weight;
         }
 
         if (r1_weight > r2_weight) {
             rc = -1;
             goto done;
         }
 
         if (r1_weight < r2_weight) {
             rc = 1;
             goto done;
         }
     }
 
   done:
     if (r1_nodes) {
         g_hash_table_destroy(r1_nodes);
     }
     if (r2_nodes) {
         g_hash_table_destroy(r2_nodes);
     }
 
     crm_trace("%s (%d) %c %s (%d) on %s: %s",
               resource1->id, r1_weight, rc < 0 ? '>' : rc > 0 ? '<' : '=',
               resource2->id, r2_weight, node ? node->details->id : "n/a", reason);
     return rc;
 }
 
 static void
 allocate_resources(pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
 
     if (is_set(data_set->flags, pe_flag_have_remote_nodes)) {
         /* Force remote connection resources to be allocated first. This
          * also forces any colocation dependencies to be allocated as well */
         for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
             resource_t *rsc = (resource_t *) gIter->data;
             if (rsc->is_remote_node == FALSE) {
                 continue;
             }
             pe_rsc_trace(rsc, "Allocating: %s", rsc->id);
             rsc->cmds->allocate(rsc, NULL, data_set);
         }
     }
 
     /* now do the rest of the resources */
     for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
         if (rsc->is_remote_node == TRUE) {
             continue;
         }
         pe_rsc_trace(rsc, "Allocating: %s", rsc->id);
         rsc->cmds->allocate(rsc, NULL, data_set);
     }
 }
 
 gboolean
 stage5(pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
 
     if (safe_str_neq(data_set->placement_strategy, "default")) {
         GListPtr nodes = g_list_copy(data_set->nodes);
 
         nodes = g_list_sort_with_data(nodes, sort_node_weight, NULL);
 
         data_set->resources =
             g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes);
 
         g_list_free(nodes);
     }
 
     gIter = data_set->nodes;
     for (; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
 
         dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Original", node);
     }
 
     crm_trace("Allocating services");
     /* Take (next) highest resource, assign it and create its actions */
 
     allocate_resources(data_set);
 
     gIter = data_set->nodes;
     for (; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
 
         dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Remaining", node);
     }
 
     if (is_set(data_set->flags, pe_flag_startup_probes)) {
         crm_trace("Calculating needed probes");
         /* This code probably needs optimization
          * ptest -x with 100 nodes, 100 clones and clone-max=100:
 
          With probes:
 
          ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
          ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
          ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
          ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions
          ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
          ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services
          ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes
          ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions
          ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done
          ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
          ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
          36s
          ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
 
          Without probes:
 
          ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
          ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
          ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
          ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions
          ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
          ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services
          ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions
          ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done
          ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
          ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
          ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
          */
 
         probe_resources(data_set);
     }
 
     crm_trace("Creating actions");
 
     gIter = data_set->resources;
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
 
         rsc->cmds->create_actions(rsc, data_set);
     }
 
     crm_trace("Creating done");
     return TRUE;
 }
 
 static gboolean
 is_managed(const resource_t * rsc)
 {
     GListPtr gIter = rsc->children;
 
     if (is_set(rsc->flags, pe_rsc_managed)) {
         return TRUE;
     }
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         if (is_managed(child_rsc)) {
             return TRUE;
         }
     }
 
     return FALSE;
 }
 
 static gboolean
 any_managed_resources(pe_working_set_t * data_set)
 {
 
     GListPtr gIter = data_set->resources;
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
 
         if (is_managed(rsc)) {
             return TRUE;
         }
     }
     return FALSE;
 }
 
 /*
  * Create dependancies for stonith and shutdown operations
  */
 gboolean
 stage6(pe_working_set_t * data_set)
 {
     action_t *dc_down = NULL;
     action_t *dc_fence = NULL;
     action_t *stonith_op = NULL;
     action_t *last_stonith = NULL;
     gboolean integrity_lost = FALSE;
     action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set);
     action_t *done = get_pseudo_op(STONITH_DONE, data_set);
     gboolean need_stonith = TRUE;
     GListPtr gIter = data_set->nodes;
 
     crm_trace("Processing fencing and shutdown cases");
 
     if (any_managed_resources(data_set) == FALSE) {
         crm_notice("Delaying fencing operations until there are resources to manage");
         need_stonith = FALSE;
     }
 
     for (; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
 
         /* remote-nodes associated with a container resource (such as a vm) are not fenced */
         if (is_container_remote_node(node)) {
             continue;
         }
 
         stonith_op = NULL;
         if (need_stonith && node->details->unclean && pe_can_fence(data_set, node)) {
             pe_warn("Scheduling Node %s for STONITH", node->details->uname);
 
             stonith_op = pe_fence_op(node, NULL, FALSE, data_set);
 
             stonith_constraints(node, stonith_op, data_set);
 
             if (node->details->is_dc) {
                 dc_down = stonith_op;
                 dc_fence = stonith_op;
 
             } else {
                 if (last_stonith) {
                     order_actions(last_stonith, stonith_op, pe_order_optional);
                 }
                 last_stonith = stonith_op;
             }
 
         } else if (node->details->online && node->details->shutdown &&
                 /* TODO define what a shutdown op means for a baremetal remote node.
                  * For now we do not send shutdown operations for remote nodes, but
                  * if we can come up with a good use for this in the future, we will. */
                     is_remote_node(node) == FALSE) {
 
             action_t *down_op = NULL;
 
             crm_notice("Scheduling Node %s for shutdown", node->details->uname);
 
             down_op = custom_action(NULL, g_strdup_printf("%s-%s", CRM_OP_SHUTDOWN, node->details->uname),
                                     CRM_OP_SHUTDOWN, node, FALSE, TRUE, data_set);
 
             shutdown_constraints(node, down_op, data_set);
             add_hash_param(down_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
 
             if (node->details->is_dc) {
                 dc_down = down_op;
             }
         }
 
         if (node->details->unclean && stonith_op == NULL) {
             integrity_lost = TRUE;
             pe_warn("Node %s is unclean!", node->details->uname);
         }
     }
 
     if (integrity_lost) {
         if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
             pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED");
             pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE");
 
         } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE) {
             crm_notice("Cannot fence unclean nodes until quorum is"
                        " attained (or no-quorum-policy is set to ignore)");
         }
     }
 
     if (dc_down != NULL) {
         GListPtr gIter = NULL;
 
         crm_trace("Ordering shutdowns before %s on %s (DC)",
                   dc_down->task, dc_down->node->details->uname);
 
         add_hash_param(dc_down->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
 
         for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
             action_t *node_stop = (action_t *) gIter->data;
 
             if (safe_str_neq(CRM_OP_SHUTDOWN, node_stop->task)) {
                 continue;
             } else if (node_stop->node->details->is_dc) {
                 continue;
             }
 
             crm_debug("Ordering shutdown on %s before %s on %s",
                       node_stop->node->details->uname,
                       dc_down->task, dc_down->node->details->uname);
 
             order_actions(node_stop, dc_down, pe_order_optional);
         }
 
         if (last_stonith && dc_down != last_stonith) {
             order_actions(last_stonith, dc_down, pe_order_optional);
         }
     }
 
     if (last_stonith) {
         order_actions(last_stonith, done, pe_order_implies_then);
 
     } else if (dc_fence) {
         order_actions(dc_down, done, pe_order_implies_then);
     }
 
     order_actions(done, all_stopped, pe_order_implies_then);
     return TRUE;
 }
 
 /*
  * Determin the sets of independant actions and the correct order for the
  *  actions in each set.
  *
  * Mark dependencies of un-runnable actions un-runnable
  *
  */
 static GListPtr
 find_actions_by_task(GListPtr actions, resource_t * rsc, const char *original_key)
 {
     GListPtr list = NULL;
 
     list = find_actions(actions, original_key, NULL);
     if (list == NULL) {
         /* we're potentially searching a child of the original resource */
         char *key = NULL;
         char *tmp = NULL;
         char *task = NULL;
         int interval = 0;
 
         if (parse_op_key(original_key, &tmp, &task, &interval)) {
             key = generate_op_key(rsc->id, task, interval);
             /* crm_err("looking up %s instead of %s", key, original_key); */
             /* slist_iter(action, action_t, actions, lpc, */
             /*         crm_err("  - %s", action->uuid)); */
             list = find_actions(actions, key, NULL);
 
         } else {
             crm_err("search key: %s", original_key);
         }
 
         free(key);
         free(tmp);
         free(task);
     }
 
     return list;
 }
 
 static void
 rsc_order_then(action_t * lh_action, resource_t * rsc, order_constraint_t * order)
 {
     GListPtr gIter = NULL;
     GListPtr rh_actions = NULL;
     action_t *rh_action = NULL;
     enum pe_ordering type = order->type;
 
     CRM_CHECK(rsc != NULL, return);
     CRM_CHECK(order != NULL, return);
 
     rh_action = order->rh_action;
     crm_trace("Processing RH of ordering constraint %d", order->id);
 
     if (rh_action != NULL) {
         rh_actions = g_list_prepend(NULL, rh_action);
 
     } else if (rsc != NULL) {
         rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task);
     }
 
     if (rh_actions == NULL) {
         pe_rsc_trace(rsc, "No RH-Side (%s/%s) found for constraint..."
                      " ignoring", rsc->id, order->rh_action_task);
         if (lh_action) {
             pe_rsc_trace(rsc, "LH-Side was: %s", lh_action->uuid);
         }
         return;
     }
 
     if (lh_action && lh_action->rsc == rsc && is_set(lh_action->flags, pe_action_dangle)) {
         pe_rsc_trace(rsc, "Detected dangling operation %s -> %s", lh_action->uuid,
                      order->rh_action_task);
         clear_bit(type, pe_order_implies_then);
     }
 
     gIter = rh_actions;
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *rh_action_iter = (action_t *) gIter->data;
 
         if (lh_action) {
             order_actions(lh_action, rh_action_iter, type);
 
         } else if (type & pe_order_implies_then) {
             update_action_flags(rh_action_iter, pe_action_runnable | pe_action_clear);
             crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type);
         } else {
             crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type);
         }
     }
 
     g_list_free(rh_actions);
 }
 
 static void
 rsc_order_first(resource_t * lh_rsc, order_constraint_t * order, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     GListPtr lh_actions = NULL;
     action_t *lh_action = order->lh_action;
     resource_t *rh_rsc = order->rh_rsc;
 
     crm_trace("Processing LH of ordering constraint %d", order->id);
     CRM_ASSERT(lh_rsc != NULL);
 
     if (lh_action != NULL) {
         lh_actions = g_list_prepend(NULL, lh_action);
 
     } else if (lh_action == NULL) {
         lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task);
     }
 
     if (lh_actions == NULL && lh_rsc != rh_rsc) {
         char *key = NULL;
         char *rsc_id = NULL;
         char *op_type = NULL;
         int interval = 0;
 
         parse_op_key(order->lh_action_task, &rsc_id, &op_type, &interval);
         key = generate_op_key(lh_rsc->id, op_type, interval);
 
         if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && safe_str_eq(op_type, RSC_STOP)) {
             free(key);
             pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
                          lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
 
         } else if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_SLAVE && safe_str_eq(op_type, RSC_DEMOTE)) {
             free(key);
             pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
                          lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
 
         } else {
             pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - creating",
                          lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
             lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set);
             lh_actions = g_list_prepend(NULL, lh_action);
         }
 
         free(op_type);
         free(rsc_id);
     }
 
     gIter = lh_actions;
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *lh_action_iter = (action_t *) gIter->data;
 
         if (rh_rsc == NULL && order->rh_action) {
             rh_rsc = order->rh_action->rsc;
         }
         if (rh_rsc) {
             rsc_order_then(lh_action_iter, rh_rsc, order);
 
         } else if (order->rh_action) {
             order_actions(lh_action_iter, order->rh_action, order->type);
         }
     }
 
     g_list_free(lh_actions);
 }
 
 extern gboolean update_action(action_t * action);
 
 static void
 apply_remote_node_ordering(pe_working_set_t *data_set)
 {
     GListPtr gIter = data_set->actions;
 
     if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) {
         return;
     }
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
         resource_t *remote_rsc = NULL;
         resource_t *container = NULL;
 
         if (action->node == NULL ||
             is_remote_node(action->node) == FALSE ||
             action->rsc == NULL ||
             is_set(action->flags, pe_action_pseudo)) {
             continue;
         }
 
         remote_rsc = action->node->details->remote_rsc;
         container = remote_rsc->container;
 
         if (safe_str_eq(action->task, "monitor") ||
             safe_str_eq(action->task, "start") ||
             safe_str_eq(action->task, "promote") ||
             safe_str_eq(action->task, CRM_OP_LRM_REFRESH) ||
             safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT) ||
             safe_str_eq(action->task, "delete")) {
 
             custom_action_order(remote_rsc,
                 generate_op_key(remote_rsc->id, RSC_START, 0),
                 NULL,
                 action->rsc,
                 NULL,
                 action,
                 pe_order_preserve | pe_order_implies_then | pe_order_runnable_left,
                 data_set);
 
         } else if (safe_str_eq(action->task, "demote")) {
 
             /* If the connection is being torn down, we don't want
              * to build a constraint between a resource's demotion and
              * the connection resource starting... because the connection
              * resource can not start. The connection might already be up,
              * but the START action would not be allowed which in turn would
              * block the demotion of any resournces living in the remote-node.
              *
              * In this case, only build the constraint between the demotion and
              * the connection's stop action. This allows the connection and all the
              * resources within the remote-node to be torn down properly. */
             if (remote_rsc->next_role == RSC_ROLE_STOPPED) {
                 custom_action_order(action->rsc,
                     NULL,
                     action,
                     remote_rsc,
                     generate_op_key(remote_rsc->id, RSC_STOP, 0),
                     NULL,
                     pe_order_preserve | pe_order_implies_first,
                     data_set);
             } else {
 
                 custom_action_order(remote_rsc,
                     generate_op_key(remote_rsc->id, RSC_START, 0),
                     NULL,
                     action->rsc,
                     NULL,
                     action,
                     pe_order_preserve | pe_order_implies_then | pe_order_runnable_left,
                     data_set);
             }
 
         } else if (safe_str_eq(action->task, "stop") &&
                    container &&
                    is_set(container->flags, pe_rsc_failed)) {
 
             /* when the container representing a remote node fails, the stop
              * action for all the resources living in that container is implied
              * by the container stopping.  This is similar to how fencing operations
              * work for cluster nodes. */
             pe_set_action_bit(action, pe_action_pseudo);
             custom_action_order(container,
                 generate_op_key(container->id, RSC_STOP, 0),
                 NULL,
                 action->rsc,
                 NULL,
                 action,
                 pe_order_preserve | pe_order_implies_then | pe_order_runnable_left,
                 data_set);
-
         } else if (safe_str_eq(action->task, "stop")) {
-            custom_action_order(action->rsc,
-                NULL,
-                action,
-                remote_rsc,
-                generate_op_key(remote_rsc->id, RSC_STOP, 0),
-                NULL,
-                pe_order_preserve | pe_order_implies_first,
-                data_set);
+            gboolean after_start = FALSE;
+
+            /* handle special case with baremetal remote where stop actions need to be
+             * ordered after the connection resource starts somewhere else. */
+            if (is_baremetal_remote_node(action->node)) {
+                node_t *cluster_node = remote_rsc->running_on ? remote_rsc->running_on->data : NULL;
+
+                /* if the current cluster node a baremetal connection resource
+                 * is residing on is unclean, we can't process any operations on that
+                 * remote node until after it starts somewhere else. */
+                if (cluster_node && cluster_node->details->unclean == TRUE) {
+                    after_start = TRUE;
+                }
+            }
+
+            if (after_start) {
+                custom_action_order(remote_rsc,
+                    generate_op_key(remote_rsc->id, RSC_START, 0),
+                    NULL,
+                    action->rsc,
+                    NULL,
+                    action,
+                    pe_order_preserve | pe_order_implies_then | pe_order_runnable_left,
+                    data_set);
+            } else {
+                custom_action_order(action->rsc,
+                    NULL,
+                    action,
+                    remote_rsc,
+                    generate_op_key(remote_rsc->id, RSC_STOP, 0),
+                    NULL,
+                    pe_order_preserve | pe_order_implies_first,
+                    data_set);
+            }
         }
     }
 }
 
 gboolean
 stage7(pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
 
     apply_remote_node_ordering(data_set);
     crm_trace("Applying ordering constraints");
 
     /* Don't ask me why, but apparently they need to be processed in
      * the order they were created in... go figure
      *
      * Also g_list_prepend() has horrendous performance characteristics
      * So we need to use g_list_prepend() and then reverse the list here
      */
     data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints);
 
     gIter = data_set->ordering_constraints;
     for (; gIter != NULL; gIter = gIter->next) {
         order_constraint_t *order = (order_constraint_t *) gIter->data;
         resource_t *rsc = order->lh_rsc;
 
         crm_trace("Applying ordering constraint: %d", order->id);
 
         if (rsc != NULL) {
             crm_trace("rsc_action-to-*");
             rsc_order_first(rsc, order, data_set);
             continue;
         }
 
         rsc = order->rh_rsc;
         if (rsc != NULL) {
             crm_trace("action-to-rsc_action");
             rsc_order_then(order->lh_action, rsc, order);
 
         } else {
             crm_trace("action-to-action");
             order_actions(order->lh_action, order->rh_action, order->type);
         }
     }
 
     crm_trace("Updating %d actions", g_list_length(data_set->actions));
 
     gIter = data_set->actions;
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
 
         update_action(action);
     }
 
     crm_trace("Processing reloads");
 
     gIter = data_set->resources;
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
 
         rsc_reload(rsc, data_set);
         LogActions(rsc, data_set, FALSE);
     }
     return TRUE;
 }
 
 static gint
 sort_notify_entries(gconstpointer a, gconstpointer b)
 {
     int tmp;
     const notify_entry_t *entry_a = a;
     const notify_entry_t *entry_b = b;
 
     if (entry_a == NULL && entry_b == NULL) {
         return 0;
     }
     if (entry_a == NULL) {
         return 1;
     }
     if (entry_b == NULL) {
         return -1;
     }
 
     if (entry_a->rsc == NULL && entry_b->rsc == NULL) {
         return 0;
     }
     if (entry_a->rsc == NULL) {
         return 1;
     }
     if (entry_b->rsc == NULL) {
         return -1;
     }
 
     tmp = strcmp(entry_a->rsc->id, entry_b->rsc->id);
     if (tmp != 0) {
         return tmp;
     }
 
     if (entry_a->node == NULL && entry_b->node == NULL) {
         return 0;
     }
     if (entry_a->node == NULL) {
         return 1;
     }
     if (entry_b->node == NULL) {
         return -1;
     }
 
     return strcmp(entry_a->node->details->id, entry_b->node->details->id);
 }
 
 static void
 expand_list(GListPtr list, char **rsc_list, char **node_list)
 {
     GListPtr gIter = NULL;
     const char *uname = NULL;
     const char *rsc_id = NULL;
     const char *last_rsc_id = NULL;
 
     if (rsc_list) {
         *rsc_list = NULL;
     }
 
     if (list == NULL) {
         if (rsc_list) {
             *rsc_list = strdup(" ");
         }
         if (node_list) {
             *node_list = strdup(" ");
         }
         return;
     }
 
     if (node_list) {
         *node_list = NULL;
     }
 
     for (gIter = list; gIter != NULL; gIter = gIter->next) {
         notify_entry_t *entry = (notify_entry_t *) gIter->data;
 
         CRM_LOG_ASSERT(entry != NULL);
         CRM_LOG_ASSERT(entry && entry->rsc != NULL);
 
         if(entry == NULL || entry->rsc == NULL) {
             continue;
         }
 
         /* Uh, why? */
         CRM_LOG_ASSERT(node_list == NULL || entry->node != NULL);
         if(node_list != NULL && entry->node == NULL) {
             continue;
         }
 
         uname = NULL;
         rsc_id = entry->rsc->id;
         CRM_ASSERT(rsc_id != NULL);
 
         /* filter dups */
         if (safe_str_eq(rsc_id, last_rsc_id)) {
             continue;
         }
         last_rsc_id = rsc_id;
 
         if (rsc_list != NULL) {
             int existing_len = 0;
             int len = 2 + strlen(rsc_id);       /* +1 space, +1 EOS */
 
             if (rsc_list && *rsc_list) {
                 existing_len = strlen(*rsc_list);
             }
 
             crm_trace("Adding %s (%dc) at offset %d", rsc_id, len - 2, existing_len);
             *rsc_list = realloc(*rsc_list, len + existing_len);
             sprintf(*rsc_list + existing_len, "%s ", rsc_id);
         }
 
         if (entry->node != NULL) {
             uname = entry->node->details->uname;
         }
 
         if (node_list != NULL && uname) {
             int existing_len = 0;
             int len = 2 + strlen(uname);
 
             if (node_list && *node_list) {
                 existing_len = strlen(*node_list);
             }
 
             crm_trace("Adding %s (%dc) at offset %d", uname, len - 2, existing_len);
             *node_list = realloc(*node_list, len + existing_len);
             sprintf(*node_list + existing_len, "%s ", uname);
         }
     }
 
 }
 
 static void
 dup_attr(gpointer key, gpointer value, gpointer user_data)
 {
     add_hash_param(user_data, key, value);
 }
 
 static action_t *
 pe_notify(resource_t * rsc, node_t * node, action_t * op, action_t * confirm,
           notify_data_t * n_data, pe_working_set_t * data_set)
 {
     char *key = NULL;
     action_t *trigger = NULL;
     const char *value = NULL;
     const char *task = NULL;
 
     if (op == NULL || confirm == NULL) {
         pe_rsc_trace(rsc, "Op=%p confirm=%p", op, confirm);
         return NULL;
     }
 
     CRM_CHECK(rsc != NULL, return NULL);
     CRM_CHECK(node != NULL, return NULL);
 
     if (node->details->online == FALSE) {
         pe_rsc_trace(rsc, "Skipping notification for %s: node offline", rsc->id);
         return NULL;
     } else if (is_set(op->flags, pe_action_runnable) == FALSE) {
         pe_rsc_trace(rsc, "Skipping notification for %s: not runnable", op->uuid);
         return NULL;
     }
 
     value = g_hash_table_lookup(op->meta, "notify_type");
     task = g_hash_table_lookup(op->meta, "notify_operation");
 
     pe_rsc_trace(rsc, "Creating notify actions for %s: %s (%s-%s)", op->uuid, rsc->id, value, task);
 
     key = generate_notify_key(rsc->id, value, task);
     trigger = custom_action(rsc, key, op->task, node,
                             is_set(op->flags, pe_action_optional), TRUE, data_set);
     g_hash_table_foreach(op->meta, dup_attr, trigger->meta);
     g_hash_table_foreach(n_data->keys, dup_attr, trigger->meta);
 
     /* pseudo_notify before notify */
     pe_rsc_trace(rsc, "Ordering %s before %s (%d->%d)", op->uuid, trigger->uuid, trigger->id,
                  op->id);
 
     order_actions(op, trigger, pe_order_optional);
     order_actions(trigger, confirm, pe_order_optional);
     return trigger;
 }
 
 static void
 pe_post_notify(resource_t * rsc, node_t * node, notify_data_t * n_data, pe_working_set_t * data_set)
 {
     action_t *notify = NULL;
 
     CRM_CHECK(rsc != NULL, return);
 
     if (n_data->post == NULL) {
         return;                 /* Nothing to do */
     }
 
     notify = pe_notify(rsc, node, n_data->post, n_data->post_done, n_data, data_set);
 
     if (notify != NULL) {
         notify->priority = INFINITY;
     }
 
     if (n_data->post_done) {
         GListPtr gIter = rsc->actions;
 
         for (; gIter != NULL; gIter = gIter->next) {
             action_t *mon = (action_t *) gIter->data;
             const char *interval = g_hash_table_lookup(mon->meta, "interval");
 
             if (interval == NULL || safe_str_eq(interval, "0")) {
                 pe_rsc_trace(rsc, "Skipping %s: interval", mon->uuid);
                 continue;
             } else if (safe_str_eq(mon->task, RSC_CANCEL)) {
                 pe_rsc_trace(rsc, "Skipping %s: cancel", mon->uuid);
                 continue;
             }
 
             order_actions(n_data->post_done, mon, pe_order_optional);
         }
     }
 }
 
 notify_data_t *
 create_notification_boundaries(resource_t * rsc, const char *action, action_t * start,
                                action_t * end, pe_working_set_t * data_set)
 {
     /* Create the pseudo ops that preceed and follow the actual notifications */
 
     /*
      * Creates two sequences (conditional on start and end being supplied):
      *   pre_notify -> pre_notify_complete -> start, and
      *   end -> post_notify -> post_notify_complete
      *
      * 'start' and 'end' may be the same event or ${X} and ${X}ed as per clones
      */
     char *key = NULL;
     notify_data_t *n_data = NULL;
 
     if (is_not_set(rsc->flags, pe_rsc_notify)) {
         return NULL;
     }
 
     n_data = calloc(1, sizeof(notify_data_t));
     n_data->action = action;
     n_data->keys =
         g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
 
     if (start) {
         /* create pre-event notification wrappers */
         key = generate_notify_key(rsc->id, "pre", start->task);
         n_data->pre =
             custom_action(rsc, key, RSC_NOTIFY, NULL, is_set(start->flags, pe_action_optional),
                           TRUE, data_set);
 
         update_action_flags(n_data->pre, pe_action_pseudo);
         update_action_flags(n_data->pre, pe_action_runnable);
 
         add_hash_param(n_data->pre->meta, "notify_type", "pre");
         add_hash_param(n_data->pre->meta, "notify_operation", n_data->action);
 
         add_hash_param(n_data->pre->meta, "notify_key_type", "pre");
         add_hash_param(n_data->pre->meta, "notify_key_operation", start->task);
 
         /* create pre_notify_complete */
         key = generate_notify_key(rsc->id, "confirmed-pre", start->task);
         n_data->pre_done =
             custom_action(rsc, key, RSC_NOTIFIED, NULL, is_set(start->flags, pe_action_optional),
                           TRUE, data_set);
 
         update_action_flags(n_data->pre_done, pe_action_pseudo);
         update_action_flags(n_data->pre_done, pe_action_runnable);
 
         add_hash_param(n_data->pre_done->meta, "notify_type", "pre");
         add_hash_param(n_data->pre_done->meta, "notify_operation", n_data->action);
 
         add_hash_param(n_data->pre_done->meta, "notify_key_type", "confirmed-pre");
         add_hash_param(n_data->pre_done->meta, "notify_key_operation", start->task);
 
         order_actions(n_data->pre_done, start, pe_order_optional);
         order_actions(n_data->pre, n_data->pre_done, pe_order_optional);
     }
 
     if (end) {
         /* create post-event notification wrappers */
         key = generate_notify_key(rsc->id, "post", end->task);
         n_data->post =
             custom_action(rsc, key, RSC_NOTIFY, NULL, is_set(end->flags, pe_action_optional), TRUE,
                           data_set);
 
         n_data->post->priority = INFINITY;
         update_action_flags(n_data->post, pe_action_pseudo);
         if (is_set(end->flags, pe_action_runnable)) {
             update_action_flags(n_data->post, pe_action_runnable);
         } else {
             update_action_flags(n_data->post, pe_action_runnable | pe_action_clear);
         }
 
         add_hash_param(n_data->post->meta, "notify_type", "post");
         add_hash_param(n_data->post->meta, "notify_operation", n_data->action);
 
         add_hash_param(n_data->post->meta, "notify_key_type", "post");
         add_hash_param(n_data->post->meta, "notify_key_operation", end->task);
 
         /* create post_notify_complete */
         key = generate_notify_key(rsc->id, "confirmed-post", end->task);
         n_data->post_done =
             custom_action(rsc, key, RSC_NOTIFIED, NULL, is_set(end->flags, pe_action_optional),
                           TRUE, data_set);
 
         n_data->post_done->priority = INFINITY;
         update_action_flags(n_data->post_done, pe_action_pseudo);
         if (is_set(end->flags, pe_action_runnable)) {
             update_action_flags(n_data->post_done, pe_action_runnable);
         } else {
             update_action_flags(n_data->post_done, pe_action_runnable | pe_action_clear);
         }
 
         add_hash_param(n_data->post_done->meta, "notify_type", "post");
         add_hash_param(n_data->post_done->meta, "notify_operation", n_data->action);
 
         add_hash_param(n_data->post_done->meta, "notify_key_type", "confirmed-post");
         add_hash_param(n_data->post_done->meta, "notify_key_operation", end->task);
 
         order_actions(end, n_data->post, pe_order_implies_then);
         order_actions(n_data->post, n_data->post_done, pe_order_implies_then);
     }
 
     if (start && end) {
         order_actions(n_data->pre_done, n_data->post, pe_order_optional);
     }
 
     if (safe_str_eq(action, RSC_STOP)) {
         action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set);
 
         order_actions(n_data->post_done, all_stopped, pe_order_optional);
     }
 
     return n_data;
 }
 
 void
 collect_notification_data(resource_t * rsc, gboolean state, gboolean activity,
                           notify_data_t * n_data)
 {
 
     if (rsc->children) {
         GListPtr gIter = rsc->children;
 
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child = (resource_t *) gIter->data;
 
             collect_notification_data(child, state, activity, n_data);
         }
         return;
     }
 
     if (state) {
         notify_entry_t *entry = NULL;
 
         entry = calloc(1, sizeof(notify_entry_t));
         entry->rsc = rsc;
         if (rsc->running_on) {
             /* we only take the first one */
             entry->node = rsc->running_on->data;
         }
 
         pe_rsc_trace(rsc, "%s state: %s", rsc->id, role2text(rsc->role));
 
         switch (rsc->role) {
             case RSC_ROLE_STOPPED:
                 n_data->inactive = g_list_prepend(n_data->inactive, entry);
                 break;
             case RSC_ROLE_STARTED:
                 n_data->active = g_list_prepend(n_data->active, entry);
                 break;
             case RSC_ROLE_SLAVE:
                 n_data->slave = g_list_prepend(n_data->slave, entry);
                 break;
             case RSC_ROLE_MASTER:
                 n_data->master = g_list_prepend(n_data->master, entry);
                 break;
             default:
                 crm_err("Unsupported notify role");
                 free(entry);
                 break;
         }
     }
 
     if (activity) {
         notify_entry_t *entry = NULL;
         enum action_tasks task;
 
         GListPtr gIter = rsc->actions;
 
         for (; gIter != NULL; gIter = gIter->next) {
             action_t *op = (action_t *) gIter->data;
 
             if (is_set(op->flags, pe_action_optional) == FALSE && op->node != NULL) {
 
                 entry = calloc(1, sizeof(notify_entry_t));
                 entry->node = op->node;
                 entry->rsc = rsc;
 
                 task = text2task(op->task);
                 switch (task) {
                     case start_rsc:
                         n_data->start = g_list_prepend(n_data->start, entry);
                         break;
                     case stop_rsc:
                         n_data->stop = g_list_prepend(n_data->stop, entry);
                         break;
                     case action_promote:
                         n_data->promote = g_list_prepend(n_data->promote, entry);
                         break;
                     case action_demote:
                         n_data->demote = g_list_prepend(n_data->demote, entry);
                         break;
                     default:
                         free(entry);
                         break;
                 }
             }
         }
     }
 }
 
 gboolean
 expand_notification_data(notify_data_t * n_data)
 {
     /* Expand the notification entries into a key=value hashtable
      * This hashtable is later used in action2xml()
      */
     gboolean required = FALSE;
     char *rsc_list = NULL;
     char *node_list = NULL;
 
     if (n_data->stop) {
         n_data->stop = g_list_sort(n_data->stop, sort_notify_entries);
     }
     expand_list(n_data->stop, &rsc_list, &node_list);
     if (rsc_list != NULL && safe_str_neq(" ", rsc_list)) {
         if (safe_str_eq(n_data->action, RSC_STOP)) {
             required = TRUE;
         }
     }
     g_hash_table_insert(n_data->keys, strdup("notify_stop_resource"), rsc_list);
     g_hash_table_insert(n_data->keys, strdup("notify_stop_uname"), node_list);
 
     if (n_data->start) {
         n_data->start = g_list_sort(n_data->start, sort_notify_entries);
         if (rsc_list && safe_str_eq(n_data->action, RSC_START)) {
             required = TRUE;
         }
     }
     expand_list(n_data->start, &rsc_list, &node_list);
     g_hash_table_insert(n_data->keys, strdup("notify_start_resource"), rsc_list);
     g_hash_table_insert(n_data->keys, strdup("notify_start_uname"), node_list);
 
     if (n_data->demote) {
         n_data->demote = g_list_sort(n_data->demote, sort_notify_entries);
         if (safe_str_eq(n_data->action, RSC_DEMOTE)) {
             required = TRUE;
         }
     }
 
     expand_list(n_data->demote, &rsc_list, &node_list);
     g_hash_table_insert(n_data->keys, strdup("notify_demote_resource"), rsc_list);
     g_hash_table_insert(n_data->keys, strdup("notify_demote_uname"), node_list);
 
     if (n_data->promote) {
         n_data->promote = g_list_sort(n_data->promote, sort_notify_entries);
         if (safe_str_eq(n_data->action, RSC_PROMOTE)) {
             required = TRUE;
         }
     }
     expand_list(n_data->promote, &rsc_list, &node_list);
     g_hash_table_insert(n_data->keys, strdup("notify_promote_resource"), rsc_list);
     g_hash_table_insert(n_data->keys, strdup("notify_promote_uname"), node_list);
 
     if (n_data->active) {
         n_data->active = g_list_sort(n_data->active, sort_notify_entries);
     }
     expand_list(n_data->active, &rsc_list, &node_list);
     g_hash_table_insert(n_data->keys, strdup("notify_active_resource"), rsc_list);
     g_hash_table_insert(n_data->keys, strdup("notify_active_uname"), node_list);
 
     if (n_data->slave) {
         n_data->slave = g_list_sort(n_data->slave, sort_notify_entries);
     }
     expand_list(n_data->slave, &rsc_list, &node_list);
     g_hash_table_insert(n_data->keys, strdup("notify_slave_resource"), rsc_list);
     g_hash_table_insert(n_data->keys, strdup("notify_slave_uname"), node_list);
 
     if (n_data->master) {
         n_data->master = g_list_sort(n_data->master, sort_notify_entries);
     }
     expand_list(n_data->master, &rsc_list, &node_list);
     g_hash_table_insert(n_data->keys, strdup("notify_master_resource"), rsc_list);
     g_hash_table_insert(n_data->keys, strdup("notify_master_uname"), node_list);
 
     if (n_data->inactive) {
         n_data->inactive = g_list_sort(n_data->inactive, sort_notify_entries);
     }
     expand_list(n_data->inactive, &rsc_list, NULL);
     g_hash_table_insert(n_data->keys, strdup("notify_inactive_resource"), rsc_list);
 
     if (required && n_data->pre) {
         update_action_flags(n_data->pre, pe_action_optional | pe_action_clear);
         update_action_flags(n_data->pre_done, pe_action_optional | pe_action_clear);
     }
 
     if (required && n_data->post) {
         update_action_flags(n_data->post, pe_action_optional | pe_action_clear);
         update_action_flags(n_data->post_done, pe_action_optional | pe_action_clear);
     }
     return required;
 }
 
 void
 create_notifications(resource_t * rsc, notify_data_t * n_data, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     action_t *stop = NULL;
     action_t *start = NULL;
     enum action_tasks task = text2task(n_data->action);
 
     if (rsc->children) {
         gIter = rsc->children;
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child = (resource_t *) gIter->data;
 
             create_notifications(child, n_data, data_set);
         }
         return;
     }
 
     /* Copy notification details into standard ops */
 
     gIter = rsc->actions;
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *op = (action_t *) gIter->data;
 
         if (is_set(op->flags, pe_action_optional) == FALSE && op->node != NULL) {
             enum action_tasks t = text2task(op->task);
 
             switch (t) {
                 case start_rsc:
                 case stop_rsc:
                 case action_promote:
                 case action_demote:
                     g_hash_table_foreach(n_data->keys, dup_attr, op->meta);
                     break;
                 default:
                     break;
             }
         }
     }
 
     pe_rsc_trace(rsc, "Creating notificaitons for: %s.%s (%s->%s)",
                  n_data->action, rsc->id, role2text(rsc->role), role2text(rsc->next_role));
 
     stop = find_first_action(rsc->actions, NULL, RSC_STOP, NULL);
     start = find_first_action(rsc->actions, NULL, RSC_START, NULL);
 
     /* stop / demote */
     if (rsc->role != RSC_ROLE_STOPPED) {
         if (task == stop_rsc || task == action_demote) {
             gIter = rsc->running_on;
             for (; gIter != NULL; gIter = gIter->next) {
                 node_t *current_node = (node_t *) gIter->data;
 
                 pe_notify(rsc, current_node, n_data->pre, n_data->pre_done, n_data, data_set);
                 if (task == action_demote || stop == NULL
                     || is_set(stop->flags, pe_action_optional)) {
                     pe_post_notify(rsc, current_node, n_data, data_set);
                 }
             }
         }
     }
 
     /* start / promote */
     if (rsc->next_role != RSC_ROLE_STOPPED) {
         if (rsc->allocated_to == NULL) {
             pe_proc_err("Next role '%s' but %s is not allocated", role2text(rsc->next_role),
                         rsc->id);
 
         } else if (task == start_rsc || task == action_promote) {
             if (task != start_rsc || start == NULL || is_set(start->flags, pe_action_optional)) {
                 pe_notify(rsc, rsc->allocated_to, n_data->pre, n_data->pre_done, n_data, data_set);
             }
             pe_post_notify(rsc, rsc->allocated_to, n_data, data_set);
         }
     }
 }
 
 void
 free_notification_data(notify_data_t * n_data)
 {
     if (n_data == NULL) {
         return;
     }
 
     g_list_free_full(n_data->stop, free);
     g_list_free_full(n_data->start, free);
     g_list_free_full(n_data->demote, free);
     g_list_free_full(n_data->promote, free);
     g_list_free_full(n_data->master, free);
     g_list_free_full(n_data->slave, free);
     g_list_free_full(n_data->active, free);
     g_list_free_full(n_data->inactive, free);
     g_hash_table_destroy(n_data->keys);
     free(n_data);
 }
 
 int transition_id = -1;
 
 /*
  * Create a dependency graph to send to the transitioner (via the CRMd)
  */
 gboolean
 stage8(pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     const char *value = NULL;
 
     transition_id++;
     crm_trace("Creating transition graph %d.", transition_id);
 
     data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH);
 
     value = pe_pref(data_set->config_hash, "cluster-delay");
     crm_xml_add(data_set->graph, "cluster-delay", value);
 
     value = pe_pref(data_set->config_hash, "stonith-timeout");
     crm_xml_add(data_set->graph, "stonith-timeout", value);
 
     crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY");
 
     if (is_set(data_set->flags, pe_flag_start_failure_fatal)) {
         crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY");
     } else {
         crm_xml_add(data_set->graph, "failed-start-offset", "1");
     }
 
     value = pe_pref(data_set->config_hash, "batch-limit");
     crm_xml_add(data_set->graph, "batch-limit", value);
 
     crm_xml_add_int(data_set->graph, "transition_id", transition_id);
 
     value = pe_pref(data_set->config_hash, "migration-limit");
     if (crm_int_helper(value, NULL) > 0) {
         crm_xml_add(data_set->graph, "migration-limit", value);
     }
 
 /* errors...
    slist_iter(action, action_t, action_list, lpc,
    if(action->optional == FALSE && action->runnable == FALSE) {
    print_action("Ignoring", action, TRUE);
    }
    );
 */
 
     gIter = data_set->resources;
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
 
         pe_rsc_trace(rsc, "processing actions for rsc=%s", rsc->id);
         rsc->cmds->expand(rsc, data_set);
     }
 
     crm_log_xml_trace(data_set->graph, "created resource-driven action list");
 
     /* catch any non-resource specific actions */
     crm_trace("processing non-resource actions");
 
     gIter = data_set->actions;
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
 
         if (action->rsc
             && action->node
             && action->node->details->shutdown
             && is_not_set(action->rsc->flags, pe_rsc_maintenance)
             && is_not_set(action->flags, pe_action_optional)
             && is_not_set(action->flags, pe_action_runnable)
             && crm_str_eq(action->task, RSC_STOP, TRUE)
             ) {
             /* Eventually we should just ignore the 'fence' case
              * But for now its the best way to detect (in CTS) when
              * CIB resource updates are being lost
              */
             if (is_set(data_set->flags, pe_flag_have_quorum)
                 || data_set->no_quorum_policy == no_quorum_ignore) {
                 crm_crit("Cannot %s node '%s' because of %s:%s%s",
                          action->node->details->unclean ? "fence" : "shut down",
                          action->node->details->uname, action->rsc->id,
                          is_not_set(action->rsc->flags, pe_rsc_managed) ? " unmanaged" : " blocked",
                          is_set(action->rsc->flags, pe_rsc_failed) ? " failed" : "");
             }
         }
 
         graph_element_from_action(action, data_set);
     }
 
     crm_log_xml_trace(data_set->graph, "created generic action list");
     crm_trace("Created transition graph %d.", transition_id);
 
     return TRUE;
 }
 
 void
 cleanup_alloc_calculations(pe_working_set_t * data_set)
 {
     if (data_set == NULL) {
         return;
     }
 
     crm_trace("deleting %d order cons: %p",
               g_list_length(data_set->ordering_constraints), data_set->ordering_constraints);
     pe_free_ordering(data_set->ordering_constraints);
     data_set->ordering_constraints = NULL;
 
     crm_trace("deleting %d node cons: %p",
               g_list_length(data_set->placement_constraints), data_set->placement_constraints);
     pe_free_rsc_to_node(data_set->placement_constraints);
     data_set->placement_constraints = NULL;
 
     crm_trace("deleting %d inter-resource cons: %p",
               g_list_length(data_set->colocation_constraints), data_set->colocation_constraints);
     g_list_free_full(data_set->colocation_constraints, free);
     data_set->colocation_constraints = NULL;
 
     crm_trace("deleting %d ticket deps: %p",
               g_list_length(data_set->ticket_constraints), data_set->ticket_constraints);
     g_list_free_full(data_set->ticket_constraints, free);
     data_set->ticket_constraints = NULL;
 
     cleanup_calculations(data_set);
 }
diff --git a/pengine/regression.sh b/pengine/regression.sh
index 5f98215664..bdc7d3a90f 100755
--- a/pengine/regression.sh
+++ b/pengine/regression.sh
@@ -1,770 +1,772 @@
 #!/bin/bash
 
  # Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  #
  # This program is free software; you can redistribute it and/or
  # modify it under the terms of the GNU General Public
  # License as published by the Free Software Foundation; either
  # version 2 of the License, or (at your option) any later version.
  #
  # This software is distributed in the hope that it will be useful,
  # but WITHOUT ANY WARRANTY; without even the implied warranty of
  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  # General Public License for more details.
  #
  # You should have received a copy of the GNU General Public
  # License along with this library; if not, write to the Free Software
  # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  #
 
 core=`dirname $0`
 . $core/regression.core.sh || exit 1
 
 create_mode="true"
 info Generating test outputs for these tests...
 # do_test file description
 
 info Done.
 echo ""
 
 info Performing the following tests from $io_dir
 create_mode="false"
 
 echo ""
 do_test simple1 "Offline     "
 do_test simple2 "Start       "
 do_test simple3 "Start 2     "
 do_test simple4 "Start Failed"
 do_test simple6 "Stop Start  "
 do_test simple7 "Shutdown    "
 #do_test simple8 "Stonith	"
 #do_test simple9 "Lower version"
 #do_test simple10 "Higher version"
 do_test simple11 "Priority (ne)"
 do_test simple12 "Priority (eq)"
 do_test simple8 "Stickiness"
 
 echo ""
 do_test group1 "Group		"
 do_test group2 "Group + Native	"
 do_test group3 "Group + Group	"
 do_test group4 "Group + Native (nothing)"
 do_test group5 "Group + Native (move)   "
 do_test group6 "Group + Group (move)    "
 do_test group7 "Group colocation"
 do_test group13 "Group colocation (cant run)"
 do_test group8 "Group anti-colocation"
 do_test group9 "Group recovery"
 do_test group10 "Group partial recovery"
 do_test group11 "Group target_role"
 do_test group14 "Group stop (graph terminated)"
 do_test group15 "-ve group colocation"
 do_test bug-1573 "Partial stop of a group with two children"
 do_test bug-1718 "Mandatory group ordering - Stop group_FUN"
 do_test bug-lf-2613 "Move group on failure"
 do_test bug-lf-2619 "Move group on clone failure"
 do_test group-fail "Ensure stop order is preserved for partially active groups"
 do_test group-unmanaged "No need to restart r115 because r114 is unmanaged"
 do_test group-unmanaged-stopped "Make sure r115 is stopped when r114 fails"
 do_test group-dependants "Account for the location preferences of things colocated with a group"
 
 echo ""
 do_test rsc_dep1 "Must not     "
 do_test rsc_dep3 "Must         "
 do_test rsc_dep5 "Must not 3   "
 do_test rsc_dep7 "Must 3       "
 do_test rsc_dep10 "Must (but cant)"
 do_test rsc_dep2  "Must (running) "
 do_test rsc_dep8  "Must (running : alt) "
 do_test rsc_dep4  "Must (running + move)"
 do_test asymmetric "Asymmetric - require explicit location constraints"
 
 echo ""
 do_test orphan-0 "Orphan ignore"
 do_test orphan-1 "Orphan stop"
 do_test orphan-2 "Orphan stop, remove failcount"
 
 echo ""
 do_test params-0 "Params: No change"
 do_test params-1 "Params: Changed"
 do_test params-2 "Params: Resource definition"
 do_test params-4 "Params: Reload"
 do_test params-5 "Params: Restart based on probe digest"
 do_test novell-251689 "Resource definition change + target_role=stopped"
 do_test bug-lf-2106 "Restart all anonymous clone instances after config change"
 do_test params-6 "Params: Detect reload in previously migrated resource"
 do_test nvpair-id-ref "Support id-ref in nvpair with optional name"
 
 echo ""
 do_test target-0 "Target Role : baseline"
 do_test target-1 "Target Role : master"
 do_test target-2 "Target Role : invalid"
 
 echo ""
 do_test base-score "Set a node's default score for all nodes"
 
 echo ""
 do_test date-1 "Dates" -t "2005-020"
 do_test date-2 "Date Spec - Pass" -t "2005-020T12:30"
 do_test date-3 "Date Spec - Fail" -t "2005-020T11:30"
 do_test origin "Timing of recurring operations" -t "2014-05-07 00:28:00" 
 do_test probe-0 "Probe (anon clone)"
 do_test probe-1 "Pending Probe"
 do_test probe-2 "Correctly re-probe cloned groups"
 do_test probe-3 "Probe (pending node)"
 do_test probe-4 "Probe (pending node + stopped resource)" --rc 4
 do_test standby "Standby"
 do_test comments "Comments"
 
 echo ""
 do_test one-or-more-0 "Everything starts"
 do_test one-or-more-1 "Nothing starts because of A"
 do_test one-or-more-2 "D can start because of C"
 do_test one-or-more-3 "D cannot start because of B and C"
 do_test one-or-more-4 "D cannot start because of target-role"
 do_test one-or-more-5 "Start A and F even though C and D are stopped"
 do_test one-or-more-6 "Leave A running even though B is stopped"
 do_test one-or-more-7 "Leave A running even though C is stopped"
 do_test bug-5140-require-all-false "Allow basegrp:0 to stop"
 
 echo ""
 do_test order1 "Order start 1     "
 do_test order2 "Order start 2     "
 do_test order3 "Order stop	  "
 do_test order4 "Order (multiple)  "
 do_test order5 "Order (move)  "
 do_test order6 "Order (move w/ restart)  "
 do_test order7 "Order (manditory)  "
 do_test order-optional "Order (score=0)  "
 do_test order-required "Order (score=INFINITY)  "
 do_test bug-lf-2171 "Prevent group start when clone is stopped"
 do_test order-clone "Clone ordering should be able to prevent startup of dependant clones"
 do_test order-sets "Ordering for resource sets"
 do_test order-serialize "Serialize resources without inhibiting migration"
 do_test order-serialize-set "Serialize a set of resources without inhibiting migration"
 do_test clone-order-primitive "Order clone start after a primitive"
 do_test clone-order-16instances "Verify ordering of 16 cloned resources"
 do_test order-optional-keyword "Order (optional keyword)"
 do_test order-mandatory "Order (mandatory keyword)"
 do_test bug-lf-2493 "Don't imply colocation requirements when applying ordering constraints with clones"
 do_test ordered-set-basic-startup "Constraint set with default order settings."
 do_test ordered-set-natural "Allow natural set ordering"
 do_test order-wrong-kind "Order (error)"
 
 echo ""
 do_test coloc-loop "Colocation - loop"
 do_test coloc-many-one "Colocation - many-to-one"
 do_test coloc-list "Colocation - many-to-one with list"
 do_test coloc-group "Colocation - groups"
 do_test coloc-slave-anti "Anti-colocation with slave shouldn't prevent master colocation"
 do_test coloc-attr "Colocation based on node attributes"
 do_test coloc-negative-group "Negative colocation with a group"
 do_test coloc-intra-set "Intra-set colocation"
 do_test bug-lf-2435 "Colocation sets with a negative score"
 do_test coloc-clone-stays-active "Ensure clones don't get stopped/demoted because a dependant must stop"
 do_test coloc_fp_logic "Verify floating point calculations in colocation are working"
 do_test colo_master_w_native "cl#5070 - Verify promotion order is affected when colocating master to native rsc."
 do_test colo_slave_w_native  "cl#5070 - Verify promotion order is affected when colocating slave to native rsc."
 do_test anti-colocation-order "cl#5187 - Prevent resources in an anti-colocation from even temporarily running on a same node"
 
 echo ""
 do_test rsc-sets-seq-true "Resource Sets - sequential=false"
 do_test rsc-sets-seq-false "Resource Sets - sequential=true"
 do_test rsc-sets-clone "Resource Sets - Clone"
 do_test rsc-sets-master "Resource Sets - Master"
 do_test rsc-sets-clone-1 "Resource Sets - Clone (lf#2404)"
 
 #echo ""
 #do_test agent1 "version: lt (empty)"
 #do_test agent2 "version: eq	"
 #do_test agent3 "version: gt	"
 
 echo ""
 do_test attrs1 "string: eq (and)     "
 do_test attrs2 "string: lt / gt (and)"
 do_test attrs3 "string: ne (or)      "
 do_test attrs4 "string: exists       "
 do_test attrs5 "string: not_exists   "
 do_test attrs6 "is_dc: true          "
 do_test attrs7 "is_dc: false         "
 do_test attrs8 "score_attribute      "
 do_test per-node-attrs "Per node resource parameters"
 
 echo ""
 do_test mon-rsc-1 "Schedule Monitor - start"
 do_test mon-rsc-2 "Schedule Monitor - move "
 do_test mon-rsc-3 "Schedule Monitor - pending start     "
 do_test mon-rsc-4 "Schedule Monitor - move/pending start"
 
 echo ""
 do_test rec-rsc-0 "Resource Recover - no start     "
 do_test rec-rsc-1 "Resource Recover - start        "
 do_test rec-rsc-2 "Resource Recover - monitor      "
 do_test rec-rsc-3 "Resource Recover - stop - ignore"
 do_test rec-rsc-4 "Resource Recover - stop - block "
 do_test rec-rsc-5 "Resource Recover - stop - fence "
 do_test rec-rsc-6 "Resource Recover - multiple - restart"
 do_test rec-rsc-7 "Resource Recover - multiple - stop   "
 do_test rec-rsc-8 "Resource Recover - multiple - block  "
 do_test rec-rsc-9 "Resource Recover - group/group"
 do_test monitor-recovery "on-fail=block + resource recovery detected by recurring monitor"
 do_test stop-failure-no-quorum "Stop failure without quorum"
 do_test stop-failure-no-fencing "Stop failure without fencing available"
 do_test stop-failure-with-fencing "Stop failure with fencing available"
 
 echo ""
 do_test quorum-1 "No quorum - ignore"
 do_test quorum-2 "No quorum - freeze"
 do_test quorum-3 "No quorum - stop  "
 do_test quorum-4 "No quorum - start anyway"
 do_test quorum-5 "No quorum - start anyway (group)"
 do_test quorum-6 "No quorum - start anyway (clone)"
 do_test bug-cl-5212 "No promotion with no-quorum-policy=freeze"
 
 echo ""
 do_test rec-node-1 "Node Recover - Startup   - no fence"
 do_test rec-node-2 "Node Recover - Startup   - fence   "
 do_test rec-node-3 "Node Recover - HA down   - no fence"
 do_test rec-node-4 "Node Recover - HA down   - fence   "
 do_test rec-node-5 "Node Recover - CRM down  - no fence"
 do_test rec-node-6 "Node Recover - CRM down  - fence   "
 do_test rec-node-7 "Node Recover - no quorum - ignore  "
 do_test rec-node-8 "Node Recover - no quorum - freeze  "
 do_test rec-node-9 "Node Recover - no quorum - stop    "
 do_test rec-node-10 "Node Recover - no quorum - stop w/fence"
 do_test rec-node-11 "Node Recover - CRM down w/ group - fence   "
 do_test rec-node-12 "Node Recover - nothing active - fence   "
 do_test rec-node-13 "Node Recover - failed resource + shutdown - fence   "
 do_test rec-node-15 "Node Recover - unknown lrm section"
 do_test rec-node-14 "Serialize all stonith's"
 
 echo ""
 do_test multi1 "Multiple Active (stop/start)"
 
 echo ""
 do_test migrate-begin     "Normal migration"
 do_test migrate-success   "Completed migration"
 do_test migrate-partial-1 "Completed migration, missing stop on source"
 do_test migrate-partial-2 "Successful migrate_to only"
 do_test migrate-partial-3 "Successful migrate_to only, target down"
 do_test migrate-partial-4 "Migrate from the correct host after migrate_to+migrate_from"
 do_test bug-5186-partial-migrate "Handle partial migration when src node loses membership"
 
 do_test migrate-fail-2 "Failed migrate_from"
 do_test migrate-fail-3 "Failed migrate_from + stop on source"
 do_test migrate-fail-4 "Failed migrate_from + stop on target - ideally we wouldn't need to re-stop on target"
 do_test migrate-fail-5 "Failed migrate_from + stop on source and target"
 
 do_test migrate-fail-6 "Failed migrate_to"
 do_test migrate-fail-7 "Failed migrate_to + stop on source"
 do_test migrate-fail-8 "Failed migrate_to + stop on target - ideally we wouldn't need to re-stop on target"
 do_test migrate-fail-9 "Failed migrate_to + stop on source and target"
 
 do_test migrate-stop "Migration in a stopping stack"
 do_test migrate-start "Migration in a starting stack"
 do_test migrate-stop_start "Migration in a restarting stack"
 do_test migrate-stop-complex "Migration in a complex stopping stack"
 do_test migrate-start-complex "Migration in a complex starting stack"
 do_test migrate-stop-start-complex "Migration in a complex moving stack"
 do_test migrate-shutdown "Order the post-migration 'stop' before node shutdown"
 
 do_test migrate-1 "Migrate (migrate)"
 do_test migrate-2 "Migrate (stable)"
 do_test migrate-3 "Migrate (failed migrate_to)"
 do_test migrate-4 "Migrate (failed migrate_from)"
 do_test novell-252693 "Migration in a stopping stack"
 do_test novell-252693-2 "Migration in a starting stack"
 do_test novell-252693-3 "Non-Migration in a starting and stopping stack"
 do_test bug-1820 "Migration in a group"
 do_test bug-1820-1 "Non-migration in a group"
 do_test migrate-5 "Primitive migration with a clone"
 do_test migrate-fencing "Migration after Fencing"
 do_test migrate-both-vms "Migrate two VMs that have no colocation"
 
 do_test 1-a-then-bm-move-b "Advanced migrate logic. A then B. migrate B."
 do_test 2-am-then-b-move-a "Advanced migrate logic, A then B, migrate A without stopping B"
 do_test 3-am-then-bm-both-migrate "Advanced migrate logic. A then B. migrate both"
 do_test 4-am-then-bm-b-not-migratable "Advanced migrate logic, A then B, B not migratable"
 do_test 5-am-then-bm-a-not-migratable "Advanced migrate logic. A then B. move both, a not migratable"
 do_test 6-migrate-group "Advanced migrate logic, migrate a group"
 do_test 7-migrate-group-one-unmigratable "Advanced migrate logic, migrate group mixed with allow-migrate true/false"
 do_test 8-am-then-bm-a-migrating-b-stopping "Advanced migrate logic, A then B, A migrating, B stopping"
 do_test 9-am-then-bm-b-migrating-a-stopping "Advanced migrate logic, A then B, B migrate, A stopping"
 do_test 10-a-then-bm-b-move-a-clone "Advanced migrate logic, A clone then B, migrate B while stopping A"
 do_test 11-a-then-bm-b-move-a-clone-starting "Advanced migrate logic, A clone then B, B moving while A is start/stopping"
 
 #echo ""
 #do_test complex1 "Complex	"
 
 do_test bug-lf-2422 "Dependancy on partially active group - stop ocfs:*"
 
 echo ""
 do_test clone-anon-probe-1 "Probe the correct (anonymous) clone instance for each node"
 do_test clone-anon-probe-2 "Avoid needless re-probing of anonymous clones"
 do_test clone-anon-failcount "Merge failcounts for anonymous clones"
 do_test inc0 "Incarnation start"
 do_test inc1 "Incarnation start order"
 do_test inc2 "Incarnation silent restart, stop, move"
 do_test inc3 "Inter-incarnation ordering, silent restart, stop, move"
 do_test inc4 "Inter-incarnation ordering, silent restart, stop, move (ordered)"
 do_test inc5 "Inter-incarnation ordering, silent restart, stop, move (restart 1)"
 do_test inc6 "Inter-incarnation ordering, silent restart, stop, move (restart 2)"
 do_test inc7 "Clone colocation"
 do_test inc8 "Clone anti-colocation"
 do_test inc9 "Non-unique clone"
 do_test inc10 "Non-unique clone (stop)"
 do_test inc11 "Primitive colocation with clones"
 do_test inc12 "Clone shutdown"
 do_test cloned-group "Make sure only the correct number of cloned groups are started"
 do_test cloned-group-stop "Ensure stopping qpidd also stops glance and cinder"
 do_test clone-no-shuffle "Dont prioritize allocation of instances that must be moved"
 do_test clone-max-zero "Orphan processing with clone-max=0"
 do_test clone-anon-dup "Bug LF#2087 - Correctly parse the state of anonymous clones that are active more than once per node"
 do_test bug-lf-2160 "Dont shuffle clones due to colocation"
 do_test bug-lf-2213 "clone-node-max enforcement for cloned groups"
 do_test bug-lf-2153 "Clone ordering constraints"
 do_test bug-lf-2361 "Ensure clones observe mandatory ordering constraints if the LHS is unrunnable"
 do_test bug-lf-2317 "Avoid needless restart of primitive depending on a clone"
 do_test clone-colocate-instance-1 "Colocation with a specific clone instance (negative example)"
 do_test clone-colocate-instance-2 "Colocation with a specific clone instance"
 do_test clone-order-instance "Ordering with specific clone instances"
 do_test bug-lf-2453 "Enforce mandatory clone ordering without colocation"
 do_test bug-lf-2508 "Correctly reconstruct the status of anonymous cloned groups"
 do_test bug-lf-2544 "Balanced clone placement"
 do_test bug-lf-2445 "Redistribute clones with node-max > 1 and stickiness = 0"
 do_test bug-lf-2574 "Avoid clone shuffle"
 do_test bug-lf-2581 "Avoid group restart due to unrelated clone (re)start"
 do_test bug-cl-5168 "Don't shuffle clones"
 do_test bug-cl-5170 "Prevent clone from starting with on-fail=block"
 do_test clone-fail-block-colocation "Move colocated group when failed clone has on-fail=block"
 do_test clone-interleave-1 "Clone-3 cannot start on pcmk-1 due to interleaved ordering (no colocation)"
 do_test clone-interleave-2 "Clone-3 must stop on pcmk-1 due to interleaved ordering (no colocation)"
 do_test clone-interleave-3 "Clone-3 must be recovered on pcmk-1 due to interleaved ordering (no colocation)"
 
 echo ""
 do_test unfence-startup "Clean unfencing"
 do_test unfence-definition "Unfencing when the agent changes"
 do_test unfence-parameters "Unfencing when the agent parameters changes"
 
 echo ""
 do_test master-0 "Stopped -> Slave"
 do_test master-1 "Stopped -> Promote"
 do_test master-2 "Stopped -> Promote : notify"
 do_test master-3 "Stopped -> Promote : master location"
 do_test master-4 "Started -> Promote : master location"
 do_test master-5 "Promoted -> Promoted"
 do_test master-6 "Promoted -> Promoted (2)"
 do_test master-7 "Promoted -> Fenced"
 do_test master-8 "Promoted -> Fenced -> Moved"
 do_test master-9 "Stopped + Promotable + No quorum"
 do_test master-10 "Stopped -> Promotable : notify with monitor"
 do_test master-11 "Stopped -> Promote : colocation"
 do_test novell-239082 "Demote/Promote ordering"
 do_test novell-239087 "Stable master placement"
 do_test master-12 "Promotion based solely on rsc_location constraints"
 do_test master-13 "Include preferences of colocated resources when placing master"
 do_test master-demote "Ordering when actions depends on demoting a slave resource"
 do_test master-ordering "Prevent resources from starting that need a master"
 do_test bug-1765 "Master-Master Colocation (dont stop the slaves)"
 do_test master-group "Promotion of cloned groups"
 do_test bug-lf-1852 "Don't shuffle master/slave instances unnecessarily"
 do_test master-failed-demote "Dont retry failed demote actions"
 do_test master-failed-demote-2 "Dont retry failed demote actions (notify=false)"
 do_test master-depend "Ensure resources that depend on the master don't get allocated until the master does"
 do_test master-reattach "Re-attach to a running master"
 do_test master-allow-start "Don't include master score if it would prevent allocation"
 do_test master-colocation "Allow master instances placemaker to be influenced by colocation constraints"
 do_test master-pseudo "Make sure promote/demote pseudo actions are created correctly"
 do_test master-role "Prevent target-role from promoting more than master-max instances"
 do_test bug-lf-2358 "Master-Master anti-colocation"
 do_test master-promotion-constraint "Mandatory master colocation constraints"
 do_test unmanaged-master "Ensure role is preserved for unmanaged resources"
 do_test master-unmanaged-monitor "Start the correct monitor operation for unmanaged masters"
 do_test master-demote-2 "Demote does not clear past failure"
 do_test master-move "Move master based on failure of colocated group"
 do_test master-probed-score "Observe the promotion score of probed resources"
 do_test colocation_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by colocation constraint"
 do_test colocation_constraint_stops_slave  "cl#5054 - Ensure slave is not demoted when stopped by colocation constraint"
 do_test order_constraint_stops_master      "cl#5054 - Ensure master is demoted when stopped by order constraint"
 do_test order_constraint_stops_slave       "cl#5054 - Ensure slave is not demoted when stopped by order constraint"
 do_test master_monitor_restart "cl#5072 - Ensure master monitor operation will start after promotion."
 do_test bug-rh-880249 "Handle replacement of an m/s resource with a primitive"
 do_test bug-5143-ms-shuffle "Prevent master shuffling due to promotion score"
 do_test master-demote-block "Block promotion if demote fails with on-fail=block"
 do_test master-dependant-ban "Don't stop instances from being active because a dependant is banned from that host"
 do_test master-stop "Stop instances due to location constraint with role=Started"
 do_test master-partially-demoted-group "Allow partially demoted group to finish demoting"
 do_test bug-cl-5213 "Ensure role colocation with -INFINITY is enforced"
 do_test bug-cl-5219 "Allow unrelated resources with a common colocation target to remain promoted"
 
 echo ""
 do_test history-1 "Correctly parse stateful-1 resource state"
 
 echo ""
 do_test managed-0 "Managed (reference)"
 do_test managed-1 "Not managed - down "
 do_test managed-2 "Not managed - up   "
 do_test bug-5028 "Shutdown should block if anything depends on an unmanaged resource"
 do_test bug-5028-detach "Ensure detach still works"
 do_test bug-5028-bottom "Ensure shutdown still blocks if the blocked resource is at the bottom of the stack"
 do_test unmanaged-stop-1 "cl#5155 - Block the stop of resources if any depending resource is unmanaged "
 do_test unmanaged-stop-2 "cl#5155 - Block the stop of resources if the first resource in a mandatory stop order is unmanaged "
 do_test unmanaged-stop-3 "cl#5155 - Block the stop of resources if any depending resource in a group is unmanaged "
 do_test unmanaged-stop-4 "cl#5155 - Block the stop of resources if any depending resource in the middle of a group is unmanaged "
 do_test unmanaged-block-restart "Block restart of resources if any dependent resource in a group is unmanaged"
 
 echo ""
 do_test interleave-0 "Interleave (reference)"
 do_test interleave-1 "coloc - not interleaved"
 do_test interleave-2 "coloc - interleaved   "
 do_test interleave-3 "coloc - interleaved (2)"
 do_test interleave-pseudo-stop "Interleaved clone during stonith"
 do_test interleave-stop "Interleaved clone during stop"
 do_test interleave-restart "Interleaved clone during dependancy restart"
 
 echo ""
 do_test notify-0 "Notify reference"
 do_test notify-1 "Notify simple"
 do_test notify-2 "Notify simple, confirm"
 do_test notify-3 "Notify move, confirm"
 do_test novell-239079 "Notification priority"
 #do_test notify-2 "Notify - 764"
 
 echo ""
 do_test 594 "OSDL #594 - Unrunnable actions scheduled in transition"
 do_test 662 "OSDL #662 - Two resources start on one node when incarnation_node_max = 1"
 do_test 696 "OSDL #696 - CRM starts stonith RA without monitor"
 do_test 726 "OSDL #726 - Attempting to schedule rsc_posic041_monitor_5000 _after_ a stop"
 do_test 735 "OSDL #735 - Correctly detect that rsc_hadev1 is stopped on hadev3"
 do_test 764 "OSDL #764 - Missing monitor op for DoFencing:child_DoFencing:1"
 do_test 797 "OSDL #797 - Assert triggered: task_id_i > max_call_id"
 do_test 829 "OSDL #829"
 do_test 994 "OSDL #994 - Stopping the last resource in a resource group causes the entire group to be restarted"
 do_test 994-2 "OSDL #994 - with a dependant resource"
 do_test 1360 "OSDL #1360 - Clone stickiness"
 do_test 1484 "OSDL #1484 - on_fail=stop"
 do_test 1494 "OSDL #1494 - Clone stability"
 do_test unrunnable-1 "Unrunnable"
 do_test stonith-0 "Stonith loop - 1"
 do_test stonith-1 "Stonith loop - 2"
 do_test stonith-2 "Stonith loop - 3"
 do_test stonith-3 "Stonith startup"
 do_test stonith-4 "Stonith node state" --rc 4
 do_test bug-1572-1 "Recovery of groups depending on master/slave"
 do_test bug-1572-2 "Recovery of groups depending on master/slave when the master is never re-promoted"
 do_test bug-1685 "Depends-on-master ordering"
 do_test bug-1822 "Dont promote partially active groups"
 do_test bug-pm-11 "New resource added to a m/s group"
 do_test bug-pm-12 "Recover only the failed portion of a cloned group"
 do_test bug-n-387749 "Don't shuffle clone instances"
 do_test bug-n-385265 "Don't ignore the failure stickiness of group children - resource_idvscommon should stay stopped"
 do_test bug-n-385265-2 "Ensure groups are migrated instead of remaining partially active on the current node"
 do_test bug-lf-1920 "Correctly handle probes that find active resources"
 do_test bnc-515172 "Location constraint with multiple expressions"
 do_test colocate-primitive-with-clone "Optional colocation with a clone"
 do_test use-after-free-merge "Use-after-free in native_merge_weights"
 do_test bug-lf-2551 "STONITH ordering for stop"
 do_test bug-lf-2606 "Stonith implies demote"
 do_test bug-lf-2474 "Ensure resource op timeout takes precedence over op_defaults"
 do_test bug-suse-707150 "Prevent vm-01 from starting due to colocation/ordering"
 do_test bug-5014-A-start-B-start "Verify when A starts B starts using symmetrical=false"
 do_test bug-5014-A-stop-B-started "Verify when A stops B does not stop if it has already started using symmetric=false"
 do_test bug-5014-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using symmetric=false"
 do_test bug-5014-CthenAthenB-C-stopped "Verify when C then A is symmetrical=true, A then B is symmetric=false, and C is stopped that nothing starts."
 do_test bug-5014-CLONE-A-start-B-start "Verify when A starts B starts using clone resources with symmetric=false"
 do_test bug-5014-CLONE-A-stop-B-started "Verify when A stops B does not stop if it has already started using clone resources with symmetric=false."
 do_test bug-5014-GROUP-A-start-B-start "Verify when A starts B starts when using group resources with symmetric=false."
 do_test bug-5014-GROUP-A-stopped-B-started "Verify when A stops B does not stop if it has already started using group resources with symmetric=false."
 do_test bug-5014-GROUP-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using group resources with symmetric=false."
 do_test bug-5014-ordered-set-symmetrical-false "Verify ordered sets work with symmetrical=false"
 do_test bug-5014-ordered-set-symmetrical-true "Verify ordered sets work with symmetrical=true"
 do_test bug-5007-masterslave_colocation "Verify use of colocation scores other than INFINITY and -INFINITY work on multi-state resources."
 do_test bug-5038 "Prevent restart of anonymous clones when clone-max decreases"
 do_test bug-5025-1 "Automatically clean up failcount after resource config change with reload"
 do_test bug-5025-2 "Make sure clear failcount action isn't set when config does not change."
 do_test bug-5025-3 "Automatically clean up failcount after resource config change with restart"
 do_test bug-5025-4 "Clear failcount when last failure is a start op and rsc attributes changed."
 do_test failcount "Ensure failcounts are correctly expired"
 do_test failcount-block "Ensure failcounts are not expired when on-fail=block is present"
 do_test monitor-onfail-restart "bug-5058 - Monitor failure with on-fail set to restart"
 do_test monitor-onfail-stop    "bug-5058 - Monitor failure wiht on-fail set to stop"
 do_test bug-5059 "No need to restart p_stateful1:*"
 do_test bug-5069-op-enabled  "Test on-fail=ignore with failure when monitor is enabled."
 do_test bug-5069-op-disabled "Test on-fail-ignore with failure when monitor is disabled."
 do_test obsolete-lrm-resource "cl#5115 - Do not use obsolete lrm_resource sections"
 do_test expire-non-blocked-failure "Ignore failure-timeout only if the failed operation has on-fail=block"
 
 do_test ignore_stonith_rsc_order1 "cl#5056- Ignore order constraint between stonith and non-stonith rsc."
 do_test ignore_stonith_rsc_order2 "cl#5056- Ignore order constraint with group rsc containing mixed stonith and non-stonith."
 do_test ignore_stonith_rsc_order3 "cl#5056- Ignore order constraint, stonith clone and mixed group"
 do_test ignore_stonith_rsc_order4 "cl#5056- Ignore order constraint, stonith clone and clone with nested mixed group"
 do_test honor_stonith_rsc_order1 "cl#5056- Honor order constraint, stonith clone and pure stonith group(single rsc)."
 do_test honor_stonith_rsc_order2 "cl#5056- Honor order constraint, stonith clone and pure stonith group(multiple rsc)"
 do_test honor_stonith_rsc_order3 "cl#5056- Honor order constraint, stonith clones with nested pure stonith group."
 do_test honor_stonith_rsc_order4 "cl#5056- Honor order constraint, between two native stonith rscs."
 do_test probe-timeout "cl#5099 - Default probe timeout"
 
 echo ""
 do_test systemhealth1  "System Health ()               #1"
 do_test systemhealth2  "System Health ()               #2"
 do_test systemhealth3  "System Health ()               #3"
 do_test systemhealthn1 "System Health (None)           #1"
 do_test systemhealthn2 "System Health (None)           #2"
 do_test systemhealthn3 "System Health (None)           #3"
 do_test systemhealthm1 "System Health (Migrate On Red) #1"
 do_test systemhealthm2 "System Health (Migrate On Red) #2"
 do_test systemhealthm3 "System Health (Migrate On Red) #3"
 do_test systemhealtho1 "System Health (Only Green)     #1"
 do_test systemhealtho2 "System Health (Only Green)     #2"
 do_test systemhealtho3 "System Health (Only Green)     #3"
 do_test systemhealthp1 "System Health (Progessive)     #1"
 do_test systemhealthp2 "System Health (Progessive)     #2"
 do_test systemhealthp3 "System Health (Progessive)     #3"
 
 echo ""
 do_test utilization "Placement Strategy - utilization"
 do_test minimal     "Placement Strategy - minimal"
 do_test balanced    "Placement Strategy - balanced"
 
 echo ""
 do_test placement-stickiness "Optimized Placement Strategy - stickiness"
 do_test placement-priority   "Optimized Placement Strategy - priority"
 do_test placement-location   "Optimized Placement Strategy - location"
 do_test placement-capacity   "Optimized Placement Strategy - capacity"
 
 echo ""
 do_test utilization-order1 "Utilization Order - Simple"
 do_test utilization-order2 "Utilization Order - Complex"
 do_test utilization-order3 "Utilization Order - Migrate"
 do_test utilization-order4 "Utilization Order - Live Mirgration (bnc#695440)"
 do_test utilization-shuffle "Don't displace prmExPostgreSQLDB2 on act2, Start prmExPostgreSQLDB1 on act3"
 do_test load-stopped-loop "Avoid transition loop due to load_stopped (cl#5044)"
 
 echo ""
 do_test reprobe-target_rc "Ensure correct target_rc for reprobe of inactive resources"
 do_test node-maintenance-1 "cl#5128 - Node maintenance"
 do_test node-maintenance-2 "cl#5128 - Node maintenance (coming out of maintenance mode)"
 
 do_test rsc-maintenance "Per-resource maintenance"
 
 echo ""
 do_test not-installed-agent "The resource agent is missing"
 do_test not-installed-tools "Something the resource agent needs is missing"
 
 echo ""
 do_test stopped-monitor-00	"Stopped Monitor - initial start"
 do_test stopped-monitor-01	"Stopped Monitor - failed started"
 do_test stopped-monitor-02	"Stopped Monitor - started multi-up"
 do_test stopped-monitor-03	"Stopped Monitor - stop started"
 do_test stopped-monitor-04	"Stopped Monitor - failed stop"
 do_test stopped-monitor-05	"Stopped Monitor - start unmanaged"
 do_test stopped-monitor-06	"Stopped Monitor - unmanaged multi-up"
 do_test stopped-monitor-07	"Stopped Monitor - start unmanaged multi-up"
 do_test stopped-monitor-08	"Stopped Monitor - migrate"
 do_test stopped-monitor-09	"Stopped Monitor - unmanage started"
 do_test stopped-monitor-10	"Stopped Monitor - unmanaged started multi-up"
 do_test stopped-monitor-11	"Stopped Monitor - stop unmanaged started"
 do_test stopped-monitor-12	"Stopped Monitor - unmanaged started multi-up (targer-role="Stopped")"
 do_test stopped-monitor-20	"Stopped Monitor - initial stop"
 do_test stopped-monitor-21	"Stopped Monitor - stopped single-up"
 do_test stopped-monitor-22	"Stopped Monitor - stopped multi-up"
 do_test stopped-monitor-23	"Stopped Monitor - start stopped"
 do_test stopped-monitor-24	"Stopped Monitor - unmanage stopped"
 do_test stopped-monitor-25	"Stopped Monitor - unmanaged stopped multi-up"
 do_test stopped-monitor-26	"Stopped Monitor - start unmanaged stopped"
 do_test stopped-monitor-27	"Stopped Monitor - unmanaged stopped multi-up (target-role="Started")"
 do_test stopped-monitor-30	"Stopped Monitor - new node started"
 do_test stopped-monitor-31	"Stopped Monitor - new node stopped"
 
 echo""
 do_test ticket-primitive-1 "Ticket - Primitive (loss-policy=stop, initial)"
 do_test ticket-primitive-2 "Ticket - Primitive (loss-policy=stop, granted)"
 do_test ticket-primitive-3 "Ticket - Primitive (loss-policy-stop, revoked)"
 do_test ticket-primitive-4 "Ticket - Primitive (loss-policy=demote, initial)"
 do_test ticket-primitive-5 "Ticket - Primitive (loss-policy=demote, granted)"
 do_test ticket-primitive-6 "Ticket - Primitive (loss-policy=demote, revoked)"
 do_test ticket-primitive-7 "Ticket - Primitive (loss-policy=fence, initial)"
 do_test ticket-primitive-8 "Ticket - Primitive (loss-policy=fence, granted)"
 do_test ticket-primitive-9 "Ticket - Primitive (loss-policy=fence, revoked)"
 do_test ticket-primitive-10 "Ticket - Primitive (loss-policy=freeze, initial)"
 do_test ticket-primitive-11 "Ticket - Primitive (loss-policy=freeze, granted)"
 do_test ticket-primitive-12 "Ticket - Primitive (loss-policy=freeze, revoked)"
 
 do_test ticket-primitive-13 "Ticket - Primitive (loss-policy=stop, standby, granted)"
 do_test ticket-primitive-14 "Ticket - Primitive (loss-policy=stop, granted, standby)"
 do_test ticket-primitive-15 "Ticket - Primitive (loss-policy=stop, standby, revoked)"
 do_test ticket-primitive-16 "Ticket - Primitive (loss-policy=demote, standby, granted)"
 do_test ticket-primitive-17 "Ticket - Primitive (loss-policy=demote, granted, standby)"
 do_test ticket-primitive-18 "Ticket - Primitive (loss-policy=demote, standby, revoked)"
 do_test ticket-primitive-19 "Ticket - Primitive (loss-policy=fence, standby, granted)"
 do_test ticket-primitive-20 "Ticket - Primitive (loss-policy=fence, granted, standby)"
 do_test ticket-primitive-21 "Ticket - Primitive (loss-policy=fence, standby, revoked)"
 do_test ticket-primitive-22 "Ticket - Primitive (loss-policy=freeze, standby, granted)"
 do_test ticket-primitive-23 "Ticket - Primitive (loss-policy=freeze, granted, standby)"
 do_test ticket-primitive-24 "Ticket - Primitive (loss-policy=freeze, standby, revoked)"
 
 echo""
 do_test ticket-group-1 "Ticket - Group (loss-policy=stop, initial)"
 do_test ticket-group-2 "Ticket - Group (loss-policy=stop, granted)"
 do_test ticket-group-3 "Ticket - Group (loss-policy-stop, revoked)"
 do_test ticket-group-4 "Ticket - Group (loss-policy=demote, initial)"
 do_test ticket-group-5 "Ticket - Group (loss-policy=demote, granted)"
 do_test ticket-group-6 "Ticket - Group (loss-policy=demote, revoked)"
 do_test ticket-group-7 "Ticket - Group (loss-policy=fence, initial)"
 do_test ticket-group-8 "Ticket - Group (loss-policy=fence, granted)"
 do_test ticket-group-9 "Ticket - Group (loss-policy=fence, revoked)"
 do_test ticket-group-10 "Ticket - Group (loss-policy=freeze, initial)"
 do_test ticket-group-11 "Ticket - Group (loss-policy=freeze, granted)"
 do_test ticket-group-12 "Ticket - Group (loss-policy=freeze, revoked)"
 
 do_test ticket-group-13 "Ticket - Group (loss-policy=stop, standby, granted)"
 do_test ticket-group-14 "Ticket - Group (loss-policy=stop, granted, standby)"
 do_test ticket-group-15 "Ticket - Group (loss-policy=stop, standby, revoked)"
 do_test ticket-group-16 "Ticket - Group (loss-policy=demote, standby, granted)"
 do_test ticket-group-17 "Ticket - Group (loss-policy=demote, granted, standby)"
 do_test ticket-group-18 "Ticket - Group (loss-policy=demote, standby, revoked)"
 do_test ticket-group-19 "Ticket - Group (loss-policy=fence, standby, granted)"
 do_test ticket-group-20 "Ticket - Group (loss-policy=fence, granted, standby)"
 do_test ticket-group-21 "Ticket - Group (loss-policy=fence, standby, revoked)"
 do_test ticket-group-22 "Ticket - Group (loss-policy=freeze, standby, granted)"
 do_test ticket-group-23 "Ticket - Group (loss-policy=freeze, granted, standby)"
 do_test ticket-group-24 "Ticket - Group (loss-policy=freeze, standby, revoked)"
 
 echo""
 do_test ticket-clone-1 "Ticket - Clone (loss-policy=stop, initial)"
 do_test ticket-clone-2 "Ticket - Clone (loss-policy=stop, granted)"
 do_test ticket-clone-3 "Ticket - Clone (loss-policy-stop, revoked)"
 do_test ticket-clone-4 "Ticket - Clone (loss-policy=demote, initial)"
 do_test ticket-clone-5 "Ticket - Clone (loss-policy=demote, granted)"
 do_test ticket-clone-6 "Ticket - Clone (loss-policy=demote, revoked)"
 do_test ticket-clone-7 "Ticket - Clone (loss-policy=fence, initial)"
 do_test ticket-clone-8 "Ticket - Clone (loss-policy=fence, granted)"
 do_test ticket-clone-9 "Ticket - Clone (loss-policy=fence, revoked)"
 do_test ticket-clone-10 "Ticket - Clone (loss-policy=freeze, initial)"
 do_test ticket-clone-11 "Ticket - Clone (loss-policy=freeze, granted)"
 do_test ticket-clone-12 "Ticket - Clone (loss-policy=freeze, revoked)"
 
 do_test ticket-clone-13 "Ticket - Clone (loss-policy=stop, standby, granted)"
 do_test ticket-clone-14 "Ticket - Clone (loss-policy=stop, granted, standby)"
 do_test ticket-clone-15 "Ticket - Clone (loss-policy=stop, standby, revoked)"
 do_test ticket-clone-16 "Ticket - Clone (loss-policy=demote, standby, granted)"
 do_test ticket-clone-17 "Ticket - Clone (loss-policy=demote, granted, standby)"
 do_test ticket-clone-18 "Ticket - Clone (loss-policy=demote, standby, revoked)"
 do_test ticket-clone-19 "Ticket - Clone (loss-policy=fence, standby, granted)"
 do_test ticket-clone-20 "Ticket - Clone (loss-policy=fence, granted, standby)"
 do_test ticket-clone-21 "Ticket - Clone (loss-policy=fence, standby, revoked)"
 do_test ticket-clone-22 "Ticket - Clone (loss-policy=freeze, standby, granted)"
 do_test ticket-clone-23 "Ticket - Clone (loss-policy=freeze, granted, standby)"
 do_test ticket-clone-24 "Ticket - Clone (loss-policy=freeze, standby, revoked)"
 
 echo""
 do_test ticket-master-1 "Ticket - Master (loss-policy=stop, initial)"
 do_test ticket-master-2 "Ticket - Master (loss-policy=stop, granted)"
 do_test ticket-master-3 "Ticket - Master (loss-policy-stop, revoked)"
 do_test ticket-master-4 "Ticket - Master (loss-policy=demote, initial)"
 do_test ticket-master-5 "Ticket - Master (loss-policy=demote, granted)"
 do_test ticket-master-6 "Ticket - Master (loss-policy=demote, revoked)"
 do_test ticket-master-7 "Ticket - Master (loss-policy=fence, initial)"
 do_test ticket-master-8 "Ticket - Master (loss-policy=fence, granted)"
 do_test ticket-master-9 "Ticket - Master (loss-policy=fence, revoked)"
 do_test ticket-master-10 "Ticket - Master (loss-policy=freeze, initial)"
 do_test ticket-master-11 "Ticket - Master (loss-policy=freeze, granted)"
 do_test ticket-master-12 "Ticket - Master (loss-policy=freeze, revoked)"
 
 do_test ticket-master-13 "Ticket - Master (loss-policy=stop, standby, granted)"
 do_test ticket-master-14 "Ticket - Master (loss-policy=stop, granted, standby)"
 do_test ticket-master-15 "Ticket - Master (loss-policy=stop, standby, revoked)"
 do_test ticket-master-16 "Ticket - Master (loss-policy=demote, standby, granted)"
 do_test ticket-master-17 "Ticket - Master (loss-policy=demote, granted, standby)"
 do_test ticket-master-18 "Ticket - Master (loss-policy=demote, standby, revoked)"
 do_test ticket-master-19 "Ticket - Master (loss-policy=fence, standby, granted)"
 do_test ticket-master-20 "Ticket - Master (loss-policy=fence, granted, standby)"
 do_test ticket-master-21 "Ticket - Master (loss-policy=fence, standby, revoked)"
 do_test ticket-master-22 "Ticket - Master (loss-policy=freeze, standby, granted)"
 do_test ticket-master-23 "Ticket - Master (loss-policy=freeze, granted, standby)"
 do_test ticket-master-24 "Ticket - Master (loss-policy=freeze, standby, revoked)"
 
 echo ""
 do_test ticket-rsc-sets-1 "Ticket - Resource sets (1 ticket, initial)"
 do_test ticket-rsc-sets-2 "Ticket - Resource sets (1 ticket, granted)"
 do_test ticket-rsc-sets-3 "Ticket - Resource sets (1 ticket, revoked)"
 do_test ticket-rsc-sets-4 "Ticket - Resource sets (2 tickets, initial)"
 do_test ticket-rsc-sets-5 "Ticket - Resource sets (2 tickets, granted)"
 do_test ticket-rsc-sets-6 "Ticket - Resource sets (2 tickets, granted)"
 do_test ticket-rsc-sets-7 "Ticket - Resource sets (2 tickets, revoked)"
 
 do_test ticket-rsc-sets-8 "Ticket - Resource sets (1 ticket, standby, granted)"
 do_test ticket-rsc-sets-9 "Ticket - Resource sets (1 ticket, granted, standby)"
 do_test ticket-rsc-sets-10 "Ticket - Resource sets (1 ticket, standby, revoked)"
 do_test ticket-rsc-sets-11 "Ticket - Resource sets (2 tickets, standby, granted)"
 do_test ticket-rsc-sets-12 "Ticket - Resource sets (2 tickets, standby, granted)"
 do_test ticket-rsc-sets-13 "Ticket - Resource sets (2 tickets, granted, standby)"
 do_test ticket-rsc-sets-14 "Ticket - Resource sets (2 tickets, standby, revoked)"
 
 do_test cluster-specific-params "Cluster-specific instance attributes based on rules"
 do_test site-specific-params "Site-specific instance attributes based on rules"
 
 echo ""
 do_test template-1 "Template - 1"
 do_test template-2 "Template - 2"
 do_test template-3 "Template - 3 (merge operations)"
 
 do_test template-coloc-1 "Template - Colocation 1"
 do_test template-coloc-2 "Template - Colocation 2"
 do_test template-coloc-3 "Template - Colocation 3"
 do_test template-order-1 "Template - Order 1"
 do_test template-order-2 "Template - Order 2"
 do_test template-order-3 "Template - Order 3"
 do_test template-ticket  "Template - Ticket"
 
 do_test template-rsc-sets-1  "Template - Resource Sets 1"
 do_test template-rsc-sets-2  "Template - Resource Sets 2"
 do_test template-rsc-sets-3  "Template - Resource Sets 3"
 do_test template-rsc-sets-4  "Template - Resource Sets 4"
 
 do_test template-clone-primitive "Cloned primitive from template"
 do_test template-clone-group     "Cloned group from template"
 
 do_test location-sets-templates "Resource sets and templates - Location"
 
 do_test tags-coloc-order-1 "Tags - Colocation and Order (Simple)"
 do_test tags-coloc-order-2 "Tags - Colocation and Order (Resource Sets with Templates)"
 do_test tags-location      "Tags - Location"
 do_test tags-ticket        "Tags - Ticket"
 
 echo ""
 do_test container-1 "Container - initial"
 do_test container-2 "Container - monitor failed"
 do_test container-3 "Container - stop failed"
 do_test container-4 "Container - reached migration-threshold"
 do_test container-group-1 "Container in group - initial"
 do_test container-group-2 "Container in group - monitor failed"
 do_test container-group-3 "Container in group - stop failed"
 do_test container-group-4 "Container in group - reached migration-threshold"
 do_test container-is-remote-node "Place resource within container when container is remote-node"
 do_test bug-rh-1097457 "Kill user defined container/contents ordering"
 
 echo ""
 do_test whitebox-fail1 "Fail whitebox container rsc."
 do_test whitebox-fail2 "Fail whitebox container rsc lrmd connection."
 do_test whitebox-fail3 "Failed containers should not run nested on remote nodes."
 do_test whitebox-start "Start whitebox container with resources assigned to it"
 do_test whitebox-stop "Stop whitebox container with resources assigned to it"
 do_test whitebox-move "Move whitebox container with resources assigned to it"
 do_test whitebox-asymmetric "Verify connection rsc opts-in based on container resource"
 do_test whitebox-ms-ordering "Verify promote/demote can not occur before connection is established"
 do_test whitebox-orphaned    "Properly shutdown orphaned whitebox container"
 do_test whitebox-orphan-ms   "Properly tear down orphan ms resources on remote-nodes"
 do_test whitebox-unexpectedly-running "Recover container nodes the cluster did not start."
 do_test whitebox-migrate1 "Migrate both container and connection resource"
 
 echo ""
 do_test remote-startup-probes  "Baremetal remote-node startup probes"
 do_test remote-startup         "Startup a newly discovered remote-nodes with no status."
 do_test remote-fence-unclean   "Fence unclean baremetal remote-node"
+do_test remote-fence-unclean2  "Fence baremetal remote-node after cluster node fails and connection can not be recovered"
 do_test remote-move            "Move remote-node connection resource"
 do_test remote-disable         "Disable a baremetal remote-node"
 do_test remote-orphaned        "Properly shutdown orphaned connection resource"
+do_test remote-recover         "Recover connection resource after cluster-node fails."
 do_test remote-stale-node-entry "Make sure we properly handle leftover remote-node entries in the node section"
 echo ""
 test_results
diff --git a/pengine/test10/remote-fence-unclean2.dot b/pengine/test10/remote-fence-unclean2.dot
new file mode 100644
index 0000000000..6cff564ec7
--- /dev/null
+++ b/pengine/test10/remote-fence-unclean2.dot
@@ -0,0 +1,10 @@
+digraph "g" {
+"all_stopped" [ style=bold color="green" fontcolor="orange"]
+"fake_stop_0 rhel7-alt4" -> "all_stopped" [ style = bold]
+"fake_stop_0 rhel7-alt4" [ style=bold color="green" fontcolor="orange"]
+"stonith 'reboot' rhel7-alt4" -> "fake_stop_0 rhel7-alt4" [ style = bold]
+"stonith 'reboot' rhel7-alt4" -> "stonith_complete" [ style = bold]
+"stonith 'reboot' rhel7-alt4" [ style=bold color="green" fontcolor="black"]
+"stonith_complete" -> "all_stopped" [ style = bold]
+"stonith_complete" [ style=bold color="green" fontcolor="orange"]
+}
diff --git a/pengine/test10/remote-fence-unclean2.exp b/pengine/test10/remote-fence-unclean2.exp
new file mode 100644
index 0000000000..e58b617d91
--- /dev/null
+++ b/pengine/test10/remote-fence-unclean2.exp
@@ -0,0 +1,49 @@
+<transition_graph cluster-delay="60s" stonith-timeout="60s" failed-stop-offset="INFINITY" failed-start-offset="INFINITY"  transition_id="0">
+  <synapse id="0">
+    <action_set>
+      <pseudo_event id="6" operation="stop" operation_key="fake_stop_0">
+        <attributes CRM_meta_name="stop" CRM_meta_timeout="20000" />
+      </pseudo_event>
+    </action_set>
+    <inputs>
+      <trigger>
+        <crm_event id="8" operation="stonith" operation_key="stonith-rhel7-alt4-reboot" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4"/>
+      </trigger>
+    </inputs>
+  </synapse>
+  <synapse id="1">
+    <action_set>
+      <crm_event id="8" operation="stonith" operation_key="stonith-rhel7-alt4-reboot" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4">
+        <attributes CRM_meta_last_failure_fake="1411503989" CRM_meta_on_node="rhel7-alt4" CRM_meta_on_node_uuid="rhel7-alt4" CRM_meta_probe_complete="true" CRM_meta_stonith_action="reboot" />
+      </crm_event>
+    </action_set>
+    <inputs/>
+  </synapse>
+  <synapse id="2">
+    <action_set>
+      <pseudo_event id="7" operation="stonith_complete" operation_key="stonith_complete">
+        <attributes />
+      </pseudo_event>
+    </action_set>
+    <inputs>
+      <trigger>
+        <crm_event id="8" operation="stonith" operation_key="stonith-rhel7-alt4-reboot" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4"/>
+      </trigger>
+    </inputs>
+  </synapse>
+  <synapse id="3">
+    <action_set>
+      <pseudo_event id="1" operation="all_stopped" operation_key="all_stopped">
+        <attributes />
+      </pseudo_event>
+    </action_set>
+    <inputs>
+      <trigger>
+        <pseudo_event id="6" operation="stop" operation_key="fake_stop_0"/>
+      </trigger>
+      <trigger>
+        <pseudo_event id="7" operation="stonith_complete" operation_key="stonith_complete"/>
+      </trigger>
+    </inputs>
+  </synapse>
+</transition_graph>
diff --git a/pengine/test10/remote-fence-unclean2.scores b/pengine/test10/remote-fence-unclean2.scores
new file mode 100644
index 0000000000..10fc7fd7fa
--- /dev/null
+++ b/pengine/test10/remote-fence-unclean2.scores
@@ -0,0 +1,13 @@
+Allocation scores:
+native_color: fake allocation score on rhel7-alt1: 0
+native_color: fake allocation score on rhel7-alt2: 0
+native_color: fake allocation score on rhel7-alt3: 0
+native_color: fake allocation score on rhel7-alt4: INFINITY
+native_color: rhel7-alt4 allocation score on rhel7-alt1: 0
+native_color: rhel7-alt4 allocation score on rhel7-alt2: 0
+native_color: rhel7-alt4 allocation score on rhel7-alt3: 0
+native_color: rhel7-alt4 allocation score on rhel7-alt4: -INFINITY
+native_color: shooter allocation score on rhel7-alt1: 0
+native_color: shooter allocation score on rhel7-alt2: 0
+native_color: shooter allocation score on rhel7-alt3: 0
+native_color: shooter allocation score on rhel7-alt4: -INFINITY
diff --git a/pengine/test10/remote-fence-unclean2.summary b/pengine/test10/remote-fence-unclean2.summary
new file mode 100644
index 0000000000..bfaf77b5dd
--- /dev/null
+++ b/pengine/test10/remote-fence-unclean2.summary
@@ -0,0 +1,30 @@
+
+Current cluster status:
+Node rhel7-alt1 (1): standby
+Node rhel7-alt2 (2): standby
+RemoteNode rhel7-alt4: UNCLEAN (offline)
+OFFLINE: [ rhel7-alt3 ]
+
+ shooter	(stonith:fence_xvm):	Stopped 
+ rhel7-alt4	(ocf::pacemaker:remote):	Stopped 
+ fake	(ocf::heartbeat:Dummy):	Started rhel7-alt4 
+
+Transition Summary:
+ * Stop    fake	(rhel7-alt4)
+
+Executing cluster transition:
+ * Fencing rhel7-alt4 (reboot)
+ * Pseudo action:   stonith_complete
+ * Pseudo action:   fake_stop_0
+ * Pseudo action:   all_stopped
+
+Revised cluster status:
+Node rhel7-alt1 (1): standby
+Node rhel7-alt2 (2): standby
+OFFLINE: [ rhel7-alt3 ]
+RemoteOFFLINE: [ rhel7-alt4 ]
+
+ shooter	(stonith:fence_xvm):	Stopped 
+ rhel7-alt4	(ocf::pacemaker:remote):	Stopped 
+ fake	(ocf::heartbeat:Dummy):	Stopped 
+
diff --git a/pengine/test10/remote-fence-unclean2.xml b/pengine/test10/remote-fence-unclean2.xml
new file mode 100644
index 0000000000..78fc4f1f94
--- /dev/null
+++ b/pengine/test10/remote-fence-unclean2.xml
@@ -0,0 +1,115 @@
+<cib crm_feature_set="3.0.9" validate-with="pacemaker-2.1" epoch="13" num_updates="8" admin_epoch="0" cib-last-written="Tue Sep 23 16:28:22 2014" have-quorum="1" dc-uuid="2">
+  <configuration>
+    <crm_config>
+      <cluster_property_set id="cib-bootstrap-options">
+        <nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.1.12-6da3f72"/>
+        <nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="corosync"/>
+        <nvpair id="cib-bootstrap-options-cluster-name" name="cluster-name" value="phd"/>
+        <nvpair id="cib-bootstrap-options-last-lrm-refresh" name="last-lrm-refresh" value="1411504087"/>
+      </cluster_property_set>
+    </crm_config>
+    <nodes>
+      <node id="1" uname="rhel7-alt1">
+        <instance_attributes id="nodes-1">
+          <nvpair id="nodes-1-standby" name="standby" value="on"/>
+        </instance_attributes>
+      </node>
+      <node id="2" uname="rhel7-alt2">
+        <instance_attributes id="nodes-2">
+          <nvpair id="nodes-2-standby" name="standby" value="on"/>
+        </instance_attributes>
+      </node>
+      <node id="3" uname="rhel7-alt3"/>
+    </nodes>
+    <resources>
+      <primitive class="stonith" id="shooter" type="fence_xvm">
+        <instance_attributes id="shooter-instance_attributes"/>
+        <operations>
+          <op id="shooter-monitor-interval-60s" interval="60s" name="monitor"/>
+        </operations>
+      </primitive>
+      <primitive class="ocf" id="rhel7-alt4" provider="pacemaker" type="remote">
+        <instance_attributes id="rhel7-alt4-instance_attributes"/>
+        <operations>
+          <op id="rhel7-alt4-start-timeout-15" interval="0s" name="start" timeout="15"/>
+          <op id="rhel7-alt4-stop-timeout-15" interval="0s" name="stop" timeout="15"/>
+          <op id="rhel7-alt4-monitor-timeout-15" interval="60s" name="monitor" timeout="15"/>
+        </operations>
+      </primitive>
+      <primitive class="ocf" id="fake" provider="heartbeat" type="Dummy">
+        <instance_attributes id="fake-instance_attributes"/>
+        <operations>
+          <op id="fake-start-timeout-20" interval="0s" name="start" timeout="20"/>
+          <op id="fake-stop-timeout-20" interval="0s" name="stop" timeout="20"/>
+          <op id="fake-monitor-interval-10" interval="10" name="monitor" timeout="20"/>
+        </operations>
+      </primitive>
+    </resources>
+    <constraints>
+      <rsc_location id="location-fake-rhel7-alt4-INFINITY" node="rhel7-alt4" rsc="fake" score="INFINITY"/>
+    </constraints>
+  </configuration>
+  <status>
+    <node_state id="2" uname="rhel7-alt2" in_ccm="true" crmd="online" crm-debug-origin="post_cache_update" join="member" expected="member">
+      <transient_attributes id="2">
+        <instance_attributes id="status-2">
+          <nvpair id="status-2-shutdown" name="shutdown" value="0"/>
+          <nvpair id="status-2-probe_complete" name="probe_complete" value="true"/>
+        </instance_attributes>
+      </transient_attributes>
+      <lrm id="2">
+        <lrm_resources>
+          <lrm_resource id="shooter" type="fence_xvm" class="stonith">
+            <lrm_rsc_op id="shooter_last_0" operation_key="shooter_stop_0" operation="stop" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.9" transition-key="11:8:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;11:8:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="14" rc-code="0" op-status="0" interval="0" last-run="1411503701" last-rc-change="1411503701" exec-time="1" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2"/>
+          </lrm_resource>
+          <lrm_resource id="rhel7-alt4" type="remote" class="ocf" provider="pacemaker">
+            <lrm_rsc_op id="rhel7-alt4_last_0" operation_key="rhel7-alt4_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.9" transition-key="8:5:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:5:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="1" rc-code="7" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2"/>
+          </lrm_resource>
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="8:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="19" rc-code="7" op-status="0" interval="0" last-run="1411504086" last-rc-change="1411504086" exec-time="34" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+          </lrm_resource>
+        </lrm_resources>
+      </lrm>
+    </node_state>
+    <node_state id="1" uname="rhel7-alt1" in_ccm="true" crmd="online" crm-debug-origin="post_cache_update" join="member" expected="member">
+      <lrm id="1">
+        <lrm_resources>
+          <lrm_resource id="shooter" type="fence_xvm" class="stonith">
+            <lrm_rsc_op id="shooter_last_0" operation_key="shooter_stop_0" operation="stop" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="11:23:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;11:23:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="19" rc-code="0" op-status="0" interval="0" last-run="1411504102" last-rc-change="1411504102" exec-time="1" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1"/>
+            <lrm_rsc_op id="shooter_monitor_60000" operation_key="shooter_monitor_60000" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="16:15:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;16:15:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="12" rc-code="0" op-status="0" interval="60000" last-rc-change="1411504079" exec-time="10" queue-time="0" op-digest="4811cef7f7f94e3a35a70be7916cb2fd" on_node="rhel7-alt1"/>
+          </lrm_resource>
+          <lrm_resource id="rhel7-alt4" type="remote" class="ocf" provider="pacemaker">
+            <lrm_rsc_op id="rhel7-alt4_last_0" operation_key="rhel7-alt4_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="9:15:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;9:15:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="1" rc-code="7" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1"/>
+          </lrm_resource>
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="8:18:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:18:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="17" rc-code="7" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+          </lrm_resource>
+        </lrm_resources>
+      </lrm>
+      <transient_attributes id="1">
+        <instance_attributes id="status-1">
+          <nvpair id="status-1-shutdown" name="shutdown" value="0"/>
+          <nvpair id="status-1-probe_complete" name="probe_complete" value="true"/>
+        </instance_attributes>
+      </transient_attributes>
+    </node_state>
+    <node_state id="3" uname="rhel7-alt3" in_ccm="false" crmd="offline" crm-debug-origin="send_stonith_update" join="down" expected="down"/>
+    <node_state id="rhel7-alt4" remote_node="true" uname="rhel7-alt4" crm-debug-origin="post_cache_update">
+      <lrm id="rhel7-alt4">
+        <lrm_resources>
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
+            <lrm_rsc_op id="fake_last_failure_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="34" rc-code="0" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="1" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="34" rc-code="0" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="1" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt3" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+            <lrm_rsc_op id="fake_monitor_10000" operation_key="fake_monitor_10000" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="16:22:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;16:22:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="35" rc-code="0" op-status="0" interval="10000" last-rc-change="1411504087" exec-time="29" queue-time="0" op-digest="4811cef7f7f94e3a35a70be7916cb2fd" on_node="rhel7-alt3"/>
+          </lrm_resource>
+        </lrm_resources>
+      </lrm>
+      <transient_attributes id="rhel7-alt4">
+        <instance_attributes id="status-rhel7-alt4">
+          <nvpair id="status-rhel7-alt4-probe_complete" name="probe_complete" value="true"/>
+          <nvpair id="status-rhel7-alt4-last-failure-fake" name="last-failure-fake" value="1411503989"/>
+        </instance_attributes>
+      </transient_attributes>
+    </node_state>
+  </status>
+</cib>
diff --git a/pengine/test10/remote-recover.dot b/pengine/test10/remote-recover.dot
new file mode 100644
index 0000000000..1da6a7b1a6
--- /dev/null
+++ b/pengine/test10/remote-recover.dot
@@ -0,0 +1,17 @@
+ digraph "g" {
+"all_stopped" [ style=bold color="green" fontcolor="orange"]
+"fake_monitor_10000 rhel7-alt4" [ style=bold color="green" fontcolor="black"]
+"fake_start_0 rhel7-alt4" -> "fake_monitor_10000 rhel7-alt4" [ style = bold]
+"fake_start_0 rhel7-alt4" [ style=bold color="green" fontcolor="black"]
+"fake_stop_0 rhel7-alt4" -> "all_stopped" [ style = bold]
+"fake_stop_0 rhel7-alt4" -> "fake_start_0 rhel7-alt4" [ style = bold]
+"fake_stop_0 rhel7-alt4" [ style=bold color="green" fontcolor="black"]
+"rhel7-alt4_monitor_60000 rhel7-alt1" [ style=bold color="green" fontcolor="black"]
+"rhel7-alt4_start_0 rhel7-alt1" -> "fake_monitor_10000 rhel7-alt4" [ style = bold]
+"rhel7-alt4_start_0 rhel7-alt1" -> "fake_start_0 rhel7-alt4" [ style = bold]
+"rhel7-alt4_start_0 rhel7-alt1" -> "rhel7-alt4_monitor_60000 rhel7-alt1" [ style = bold]
+"rhel7-alt4_start_0 rhel7-alt1" [ style=bold color="green" fontcolor="black"]
+"shooter_monitor_60000 rhel7-alt1" [ style=bold color="green" fontcolor="black"]
+"shooter_start_0 rhel7-alt1" -> "shooter_monitor_60000 rhel7-alt1" [ style = bold]
+"shooter_start_0 rhel7-alt1" [ style=bold color="green" fontcolor="black"]
+}
diff --git a/pengine/test10/remote-recover.exp b/pengine/test10/remote-recover.exp
new file mode 100644
index 0000000000..37e4f7156d
--- /dev/null
+++ b/pengine/test10/remote-recover.exp
@@ -0,0 +1,99 @@
+<transition_graph cluster-delay="60s" stonith-timeout="60s" failed-stop-offset="INFINITY" failed-start-offset="INFINITY"  transition_id="0">
+  <synapse id="0">
+    <action_set>
+      <rsc_op id="8" operation="monitor" operation_key="shooter_monitor_60000" on_node="rhel7-alt1" on_node_uuid="1">
+        <primitive id="shooter" class="stonith" type="fence_xvm"/>
+        <attributes CRM_meta_interval="60000" CRM_meta_name="monitor" CRM_meta_timeout="20000" />
+      </rsc_op>
+    </action_set>
+    <inputs>
+      <trigger>
+        <rsc_op id="7" operation="start" operation_key="shooter_start_0" on_node="rhel7-alt1" on_node_uuid="1"/>
+      </trigger>
+    </inputs>
+  </synapse>
+  <synapse id="1">
+    <action_set>
+      <rsc_op id="7" operation="start" operation_key="shooter_start_0" on_node="rhel7-alt1" on_node_uuid="1">
+        <primitive id="shooter" class="stonith" type="fence_xvm"/>
+        <attributes CRM_meta_timeout="20000" />
+      </rsc_op>
+    </action_set>
+    <inputs/>
+  </synapse>
+  <synapse id="2">
+    <action_set>
+      <rsc_op id="10" operation="monitor" operation_key="rhel7-alt4_monitor_60000" on_node="rhel7-alt1" on_node_uuid="1">
+        <primitive id="rhel7-alt4" class="ocf" provider="pacemaker" type="remote"/>
+        <attributes CRM_meta_interval="60000" CRM_meta_name="monitor" CRM_meta_timeout="15000" />
+      </rsc_op>
+    </action_set>
+    <inputs>
+      <trigger>
+        <rsc_op id="9" operation="start" operation_key="rhel7-alt4_start_0" on_node="rhel7-alt1" on_node_uuid="1"/>
+      </trigger>
+    </inputs>
+  </synapse>
+  <synapse id="3">
+    <action_set>
+      <rsc_op id="9" operation="start" operation_key="rhel7-alt4_start_0" on_node="rhel7-alt1" on_node_uuid="1">
+        <primitive id="rhel7-alt4" class="ocf" provider="pacemaker" type="remote"/>
+        <attributes CRM_meta_name="start" CRM_meta_timeout="15000" />
+      </rsc_op>
+    </action_set>
+    <inputs/>
+  </synapse>
+  <synapse id="4">
+    <action_set>
+      <rsc_op id="13" operation="monitor" operation_key="fake_monitor_10000" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1">
+        <primitive id="fake" class="ocf" provider="heartbeat" type="Dummy"/>
+        <attributes CRM_meta_interval="10000" CRM_meta_name="monitor" CRM_meta_timeout="20000" />
+      </rsc_op>
+    </action_set>
+    <inputs>
+      <trigger>
+        <rsc_op id="9" operation="start" operation_key="rhel7-alt4_start_0" on_node="rhel7-alt1" on_node_uuid="1"/>
+      </trigger>
+      <trigger>
+        <rsc_op id="12" operation="start" operation_key="fake_start_0" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1"/>
+      </trigger>
+    </inputs>
+  </synapse>
+  <synapse id="5">
+    <action_set>
+      <rsc_op id="12" operation="start" operation_key="fake_start_0" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1">
+        <primitive id="fake" class="ocf" provider="heartbeat" type="Dummy"/>
+        <attributes CRM_meta_name="start" CRM_meta_timeout="20000" />
+      </rsc_op>
+    </action_set>
+    <inputs>
+      <trigger>
+        <rsc_op id="9" operation="start" operation_key="rhel7-alt4_start_0" on_node="rhel7-alt1" on_node_uuid="1"/>
+      </trigger>
+      <trigger>
+        <rsc_op id="11" operation="stop" operation_key="fake_stop_0" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1"/>
+      </trigger>
+    </inputs>
+  </synapse>
+  <synapse id="6">
+    <action_set>
+      <rsc_op id="11" operation="stop" operation_key="fake_stop_0" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1">
+        <primitive id="fake" class="ocf" provider="heartbeat" type="Dummy"/>
+        <attributes CRM_meta_name="stop" CRM_meta_timeout="20000" />
+      </rsc_op>
+    </action_set>
+    <inputs/>
+  </synapse>
+  <synapse id="7">
+    <action_set>
+      <pseudo_event id="1" operation="all_stopped" operation_key="all_stopped">
+        <attributes />
+      </pseudo_event>
+    </action_set>
+    <inputs>
+      <trigger>
+        <rsc_op id="11" operation="stop" operation_key="fake_stop_0" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1"/>
+      </trigger>
+    </inputs>
+  </synapse>
+</transition_graph>
diff --git a/pengine/test10/remote-recover.scores b/pengine/test10/remote-recover.scores
new file mode 100644
index 0000000000..10fc7fd7fa
--- /dev/null
+++ b/pengine/test10/remote-recover.scores
@@ -0,0 +1,13 @@
+Allocation scores:
+native_color: fake allocation score on rhel7-alt1: 0
+native_color: fake allocation score on rhel7-alt2: 0
+native_color: fake allocation score on rhel7-alt3: 0
+native_color: fake allocation score on rhel7-alt4: INFINITY
+native_color: rhel7-alt4 allocation score on rhel7-alt1: 0
+native_color: rhel7-alt4 allocation score on rhel7-alt2: 0
+native_color: rhel7-alt4 allocation score on rhel7-alt3: 0
+native_color: rhel7-alt4 allocation score on rhel7-alt4: -INFINITY
+native_color: shooter allocation score on rhel7-alt1: 0
+native_color: shooter allocation score on rhel7-alt2: 0
+native_color: shooter allocation score on rhel7-alt3: 0
+native_color: shooter allocation score on rhel7-alt4: -INFINITY
diff --git a/pengine/test10/remote-recover.summary b/pengine/test10/remote-recover.summary
new file mode 100644
index 0000000000..8fd74806b2
--- /dev/null
+++ b/pengine/test10/remote-recover.summary
@@ -0,0 +1,36 @@
+
+Current cluster status:
+Node rhel7-alt2 (2): standby
+RemoteNode rhel7-alt4: UNCLEAN (offline)
+Online: [ rhel7-alt1 ]
+OFFLINE: [ rhel7-alt3 ]
+
+ shooter	(stonith:fence_xvm):	Stopped 
+ rhel7-alt4	(ocf::pacemaker:remote):	Stopped 
+ fake	(ocf::heartbeat:Dummy):	Started rhel7-alt4 
+
+Transition Summary:
+ * Start   shooter	(rhel7-alt1)
+ * Start   rhel7-alt4	(rhel7-alt1)
+ * Restart fake	(Started rhel7-alt4)
+
+Executing cluster transition:
+ * Resource action: shooter         start on rhel7-alt1
+ * Resource action: rhel7-alt4      start on rhel7-alt1
+ * Resource action: fake            stop on rhel7-alt4
+ * Pseudo action:   all_stopped
+ * Resource action: shooter         monitor=60000 on rhel7-alt1
+ * Resource action: rhel7-alt4      monitor=60000 on rhel7-alt1
+ * Resource action: fake            start on rhel7-alt4
+ * Resource action: fake            monitor=10000 on rhel7-alt4
+
+Revised cluster status:
+Node rhel7-alt2 (2): standby
+Online: [ rhel7-alt1 ]
+OFFLINE: [ rhel7-alt3 ]
+RemoteOnline: [ rhel7-alt4 ]
+
+ shooter	(stonith:fence_xvm):	Started rhel7-alt1 
+ rhel7-alt4	(ocf::pacemaker:remote):	Started rhel7-alt1 
+ fake	(ocf::heartbeat:Dummy):	Started rhel7-alt4 
+
diff --git a/pengine/test10/remote-recover.xml b/pengine/test10/remote-recover.xml
new file mode 100644
index 0000000000..1a83dd985b
--- /dev/null
+++ b/pengine/test10/remote-recover.xml
@@ -0,0 +1,114 @@
+<cib crm_feature_set="3.0.9" validate-with="pacemaker-2.1" epoch="13" num_updates="8" admin_epoch="0" cib-last-written="Tue Sep 23 16:28:22 2014" have-quorum="1" dc-uuid="2">
+  <configuration>
+    <crm_config>
+      <cluster_property_set id="cib-bootstrap-options">
+        <nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.1.12-6da3f72"/>
+        <nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="corosync"/>
+        <nvpair id="cib-bootstrap-options-cluster-name" name="cluster-name" value="phd"/>
+        <nvpair id="cib-bootstrap-options-last-lrm-refresh" name="last-lrm-refresh" value="1411504087"/>
+      </cluster_property_set>
+    </crm_config>
+    <nodes>
+      <node id="1" uname="rhel7-alt1">
+        <instance_attributes id="nodes-1">
+        </instance_attributes>
+      </node>
+      <node id="2" uname="rhel7-alt2">
+        <instance_attributes id="nodes-2">
+          <nvpair id="nodes-2-standby" name="standby" value="on"/>
+        </instance_attributes>
+      </node>
+      <node id="3" uname="rhel7-alt3"/>
+    </nodes>
+    <resources>
+      <primitive class="stonith" id="shooter" type="fence_xvm">
+        <instance_attributes id="shooter-instance_attributes"/>
+        <operations>
+          <op id="shooter-monitor-interval-60s" interval="60s" name="monitor"/>
+        </operations>
+      </primitive>
+      <primitive class="ocf" id="rhel7-alt4" provider="pacemaker" type="remote">
+        <instance_attributes id="rhel7-alt4-instance_attributes"/>
+        <operations>
+          <op id="rhel7-alt4-start-timeout-15" interval="0s" name="start" timeout="15"/>
+          <op id="rhel7-alt4-stop-timeout-15" interval="0s" name="stop" timeout="15"/>
+          <op id="rhel7-alt4-monitor-timeout-15" interval="60s" name="monitor" timeout="15"/>
+        </operations>
+      </primitive>
+      <primitive class="ocf" id="fake" provider="heartbeat" type="Dummy">
+        <instance_attributes id="fake-instance_attributes"/>
+        <operations>
+          <op id="fake-start-timeout-20" interval="0s" name="start" timeout="20"/>
+          <op id="fake-stop-timeout-20" interval="0s" name="stop" timeout="20"/>
+          <op id="fake-monitor-interval-10" interval="10" name="monitor" timeout="20"/>
+        </operations>
+      </primitive>
+    </resources>
+    <constraints>
+      <rsc_location id="location-fake-rhel7-alt4-INFINITY" node="rhel7-alt4" rsc="fake" score="INFINITY"/>
+    </constraints>
+  </configuration>
+  <status>
+    <node_state id="2" uname="rhel7-alt2" in_ccm="true" crmd="online" crm-debug-origin="post_cache_update" join="member" expected="member">
+      <transient_attributes id="2">
+        <instance_attributes id="status-2">
+          <nvpair id="status-2-shutdown" name="shutdown" value="0"/>
+          <nvpair id="status-2-probe_complete" name="probe_complete" value="true"/>
+        </instance_attributes>
+      </transient_attributes>
+      <lrm id="2">
+        <lrm_resources>
+          <lrm_resource id="shooter" type="fence_xvm" class="stonith">
+            <lrm_rsc_op id="shooter_last_0" operation_key="shooter_stop_0" operation="stop" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.9" transition-key="11:8:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;11:8:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="14" rc-code="0" op-status="0" interval="0" last-run="1411503701" last-rc-change="1411503701" exec-time="1" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2"/>
+          </lrm_resource>
+          <lrm_resource id="rhel7-alt4" type="remote" class="ocf" provider="pacemaker">
+            <lrm_rsc_op id="rhel7-alt4_last_0" operation_key="rhel7-alt4_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.9" transition-key="8:5:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:5:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="1" rc-code="7" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2"/>
+          </lrm_resource>
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="8:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="19" rc-code="7" op-status="0" interval="0" last-run="1411504086" last-rc-change="1411504086" exec-time="34" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+          </lrm_resource>
+        </lrm_resources>
+      </lrm>
+    </node_state>
+    <node_state id="1" uname="rhel7-alt1" in_ccm="true" crmd="online" crm-debug-origin="post_cache_update" join="member" expected="member">
+      <lrm id="1">
+        <lrm_resources>
+          <lrm_resource id="shooter" type="fence_xvm" class="stonith">
+            <lrm_rsc_op id="shooter_last_0" operation_key="shooter_stop_0" operation="stop" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="11:23:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;11:23:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="19" rc-code="0" op-status="0" interval="0" last-run="1411504102" last-rc-change="1411504102" exec-time="1" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1"/>
+            <lrm_rsc_op id="shooter_monitor_60000" operation_key="shooter_monitor_60000" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="16:15:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;16:15:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="12" rc-code="0" op-status="0" interval="60000" last-rc-change="1411504079" exec-time="10" queue-time="0" op-digest="4811cef7f7f94e3a35a70be7916cb2fd" on_node="rhel7-alt1"/>
+          </lrm_resource>
+          <lrm_resource id="rhel7-alt4" type="remote" class="ocf" provider="pacemaker">
+            <lrm_rsc_op id="rhel7-alt4_last_0" operation_key="rhel7-alt4_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="9:15:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;9:15:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="1" rc-code="7" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1"/>
+          </lrm_resource>
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="8:18:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:18:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="17" rc-code="7" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+          </lrm_resource>
+        </lrm_resources>
+      </lrm>
+      <transient_attributes id="1">
+        <instance_attributes id="status-1">
+          <nvpair id="status-1-shutdown" name="shutdown" value="0"/>
+          <nvpair id="status-1-probe_complete" name="probe_complete" value="true"/>
+        </instance_attributes>
+      </transient_attributes>
+    </node_state>
+    <node_state id="3" uname="rhel7-alt3" in_ccm="false" crmd="offline" crm-debug-origin="send_stonith_update" join="down" expected="down"/>
+    <node_state id="rhel7-alt4" remote_node="true" uname="rhel7-alt4" crm-debug-origin="post_cache_update">
+      <lrm id="rhel7-alt4">
+        <lrm_resources>
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
+            <lrm_rsc_op id="fake_last_failure_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="34" rc-code="0" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="1" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="34" rc-code="0" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="1" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt3" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+            <lrm_rsc_op id="fake_monitor_10000" operation_key="fake_monitor_10000" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="16:22:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;16:22:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="35" rc-code="0" op-status="0" interval="10000" last-rc-change="1411504087" exec-time="29" queue-time="0" op-digest="4811cef7f7f94e3a35a70be7916cb2fd" on_node="rhel7-alt3"/>
+          </lrm_resource>
+        </lrm_resources>
+      </lrm>
+      <transient_attributes id="rhel7-alt4">
+        <instance_attributes id="status-rhel7-alt4">
+          <nvpair id="status-rhel7-alt4-probe_complete" name="probe_complete" value="true"/>
+          <nvpair id="status-rhel7-alt4-last-failure-fake" name="last-failure-fake" value="1411503989"/>
+        </instance_attributes>
+      </transient_attributes>
+    </node_state>
+  </status>
+</cib>
diff --git a/tools/crm_mon.sysconfig b/tools/crm_mon.sysconfig
new file mode 100644
index 0000000000..bd4e5bd860
--- /dev/null
+++ b/tools/crm_mon.sysconfig
@@ -0,0 +1,7 @@
+#
+# Example for SNMP : OPTIONS="-d -S <snmp manager address> -W -p <pid file>"
+#    OPTIONS="-d -S 192.168.40.2 -W -p /tmp/ClusterMon-upstart.pid"
+#
+# Please refer to a help and a manual for the detailed option.
+#
+#OPSTIONS="-d"
diff --git a/tools/crm_mon.upstart.in b/tools/crm_mon.upstart.in
new file mode 100644
index 0000000000..ef0fe7a7f2
--- /dev/null
+++ b/tools/crm_mon.upstart.in
@@ -0,0 +1,39 @@
+# crm_mon - Daemon for pacemaker monitor
+#
+#
+
+kill timeout 3600
+respawn
+respawn limit 10 3600
+
+expect fork
+
+env prog=crm_mon
+env rpm_sysconf=@sysconfdir@/sysconfig/crm_mon
+env rpm_lockfile=@localstatedir@/lock/subsys/crm_mon
+env deb_sysconf=@sysconfdir@/default/crm_mon
+env deb_lockfile=@localstatedir@/lock/crm_mon
+
+
+script
+    [ -f "$rpm_sysconf" ] && . $rpm_sysconf
+    [ -f "$deb_sysconf" ] && . $deb_sysconf
+    exec $prog $OPTIONS
+end script
+
+post-start script
+    [ -f "$rpm_sysconf" ] && . $rpm_sysconf
+    [ -f "$deb_sysconf" ] && . $deb_sysconf
+    [ -z "$LOCK_FILE" -a -d @sysconfdir@/sysconfig ] && LOCK_FILE="$rpm_lockfile"
+    [ -z "$LOCK_FILE" -a -d @sysconfdir@/default ] && LOCK_FILE="$deb_lockfile"
+    touch $LOCK_FILE
+end script
+
+post-stop script
+    [ -f "$rpm_sysconf" ] && . $rpm_sysconf
+    [ -f "$deb_sysconf" ] && . $deb_sysconf
+    [ -z "$LOCK_FILE" -a -d @sysconfdir@/sysconfig ] && LOCK_FILE="$rpm_lockfile"
+    [ -z "$LOCK_FILE" -a -d @sysconfdir@/default ] && LOCK_FILE="$deb_lockfile"
+    rm -f $LOCK_FILE
+end script
+