diff --git a/configure.ac b/configure.ac index c4ac9de1fe..ce82581bf5 100644 --- a/configure.ac +++ b/configure.ac @@ -1,1475 +1,1476 @@ dnl dnl autoconf for Pacemaker dnl dnl License: GNU General Public License (GPL) dnl =============================================== dnl Bootstrap dnl =============================================== AC_PREREQ(2.53) dnl Suggested structure: dnl information on the package dnl checks for programs dnl checks for libraries dnl checks for header files dnl checks for types dnl checks for structures dnl checks for compiler characteristics dnl checks for library functions dnl checks for system services -AC_INIT(pacemaker, 1.0.7, pacemaker@oss.clusterlabs.org) +AC_INIT(pacemaker, 1.1.0, pacemaker@oss.clusterlabs.org) CRM_DTD_VERSION="1.0" PKG_FEATURES="" HB_PKG=heartbeat AC_CONFIG_AUX_DIR(.) AC_CANONICAL_HOST dnl Where #defines go (e.g. `AC_CHECK_HEADERS' below) dnl dnl Internal header: include/config.h dnl - Contains ALL defines dnl - include/config.h.in is generated automatically by autoheader dnl - NOT to be included in any header files except lha_internal.h dnl (which is also not to be included in any other header files) dnl dnl External header: include/crm_config.h dnl - Contains a subset of defines checked here dnl - Manually edit include/crm_config.h.in to have configure include dnl new defines dnl - Should not include HAVE_* defines dnl - Safe to include anywhere AM_CONFIG_HEADER(include/config.h include/crm_config.h) ALL_LINGUAS="en fr" AC_ARG_WITH(version, [ --with-version=version Override package version (if you're a packager needing to pretend) ], [ PACKAGE_VERSION="$withval" ]) AC_ARG_WITH(pkg-name, [ --with-pkg-name=name Override package name (if you're a packager needing to pretend) ], [ PACKAGE_NAME="$withval" ]) AM_INIT_AUTOMAKE($PACKAGE_NAME, $PACKAGE_VERSION) AC_DEFINE_UNQUOTED(PACEMAKER_VERSION, "$PACKAGE_VERSION", Current pacemaker version) dnl automake >= 1.11 offers --enable-silent-rules for suppressing the output from dnl normal compilation. When a failure occurs, it will then display the full dnl command line dnl Wrap in m4_ifdef to avoid breaking on older platforms m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES]) CC_IN_CONFIGURE=yes export CC_IN_CONFIGURE LDD=ldd dnl ======================================================================== dnl Compiler characteristics dnl ======================================================================== AC_PROG_CC dnl Can force other with environment variable "CC". AM_PROG_CC_C_O AC_PROG_CC_STDC AC_LIBTOOL_DLOPEN dnl Enable dlopen support... AC_LIBLTDL_CONVENIENCE dnl make libltdl a convenience lib AC_PROG_LIBTOOL AC_C_STRINGIZE AC_TYPE_SIZE_T AC_CHECK_SIZEOF(char) AC_CHECK_SIZEOF(short) AC_CHECK_SIZEOF(int) AC_CHECK_SIZEOF(long) AC_CHECK_SIZEOF(long long) AC_STRUCT_TIMEZONE dnl =============================================== dnl Helpers dnl =============================================== cc_supports_flag() { local CFLAGS="$@" AC_MSG_CHECKING(whether $CC supports "$@") AC_COMPILE_IFELSE([int main(){return 0;}] ,[RC=0; AC_MSG_RESULT(yes)],[RC=1; AC_MSG_RESULT(no)]) return $RC } extract_header_define() { AC_MSG_CHECKING(for $2 in $1) Cfile=/tmp/extract_define.$2.${$} printf "#include \n" > ${Cfile}.c printf "#include <%s>\n" $1 >> ${Cfile}.c printf "int main(int argc, char **argv) { printf(\"%%s\", %s); return 0; }\n" $2 >> ${Cfile}.c $CC $CFLAGS ${Cfile}.c -o ${Cfile} value=`${Cfile}` AC_MSG_RESULT($value) printf $value rm -f ${Cfile}.c ${Cfile} } dnl =============================================== dnl Configure Options dnl =============================================== dnl Some systems, like Solaris require a custom package name AC_ARG_WITH(pkgname, [ --with-pkgname=name name for pkg (typically for Solaris) ], [ PKGNAME="$withval" ], [ PKGNAME="LXHAhb" ], ) AC_SUBST(PKGNAME) AC_ARG_ENABLE([ansi], [ --enable-ansi force GCC to compile to ANSI/ANSI standard for older compilers. [default=yes]]) AC_ARG_ENABLE([fatal-warnings], [ --enable-fatal-warnings very pedantic and fatal warnings for gcc [default=yes]]) AC_ARG_ENABLE([pretty], [ --enable-pretty Pretty-print compiler output unless there is an error [default=no]]) AC_ARG_ENABLE([quiet], [ --enable-quiet Supress make output unless there is an error [default=no]]) AC_ARG_ENABLE([thread-safe], [ --enable-thread-safe Enable some client libraries to be thread safe. [default=no]]) AC_ARG_ENABLE([bundled-ltdl], [ --enable-bundled-ltdl Configure, build and install the standalone ltdl library bundled with ${PACKAGE} [default=no]]) LTDL_LIBS="" AC_ARG_WITH(ais, [ --with-ais Support the OpenAIS messaging and membership layer ], [ SUPPORT_AIS=$withval ], [ SUPPORT_AIS=try ], ) AC_ARG_WITH(heartbeat, [ --with-heartbeat Support the Heartbeat messaging and membership layer ], [ SUPPORT_HEARTBEAT=$withval ], [ SUPPORT_HEARTBEAT=try ], ) AC_ARG_WITH(snmp, [ --with-snmp Support the SNMP protocol ], [ SUPPORT_SNMP=$withval ], [ SUPPORT_SNMP=try ], ) AC_ARG_WITH(esmtp, [ --with-esmtp Support the sending mail notifications with the esmtp library ], [ SUPPORT_ESMTP=$withval ], [ SUPPORT_ESMTP=try ], ) AISPREFIX="" AC_ARG_WITH(ais-prefix, [ --with-ais-prefix=DIR Prefix used when OpenAIS was installed [$prefix]], [ AISPREFIX=$withval ], [ AISPREFIX=$prefix ]) LCRSODIR="" AC_ARG_WITH(lcrso-dir, [ --with-lcrso-dir=DIR OpenAIS lcrso files. ], [ LCRSODIR="$withval" ]) INITDIR="" AC_ARG_WITH(initdir, [ --with-initdir=DIR directory for init (rc) scripts [${INITDIR}]], [ INITDIR="$withval" ]) dnl =============================================== dnl General Processing dnl =============================================== AC_SUBST(HB_PKG) INIT_EXT="" echo Our Host OS: $host_os/$host AC_MSG_NOTICE(Sanitizing prefix: ${prefix}) case $prefix in NONE) prefix=/usr;; esac AC_MSG_NOTICE(Sanitizing exec_prefix: ${exec_prefix}) case $exec_prefix in dnl For consistency with Heartbeat, map NONE->$prefix NONE) exec_prefix=$prefix;; prefix) exec_prefix=$prefix;; esac AC_MSG_NOTICE(Sanitizing ais_prefix: ${AISPREFIX}) case $AISPREFIX in dnl For consistency with Heartbeat, map NONE->$prefix NONE) AISPREFIX=$prefix;; prefix) AISPREFIX=$prefix;; esac AC_MSG_NOTICE(Sanitizing INITDIR: ${INITDIR}) case $INITDIR in prefix) INITDIR=$prefix;; "") AC_MSG_CHECKING(which init (rc) directory to use) for initdir in /etc/init.d /etc/rc.d/init.d /sbin/init.d \ /usr/local/etc/rc.d /etc/rc.d do if test -d $initdir then INITDIR=$initdir break fi done AC_MSG_RESULT($INITDIR);; esac AC_SUBST(INITDIR) AC_MSG_NOTICE(Sanitizing libdir: ${libdir}) case $libdir in dnl For consistency with Heartbeat, map NONE->$prefix *prefix*|NONE) AC_MSG_CHECKING(which lib directory to use) for aDir in lib64 lib do trydir="${exec_prefix}/${aDir}" if test -d ${trydir} then libdir=${trydir} break fi done AC_MSG_RESULT($libdir); ;; esac dnl Expand autoconf variables so that we dont end up with '${prefix}' dnl in #defines and python scripts dnl NOTE: Autoconf deliberately leaves them unexpanded to allow dnl make exec_prefix=/foo install dnl No longer being able to do this seems like no great loss to me... eval prefix="`eval echo ${prefix}`" eval exec_prefix="`eval echo ${exec_prefix}`" eval bindir="`eval echo ${bindir}`" eval sbindir="`eval echo ${sbindir}`" eval libexecdir="`eval echo ${libexecdir}`" eval datadir="`eval echo ${datadir}`" eval sysconfdir="`eval echo ${sysconfdir}`" eval sharedstatedir="`eval echo ${sharedstatedir}`" eval localstatedir="`eval echo ${localstatedir}`" eval libdir="`eval echo ${libdir}`" eval includedir="`eval echo ${includedir}`" eval oldincludedir="`eval echo ${oldincludedir}`" eval infodir="`eval echo ${infodir}`" eval mandir="`eval echo ${mandir}`" dnl Home-grown variables eval INITDIR="${INITDIR}" eval docdir="`eval echo ${docdir}`" if test x"${docdir}" = x""; then docdir=${datadir}/doc/${PACKAGE}-${VERSION} #docdir=${datadir}/doc/packages/${PACKAGE} fi AC_SUBST(docdir) for j in prefix exec_prefix bindir sbindir libexecdir datadir sysconfdir \ sharedstatedir localstatedir libdir includedir oldincludedir infodir \ mandir INITDIR docdir do dirname=`eval echo '${'${j}'}'` if test ! -d "$dirname" then AC_MSG_WARN([$j directory ($dirname) does not exist!]) fi done dnl This OS-based decision-making is poor autotools practice; dnl feature-based mechanisms are strongly preferred. dnl dnl So keep this section to a bare minimum; regard as a "necessary evil". case "$host_os" in *bsd*) LIBS="-L/usr/local/lib" CPPFLAGS="$CPPFLAGS -I/usr/local/include" INIT_EXT=".sh" ;; *solaris*) ;; *linux*) AC_DEFINE_UNQUOTED(ON_LINUX, 1, Compiling for Linux platform) CFLAGS="$CFLAGS -I${prefix}/include" ;; darwin*) AC_DEFINE_UNQUOTED(ON_DARWIN, 1, Compiling for Darwin platform) LIBS="$LIBS -L${prefix}/lib" CFLAGS="$CFLAGS -I${prefix}/include" ;; esac dnl Eventually remove this CFLAGS="$CFLAGS -I${prefix}/include/heartbeat" AC_SUBST(INIT_EXT) AC_DEFINE_UNQUOTED(HA_LOG_FACILITY, LOG_DAEMON, Default logging facility) AC_MSG_NOTICE(Host CPU: $host_cpu) case "$host_cpu" in ppc64|powerpc64) case $CFLAGS in *powerpc64*) ;; *) if test "$GCC" = yes; then CFLAGS="$CFLAGS -m64" fi ;; esac esac AC_MSG_CHECKING(which format is needed to print uint64_t) case "$host_cpu" in s390x)U64T="%lu";; *64*) U64T="%lu";; *) U64T="%llu";; esac AC_MSG_RESULT($U64T) AC_DEFINE_UNQUOTED(U64T, "$U64T", Correct printf format for logging uint64_t) AC_CHECK_HEADERS(hb_config.h) AC_CHECK_HEADERS(glue_config.h) GLUE_HEADER=none if test "$ac_cv_header_glue_config_h" = "yes"; then GLUE_HEADER=glue_config.h elif test "$ac_cv_header_hb_config_h" = "yes"; then GLUE_HEADER=hb_config.h else AC_MSG_FAILURE(Core development headers were not found) fi dnl Variables needed for substitution CRM_DTD_DIRECTORY="${datadir}/pacemaker" AC_DEFINE_UNQUOTED(CRM_DTD_DIRECTORY,"$CRM_DTD_DIRECTORY", Location for the Pacemaker Relax-NG Schema) AC_SUBST(CRM_DTD_DIRECTORY) AC_DEFINE_UNQUOTED(CRM_DTD_VERSION,"$CRM_DTD_VERSION", Current version of the Pacemaker Relax-NG Schema) AC_SUBST(CRM_DTD_VERSION) CRM_DAEMON_USER=`extract_header_define $GLUE_HEADER HA_CCMUSER` AC_DEFINE_UNQUOTED(CRM_DAEMON_USER,"$CRM_DAEMON_USER", User to run Pacemaker daemons as) AC_SUBST(CRM_DAEMON_USER) CRM_DAEMON_GROUP=`extract_header_define $GLUE_HEADER HA_APIGROUP` AC_DEFINE_UNQUOTED(CRM_DAEMON_GROUP,"$CRM_DAEMON_GROUP", Group to run Pacemaker daemons as) AC_SUBST(CRM_DAEMON_GROUP) CRM_STATE_DIR=${localstatedir}/run/crm AC_DEFINE_UNQUOTED(CRM_STATE_DIR,"$CRM_STATE_DIR", Where to keep state files and sockets) AC_SUBST(CRM_STATE_DIR) PE_STATE_DIR="${localstatedir}/lib/pengine" AC_DEFINE_UNQUOTED(PE_STATE_DIR,"$PE_STATE_DIR", Where to keep PEngine outputs) AC_SUBST(PE_STATE_DIR) dnl Eventually move out of the heartbeat dir tree and create compatability code CRM_CONFIG_DIR="${localstatedir}/lib/heartbeat/crm" AC_DEFINE_UNQUOTED(CRM_CONFIG_DIR,"$CRM_CONFIG_DIR", Where to keep CIB configuration files) AC_SUBST(CRM_CONFIG_DIR) dnl Eventually move out of the heartbeat dir tree and create symlinks when needed CRM_DAEMON_DIR=`extract_header_define $GLUE_HEADER HA_LIBHBDIR` AC_DEFINE_UNQUOTED(CRM_DAEMON_DIR,"$CRM_DAEMON_DIR", Location for Pacemaker daemons) AC_SUBST(CRM_DAEMON_DIR) dnl Needed so that the AIS plugin can clear out the directory as Heartbeat does HA_STATE_DIR=`extract_header_define $GLUE_HEADER HA_VARRUNDIR` AC_DEFINE_UNQUOTED(HA_STATE_DIR,"$HA_STATE_DIR", Where Heartbeat keeps state files and sockets) AC_SUBST(HA_STATE_DIR) dnl Needed for the location of hostcache in CTS.py HA_VARLIBHBDIR=`extract_header_define $GLUE_HEADER HA_VARLIBHBDIR` AC_SUBST(HA_VARLIBHBDIR) AC_DEFINE_UNQUOTED(UUID_FILE,"$localstatedir/lib/heartbeat/hb_uuid", Location of Heartbeat's UUID file) OCF_ROOT_DIR=`extract_header_define $GLUE_HEADER OCF_ROOT_DIR` if test "X$OCF_ROOT_DIR" = X; then AC_MSG_ERROR(Could not locate OCF directory) fi AC_SUBST(OCF_ROOT_DIR) OCF_RA_DIR=`extract_header_define $GLUE_HEADER OCF_RA_DIR` AC_DEFINE_UNQUOTED(OCF_RA_DIR,"$OCF_RA_DIR", Location for OCF RAs) AC_SUBST(OCF_RA_DIR) dnl Extract this value from glue_config.h once we no longer support anything else STONITH_PLUGIN_DIR="$libdir/stonith/plugins/stonith/" AC_DEFINE_UNQUOTED(STONITH_PLUGIN_DIR,"$STONITH_PLUGIN_DIR", Location for Stonith plugins) AC_SUBST(STONITH_PLUGIN_DIR) +RH_STONITH_DIR="$sbindir" +AC_DEFINE_UNQUOTED(RH_STONITH_DIR,"$RH_STONITH_DIR", Location for Red Hat Stonith agents) + +RH_STONITH_PREFIX="fence_" +AC_DEFINE_UNQUOTED(RH_STONITH_PREFIX,"$RH_STONITH_PREFIX", Prefix for Red Hat Stonith agents) + AC_PATH_PROGS(HG, hg false) AC_MSG_CHECKING(build version) BUILD_VERSION=unknown if test -f $srcdir/.hg_archival.txt; then BUILD_VERSION=`cat $srcdir/.hg_archival.txt | awk '/node:/ { print $2 }'` elif test -x $HG -a -d .hg; then BUILD_VERSION=`$HG id -itb` if test $? != 0; then BUILD_VERSION=unknown fi fi AC_DEFINE_UNQUOTED(BUILD_VERSION, "$BUILD_VERSION", Build version) AC_MSG_RESULT($BUILD_VERSION) AC_SUBST(BUILD_VERSION) dnl =============================================== dnl Program Paths dnl =============================================== PATH="$PATH:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin" export PATH dnl Replacing AC_PROG_LIBTOOL with AC_CHECK_PROG because LIBTOOL dnl was NOT being expanded all the time thus causing things to fail. AC_CHECK_PROGS(LIBTOOL, glibtool libtool libtool15 libtool13) AM_PATH_PYTHON AC_CHECK_PROGS(MAKE, gmake make) AC_PATH_PROGS(HTML2TXT, lynx w3m) AC_PATH_PROGS(HELP2MAN, help2man) AC_PATH_PROGS(POD2MAN, pod2man, pod2man) AC_PATH_PROGS(ASCIIDOC, asciidoc) AC_PATH_PROGS(PUBLICAN, publican) AC_PATH_PROGS(FOP, fop) AC_PATH_PROGS(SSH, ssh, /usr/bin/ssh) AC_PATH_PROGS(SCP, scp, /usr/bin/scp) AC_PATH_PROGS(HG, hg, /bin/false) AC_PATH_PROGS(TAR, tar) AC_PATH_PROGS(MD5, md5) AC_PATH_PROGS(TEST, test) AC_PATH_PROGS(PKGCONFIG, pkg-config) AC_PATH_PROGS(XML2CONFIG, xml2-config) AC_PATH_PROGS(VALGRIND_BIN, valgrind, /usr/bin/valgrind) AC_DEFINE_UNQUOTED(VALGRIND_BIN, "$VALGRIND_BIN", Valgrind command) if test x"${LIBTOOL}" = x""; then AC_MSG_ERROR(You need (g)libtool installed in order to build ${PACKAGE}) fi if test x"${MAKE}" = x""; then AC_MSG_ERROR(You need (g)make installed in order to build ${PACKAGE}) fi AM_CONDITIONAL(BUILD_HELP, test x"${HELP2MAN}" != x"") if test x"${HELP2MAN}" != x""; then PKG_FEATURES="$PKG_FEATURES manpages" fi AM_CONDITIONAL(BUILD_ASCIIDOC, test x"${ASCIIDOC}" != x"") if test x"${ASCIIDOC}" != x""; then PKG_FEATURES="$PKG_FEATURES asciidoc" fi AM_CONDITIONAL(BUILD_DOCBOOK, test ${PUBLICAN} != x"") if test ${PUBLICAN} != x""; then PKG_FEATURES="$PKG_FEATURES publican" fi dnl =============================================== dnl Libraries dnl =============================================== AC_CHECK_LIB(socket, socket) dnl -lsocket AC_CHECK_LIB(c, dlopen) dnl if dlopen is in libc... AC_CHECK_LIB(dl, dlopen) dnl -ldl (for Linux) AC_CHECK_LIB(rt, sched_getscheduler) dnl -lrt (for Tru64) AC_CHECK_LIB(gnugetopt, getopt_long) dnl -lgnugetopt ( if available ) AC_CHECK_LIB(pam, pam_start) dnl -lpam (if available) AC_CHECK_LIB(uuid, uuid_parse) dnl e2fsprogs AC_CHECK_LIB(uuid, uuid_create) dnl ossp if test x"${PKGCONFIG}" = x""; then AC_MSG_ERROR(You need pkgconfig installed in order to build ${PACKAGE}) fi dnl dnl On many systems libcrypto is needed when linking against libsnmp. dnl Check to see if it exists, and if so use it. dnl AC_CHECK_LIB(crypto, CRYPTO_free, CRYPTOLIB="-lcrypto",) AC_SUBST(CRYPTOLIB) if test "x${enable_thread_safe}" = "xyes"; then GPKGNAME="gthread-2.0" else GPKGNAME="glib-2.0" fi if $PKGCONFIG --exists $GPKGNAME then GLIBCONFIG="$PKGCONFIG $GPKGNAME" else set -x echo PKG_CONFIG_PATH=$PKG_CONFIG_PATH $PKGCONFIG --exists $GPKGNAME; echo $? $PKGCONFIG --cflags $GPKGNAME; echo $? $PKGCONFIG $GPKGNAME; echo $? set +x AC_MSG_ERROR(You need glib2-devel installed in order to build ${PACKAGE}) fi AC_MSG_RESULT(using $GLIBCONFIG) # # Where is dlopen? # if test "$ac_cv_lib_c_dlopen" = yes; then LIBADD_DL="" elif test "$ac_cv_lib_dl_dlopen" = yes; then LIBADD_DL=-ldl else LIBADD_DL=${lt_cv_dlopen_libs} fi dnl dnl Check for location of gettext dnl dnl On at least Solaris 2.x, where it is in libc, specifying lintl causes dnl grief. Ensure minimal result, not the sum of all possibilities. dnl And do libc first. dnl Known examples: dnl c: Linux, Solaris 2.6+ dnl intl: BSD, AIX AC_CHECK_LIB(c, gettext) if test x$ac_cv_lib_c_gettext != xyes; then AC_CHECK_LIB(intl, gettext) fi if test x$ac_cv_lib_c_gettext != xyes -a x$ac_cv_lib_intl_gettext != xyes; then AC_MSG_ERROR(You need gettext installed in order to build ${PACKAGE}) fi if test "X$GLIBCONFIG" != X; then AC_MSG_CHECKING(for special glib includes: ) GLIBHEAD=`$GLIBCONFIG --cflags` AC_MSG_RESULT($GLIBHEAD) CPPFLAGS="$CPPFLAGS $GLIBHEAD" AC_MSG_CHECKING(for glib library flags) GLIBLIB=`$GLIBCONFIG --libs` AC_MSG_RESULT($GLIBLIB) LIBS="$LIBS $GLIBLIB" fi dnl ======================================================================== dnl Headers dnl ======================================================================== AC_HEADER_STDC AC_CHECK_HEADERS(arpa/inet.h) AC_CHECK_HEADERS(asm/types.h) AC_CHECK_HEADERS(assert.h) AC_CHECK_HEADERS(auth-client.h) AC_CHECK_HEADERS(ctype.h) AC_CHECK_HEADERS(dirent.h) AC_CHECK_HEADERS(errno.h) AC_CHECK_HEADERS(fcntl.h) AC_CHECK_HEADERS(getopt.h) AC_CHECK_HEADERS(glib.h) AC_CHECK_HEADERS(grp.h) AC_CHECK_HEADERS(limits.h) AC_CHECK_HEADERS(linux/errqueue.h) AC_CHECK_HEADERS(malloc.h) AC_CHECK_HEADERS(netdb.h) AC_CHECK_HEADERS(netinet/in.h) AC_CHECK_HEADERS(netinet/ip.h) AC_CHECK_HEADERS(pam/pam_appl.h) AC_CHECK_HEADERS(pthread.h) AC_CHECK_HEADERS(pwd.h) AC_CHECK_HEADERS(security/pam_appl.h) AC_CHECK_HEADERS(sgtty.h) AC_CHECK_HEADERS(signal.h) AC_CHECK_HEADERS(stdarg.h) AC_CHECK_HEADERS(stddef.h) AC_CHECK_HEADERS(stdio.h) AC_CHECK_HEADERS(stdlib.h) AC_CHECK_HEADERS(string.h) AC_CHECK_HEADERS(strings.h) AC_CHECK_HEADERS(sys/dir.h) AC_CHECK_HEADERS(sys/ioctl.h) AC_CHECK_HEADERS(sys/param.h) AC_CHECK_HEADERS(sys/poll.h) AC_CHECK_HEADERS(sys/resource.h) AC_CHECK_HEADERS(sys/select.h) AC_CHECK_HEADERS(sys/socket.h) AC_CHECK_HEADERS(sys/sockio.h) AC_CHECK_HEADERS(sys/stat.h) AC_CHECK_HEADERS(sys/time.h) AC_CHECK_HEADERS(sys/timeb.h) AC_CHECK_HEADERS(sys/types.h) AC_CHECK_HEADERS(sys/uio.h) AC_CHECK_HEADERS(sys/un.h) AC_CHECK_HEADERS(sys/utsname.h) AC_CHECK_HEADERS(sys/wait.h) AC_CHECK_HEADERS(time.h) AC_CHECK_HEADERS(unistd.h) AC_CHECK_HEADERS(winsock.h) dnl These headers need prerequisits before the tests will pass dnl AC_CHECK_HEADERS(net/if.h) dnl AC_CHECK_HEADERS(netinet/icmp6.h) dnl AC_CHECK_HEADERS(netinet/ip6.h) dnl AC_CHECK_HEADERS(netinet/ip_icmp.h) AC_MSG_CHECKING(for special libxml2 includes) if test "x$XML2CONFIG" = "x"; then AC_MSG_ERROR(libxml2 config not found) else XML2HEAD="`$XML2CONFIG --cflags`" AC_MSG_RESULT($XML2HEAD) AC_CHECK_LIB(xml2, xmlReadMemory) AC_CHECK_LIB(xslt, xsltApplyStylesheet) fi CPPFLAGS="$CPPFLAGS $XML2HEAD" AC_CHECK_HEADERS(libxml/xpath.h) AC_CHECK_HEADERS(libxslt/xslt.h) if test "$ac_cv_header_libxml_xpath_h" != "yes"; then AC_MSG_ERROR(The libxml developement headers were not found) fi if test "$ac_cv_header_libxslt_xslt_h" != "yes"; then AC_MSG_ERROR(The libxslt developement headers were not found) fi dnl ======================================================================== dnl Structures dnl ======================================================================== AC_CHECK_MEMBERS([struct tm.tm_gmtoff],,,[[#include ]]) dnl ======================================================================== dnl Functions dnl ======================================================================== AC_CHECK_FUNCS(g_log_set_default_handler) AC_CHECK_FUNCS(getopt, AC_DEFINE(HAVE_DECL_GETOPT, 1, [Have getopt function])) dnl ======================================================================== dnl ltdl dnl ======================================================================== AC_CHECK_LIB(ltdl, lt_dlopen, [LTDL_foo=1]) if test "x${enable_bundled_ltdl}" = "xyes"; then if test $ac_cv_lib_ltdl_lt_dlopen = yes; then AC_MSG_NOTICE([Disabling usage of installed ltdl]) fi ac_cv_lib_ltdl_lt_dlopen=no fi LIBLTDL_DIR="" if test $ac_cv_lib_ltdl_lt_dlopen != yes ; then AC_MSG_NOTICE([Installing local ltdl]) LIBLTDL_DIR=libltdl ( cd $srcdir ; $TAR -xvf libltdl.tar ) if test "$?" -ne 0; then AC_MSG_ERROR([$TAR of libltdl.tar in $srcdir failed]) fi AC_CONFIG_SUBDIRS(libltdl) else LIBS="$LIBS -lltdl" AC_MSG_NOTICE([Using installed ltdl]) INCLTDL="" LIBLTDL="" fi AC_SUBST(INCLTDL) AC_SUBST(LIBLTDL) AC_SUBST(LIBLTDL_DIR) dnl ======================================================================== dnl bzip2 dnl ======================================================================== AC_CHECK_HEADERS(bzlib.h) AC_CHECK_LIB(bz2, BZ2_bzBuffToBuffCompress) if test x$ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress != xyes ; then AC_MSG_ERROR(BZ2 libraries not found) fi if test x$ac_cv_header_bzlib_h != xyes; then AC_MSG_ERROR(BZ2 Development headers not found) fi dnl ======================================================================== dnl ncurses dnl ======================================================================== dnl dnl A few OSes (e.g. Linux) deliver a default "ncurses" alongside "curses". dnl Many non-Linux deliver "curses"; sites may add "ncurses". dnl dnl However, the source-code recommendation for both is to #include "curses.h" dnl (i.e. "ncurses" still wants the include to be simple, no-'n', "curses.h"). dnl dnl ncurse takes precedence. dnl AC_CHECK_HEADERS(curses.h) AC_CHECK_HEADERS(curses/curses.h) AC_CHECK_HEADERS(ncurses.h) AC_CHECK_HEADERS(ncurses/ncurses.h) dnl Although n-library is preferred, only look for it if the n-header was found. CURSESLIBS='' if test "$ac_cv_header_ncurses_h" = "yes"; then AC_CHECK_LIB(ncurses, printw, [CURSESLIBS='-lncurses'; AC_DEFINE(HAVE_LIBNCURSES,1, have ncurses library)] ) fi if test "$ac_cv_header_ncurses_ncurses_h" = "yes"; then AC_CHECK_LIB(ncurses, printw, [CURSESLIBS='-lncurses'; AC_DEFINE(HAVE_LIBNCURSES,1, have ncurses library)] ) fi dnl Only look for non-n-library if there was no n-library. if test X"$CURSESLIBS" = X"" -a "$ac_cv_header_curses_h" = "yes"; then AC_CHECK_LIB(curses, printw, [CURSESLIBS='-lcurses'; AC_DEFINE(HAVE_LIBCURSES,1, have curses library)] ) fi dnl Only look for non-n-library if there was no n-library. if test X"$CURSESLIBS" = X"" -a "$ac_cv_header_curses_curses_h" = "yes"; then AC_CHECK_LIB(curses, printw, [CURSESLIBS='-lcurses'; AC_DEFINE(HAVE_LIBCURSES,1, have curses library)] ) fi if test "x$CURSESLIBS" != "x"; then PKG_FEATURES="$PKG_FEATURES ncurses" fi dnl Check for printw() prototype compatibility if test X"$CURSESLIBS" != X"" && cc_supports_flag -Wcast-qual && cc_supports_flag -Werror; then AC_MSG_CHECKING(whether printw() requires argument of "const char *") ac_save_LIBS=$LIBS LIBS="$CURSESLIBS $LIBS" ac_save_CFLAGS=$CFLAGS CFLAGS="-Wcast-qual -Werror" AC_LINK_IFELSE( [AC_LANG_PROGRAM( [ #if defined(HAVE_CURSES_H) # include #elif defined(HAVE_NCURSES_H) # include #endif ], [printw((const char *)"Test");] )], [ac_cv_compatible_printw=yes], [ac_cv_compatible_printw=no] ) LIBS=$ac_save_LIBS CFLAGS=$ac_save_CFLAGS AC_MSG_RESULT([$ac_cv_compatible_printw]) if test "$ac_cv_compatible_printw" = no; then AC_MSG_WARN([The printw() function of your ncurses or curses library is old, we will disable usage of the library. If you want to use this library anyway, please update to newer version of the library, ncurses 5.4 or later is recommended. You can get the library from http://www.gnu.org/software/ncurses/.]) AC_MSG_NOTICE([Disabling curses]) AC_DEFINE(HAVE_INCOMPATIBLE_PRINTW, 1, [Do we have incompatible printw() in curses library?]) fi fi AC_SUBST(CURSESLIBS) dnl ======================================================================== dnl Cluster infrastructure - Heartbeat dnl ======================================================================== dnl On Debian, AC_CHECK_LIBS fail if a library has any unresolved symbols dnl So check for all the depenancies (so they're added to LIBS) before checking for -lplumb AC_CHECK_LIB(pils, PILLoadPlugin) AC_CHECK_LIB(plumb, G_main_add_IPC_Channel) if test x"$ac_cv_lib_plumb_G_main_add_IPC_Channel" != x"yes"; then AC_MSG_FAILURE(Core Heartbeat utility libraries not found: $ac_cv_lib_plumb_G_main_add_IPC_Channel) fi dnl Compatability checks AC_CHECK_FUNCS(msgfromIPC_timeout) AC_CHECK_MEMBERS([struct lrm_ops.fail_rsc],,,[[#include ]]) dnl ======================================================================== dnl Cluster stack - Heartbeat dnl ======================================================================== case $SUPPORT_HEARTBEAT in 1|yes|true) AC_CHECK_LIB(hbclient, ll_cluster_new, [SUPPORT_HEARTBEAT=1], [AC_MSG_FAILURE(Unable to support Heartbeat: client libraries not found)]);; try) AC_CHECK_LIB(hbclient, ll_cluster_new, [SUPPORT_HEARTBEAT=1], [SUPPORT_HEARTBEAT=0]);; *) SUPPORT_HEARTBEAT=0;; esac AM_CONDITIONAL(BUILD_HEARTBEAT_SUPPORT, test $SUPPORT_HEARTBEAT = 1) AC_DEFINE_UNQUOTED(SUPPORT_HEARTBEAT, $SUPPORT_HEARTBEAT, Support the Heartbeat messaging and membership layer) dnl ======================================================================== dnl Cluster stack - OpenAIS dnl ======================================================================== AISLIB="" dnl Normalize the values case $SUPPORT_AIS in 1|yes|true) missingisfatal=1;; try) missingisfatal=0;; *) SUPPORT_AIS=no;; esac AC_MSG_CHECKING(for native AIS) AISMSGLIB="" AIS_VERSION="none" COROSYNC_PKG="$PKGCONFIG libcoroipcc" if test $SUPPORT_AIS = no; then AC_MSG_RESULT(no... not requested.) else AC_MSG_RESULT($SUPPORT_AIS, with '$AISPREFIX') AC_CHECK_HEADERS(openais/saAis.h) AC_CHECK_HEADERS(corosync/coroipcc.h) $COROSYNC_PKG --exists if test $? = 0; then AIS_VERSION="corosync" elif test "$ac_cv_header_openais_saAis_h" = "yes"; then AIS_VERSION="whitetank" else aisreason="Whitetank headers not found" fi fi if test $AIS_VERSION != "none"; then AC_MSG_CHECKING(for OpenAIS branch) AC_MSG_RESULT($AIS_VERSION) fi if test $AIS_VERSION = "corosync"; then if test "$ac_cv_header_corosync_coroipcc_h" != "yes"; then AIS_VERSION="none" aisreason="Corosync headers not found" fi saveLIBS="$LIBS" LIBS="$LIBS `$COROSYNC_PKG --libs-only-L`" AC_CHECK_LIB(coroipcc, coroipcc_msg_send_reply_receive, []) LIBS="$saveLIBS" if test $ac_cv_lib_coroipcc_coroipcc_msg_send_reply_receive != yes; then AC_MSG_RESULT(Cannot locate AIS messaging library) aisreason="requred Corosync libraries not found" AIS_VERSION="none" fi fi dnl continue? if test $AIS_VERSION = "whitetank"; then dnl Find it in lib, lib64, or wherever it wants to live... AC_MSG_CHECKING(location of OpenAIS libraries) dnl CoroSync location alib=`ls ${AISPREFIX}/*/libcpg.so | head -n 1` if test -z "$alib"; then dnl Whitetank location alib=`ls ${AISPREFIX}/*/*/libcpg.so | head -n 1` fi AISLIB=`dirname $alib` AC_MSG_RESULT($AISLIB) if test "x$AISLIB" = "x"; then AC_MSG_WARN(Use --with-ais-prefix to specify the prefix OpenAIS was installed with) aisreason="library directory not found" AIS_VERSION="none" elif test ! -d "$AISLIB"; then AC_MSG_WARN(Use --with-ais-prefix to specify the prefix OpenAIS was installed with) aisreason="specified library directory does not exist" AIS_VERSION="none" fi fi dnl continue? if test $AIS_VERSION = "whitetank"; then AC_MSG_CHECKING(location of OpenAIS plugins) if test -z "$LCRSODIR"; then LCRSODIR="$libexecdir/lcrso" alib=`ls ${AISPREFIX}/*/lcrso/objdb.lcrso | head -n 1` LCRSODIR=`dirname $alib` fi AC_MSG_RESULT($LCRSODIR) if test "x$LCRSODIR" = "x"; then AC_MSG_RESULT(Invalid. Please specify the correct location with --with-lcrso-dir) aisreason="plugin directory not found" AIS_VERSION="none" elif test ! -d "$LCRSODIR"; then AC_MSG_RESULT(Invalid. Please specify the correct location with --with-lcrso-dir) aisreason="specified plugin directory does not exist" AIS_VERSION="none" fi fi dnl continue? if test $AIS_VERSION = "whitetank"; then dnl Don't add the messaging library to LIBS since most daemons don't need/use it saveLIBS="$LIBS" LIBS="$LIBS -L${AISLIB} -R${AISLIB}" AC_CHECK_LIB(SaMsg, saSendReceiveReply, []) AC_CHECK_LIB(SaMsg, openais_msg_send_reply_receive, []) if test $ac_cv_lib_SaMsg_openais_msg_send_reply_receive = yes; then : OpenAIS elif test $ac_cv_lib_SaMsg_saSendReceiveReply = yes; then : OpenAIS AC_DEFINE_UNQUOTED(TRADITIONAL_AIS_IPC, 1, "Use the 'old' AIS IPC interface") else AC_MSG_RESULT(Cannot locate AIS messaging library) aisreason="requred libraries not found" AIS_VERSION="none" fi LIBS="$saveLIBS" fi SUPPORT_AIS=1 case $AIS_VERSION in corosync) AC_DEFINE_UNQUOTED(AIS_COROSYNC, 1, "AIS target is the corosync series") LCRSODIR=`$PKGCONFIG corosync --variable=lcrsodir` CFLAGS="$CFLAGS `$COROSYNC_PKG --cflags`" AISMSGLIB=`$COROSYNC_PKG --libs` ;; whitetank) AC_DEFINE_UNQUOTED(AIS_WHITETANK, 1, "AIS target is the whitetank series") CFLAGS="$CFLAGS -I$AISPREFIX/include/openais" AISMSGLIB="-L${AISLIB} -R${AISLIB} -lSaMsg" ;; none) SUPPORT_AIS=0 if test "x$aisreason" != x; then if test $missingisfatal = 0; then AC_MSG_WARN(Unable to support OpenAIS: $aisreason) else AC_MSG_FAILURE(Unable to support OpenAIS: $aisreason) fi fi ;; *) AC_MSG_FAILURE(Unknown OpenAIS branch: $AIS_VERSION);; esac AC_DEFINE_UNQUOTED(SUPPORT_AIS, $SUPPORT_AIS, Support the OpenAIS messaging and membership layer) AM_CONDITIONAL(BUILD_AIS_SUPPORT, test $SUPPORT_AIS = 1) dnl dnl Cluster stack - Sanity dnl STACKS="" CLUSTERLIBS="" if test $SUPPORT_HEARTBEAT = 1; then STACKS="$STACKS heartbeat" CLUSTERLIBS="$CLUSTERLIBS -lhbclient -lccmclient" fi if test $SUPPORT_AIS = 1; then STACKS="$STACKS $AIS_VERSION" CLUSTERLIBS="$CLUSTERLIBS ${AISMSGLIB}" else AISPREFIX="" LCRSODIR="$libdir" fi PKG_FEATURES="$PKG_FEATURES$STACKS" AC_MSG_CHECKING(for supported stacks) if test x"$STACKS" = x; then AC_MSG_FAILURE(You must choose at least one cluster stack to support) fi AC_MSG_RESULT($STACKS) AC_SUBST(CLUSTERLIBS) AC_SUBST(LCRSODIR) dnl ======================================================================== dnl SNMP dnl ======================================================================== case $SUPPORT_SNMP in 1|yes|true) missingisfatal=1;; try) missingisfatal=0;; *) SUPPORT_SNMP=no;; esac SNMPLIB="" AC_MSG_CHECKING(for snmp support) if test $SUPPORT_SNMP = no; then AC_MSG_RESULT(no... not requested.) SUPPORT_SNMP=0 else SNMPCONFIG="" AC_MSG_RESULT($SUPPORT_SNMP) AC_CHECK_HEADERS(net-snmp/net-snmp-config.h) if test "x${ac_cv_header_net_snmp_net_snmp_config_h}" != "xyes"; then SUPPORT_SNMP="no" fi if test $SUPPORT_SNMP != no; then AC_PATH_PROGS(SNMPCONFIG, net-snmp-config) if test "X${SNMPCONFIG}" = "X"; then AC_MSG_RESULT(You need the net_snmp development package to continue.) SUPPORT_SNMP=no fi fi if test $SUPPORT_SNMP != no; then AC_MSG_CHECKING(for special snmp libraries) SNMPLIBS=`$SNMPCONFIG --agent-libs` AC_MSG_RESULT($SNMPLIBS) fi if test $SUPPORT_SNMP != no; then savedLibs=$LIBS LIBS="$LIBS $SNMPLIBS" AC_CHECK_FUNCS(netsnmp_transport_open_client) if test $ac_cv_func_netsnmp_transport_open_client != yes; then SUPPORT_SNMP=no fi LIBS=$savedLibs fi if test $SUPPORT_SNMP = no; then SUPPORT_SNMP=0 if test $missingisfatal = 0; then AC_MSG_WARN(Unable to support SNMP) else AC_MSG_FAILURE(Unable to support SNMP) fi else SUPPORT_SNMP=1 fi fi if test $SUPPORT_SNMP = 1; then PKG_FEATURES="$PKG_FEATURES snmp" fi AC_SUBST(SNMPLIBS) AM_CONDITIONAL(ENABLE_SNMP, test "$SUPPORT_SNMP" = "1") AC_DEFINE_UNQUOTED(ENABLE_SNMP, $SUPPORT_SNMP, Build in support for sending SNMP traps) dnl ======================================================================== dnl ESMTP dnl ======================================================================== case $SUPPORT_ESMTP in 1|yes|true) missingisfatal=1;; try) missingisfatal=0;; *) SUPPORT_ESMTP=no;; esac ESMTPLIB="" AC_MSG_CHECKING(for esmtp support) if test $SUPPORT_ESMTP = no; then AC_MSG_RESULT(no... not requested.) SUPPORT_ESMTP=0 else ESMTPCONFIG="" AC_MSG_RESULT($SUPPORT_ESMTP) AC_CHECK_HEADERS(libesmtp.h) if test "x${ac_cv_header_libesmtp_h}" != "xyes"; then ENABLE_ESMTP="no" fi if test $SUPPORT_ESMTP != no; then AC_PATH_PROGS(ESMTPCONFIG, libesmtp-config) if test "X${ESMTPCONFIG}" = "X"; then AC_MSG_RESULT(You need the libesmtp development package to continue.) SUPPORT_ESMTP=no fi fi if test $SUPPORT_ESMTP != no; then AC_MSG_CHECKING(for special esmtp libraries) ESMTPLIBS=`$ESMTPCONFIG --libs | tr '\n' ' '` AC_MSG_RESULT($ESMTPLIBS) fi if test $SUPPORT_ESMTP = no; then SUPPORT_ESMTP=0 if test $missingisfatal = 0; then AC_MSG_WARN(Unable to support ESMTP) else AC_MSG_FAILURE(Unable to support ESMTP) fi else SUPPORT_ESMTP=1 fi fi if test $SUPPORT_ESMTP = 1; then PKG_FEATURES="$PKG_FEATURES libesmtp" fi AC_SUBST(ESMTPLIBS) AM_CONDITIONAL(ENABLE_ESMTP, test "$SUPPORT_ESMTP" = "1") AC_DEFINE_UNQUOTED(ENABLE_ESMTP, $SUPPORT_ESMTP, Build in support for sending mail notifications with ESMTP) dnl ======================================================================== dnl GnuTLS dnl ======================================================================== AC_CHECK_HEADERS(gnutls/gnutls.h) AC_CHECK_HEADERS(security/pam_appl.h pam/pam_appl.h) dnl GNUTLS library: Attempt to determine by 'libgnutls-config' program. dnl If no 'libgnutls-config', try traditional autoconf means. AC_PATH_PROGS(LIBGNUTLS_CONFIG, libgnutls-config) if test -n "$LIBGNUTLS_CONFIG"; then AC_MSG_CHECKING(for gnutls header flags) GNUTLSHEAD="`$LIBGNUTLS_CONFIG --cflags`"; AC_MSG_RESULT($GNUTLSHEAD) AC_MSG_CHECKING(for gnutls library flags) GNUTLSLIBS="`$LIBGNUTLS_CONFIG --libs`"; AC_MSG_RESULT($GNUTLSLIBS) else AC_CHECK_LIB(gnutls, gnutls_init) fi AC_SUBST(GNUTLSHEAD) AC_SUBST(GNUTLSLIBS) dnl ======================================================================== dnl System Health dnl ======================================================================== dnl Check if servicelog development package is installed SERVICELOG=servicelog-1 SERVICELOG_EXISTS="no" AC_MSG_CHECKING(for $SERVICELOG packages) if $PKGCONFIG --exists $SERVICELOG then SERVICELOG_EXISTS="yes" fi AC_MSG_RESULT($SERVICELOG_EXISTS) AM_CONDITIONAL(BUILD_SERVICELOG, test "$SERVICELOG_EXISTS" = "yes") dnl Check if OpenIMPI packages and servicelog are installed OPENIPMI="OpenIPMI OpenIPMIposix" OPENIPMI_SERVICELOG_EXISTS="no" AC_MSG_CHECKING(for $SERVICELOG $OPENIPMI packages) if $PKGCONFIG --exists $OPENIPMI $SERVICELOG then OPENIPMI_SERICELOG_EXISTS="yes" fi AC_MSG_RESULT($OPENIPMI_SERICELOG_EXISTS) AM_CONDITIONAL(BUILD_OPENIPMI_SERICELOG, test "$OPENIPMI_SERICELOG_EXISTS" = "yes") dnl ======================================================================== dnl checks for library functions to replace them dnl dnl NoSuchFunctionName: dnl is a dummy function which no system supplies. It is here to make dnl the system compile semi-correctly on OpenBSD which doesn't know dnl how to create an empty archive dnl dnl scandir: Only on BSD. dnl System-V systems may have it, but hidden and/or deprecated. dnl A replacement function is supplied for it. dnl dnl setenv: is some bsdish function that should also be avoided (use dnl putenv instead) dnl On the other hand, putenv doesn't provide the right API for the dnl code and has memory leaks designed in (sigh...) Fortunately this dnl A replacement function is supplied for it. dnl dnl strerror: returns a string that corresponds to an errno. dnl A replacement function is supplied for it. dnl dnl unsetenv: is some bsdish function that should also be avoided (No dnl replacement) dnl A replacement function is supplied for it. dnl dnl strnlen: is a gnu function similar to strlen, but safer. dnl We wrote a tolearably-fast replacement function for it. dnl dnl strndup: is a gnu function similar to strdup, but safer. dnl We wrote a tolearably-fast replacement function for it. dnl dnl daemon: is a GNU function. The daemon() function is for programs wishing to dnl detach themselves from the controlling terminal and run in the dnl background as system daemon dnl A replacement function is supplied for it. AC_REPLACE_FUNCS(alphasort inet_pton NoSuchFunctionName scandir setenv strerror unsetenv strnlen strndup daemon strlcpy strlcat) dnl ======================================================================== dnl Compiler flags dnl ======================================================================== dnl Make sure that CFLAGS is not exported. If the user did dnl not have CFLAGS in their environment then this should have dnl no effect. However if CFLAGS was exported from the user's dnl environment, then the new CFLAGS will also be exported dnl to sub processes. CC_ERRORS="" CC_EXTRAS="" if export | fgrep " CFLAGS=" > /dev/null; then export -n CFLAGS || true # We don't want to bomb out if this fails fi if test "$GCC" != yes; then CFLAGS="$CFLAGS -g" enable_fatal_warnings=no else CFLAGS="$CFLAGS -ggdb3 -O0" # We had to eliminate -Wnested-externs because of libtool changes EXTRA_FLAGS="-fgnu89-inline -fstack-protector-all -Wall -Waggregate-return -Wbad-function-cast -Wcast-qual -Wcast-align -Wdeclaration-after-statement -Wendif-labels -Wfloat-equal -Wformat=2 -Wformat-security -Wformat-nonliteral -Winline -Wmissing-prototypes -Wmissing-declarations -Wnested-externs -Wno-long-long -Wno-strict-aliasing -Wpointer-arith -Wstrict-prototypes -Wunsigned-char -Wwrite-strings" # Additional warnings it might be nice to enable one day # -Wshadow # -Wunreachable-code for j in $EXTRA_FLAGS do if cc_supports_flag $j then CC_EXTRAS="$CC_EXTRAS $j" fi done dnl In lib/ais/Makefile.am there's a gcc option available as of v4.x GCC_MAJOR=`gcc -v 2>&1 | awk 'END{print $3}' | sed 's/[.].*//'` AM_CONDITIONAL(GCC_4, test "${GCC_MAJOR}" = 4) dnl System specific options case "$host_os" in *linux*|*bsd*) if test "${enable_fatal_warnings}" = "unknown"; then enable_fatal_warnings=yes fi ;; esac if test "x${enable_fatal_warnings}" != xno && cc_supports_flag -Werror ; then enable_fatal_warnings=yes else enable_fatal_warnings=no fi if test "x${enable_ansi}" != xno && cc_supports_flag -std=iso9899:199409 ; then AC_MSG_NOTICE(Enabling ANSI Compatibility) CC_EXTRAS="$CC_EXTRAS -ansi -D_GNU_SOURCE -DANSI_ONLY" fi AC_MSG_NOTICE(Activated additional gcc flags: ${CC_EXTRAS}) fi CFLAGS="$CFLAGS $CC_EXTRAS" NON_FATAL_CFLAGS="$CFLAGS" AC_SUBST(NON_FATAL_CFLAGS) dnl dnl We reset CFLAGS to include our warnings *after* all function dnl checking goes on, so that our warning flags don't keep the dnl AC_*FUNCS() calls above from working. In particular, -Werror will dnl *always* cause us troubles if we set it before here. dnl dnl if test "x${enable_fatal_warnings}" = xyes ; then AC_MSG_NOTICE(Enabling Fatal Warnings) CFLAGS="$CFLAGS -Werror" fi AC_SUBST(CFLAGS) dnl This is useful for use in Makefiles that need to remove one specific flag CFLAGS_COPY="$CFLAGS" AC_SUBST(CFLAGS_COPY) AC_SUBST(LIBADD_DL) dnl extra flags for dynamic linking libraries AC_SUBST(LIBADD_INTL) dnl extra flags for GNU gettext stuff... AC_SUBST(LOCALE) dnl Options for cleaning up the compiler output QUIET_LIBTOOL_OPTS="" QUIET_MAKE_OPTS="" if test "x${enable_quiet}" = "xyes"; then QUIET_LIBTOOL_OPTS="--quiet" QUIET_MAKE_OPTS="--quiet" fi AC_MSG_RESULT(Supress make details: ${enable_quiet}) dnl Put the above variables to use LIBTOOL="${LIBTOOL} --tag=CC \$(QUIET_LIBTOOL_OPTS)" MAKE="${MAKE} \$(QUIET_MAKE_OPTS)" AC_SUBST(CC) AC_SUBST(MAKE) AC_SUBST(LIBTOOL) AC_SUBST(QUIET_MAKE_OPTS) AC_SUBST(QUIET_LIBTOOL_OPTS) dnl The Makefiles and shell scripts we output AC_CONFIG_FILES(Makefile \ -README \ cts/Makefile \ cts/CTSvars.py \ cts/LSBDummy \ cib/Makefile \ crmd/Makefile \ pengine/Makefile \ debian/Makefile \ doc/Makefile \ doc/cibadmin.8 \ doc/crm_resource.8 \ include/Makefile \ include/crm/Makefile \ include/crm/common/Makefile \ include/crm/pengine/Makefile \ - include/fencing/Makefile \ replace/Makefile \ lib/Makefile \ lib/ais/Makefile \ lib/common/Makefile \ lib/cib/Makefile \ lib/pengine/Makefile \ lib/transition/Makefile \ lib/fencing/Makefile \ lib/plugins/Makefile \ lib/plugins/lrm/Makefile \ fencing/Makefile \ - fencing/stonithd/Makefile \ - fencing/test/Makefile \ - fencing/test/STONITHDBasicSanityCheck \ extra/Makefile \ extra/resources/Makefile \ tools/Makefile \ tools/haresources2cib.py \ tools/hb2openais.sh \ tools/crm_primitive.py \ tools/crm \ - tools/shell/Makefile \ - tools/shell/templates/Makefile \ - tools/shell/regression/Makefile \ - tools/shell/regression/regression.sh \ - tools/shell/regression/lrmregtest-lsb \ - tools/shell/regression/testcases/Makefile \ + tools/shell/Makefile \ + tools/shell/templates/Makefile \ + tools/shell/regression/Makefile \ + tools/shell/regression/regression.sh \ + tools/shell/regression/lrmregtest-lsb \ + tools/shell/regression/testcases/Makefile \ xml/Makefile \ xml/pacemaker.rng \ xml/resources.rng \ xml/constraints.rng \ xml/rule.rng \ xml/nvset.rng \ ) dnl Now process the entire list of files added by previous dnl calls to AC_CONFIG_FILES() AC_OUTPUT() dnl ***************** dnl Configure summary dnl ***************** AC_MSG_RESULT([]) AC_MSG_RESULT([$PACKAGE configuration:]) AC_MSG_RESULT([ Version = ${VERSION} (Build: $BUILD_VERSION)]) AC_MSG_RESULT([ Features =${PKG_FEATURES}]) AC_MSG_RESULT([]) AC_MSG_RESULT([ Prefix = ${prefix}]) AC_MSG_RESULT([ Executables = ${sbindir}]) AC_MSG_RESULT([ Man pages = ${mandir}]) AC_MSG_RESULT([ Libraries = ${libdir}]) AC_MSG_RESULT([ Header files = ${includedir}]) AC_MSG_RESULT([ Arch-independent files = ${datadir}]) AC_MSG_RESULT([ State information = ${localstatedir}]) AC_MSG_RESULT([ System configuration = ${sysconfdir}]) AC_MSG_RESULT([ AIS Plugins = ${LCRSODIR}]) AC_MSG_RESULT([]) AC_MSG_RESULT([ Use system LTDL = ${ac_cv_lib_ltdl_lt_dlopen}]) AC_MSG_RESULT([]) AC_MSG_RESULT([ HA group name = ${CRM_DAEMON_GROUP}]) AC_MSG_RESULT([ HA user name = ${CRM_DAEMON_USER}]) AC_MSG_RESULT([]) AC_MSG_RESULT([ CFLAGS = ${CFLAGS}]) AC_MSG_RESULT([ Libraries = ${LIBS}]) AC_MSG_RESULT([ Stack Libraries = ${CLUSTERLIBS}]) diff --git a/crmd/Makefile.am b/crmd/Makefile.am index b5e32ec2fd..f63291b15f 100644 --- a/crmd/Makefile.am +++ b/crmd/Makefile.am @@ -1,68 +1,65 @@ # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl halibdir = $(CRM_DAEMON_DIR) -COMMONLIBS = \ - $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/pengine/libpe_rules.la \ - $(top_builddir)/lib/cib/libcib.la \ - $(top_builddir)/lib/fencing/libstonithd.la - ## binary progs halib_PROGRAMS = crmd atest ## SOURCES noinst_HEADERS = crmd.h crmd_fsa.h crmd_messages.h fsa_defines.h \ fsa_matrix.h fsa_proto.h crmd_utils.h crmd_callbacks.h \ crmd_lrm.h te_callbacks.h tengine.h atest_SOURCES = atest.c -atest_LDADD = $(COMMONLIBS) +atest_LDADD = $(top_builddir)/lib/common/libcrmcommon.la crmd_SOURCES = main.c crmd.c \ fsa.c control.c messages.c ccm.c callbacks.c \ election.c join_client.c join_dc.c subsystems.c \ cib.c pengine.c tengine.c lrm.c \ utils.c misc.c te_events.c te_actions.c te_utils.c te_callbacks.c -crmd_LDADD = $(COMMONLIBS) $(CLUSTERLIBS) -llrm \ +crmd_LDADD = $(CLUSTERLIBS) -llrm \ + $(top_builddir)/lib/fencing/libstonithd.la \ $(top_builddir)/lib/transition/libtransitioner.la \ - $(top_builddir)/lib/common/libcrmcluster.la - + $(top_builddir)/lib/pengine/libpe_rules.la \ + $(top_builddir)/lib/cib/libcib.la \ + $(top_builddir)/lib/common/libcrmcluster.la \ + $(top_builddir)/lib/common/libcrmcommon.la clean-generic: rm -f *.log *.debug *.xml *~ install-exec-local: uninstall-local: graphs: fsa_inputs.png fsa_inputs_by_action.png fsa_actions_by_state.png %.png: %.dot dot -Tpng $< > $@ %.dot : fsa_matrix.h make_dot.pl perl $(top_srcdir)/crmd/make_dot.pl $(top_srcdir)/crmd/fsa_matrix.h $(top_builddir)/crmd diff --git a/crmd/control.c b/crmd/control.c index 324f32b185..340955e045 100644 --- a/crmd/control.c +++ b/crmd/control.c @@ -1,984 +1,1001 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include char *ipc_server = NULL; extern void post_cache_update(int seq); extern void crmd_ha_connection_destroy(gpointer user_data); void crm_shutdown(int nsig); gboolean crm_read_options(gpointer user_data); gboolean fsa_has_quorum = FALSE; GHashTable *ipc_clients = NULL; crm_trigger_t *fsa_source = NULL; crm_trigger_t *config_read = NULL; /* A_HA_CONNECT */ #if SUPPORT_AIS extern void crmd_ha_msg_filter(xmlNode * msg); static gboolean crm_ais_dispatch(AIS_Message *wrapper, char *data, int sender) { int seq = 0; xmlNode *xml = NULL; const char *seq_s = NULL; xml = string2xml(data); if(xml == NULL) { crm_err("Could not parse message content (%d): %.100s", wrapper->header.id, data); return TRUE; } switch(wrapper->header.id) { case crm_class_members: seq_s = crm_element_value(xml, "id"); seq = crm_int_helper(seq_s, NULL); set_bit_inplace(fsa_input_register, R_PEER_DATA); post_cache_update(seq); /* fall through */ case crm_class_quorum: crm_update_quorum(crm_have_quorum, FALSE); if(AM_I_DC) { const char *votes = crm_element_value(xml, "expected"); if(votes == NULL || check_number(votes) == FALSE) { crm_log_xml_err(xml, "Invalid quorum/membership update"); } else { int rc = update_attr( fsa_cib_conn, cib_quorum_override|cib_scope_local|cib_inhibit_notify, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, XML_ATTR_EXPECTED_VOTES, votes, FALSE); if(cib_ok > rc) { crm_err("Quorum update failed: %s", cib_error2string(rc)); } } } break; case crm_class_cluster: crm_xml_add(xml, F_ORIG, wrapper->sender.uname); crm_xml_add_int(xml, F_SEQ, wrapper->id); crmd_ha_msg_filter(xml); break; case crm_class_rmpeer: /* Ignore */ break; case crm_class_notify: case crm_class_nodeid: crm_err("Unexpected message class (%d): %.100s", wrapper->header.id, data); break; default: crm_err("Invalid message class (%d): %.100s", wrapper->header.id, data); } free_xml(xml); return TRUE; } static void crm_ais_destroy(gpointer user_data) { crm_err("AIS connection terminated"); ais_fd_sync = -1; exit(1); } #endif void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { gboolean registered = FALSE; if(action & A_HA_DISCONNECT) { if(is_openais_cluster()) { crm_peer_destroy(); crm_info("Disconnected from OpenAIS"); #if SUPPORT_HEARTBEAT } else if(fsa_cluster_conn != NULL) { set_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); fsa_cluster_conn->llc_ops->signoff(fsa_cluster_conn, FALSE); crm_info("Disconnected from Heartbeat"); #endif } } if(action & A_HA_CONNECT) { void *dispatch = NULL; void *destroy = NULL; if(is_openais_cluster()) { #if SUPPORT_AIS destroy = crm_ais_destroy; dispatch = crm_ais_dispatch; #endif } else if(is_heartbeat_cluster()) { #if SUPPORT_HEARTBEAT dispatch = crmd_ha_msg_callback; destroy = crmd_ha_connection_destroy; #endif } crm_set_status_callback(&ais_status_callback); registered = crm_cluster_connect( &fsa_our_uname, &fsa_our_uuid, dispatch, destroy, #if SUPPORT_HEARTBEAT &fsa_cluster_conn #else NULL #endif ); #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { crm_debug_3("Be informed of Node Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_nstatus_callback( fsa_cluster_conn, crmd_ha_status_callback, fsa_cluster_conn) != HA_OK){ crm_err("Cannot set nstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } crm_debug_3("Be informed of CRM Client Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_cstatus_callback( fsa_cluster_conn, crmd_client_status_callback, fsa_cluster_conn) != HA_OK) { crm_err("Cannot set cstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } if(registered) { crm_debug_3("Requesting an initial dump of CRMD client_status"); fsa_cluster_conn->llc_ops->client_status( fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD, -1); } } #endif if(registered == FALSE) { set_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } clear_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); crm_info("Connected to the cluster"); } if(action & ~(A_HA_CONNECT|A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { /* just in case */ set_bit_inplace(fsa_input_register, R_SHUTDOWN); if(is_heartbeat_cluster()) { if(is_set(fsa_input_register, pe_subsystem->flag_connected)) { crm_info("Terminating the %s", pe_subsystem->name); if(stop_subsystem(pe_subsystem, TRUE) == FALSE) { /* its gone... */ crm_err("Faking %s exit", pe_subsystem->name); clear_bit_inplace(fsa_input_register, pe_subsystem->flag_connected); } else { crm_info("Waiting for subsystems to exit"); crmd_fsa_stall(NULL); } } crm_info("All subsystems stopped, continuing"); } + + if(stonith_api) { + /* Prevent it from comming up again */ + clear_bit_inplace(fsa_input_register, R_ST_REQUIRED); + + crm_info("Disconnecting STONITH..."); + stonith_api->cmds->disconnect(stonith_api); + } } /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { xmlNode *msg = NULL; crm_info("Sending shutdown request to DC: %s", crm_str(fsa_our_dc)); msg = create_request( CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); /* set_bit_inplace(fsa_input_register, R_STAYDOWN); */ if(send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } free_xml(msg); } extern char *max_generation_from; extern xmlNode *max_generation_xml; extern GHashTable *resources; extern GHashTable *voted; extern GHashTable *reload_hash; void log_connected_client(gpointer key, gpointer value, gpointer user_data); void log_connected_client(gpointer key, gpointer value, gpointer user_data) { crmd_client_t *client = value; crm_err("%s is still connected at exit", client->table_key); } static void free_mem(fsa_data_t *msg_data) { g_main_loop_quit(crmd_mainloop); g_main_loop_unref(crmd_mainloop); #if SUPPORT_HEARTBEAT if(fsa_cluster_conn) { fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn); fsa_cluster_conn = NULL; } #endif slist_destroy(fsa_data_t, fsa_data, fsa_message_queue, crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); delete_fsa_input(fsa_data); ); delete_fsa_input(msg_data); if(ipc_clients) { crm_debug("Number of connected clients: %d", g_hash_table_size(ipc_clients)); /* g_hash_table_foreach(ipc_clients, log_connected_client, NULL); */ g_hash_table_destroy(ipc_clients); } empty_uuid_cache(); crm_peer_destroy(); clear_bit_inplace(fsa_input_register, R_CCM_DATA); if(te_subsystem->client && te_subsystem->client->client_source) { crm_debug("Full destroy: TE"); G_main_del_IPC_Channel(te_subsystem->client->client_source); } else { crm_debug("Partial destroy: TE"); crmd_ipc_connection_destroy(te_subsystem->client); } crm_free(te_subsystem); if(pe_subsystem->client && pe_subsystem->client->client_source) { crm_debug("Full destroy: PE"); G_main_del_IPC_Channel(pe_subsystem->client->client_source); } else { crm_debug("Partial destroy: PE"); crmd_ipc_connection_destroy(pe_subsystem->client); } crm_free(pe_subsystem); crm_free(cib_subsystem); if(integrated_nodes) { g_hash_table_destroy(integrated_nodes); } if(finalized_nodes) { g_hash_table_destroy(finalized_nodes); } if(confirmed_nodes) { g_hash_table_destroy(confirmed_nodes); } if(reload_hash) { g_hash_table_destroy(reload_hash); } if(resources) { g_hash_table_destroy(resources); } if(voted) { g_hash_table_destroy(voted); } cib_delete(fsa_cib_conn); fsa_cib_conn = NULL; if(fsa_lrm_conn) { fsa_lrm_conn->lrm_ops->delete(fsa_lrm_conn); } crm_free(integration_timer); crm_free(finalization_timer); crm_free(election_trigger); crm_free(election_timeout); crm_free(shutdown_escalation_timer); crm_free(wait_timer); crm_free(recheck_timer); crm_free(fsa_our_dc_version); crm_free(fsa_our_uname); crm_free(fsa_our_uuid); crm_free(fsa_our_dc); crm_free(ipc_server); crm_free(max_generation_from); free_xml(max_generation_xml); xmlCleanupParser(); } /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int exit_code = 0; int log_level = LOG_INFO; const char *exit_type = "gracefully"; if(action & A_EXIT_1) { exit_code = 1; log_level = LOG_ERR; exit_type = "forcefully"; } verify_stopped(cur_state, LOG_ERR); do_crm_log(log_level, "Performing %s - %s exiting the CRMd", fsa_action2string(action), exit_type); if(is_set(fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); exit_code = 2; } if(is_set(fsa_input_register, R_STAYDOWN)) { crm_warn("Inhibiting respawn by Heartbeat"); exit_code = 100; } free_mem(msg_data); crm_info("[%s] stopped (%d)", crm_system_name, exit_code); cl_flush_logs(); exit(exit_code); } /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int was_error = 0; int interval = 1; /* seconds between DC heartbeats */ crm_debug("Registering Signal Handlers"); mainloop_add_signal(SIGTERM, crm_shutdown); fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL); config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL); ipc_clients = g_hash_table_new(g_str_hash, g_str_equal); crm_debug("Creating CIB and LRM objects"); fsa_cib_conn = cib_new(); fsa_lrm_conn = ll_lrm_new(XML_CIB_TAG_LRM); /* set up the timers */ crm_malloc0(integration_timer, sizeof(fsa_timer_t)); crm_malloc0(finalization_timer, sizeof(fsa_timer_t)); crm_malloc0(election_trigger, sizeof(fsa_timer_t)); crm_malloc0(election_timeout, sizeof(fsa_timer_t)); crm_malloc0(shutdown_escalation_timer, sizeof(fsa_timer_t)); crm_malloc0(wait_timer, sizeof(fsa_timer_t)); crm_malloc0(recheck_timer, sizeof(fsa_timer_t)); interval = interval * 1000; if(election_trigger != NULL) { election_trigger->source_id = 0; election_trigger->period_ms = -1; election_trigger->fsa_input = I_DC_TIMEOUT; election_trigger->callback = crm_timer_popped; election_trigger->repeat = FALSE; } else { was_error = TRUE; } if(election_timeout != NULL) { election_timeout->source_id = 0; election_timeout->period_ms = -1; election_timeout->fsa_input = I_ELECTION_DC; election_timeout->callback = crm_timer_popped; election_timeout->repeat = FALSE; } else { was_error = TRUE; } if(integration_timer != NULL) { integration_timer->source_id = 0; integration_timer->period_ms = -1; integration_timer->fsa_input = I_INTEGRATED; integration_timer->callback = crm_timer_popped; integration_timer->repeat = FALSE; } else { was_error = TRUE; } if(finalization_timer != NULL) { finalization_timer->source_id = 0; finalization_timer->period_ms = -1; finalization_timer->fsa_input = I_FINALIZED; finalization_timer->callback = crm_timer_popped; finalization_timer->repeat = FALSE; /* for possible enabling... a bug in the join protocol left * a slave in S_PENDING while we think its in S_NOT_DC * * raising I_FINALIZED put us into a transition loop which is * never resolved. * in this loop we continually send probes which the node * NACK's because its in S_PENDING * * if we have nodes where heartbeat is active but the * CRM is not... then this will be handled in the * integration phase */ finalization_timer->fsa_input = I_ELECTION; } else { was_error = TRUE; } if(shutdown_escalation_timer != NULL) { shutdown_escalation_timer->source_id = 0; shutdown_escalation_timer->period_ms = -1; shutdown_escalation_timer->fsa_input = I_STOP; shutdown_escalation_timer->callback = crm_timer_popped; shutdown_escalation_timer->repeat = FALSE; } else { was_error = TRUE; } if(wait_timer != NULL) { wait_timer->source_id = 0; wait_timer->period_ms = 2000; wait_timer->fsa_input = I_NULL; wait_timer->callback = crm_timer_popped; wait_timer->repeat = FALSE; } else { was_error = TRUE; } if(recheck_timer != NULL) { recheck_timer->source_id = 0; recheck_timer->period_ms = -1; recheck_timer->fsa_input = I_PE_CALC; recheck_timer->callback = crm_timer_popped; recheck_timer->repeat = FALSE; } else { was_error = TRUE; } /* set up the sub systems */ crm_malloc0(cib_subsystem, sizeof(struct crm_subsystem_s)); crm_malloc0(te_subsystem, sizeof(struct crm_subsystem_s)); crm_malloc0(pe_subsystem, sizeof(struct crm_subsystem_s)); if(cib_subsystem != NULL) { cib_subsystem->pid = -1; cib_subsystem->path = CRM_DAEMON_DIR; cib_subsystem->name = CRM_SYSTEM_CIB; cib_subsystem->command = CRM_DAEMON_DIR"/"CRM_SYSTEM_CIB; cib_subsystem->args = "-VVc"; cib_subsystem->flag_connected = R_CIB_CONNECTED; cib_subsystem->flag_required = R_CIB_REQUIRED; } else { was_error = TRUE; } if(te_subsystem != NULL) { te_subsystem->pid = -1; te_subsystem->path = CRM_DAEMON_DIR; te_subsystem->name = CRM_SYSTEM_TENGINE; te_subsystem->command = CRM_DAEMON_DIR"/"CRM_SYSTEM_TENGINE; te_subsystem->args = NULL; te_subsystem->flag_connected = R_TE_CONNECTED; te_subsystem->flag_required = R_TE_REQUIRED; } else { was_error = TRUE; } if(pe_subsystem != NULL) { pe_subsystem->pid = -1; pe_subsystem->path = CRM_DAEMON_DIR; pe_subsystem->name = CRM_SYSTEM_PENGINE; pe_subsystem->command = CRM_DAEMON_DIR"/"CRM_SYSTEM_PENGINE; pe_subsystem->args = NULL; pe_subsystem->flag_connected = R_PE_CONNECTED; pe_subsystem->flag_required = R_PE_REQUIRED; } else { was_error = TRUE; } if(was_error) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } welcomed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); integrated_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); finalized_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); confirmed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); set_sigchld_proctrack(G_PRIORITY_HIGH,DEFAULT_MAXDISPATCHTIME); } /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { if(cur_state != S_STARTING) { crm_err("Start cancelled... %s", fsa_state2string(cur_state)); return; } else if(is_set(fsa_input_register, R_CCM_DATA) == FALSE) { crm_info("Delaying start, CCM (%.16llx) not connected", R_CCM_DATA); crmd_fsa_stall(NULL); return; } else if(is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) { crm_info("Delaying start, LRM (%.16llx) not connected", R_LRM_CONNECTED); crmd_fsa_stall(NULL); return; } else if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Delaying start, CIB (%.16llx) not connected", R_CIB_CONNECTED); crmd_fsa_stall(NULL); return; } else if(is_set(fsa_input_register, R_READ_CONFIG) == FALSE) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(NULL); return; } else if(is_set(fsa_input_register, R_PEER_DATA) == FALSE) { HA_Message *msg = NULL; /* try reading from HA */ crm_info("Delaying start, Peer data (%.16llx) not recieved", R_PEER_DATA); crm_debug_3("Looking for a HA message"); #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0); } #endif if(msg != NULL) { crm_debug_3("There was a HA message"); crm_msg_del(msg); } crmd_fsa_stall(NULL); return; } crm_debug("Init server comms"); if(ipc_server == NULL) { ipc_server = crm_strdup(CRM_SYSTEM_CRMD); } if(init_server_ipc_comms(ipc_server, crmd_client_connect, default_ipc_connection_destroy)) { crm_err("Couldn't start IPC server"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } + if(stonith_reconnect == NULL) { + int dummy; + stonith_reconnect = mainloop_add_trigger( + G_PRIORITY_LOW, te_connect_stonith, &dummy); + } + set_bit_inplace(fsa_input_register, R_ST_REQUIRED); + mainloop_set_trigger(stonith_reconnect); + crm_info("The local CRM is operational"); clear_bit_inplace(fsa_input_register, R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); } /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { set_bit_inplace(fsa_input_register, R_IN_RECOVERY); crm_err("Action %s (%.16llx) not supported", fsa_action2string(action), action); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } pe_cluster_option crmd_opts[] = { /* name, old-name, validate, default, description */ { XML_CONFIG_ATTR_DC_DEADTIME, "dc_deadtime", "time", NULL, "60s", &check_time, "How long to wait for a response from other nodes during startup.", "The \"correct\" value will depend on the speed/load of your network and the type of switches used." }, { XML_CONFIG_ATTR_RECHECK, "cluster_recheck_interval", "time", "Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)", "15min", &check_timer, "Polling interval for time based changes to options, resource parameters and constraints.", "The Cluster is primarily event driven, however the configuration can have elements that change based on time." " To ensure these changes take effect, we can optionally poll the cluster's status for changes." }, { XML_CONFIG_ATTR_ELECTION_FAIL, "election_timeout", "time", NULL, "2min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { XML_CONFIG_ATTR_FORCE_QUIT, "shutdown_escalation", "time", NULL, "20min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer, "*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug." }, { XML_ATTR_EXPECTED_VOTES, NULL, "integer", NULL, "2", &check_number, "The number of nodes expected to be in the cluster", "Used to calculate quorum in openais based clusters." }, }; void crmd_metadata(void) { config_metadata("CRM Daemon", "1.0", "CRM Daemon Options", "This is a fake resource that details the options that can be configured for the CRM Daemon.", crmd_opts, DIMOF(crmd_opts)); } static void verify_crmd_options(GHashTable *options) { verify_all_options(options, crmd_opts, DIMOF(crmd_opts)); } static const char * crmd_pref(GHashTable *options, const char *name) { return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name); } static void config_query_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { const char *value = NULL; GHashTable *config_hash = NULL; ha_time_t *now = new_ha_date(TRUE); if(rc != cib_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", cib_error2string(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); if(rc == cib_bad_permissions || rc == cib_dtd_validation || rc == cib_bad_digest || rc == cib_bad_config) { crm_err("The cluster is mis-configured - shutting down and staying down"); set_bit_inplace(fsa_input_register, R_STAYDOWN); } goto bail; } crm_debug("Call %d : Parsing CIB options", call_id); config_hash = g_hash_table_new_full( g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str); unpack_instance_attributes( output, output, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now); verify_crmd_options(config_hash); value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); election_trigger->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT); shutdown_escalation_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL); election_timeout->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK); recheck_timer->period_ms = crm_get_msec(value); crm_info("Checking for expired actions every %dms", recheck_timer->period_ms); value = crmd_pref(config_hash, "crmd-integration-timeout"); integration_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-finalization-timeout"); finalization_timer->period_ms = crm_get_msec(value); #if SUPPORT_AIS if(is_openais_cluster()) { value = crmd_pref(config_hash, XML_ATTR_EXPECTED_VOTES); send_ais_text(crm_class_quorum, value, TRUE, NULL, crm_msg_ais); } #endif set_bit_inplace(fsa_input_register, R_READ_CONFIG); crm_debug_3("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); g_hash_table_destroy(config_hash); bail: free_ha_date(now); } gboolean crm_read_options(gpointer user_data) { int call_id = fsa_cib_conn->cmds->query( fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, config_query_callback); crm_debug_2("Querying the CIB... call %d", call_id); return TRUE; } /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { mainloop_set_trigger(config_read); } void crm_shutdown(int nsig) { if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) { if(is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating the shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); } else { crm_info("Requesting shutdown"); set_bit_inplace(fsa_input_register, R_SHUTDOWN); register_fsa_input(C_SHUTDOWN,I_SHUTDOWN,NULL); if(shutdown_escalation_timer->period_ms < 1) { const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT); int msec = crm_get_msec(value); crm_info("Using default shutdown escalation: %dms", msec); shutdown_escalation_timer->period_ms = msec; } /* cant rely on this... */ crm_timer_start(shutdown_escalation_timer); } } else { crm_info("exit from shutdown"); exit(LSB_EXIT_OK); } } static void default_cib_update_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if(rc != cib_ok) { fsa_data_t *msg_data = NULL; crm_err("CIB Update failed: %s", cib_error2string(rc)); crm_log_xml_warn(output, "update:failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } #if SUPPORT_HEARTBEAT static void populate_cib_nodes_ha(gboolean with_client_status) { int call_id = 0; const char *ha_node = NULL; xmlNode *cib_node_list = NULL; if(fsa_cluster_conn == NULL) { crm_debug("Not connected"); return; } /* Async get client status information in the cluster */ crm_info("Requesting the list of configured nodes"); fsa_cluster_conn->llc_ops->init_nodewalk(fsa_cluster_conn); cib_node_list = create_xml_node(NULL, XML_CIB_TAG_NODES); do { const char *ha_node_type = NULL; const char *ha_node_uuid = NULL; xmlNode *cib_new_node = NULL; ha_node = fsa_cluster_conn->llc_ops->nextnode(fsa_cluster_conn); if(ha_node == NULL) { continue; } ha_node_type = fsa_cluster_conn->llc_ops->node_type( fsa_cluster_conn, ha_node); if(safe_str_neq(NORMALNODE, ha_node_type)) { crm_debug("Node %s: skipping '%s'", ha_node, ha_node_type); continue; } ha_node_uuid = get_uuid(ha_node); if(ha_node_uuid == NULL) { crm_warn("Node %s: no uuid found", ha_node); continue; } crm_debug("Node: %s (uuid: %s)", ha_node, ha_node_uuid); cib_new_node = create_xml_node(cib_node_list, XML_CIB_TAG_NODE); crm_xml_add(cib_new_node, XML_ATTR_ID, ha_node_uuid); crm_xml_add(cib_new_node, XML_ATTR_UNAME, ha_node); crm_xml_add(cib_new_node, XML_ATTR_TYPE, ha_node_type); } while(ha_node != NULL); fsa_cluster_conn->llc_ops->end_nodewalk(fsa_cluster_conn); /* Now update the CIB with the list of nodes */ fsa_cib_update( XML_CIB_TAG_NODES, cib_node_list, cib_scope_local|cib_quorum_override, call_id); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, default_cib_update_callback); free_xml(cib_node_list); crm_debug_2("Complete"); } #endif static void create_cib_node_definition( gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; xmlNode *cib_nodes = user_data; xmlNode *cib_new_node = NULL; cib_new_node = create_xml_node(cib_nodes, XML_CIB_TAG_NODE); crm_xml_add(cib_new_node, XML_ATTR_ID, node->uuid); crm_xml_add(cib_new_node, XML_ATTR_UNAME, node->uname); crm_xml_add(cib_new_node, XML_ATTR_TYPE, NORMALNODE); } void populate_cib_nodes(gboolean with_client_status) { int call_id = 0; xmlNode *cib_node_list = NULL; #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { populate_cib_nodes_ha(with_client_status); return; } #endif cib_node_list = create_xml_node(NULL, XML_CIB_TAG_NODES); g_hash_table_foreach( crm_peer_cache, create_cib_node_definition, cib_node_list); fsa_cib_update( XML_CIB_TAG_NODES, cib_node_list, cib_scope_local|cib_quorum_override, call_id); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, default_cib_update_callback); free_xml(cib_node_list); crm_debug_2("Complete"); } diff --git a/crmd/fsa_defines.h b/crmd/fsa_defines.h index 4662b6dc49..db82801972 100644 --- a/crmd/fsa_defines.h +++ b/crmd/fsa_defines.h @@ -1,478 +1,479 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef FSA_DEFINES__H #define FSA_DEFINES__H /*====================================== * States the DC/CRMd can be in *======================================*/ enum crmd_fsa_state { S_IDLE = 0, /* Nothing happening */ S_ELECTION, /* Take part in the election algorithm as * described below */ S_INTEGRATION, /* integrate that status of new nodes (which is * all of them if we have just been elected DC) * to form a complete and up-to-date picture of * the CIB */ S_FINALIZE_JOIN,/* integrate that status of new nodes (which is * all of them if we have just been elected DC) * to form a complete and up-to-date picture of * the CIB */ S_NOT_DC, /* we are in crmd/slave mode */ S_POLICY_ENGINE,/* Determin the next stable state of the cluster */ S_RECOVERY, /* Something bad happened, check everything is ok * before continuing and attempt to recover if * required */ S_RELEASE_DC, /* we were the DC, but now we arent anymore, * possibly by our own request, and we should * release all unnecessary sub-systems, finish * any pending actions, do general cleanup and * unset anything that makes us think we are * special :) */ S_STARTING, /* we are just starting out */ S_PENDING, /* we are not a full/active member yet */ S_STOPPING, /* We are in the final stages of shutting down */ S_TERMINATE, /* We are going to shutdown, this is the equiv of * "Sending TERM signal to all processes" in Linux * and in worst case scenarios could be considered * a self STONITH */ S_TRANSITION_ENGINE,/* Attempt to make the calculated next stable * state of the cluster a reality */ S_HALT, /* Freeze - dont do anything * Something ad happened that needs the admin to fix * Wait for I_ELECTION */ /* ----------- Last input found in table is above ---------- */ S_ILLEGAL /* This is an illegal FSA state */ /* (must be last) */ }; #define MAXSTATE S_ILLEGAL /* A state diagram can be constructed from the dc_fsa.dot with the following command: dot -Tpng crmd_fsa.dot > crmd_fsa.png Description: Once we start and do some basic sanity checks, we go into the S_NOT_DC state and await instructions from the DC or input from the CCM which indicates the election algorithm needs to run. If the election algorithm is triggered we enter the S_ELECTION state from where we can either go back to the S_NOT_DC state or progress to the S_INTEGRATION state (or S_RELEASE_DC if we used to be the DC but arent anymore). The election algorithm has been adapted from http://www.cs.indiana.edu/cgi-bin/techreports/TRNNN.cgi?trnum=TR521 Loosly known as the Bully Algorithm, its major points are: - Election is initiated by any node (N) notices that the coordinator is no longer responding - Concurrent multiple elections are possible - Algorithm + N sends ELECTION messages to all nodes that occur earlier in the CCM's membership list. + If no one responds, N wins and becomes coordinator + N sends out COORDINATOR messages to all other nodes in the partition + If one of higher-ups answers, it takes over. N is done. Once the election is complete, if we are the DC, we enter the S_INTEGRATION state which is a DC-in-waiting style state. We are the DC, but we shouldnt do anything yet because we may not have an up-to-date picture of the cluster. There may of course be times when this fails, so we should go back to the S_RECOVERY stage and check everything is ok. We may also end up here if a new node came online, since each node is authorative on itself and we would want to incorporate its information into the CIB. Once we have the latest CIB, we then enter the S_POLICY_ENGINE state where invoke the Policy Engine. It is possible that between invoking the Policy Engine and recieving an answer, that we recieve more input. In this case we would discard the orginal result and invoke it again. Once we are satisfied with the output from the Policy Engine we enter S_TRANSITION_ENGINE and feed the Policy Engine's output to the Transition Engine who attempts to make the Policy Engine's calculation a reality. If the transition completes successfully, we enter S_IDLE, otherwise we go back to S_POLICY_ENGINE with the current unstable state and try again. Of course we may be asked to shutdown at any time, however we must progress to S_NOT_DC before doing so. Once we have handed over DC duties to another node, we can then shut down like everyone else, that is by asking the DC for permission and waiting it to take all our resources away. The case where we are the DC and the only node in the cluster is a special case and handled as an escalation which takes us to S_SHUTDOWN. Similarly if any other point in the shutdown fails or stalls, this is escalated and we end up in S_TERMINATE. At any point, the CRMd/DC can relay messages for its sub-systems, but outbound messages (from sub-systems) should probably be blocked until S_INTEGRATION (for the DC case) or the join protocol has completed (for the CRMd case) */ /*====================================== * * Inputs/Events/Stimuli to be given to the finite state machine * * Some of these a true events, and others a synthesised based on * the "register" (see below) and the contents or source of messages. * * At this point, my plan is to have a loop of some sort that keeps * going until recieving I_NULL * *======================================*/ enum crmd_fsa_input { /* 0 */ I_NULL, /* Nothing happened */ /* 1 */ I_CIB_OP, /* An update to the CIB occurred */ I_CIB_UPDATE, /* An update to the CIB occurred */ I_DC_TIMEOUT, /* We have lost communication with the DC */ I_ELECTION, /* Someone started an election */ I_PE_CALC, /* The Policy Engine needs to be invoked */ I_RELEASE_DC, /* The election completed and we were not * elected, but we were the DC beforehand */ I_ELECTION_DC, /* The election completed and we were (re-)elected * DC */ I_ERROR, /* Something bad happened (more serious than * I_FAIL) and may not have been due to the action * being performed. For example, we may have lost * our connection to the CIB. */ /* 9 */ I_FAIL, /* The action failed to complete successfully */ I_INTEGRATED, I_FINALIZED, I_NODE_JOIN, /* A node has entered the cluster */ I_NOT_DC, /* We are not and were not the DC before or after * the current operation or state */ I_RECOVERED, /* The recovery process completed successfully */ I_RELEASE_FAIL, /* We could not give up DC status for some reason */ I_RELEASE_SUCCESS, /* We are no longer the DC */ I_RESTART, /* The current set of actions needs to be * restarted */ I_TE_SUCCESS, /* Some non-resource, non-ccm action is required * of us, eg. ping */ /* 20 */ I_ROUTER, /* Do our job as router and forward this to the * right place */ I_SHUTDOWN, /* We are asking to shutdown */ I_STOP, /* We have been told to shutdown */ I_TERMINATE, /* Actually exit */ I_STARTUP, I_PE_SUCCESS, /* The action completed successfully */ I_JOIN_OFFER, /* The DC is offering membership */ I_JOIN_REQUEST, /* The client is requesting membership */ I_JOIN_RESULT, /* If not the DC: The result of a join request * Else: A client is responding with its local state info */ I_WAIT_FOR_EVENT, /* we may be waiting for an async task to "happen" * and until it does, we cant do anything else */ I_DC_HEARTBEAT, /* The DC is telling us that it is alive and well */ I_LRM_EVENT, /* 30 */ I_PENDING, I_HALT, /* ------------ Last input found in table is above ----------- */ I_ILLEGAL /* This is an illegal value for an FSA input */ /* (must be last) */ }; #define MAXINPUT I_ILLEGAL #define I_MESSAGE I_ROUTER /*====================================== * * actions * * Some of the actions below will always occur together for now, but I can * forsee that this may not always be the case. So I've spilt them up so * that if they ever do need to be called independantly in the future, it * wont be a problem. * * For example, separating A_LRM_CONNECT from A_STARTUP might be useful * if we ever try to recover from a faulty or disconnected LRM. * *======================================*/ /* Dont do anything */ #define A_NOTHING 0x0000000000000000ULL /* -- Startup actions -- */ /* Hook to perform any actions (other than starting the CIB, * connecting to HA or the CCM) that might be needed as part * of the startup. */ #define A_STARTUP 0x0000000000000001ULL /* Hook to perform any actions that might be needed as part * after startup is successful. */ #define A_STARTED 0x0000000000000002ULL /* Connect to Heartbeat */ #define A_HA_CONNECT 0x0000000000000004ULL #define A_HA_DISCONNECT 0x0000000000000008ULL #define A_INTEGRATE_TIMER_START 0x0000000000000010ULL #define A_INTEGRATE_TIMER_STOP 0x0000000000000020ULL #define A_FINALIZE_TIMER_START 0x0000000000000040ULL #define A_FINALIZE_TIMER_STOP 0x0000000000000080ULL /* -- Election actions -- */ #define A_DC_TIMER_START 0x0000000000000100ULL #define A_DC_TIMER_STOP 0x0000000000000200ULL #define A_ELECTION_COUNT 0x0000000000000400ULL #define A_ELECTION_VOTE 0x0000000000000800ULL #define A_ELECTION_START 0x0000000000001000ULL /* -- Message processing -- */ /* Process the queue of requests */ #define A_MSG_PROCESS 0x0000000000002000ULL /* Send the message to the correct recipient */ #define A_MSG_ROUTE 0x0000000000004000ULL /* Send a welcome message to new node(s) */ #define A_DC_JOIN_OFFER_ONE 0x0000000000008000ULL /* -- Server Join protocol actions -- */ /* Send a welcome message to all nodes */ #define A_DC_JOIN_OFFER_ALL 0x0000000000010000ULL /* Process the remote node's ack of our join message */ #define A_DC_JOIN_PROCESS_REQ 0x0000000000020000ULL /* Send out the reults of the Join phase */ #define A_DC_JOIN_FINALIZE 0x0000000000040000ULL /* Send out the reults of the Join phase */ #define A_DC_JOIN_PROCESS_ACK 0x0000000000080000ULL /* -- Client Join protocol actions -- */ #define A_CL_JOIN_QUERY 0x0000000000100000ULL #define A_CL_JOIN_ANNOUNCE 0x0000000000200000ULL /* Request membership to the DC list */ #define A_CL_JOIN_REQUEST 0x0000000000400000ULL /* Did the DC accept or reject the request */ #define A_CL_JOIN_RESULT 0x0000000000800000ULL /* -- Recovery, DC start/stop -- */ /* Something bad happened, try to recover */ #define A_RECOVER 0x0000000001000000ULL /* Hook to perform any actions (apart from starting, the TE, PE * and gathering the latest CIB) that might be necessary before * giving up the responsibilities of being the DC. */ #define A_DC_RELEASE 0x0000000002000000ULL /* */ #define A_DC_RELEASED 0x0000000004000000ULL /* Hook to perform any actions (apart from starting, the TE, PE * and gathering the latest CIB) that might be necessary before * taking over the responsibilities of being the DC. */ #define A_DC_TAKEOVER 0x0000000008000000ULL /* -- Shutdown actions -- */ #define A_SHUTDOWN 0x0000000010000000ULL #define A_STOP 0x0000000020000000ULL #define A_EXIT_0 0x0000000040000000ULL #define A_EXIT_1 0x0000000080000000ULL #define A_SHUTDOWN_REQ 0x0000000100000000ULL #define A_ELECTION_CHECK 0x0000000200000000ULL #define A_DC_JOIN_FINAL 0x0000000400000000ULL /* -- CCM actions -- */ #define A_CCM_CONNECT 0x0000001000000000ULL #define A_CCM_DISCONNECT 0x0000002000000000ULL /* -- CIB actions -- */ #define A_CIB_START 0x0000020000000000ULL #define A_CIB_STOP 0x0000040000000000ULL /* -- Transition Engine actions -- */ /* Attempt to reach the newly calculated cluster state. This is * only called once per transition (except if it is asked to * stop the transition or start a new one). * Once given a cluster state to reach, the TE will determin * tasks that can be performed in parallel, execute them, wait * for replies and then determin the next set until the new * state is reached or no further tasks can be taken. */ #define A_TE_INVOKE 0x0000100000000000ULL #define A_TE_START 0x0000200000000000ULL #define A_TE_STOP 0x0000400000000000ULL #define A_TE_CANCEL 0x0000800000000000ULL #define A_TE_HALT 0x0001000000000000ULL /* -- Policy Engine actions -- */ /* Calculate the next state for the cluster. This is only * invoked once per needed calculation. */ #define A_PE_INVOKE 0x0002000000000000ULL #define A_PE_START 0x0004000000000000ULL #define A_PE_STOP 0x0008000000000000ULL /* -- Misc actions -- */ /* Add a system generate "block" so that resources arent moved * to or are activly moved away from the affected node. This * way we can return quickly even if busy with other things. */ #define A_NODE_BLOCK 0x0010000000000000ULL /* Update our information in the local CIB */ #define A_UPDATE_NODESTATUS 0x0020000000000000ULL #define A_CIB_BUMPGEN 0x0040000000000000ULL #define A_READCONFIG 0x0080000000000000ULL /* -- LRM Actions -- */ /* Connect to the Local Resource Manager */ #define A_LRM_CONNECT 0x0100000000000000ULL /* Disconnect from the Local Resource Manager */ #define A_LRM_DISCONNECT 0x0200000000000000ULL #define A_LRM_INVOKE 0x0400000000000000ULL #define A_LRM_EVENT 0x0800000000000000ULL /* -- Logging actions -- */ #define A_LOG 0x1000000000000000ULL #define A_ERROR 0x2000000000000000ULL #define A_WARN 0x4000000000000000ULL #define O_EXIT (A_SHUTDOWN|A_STOP|A_CCM_DISCONNECT|A_LRM_DISCONNECT|A_HA_DISCONNECT|A_EXIT_0|A_CIB_STOP) #define O_RELEASE (A_DC_TIMER_STOP|A_DC_RELEASE|A_PE_STOP|A_TE_STOP|A_DC_RELEASED) #define O_DC_TIMER_RESTART (A_NOTHING) #define O_PE_RESTART (A_PE_START|A_PE_STOP) #define O_TE_RESTART (A_TE_START|A_TE_STOP) #define O_CIB_RESTART (A_CIB_START|A_CIB_STOP) /*====================================== * * "register" contents * * Things we may want to remember regardless of which state we are in. * * These also count as inputs for synthesizing I_* * *======================================*/ #define R_THE_DC 0x00000001ULL /* Are we the DC? */ #define R_STARTING 0x00000002ULL /* Are we starting up? */ #define R_SHUTDOWN 0x00000004ULL /* Are we trying to shut down? */ #define R_STAYDOWN 0x00000008ULL /* Should we restart? */ #define R_JOIN_OK 0x00000010ULL /* Have we completed the join process */ #define R_READ_CONFIG 0x00000040ULL #define R_INVOKE_PE 0x00000080ULL /* Does the PE needed to be invoked at the next appropriate point? */ #define R_CIB_CONNECTED 0x00000100ULL /* Is the CIB connected? */ #define R_PE_CONNECTED 0x00000200ULL /* Is the Policy Engine connected? */ #define R_TE_CONNECTED 0x00000400ULL /* Is the Transition Engine connected? */ #define R_LRM_CONNECTED 0x00000800ULL /* Is the Local Resource Manager connected? */ #define R_CIB_REQUIRED 0x00001000ULL /* Is the CIB required? */ #define R_PE_REQUIRED 0x00002000ULL /* Is the Policy Engine required? */ #define R_TE_REQUIRED 0x00004000ULL /* Is the Transition Engine required? */ +#define R_ST_REQUIRED 0x00008000ULL /* Is the Stonith daemon required? */ #define R_CIB_DONE 0x00010000ULL /* Have we calculated the CIB? */ #define R_HAVE_CIB 0x00020000ULL /* Do we have an up-to-date CIB */ #define R_CIB_ASKED 0x00040000ULL /* Have we asked for an up-to-date CIB */ #define R_CCM_DATA 0x00100000ULL /* Have we got CCM data yet */ #define R_PEER_DATA 0x00200000ULL /* Have we got T_CL_STATUS data yet */ #define R_HA_DISCONNECTED 0x00400000ULL /* did we sign out of our own accord */ #define R_CCM_DISCONNECTED 0x00800000ULL /* did we sign out of our own accord */ #define R_REQ_PEND 0x01000000ULL /* Are there Requests waiting for processing? */ #define R_PE_PEND 0x02000000ULL /* Has the PE been invoked and we're awaiting a reply? */ #define R_TE_PEND 0x04000000ULL /* Has the TE been invoked and we're awaiting completion? */ #define R_RESP_PEND 0x08000000ULL /* Do we have clients waiting on a response? if so perhaps we shouldnt stop yet */ #define R_IN_TRANSITION 0x10000000ULL /* */ #define R_SENT_RSC_STOP 0x20000000ULL /* Have we sent a stop action to all * resources in preparation for * shutting down */ #define R_IN_RECOVERY 0x80000000ULL enum crmd_fsa_cause { C_UNKNOWN = 0, C_STARTUP, C_IPC_MESSAGE, C_HA_MESSAGE, C_CCM_CALLBACK, C_CRMD_STATUS_CALLBACK, C_LRM_OP_CALLBACK, C_LRM_MONITOR_CALLBACK, C_TIMER_POPPED, C_SHUTDOWN, C_HEARTBEAT_FAILED, C_SUBSYSTEM_CONNECT, C_HA_DISCONNECT, C_FSA_INTERNAL, C_ILLEGAL }; extern const char *fsa_input2string(enum crmd_fsa_input input); extern const char *fsa_state2string(enum crmd_fsa_state state); extern const char *fsa_cause2string(enum crmd_fsa_cause cause); extern const char *fsa_action2string(long long action); #endif diff --git a/crmd/lrm.c b/crmd/lrm.c index 9bc1268f1c..ae8ff62f78 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1,1907 +1,1760 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct recurring_op_s { char *rsc_id; char *op_key; int call_id; int interval; gboolean remove; gboolean cancelled; }; char *make_stop_id(const char *rsc, int call_id); void cib_rsc_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); gboolean build_operation_update( xmlNode *rsc_list, lrm_rsc_t *rsc, lrm_op_t *op, const char *src, int lpc, int level); gboolean build_active_RAs(xmlNode *rsc_list); gboolean is_rsc_active(const char *rsc_id); int do_update_resource(lrm_op_t *op); gboolean process_lrm_event(lrm_op_t *op); void do_lrm_rsc_op(lrm_rsc_t *rsc, const char *operation, xmlNode *msg, xmlNode *request); lrm_op_t *construct_op( xmlNode *rsc_op, const char *rsc_id, const char *operation); void send_direct_ack(const char *to_host, const char *to_sys, lrm_rsc_t *rsc, lrm_op_t* op, const char *rsc_id); void free_recurring_op(gpointer value); GHashTable *resources = NULL; GHashTable *pending_ops = NULL; GCHSource *lrm_source = NULL; int num_lrm_register_fails = 0; int max_lrm_register_fails = 30; void lrm_connection_destroy(gpointer user_data) { if(is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("LRM Connection failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED); } else { crm_info("LRM Connection disconnected"); } lrm_source = NULL; } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { if(fsa_lrm_conn == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if(action & A_LRM_DISCONNECT) { if(verify_stopped(cur_state, LOG_INFO) == FALSE) { crmd_fsa_stall(NULL); return; } if(is_set(fsa_input_register, R_LRM_CONNECTED)) { clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED); fsa_lrm_conn->lrm_ops->signoff(fsa_lrm_conn); crm_info("Disconnected from the LRM"); } /* TODO: Clean up the hashtable */ } if(action & A_LRM_CONNECT) { int ret = HA_OK; pending_ops = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, free_recurring_op); resources = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if(ret == HA_OK) { crm_debug("Connecting to the LRM"); ret = fsa_lrm_conn->lrm_ops->signon( fsa_lrm_conn, CRM_SYSTEM_CRMD); } if(ret != HA_OK) { if(++num_lrm_register_fails < max_lrm_register_fails) { crm_warn("Failed to sign on to the LRM %d" " (%d max) times", num_lrm_register_fails, max_lrm_register_fails); crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return; } } if(ret == HA_OK) { crm_debug_4("LRM: set_lrm_callback..."); ret = fsa_lrm_conn->lrm_ops->set_lrm_callback( fsa_lrm_conn, lrm_op_callback); if(ret != HA_OK) { crm_err("Failed to set LRM callbacks"); } } if(ret != HA_OK) { crm_err("Failed to sign on to the LRM %d" " (max) times", num_lrm_register_fails); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } /* TODO: create a destroy handler that causes * some recovery to happen */ lrm_source = G_main_add_IPC_Channel( G_PRIORITY_LOW, fsa_lrm_conn->lrm_ops->ipcchan(fsa_lrm_conn), FALSE, lrm_dispatch, fsa_lrm_conn, lrm_connection_destroy); set_bit_inplace(fsa_input_register, R_LRM_CONNECTED); crm_debug("LRM connection established"); } if(action & ~(A_LRM_CONNECT|A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static void ghash_print_pending(gpointer key, gpointer value, gpointer user_data) { const char *stop_id = key; int *log_level = user_data; struct recurring_op_s *pending = value; do_crm_log(*log_level, "Pending action: %s (%s)", stop_id, pending->op_key); } static void ghash_print_pending_for_rsc(gpointer key, gpointer value, gpointer user_data) { const char *stop_id = key; char *rsc = user_data; struct recurring_op_s *pending = value; if(safe_str_eq(rsc, pending->rsc_id)) { do_crm_log(LOG_NOTICE, "%sction %s (%s) incomplete at shutdown", pending->interval==0?"A":"Recurring a", stop_id, pending->op_key); } } static void ghash_count_pending(gpointer key, gpointer value, gpointer user_data) { int *counter = user_data; struct recurring_op_s *pending = value; if(pending->interval > 0) { /* Ignore recurring actions in the shutdown calculations */ return; } (*counter)++; } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; GListPtr lrm_list = NULL; crm_debug("Checking for active resources before exit"); if(cur_state == S_TERMINATE) { log_level = LOG_ERR; } if(pending_ops) { g_hash_table_foreach(pending_ops, ghash_count_pending, &counter); } if(counter > 0) { rc = FALSE; do_crm_log(log_level, "%d pending LRM operations at shutdown%s", counter, cur_state == S_TERMINATE?"":"... waiting"); if(cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_foreach( pending_ops, ghash_print_pending, &log_level); } goto bail; } if(is_set(fsa_input_register, R_LRM_CONNECTED)) { lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn); } slist_iter( rsc_id, char, lrm_list, lpc, if(is_rsc_active(rsc_id) == FALSE) { continue; } crm_err("Resource %s was active at shutdown." " You may ignore this error if it is unmanaged.", rsc_id); g_hash_table_foreach( pending_ops, ghash_print_pending_for_rsc, rsc_id); ); slist_destroy(char, rid, lrm_list, free(rid)); bail: set_bit_inplace(fsa_input_register, R_SENT_RSC_STOP); if(cur_state == S_TERMINATE) { rc = TRUE; } return rc; } static char * get_rsc_metadata(const char *type, const char *class, const char *provider) { char *metadata = NULL; CRM_CHECK(type != NULL, return NULL); CRM_CHECK(class != NULL, return NULL); if(provider == NULL) { provider = "heartbeat"; } crm_debug_2("Retreiving metadata for %s::%s:%s", type, class, provider); metadata = fsa_lrm_conn->lrm_ops->get_rsc_type_metadata( fsa_lrm_conn, class, type, provider); if(metadata) { /* copy the metadata because the LRM likes using * g_alloc instead of cl_malloc */ char *m_copy = crm_strdup(metadata); g_free(metadata); metadata = m_copy; } else { crm_warn("No metadata found for %s::%s:%s", type, class, provider); } return metadata; } typedef struct reload_data_s { char *key; char *metadata; time_t last_query; gboolean can_reload; GListPtr restart_list; } reload_data_t; static void g_hash_destroy_reload(gpointer data) { reload_data_t *reload = data; crm_free(reload->key); crm_free(reload->metadata); slist_destroy(char, child, reload->restart_list, crm_free(child)); crm_free(reload); } GHashTable *reload_hash = NULL; static GListPtr get_rsc_restart_list(lrm_rsc_t *rsc, lrm_op_t *op) { int len = 0; char *key = NULL; char *copy = NULL; const char *value = NULL; const char *provider = NULL; xmlNode *params = NULL; xmlNode *actions = NULL; xmlNode *metadata = NULL; time_t now = time(NULL); reload_data_t *reload = NULL; if(reload_hash == NULL) { reload_hash = g_hash_table_new_full( g_str_hash, g_str_equal, NULL, g_hash_destroy_reload); } provider = rsc->provider; if(provider == NULL) { provider = "heartbeat"; } len = strlen(rsc->type) + strlen(rsc->class) + strlen(provider) + 4; crm_malloc(key, len); snprintf(key, len, "%s::%s:%s", rsc->type, rsc->class, provider); reload = g_hash_table_lookup(reload_hash, key); if(reload && ((now - 9) > reload->last_query) && safe_str_eq(op->op_type, RSC_START)) { reload = NULL; /* re-query */ } if(reload == NULL) { crm_malloc0(reload, sizeof(reload_data_t)); g_hash_table_replace(reload_hash, key, reload); reload->last_query = now; reload->key = key; key = NULL; reload->metadata = get_rsc_metadata(rsc->type, rsc->class, provider); metadata = string2xml(reload->metadata); if(metadata == NULL) { crm_err("Metadata for %s::%s:%s is not valid XML", rsc->provider, rsc->class, rsc->type); goto cleanup; } actions = find_xml_node(metadata, "actions", TRUE); xml_child_iter_filter( actions, action, "action", value = crm_element_value(action, "name"); if(safe_str_eq("reload", value)) { reload->can_reload = TRUE; break; } ); if(reload->can_reload == FALSE) { goto cleanup; } params = find_xml_node(metadata, "parameters", TRUE); xml_child_iter_filter( params, param, "parameter", value = crm_element_value(param, "unique"); if(crm_is_true(value)) { value = crm_element_value(param, "name"); if(value == NULL) { crm_err("%s: NULL param", key); continue; } crm_debug("Attr %s is not reloadable", value); copy = crm_strdup(value); CRM_CHECK(copy != NULL, continue); reload->restart_list = g_list_append(reload->restart_list, copy); } ); } cleanup: crm_free(key); free_xml(metadata); return reload?reload->restart_list:NULL; } -static void -append_digest(lrm_rsc_t *rsc, lrm_op_t *op, xmlNode *update, const char *version, const char *magic, int level) -{ - /* this will enable us to later determine that the - * resource's parameters have changed and we should force - * a restart - */ - char *digest = NULL; - xmlNode *args_xml = NULL; - - if(op->params == NULL) { - return; - } - - args_xml = create_xml_node(NULL, XML_TAG_PARAMS); - g_hash_table_foreach(op->params, hash2field, args_xml); - filter_action_parameters(args_xml, version); - digest = calculate_xml_digest(args_xml, TRUE, FALSE); - -#if 0 - if(level < crm_log_level - && op->interval == 0 - && crm_str_eq(op->op_type, CRMD_ACTION_START, TRUE)) { - char *digest_source = dump_xml_unformatted(args_xml); - do_crm_log(level, "Calculated digest %s for %s (%s). Source: %s\n", - digest, ID(update), magic, digest_source); - crm_free(digest_source); - } -#endif - crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest); - - free_xml(args_xml); - crm_free(digest); -} - static void append_restart_list(lrm_rsc_t *rsc, lrm_op_t *op, xmlNode *update, const char *version) { int len = 0; char *list = NULL; char *digest = NULL; const char *value = NULL; gboolean non_empty = FALSE; xmlNode *restart = NULL; GListPtr restart_list = NULL; if(op->interval > 0) { /* monitors are not reloadable */ return; } else if(op->params == NULL) { crm_debug("%s has no parameters", ID(update)); return; } else if(rsc == NULL) { return; } else if(crm_str_eq(CRMD_ACTION_START, op->op_type, TRUE) == FALSE) { /* only starts are potentially reloadable */ return; } else if(compare_version("1.0.8", version) > 0) { /* Caller version does not support reloads */ return; } restart_list = get_rsc_restart_list(rsc, op); if(restart_list == NULL) { /* Resource does not support reloads */ return; } restart = create_xml_node(NULL, XML_TAG_PARAMS); slist_iter(param, const char, restart_list, lpc, int start = len; CRM_CHECK(param != NULL, continue); value = g_hash_table_lookup(op->params, param); if(value != NULL) { non_empty = TRUE; crm_xml_add(restart, param, value); } len += strlen(param) + 2; crm_realloc(list, len+1); sprintf(list+start, " %s ", param); ); digest = calculate_xml_digest(restart, TRUE, FALSE); crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); #if 0 crm_debug("%s: %s, %s", rsc->id, digest, list); if(non_empty) { crm_log_xml_debug(restart, "restart digest source"); } #endif free_xml(restart); crm_free(digest); crm_free(list); } gboolean build_operation_update( - xmlNode *xml_rsc, lrm_rsc_t *rsc, lrm_op_t *op, const char *src, int lpc, int level) + xmlNode *parent, lrm_rsc_t *rsc, lrm_op_t *op, const char *src, int lpc, int level) { - char *magic = NULL; - const char *task = NULL; - xmlNode *xml_op = NULL; - char *op_id = NULL; - char *local_user_data = NULL; - const char *caller_version = NULL; - - CRM_CHECK(op != NULL, return FALSE); - crm_debug_2("%s: Updating resouce %s after %s %s op", - src, op->rsc_id, op_status2text(op->op_status), op->op_type); - - if(op->op_status == LRM_OP_CANCELLED) { - crm_debug_3("Ignoring cancelled op"); - return TRUE; - } - - if(AM_I_DC) { - caller_version = CRM_FEATURE_SET; - - } else if(fsa_our_dc_version != NULL) { - caller_version = fsa_our_dc_version; - - } else { - /* there is a small risk in formerly mixed clusters that - * it will be sub-optimal. - * however with our upgrade policy, the update we send - * should still be completely supported anyway - */ - caller_version = g_hash_table_lookup( - op->params, XML_ATTR_CRM_VERSION); - crm_warn("Falling back to operation originator version: %s", - caller_version); - } - crm_debug_3("DC version: %s", caller_version); - - task = op->op_type; - /* remap the task name under various scenarios - * this makes life easier for the PE when its trying determin the current state - */ - if(crm_str_eq(task, "reload", TRUE)) { - if(op->op_status == LRM_OP_DONE) { - task = CRMD_ACTION_START; - } else { - task = CRMD_ACTION_STATUS; - } - - } else if(crm_str_eq(task, CRMD_ACTION_MIGRATE, TRUE)) { - /* if the migrate_from fails it will have enough info to do the right thing */ - if(op->op_status == LRM_OP_DONE) { - task = CRMD_ACTION_STOP; - } else { - task = CRMD_ACTION_STATUS; - } - - } else if(op->op_status == LRM_OP_DONE - && crm_str_eq(task, CRMD_ACTION_MIGRATED, TRUE)) { - task = CRMD_ACTION_START; - - } else if(crm_str_eq(task, CRMD_ACTION_NOTIFY, TRUE)) { - const char *n_type = crm_meta_value(op->params, "notify_type"); - const char *n_task = crm_meta_value(op->params, "notify_operation"); - CRM_DEV_ASSERT(n_type != NULL); - CRM_DEV_ASSERT(n_task != NULL); - op_id = generate_notify_key(op->rsc_id, n_type, n_task); - - /* these are not yet allowed to fail */ - op->op_status = LRM_OP_DONE; - op->rc = 0; - - } - - if (op_id == NULL) { - op_id = generate_op_key(op->rsc_id, task, op->interval); - } - - xml_op = find_entity(xml_rsc, XML_LRM_TAG_RSC_OP, op_id); - if(xml_op != NULL) { - crm_log_xml(LOG_DEBUG, "Replacing existing entry", xml_op); - - } else { - xml_op = create_xml_node(xml_rsc, XML_LRM_TAG_RSC_OP); - } - - if(op->user_data == NULL) { - crm_debug("Generating fake transition key for:" - " %s_%s_%d %d from %s", - op->rsc_id, op->op_type, op->interval, op->call_id, - op->app_name); - local_user_data = generate_transition_key(-1, op->call_id, 0, FAKE_TE_ID); - op->user_data = local_user_data; - } + xmlNode *xml_op = NULL; + const char *caller_version = CRM_FEATURE_SET; + if(AM_I_DC) { - magic = generate_transition_magic(op->user_data, op->op_status, op->rc); + } else if(fsa_our_dc_version != NULL) { + caller_version = fsa_our_dc_version; - crm_xml_add(xml_op, XML_ATTR_ID, op_id); - crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); - crm_xml_add(xml_op, XML_ATTR_ORIGIN, src); - crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version); - crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data); - crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic); - - crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id); - crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc); - crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status); - crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, op->interval); - - if(compare_version("2.1", caller_version) <= 0) { - if(op->t_run || op->t_rcchange || op->exec_time || op->queue_time) { - crm_debug_2("Timing data (%s_%s_%d): last=%lu change=%lu exec=%lu queue=%lu", - op->rsc_id, op->op_type, op->interval, - op->t_run, op->t_rcchange, op->exec_time, op->queue_time); - - crm_xml_add_int(xml_op, "last-run", op->t_run); - crm_xml_add_int(xml_op, "last-rc-change", op->t_rcchange); - crm_xml_add_int(xml_op, "exec-time", op->exec_time); - crm_xml_add_int(xml_op, "queue-time", op->queue_time); - } - } - - append_digest(rsc, op, xml_op, caller_version, magic, level); + } else { + /* there is a small risk in formerly mixed clusters that + * it will be sub-optimal. + * however with our upgrade policy, the update we send + * should still be completely supported anyway + */ + caller_version = g_hash_table_lookup( + op->params, XML_ATTR_CRM_VERSION); + crm_warn("Falling back to operation originator version: %s", + caller_version); + } + xml_op = create_operation_update(parent, op, caller_version, 0, src); + if(xml_op) { append_restart_list(rsc, op, xml_op, caller_version); - - if(op->op_status != LRM_OP_DONE - && crm_str_eq(op->op_type, CRMD_ACTION_MIGRATED, TRUE)) { - const char *host = crm_meta_value(op->params, "migrate_source_uuid"); - crm_xml_add(xml_op, CRMD_ACTION_MIGRATED, host); - } - - if(local_user_data) { - crm_free(local_user_data); - op->user_data = NULL; - } - crm_free(magic); - crm_free(op_id); - return TRUE; + } + return TRUE; } gboolean is_rsc_active(const char *rsc_id) { GList *op_list = NULL; gboolean active = FALSE; lrm_rsc_t *the_rsc = NULL; state_flag_t cur_state = 0; int max_call_id = -1; if(fsa_lrm_conn == NULL) { return FALSE; } the_rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rsc_id); crm_debug_3("Processing lrm_rsc_t entry %s", rsc_id); if(the_rsc == NULL) { crm_err("NULL resource returned from the LRM"); return FALSE; } op_list = the_rsc->ops->get_cur_state(the_rsc, &cur_state); crm_debug_3("\tcurrent state:%s",cur_state==LRM_RSC_IDLE?"Idle":"Busy"); slist_iter( op, lrm_op_t, op_list, llpc, crm_debug_2("Processing op %s_%d (%d) for %s (status=%d, rc=%d)", op->op_type, op->interval, op->call_id, the_rsc->id, op->op_status, op->rc); CRM_ASSERT(max_call_id <= op->call_id); if(op->rc == EXECRA_OK && safe_str_eq(op->op_type, CRMD_ACTION_STOP)) { active = FALSE; } else if(op->rc == EXECRA_OK && safe_str_eq(op->op_type, CRMD_ACTION_MIGRATE)) { /* a stricter check is too complex... * leave that to the PE */ active = FALSE; } else if(op->rc == EXECRA_NOT_RUNNING) { active = FALSE; } else { active = TRUE; } max_call_id = op->call_id; lrm_free_op(op); ); g_list_free(op_list); lrm_free_rsc(the_rsc); return active; } gboolean build_active_RAs(xmlNode *rsc_list) { GList *op_list = NULL; GList *lrm_list = NULL; gboolean found_op = FALSE; state_flag_t cur_state = 0; if(fsa_lrm_conn == NULL) { return FALSE; } lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn); slist_iter( rid, char, lrm_list, lpc, int max_call_id = -1; xmlNode *xml_rsc = NULL; lrm_rsc_t *the_rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); if(the_rsc == NULL) { crm_err("NULL resource returned from the LRM: %s", rid); continue; } xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, the_rsc->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, the_rsc->type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, the_rsc->class); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER,the_rsc->provider); op_list = the_rsc->ops->get_cur_state(the_rsc, &cur_state); slist_iter( op, lrm_op_t, op_list, llpc, if(max_call_id < op->call_id) { build_operation_update( xml_rsc, the_rsc, op, __FUNCTION__, llpc, LOG_DEBUG_2); } else if(max_call_id > op->call_id) { crm_err("Bad call_id in list=%d. Previous call_id=%d", op->call_id, max_call_id); } else { crm_warn("lrm->get_cur_state() returned" " duplicate entries for call_id=%d", op->call_id); } max_call_id = op->call_id; found_op = TRUE; lrm_free_op(op); ); if(found_op == FALSE && g_list_length(op_list) != 0) { crm_err("Could not properly determin last op" " for %s from %d entries", the_rsc->id, g_list_length(op_list)); } g_list_free(op_list); lrm_free_rsc(the_rsc); ); slist_destroy(char, rid, lrm_list, free(rid)); return TRUE; } xmlNode* do_lrm_query(gboolean is_replace) { gboolean shut_down = FALSE; xmlNode *xml_result= NULL; xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; const char *exp_state = CRMD_STATE_ACTIVE; if(is_set(fsa_input_register, R_SHUTDOWN)) { exp_state = CRMD_STATE_INACTIVE; shut_down = TRUE; } xml_state = create_node_state( fsa_our_uname, ACTIVESTATUS, XML_BOOLEAN_TRUE, ONLINESTATUS, CRMD_JOINSTATE_MEMBER, exp_state, !shut_down, __FUNCTION__); xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, fsa_our_uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(rsc_list); xml_result = create_cib_fragment(xml_state, XML_CIB_TAG_STATUS); crm_log_xml_debug_3(xml_state, "Current state of the LRM"); free_xml(xml_state); return xml_result; } /* * Remove the rsc from the CIB * * Avoids refreshing the entire LRM section of this host */ #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" static void delete_rsc_entry(const char *rsc_id) { int max = 0; char *rsc_xpath = NULL; CRM_CHECK(rsc_id != NULL, return); max = strlen(rsc_template) + strlen(rsc_id) + strlen(fsa_our_uname) + 1; crm_malloc0(rsc_xpath, max); snprintf(rsc_xpath, max, rsc_template, fsa_our_uname, rsc_id); CRM_CHECK(rsc_id != NULL, return); crm_debug("sync: Sending delete op for %s", rsc_id); fsa_cib_conn->cmds->delete( fsa_cib_conn, rsc_xpath, NULL, cib_quorum_override|cib_xpath); crm_free(rsc_xpath); } /* * Remove the op from the CIB * * Avoids refreshing the entire LRM section of this host */ #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']" #define op_call_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']" static void delete_op_entry(lrm_op_t *op, const char *rsc_id, const char *key, int call_id) { xmlNode *xml_top = NULL; if(op != NULL) { xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); crm_debug("async: Sending delete op for %s_%s_%d (call=%d)", op->rsc_id, op->op_type, op->interval, op->call_id); fsa_cib_conn->cmds->delete( fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override); } else if (rsc_id != NULL && key != NULL) { int max = 0; char *op_xpath = NULL; if(call_id > 0) { max = strlen(op_call_template) + strlen(rsc_id) + strlen(fsa_our_uname) + strlen(key) + 10; crm_malloc0(op_xpath, max); snprintf(op_xpath, max, op_call_template, fsa_our_uname, rsc_id, key, call_id); } else { max = strlen(op_template) + strlen(rsc_id) + strlen(fsa_our_uname) + strlen(key) + 1; crm_malloc0(op_xpath, max); snprintf(op_xpath, max, op_template, fsa_our_uname, rsc_id, key); } crm_debug("sync: Sending delete op for %s (call=%d)", rsc_id, call_id); fsa_cib_conn->cmds->delete( fsa_cib_conn, op_xpath, NULL, cib_quorum_override|cib_xpath); crm_free(op_xpath); } else { crm_err("Not enough information to delete op entry: rsc=%p key=%p", rsc_id, key); return; } crm_log_xml_debug_2(xml_top, "op:cancel"); free_xml(xml_top); } static gboolean cancel_op(lrm_rsc_t *rsc, const char *key, int op, gboolean remove) { int rc = HA_OK; struct recurring_op_s *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc != NULL, return FALSE); if(key == NULL) { key = make_stop_id(rsc->id, op); } pending = g_hash_table_lookup(pending_ops, key); if(pending) { if(remove && pending->remove == FALSE) { pending->remove = TRUE; crm_debug("Scheduling %s for removal", key); } if(pending->cancelled) { crm_debug("Operation %s already cancelled", key); return TRUE; } pending->cancelled = TRUE; } else { crm_info("No pending op found for %s", key); } crm_debug("Cancelling op %d for %s (%s)", op, rsc->id, key); rc = rsc->ops->cancel_op(rsc, op); if(rc != HA_OK) { crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc->id, key); /* The caller needs to make sure the entry is * removed from the pending_ops list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from pending_ops will block * the node from shutting down */ return FALSE; } return TRUE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrm_rsc_t *rsc; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { struct cancel_data *data = user_data; struct recurring_op_s *op = (struct recurring_op_s*)value; if(safe_str_eq(op->op_key, data->key)) { data->done = TRUE; if (cancel_op(data->rsc, key, op->call_id, data->remove) == FALSE) { return TRUE; } } return FALSE; } static gboolean cancel_op_key(lrm_rsc_t *rsc, const char *key, gboolean remove) { struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; g_hash_table_foreach_remove(pending_ops, cancel_action_by_key, &data); return data.done; } static lrm_rsc_t * get_lrm_resource(xmlNode *resource, xmlNode *op_msg, gboolean do_create) { char rid[64]; lrm_rsc_t *rsc = NULL; const char *short_id = ID(resource); const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG); crm_debug_2("Retrieving %s from the LRM.", short_id); CRM_CHECK(short_id != NULL, return NULL); if(rsc == NULL) { /* check if its already there (short name) */ strncpy(rid, short_id, 64); rid[63] = 0; rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); } if(rsc == NULL && long_id != NULL) { /* try the long name instead */ strncpy(rid, long_id, 64); rid[63] = 0; rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); } if(rsc == NULL && do_create) { /* add it to the LRM */ const char *type = crm_element_value(resource, XML_ATTR_TYPE); const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER); GHashTable *params = xml2list(op_msg); CRM_CHECK(class != NULL, return NULL); CRM_CHECK(type != NULL, return NULL); crm_debug_2("Adding rsc %s before operation", short_id); strncpy(rid, short_id, 64); rid[63] = 0; if(g_hash_table_size(params) == 0) { crm_log_xml_warn(op_msg, "EmptyParams"); } fsa_lrm_conn->lrm_ops->add_rsc( fsa_lrm_conn, rid, class, type, provider, params); rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); g_hash_table_destroy(params); if(rsc == NULL) { fsa_data_t *msg_data = NULL; crm_err("Could not add resource %s to LRM", rid); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } return rsc; } static gboolean lrm_remove_deleted_op( gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; struct recurring_op_s *pending = value; if(safe_str_eq(rsc, pending->rsc_id)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { gboolean done = FALSE; gboolean create_rsc = TRUE; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if(safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } crm_debug_2("LRM command from: %s", from_sys); if(safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { operation = CRMD_ACTION_DELETE; } else if(safe_str_eq(operation, CRM_OP_LRM_REFRESH)) { crm_op = CRM_OP_LRM_REFRESH; } else if(safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) { #if HAVE_STRUCT_LRM_OPS_FAIL_RSC lrm_rsc_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node( input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); rsc = get_lrm_resource(xml_rsc, input->xml, create_rsc); if(rsc) { int rc = HA_OK; crm_info("Failing resource %s...", rsc->id); rc = fsa_lrm_conn->lrm_ops->fail_rsc(fsa_lrm_conn, rsc->id, 1, "do_lrm_invoke: Async failure"); if(rc != HA_OK) { crm_err("Could not initiate an asynchronous failure for %s (%d)", rsc->id, rc); } lrm_free_rsc(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(input->msg, "bad input"); } return; #else crm_info("Failing resource..."); operation = "fail"; #endif } else if(input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if(safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { enum cib_errors rc = cib_ok; xmlNode *fragment = do_lrm_query(TRUE); crm_info("Forcing a local LRM refresh"); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc); free_xml(fragment); } else if(safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { xmlNode *data = do_lrm_query(FALSE); xmlNode *reply = create_reply(input->msg, data); if(relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_xml(LOG_ERR, "reply", reply); } free_xml(reply); free_xml(data); } else if(safe_str_eq(operation, CRM_OP_PROBED) || safe_str_eq(crm_op, CRM_OP_REPROBE)) { int cib_options = cib_inhibit_notify; const char *probed = XML_BOOLEAN_TRUE; if(safe_str_eq(crm_op, CRM_OP_REPROBE)) { cib_options = cib_none; probed = XML_BOOLEAN_FALSE; } update_attrd(NULL, CRM_OP_PROBED, probed); } else if(operation != NULL) { lrm_rsc_t *rsc = NULL; xmlNode *params = NULL; xmlNode *xml_rsc = find_xml_node( input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* only the first 16 chars are used by the LRM */ params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); if(safe_str_eq(operation, CRMD_ACTION_DELETE)) { create_rsc = FALSE; } rsc = get_lrm_resource(xml_rsc, input->xml, create_rsc); if(rsc == NULL && create_rsc) { crm_err("Invalid resource definition"); crm_log_xml_warn(input->msg, "bad input"); } else if(rsc == NULL) { lrm_op_t* op = NULL; crm_err("Not creating resource for a %s event: %s", operation, ID(input->xml)); crm_log_xml_warn(input->msg, "bad input"); op = construct_op(input->xml, ID(xml_rsc), operation); op->op_status = LRM_OP_DONE; op->rc = EXECRA_OK; CRM_ASSERT(op != NULL); send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); free_lrm_op(op); } else if(safe_str_eq(operation, CRMD_ACTION_CANCEL)) { lrm_op_t* op = NULL; char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; const char *op_interval = NULL; CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL); op_interval = crm_element_value(params, meta_key); crm_free(meta_key); meta_key = crm_meta_name(XML_LRM_ATTR_TASK); op_task = crm_element_value(params, meta_key); crm_free(meta_key); meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); call_id = crm_element_value(params, meta_key); crm_free(meta_key); CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); op = construct_op(input->xml, rsc->id, op_task); CRM_ASSERT(op != NULL); op_key = generate_op_key( rsc->id,op_task,crm_parse_int(op_interval,"0")); crm_debug("PE requested op %s (call=%s) be cancelled", op_key, call_id?call_id:"NA"); call = crm_parse_int(call_id, "0"); if(call == 0) { /* the normal case when the PE cancels a recurring op */ done = cancel_op_key(rsc, op_key, TRUE); } else { /* the normal case when the PE cancels an orphan op */ done = cancel_op(rsc, NULL, call, TRUE); } if(done == FALSE) { crm_debug("Nothing known about operation %d for %s", call, op_key); delete_op_entry(NULL, rsc->id, op_key, call); /* needed?? surely not otherwise the cancel_op_(_key) wouldn't * have failed in the first place */ g_hash_table_remove(pending_ops, op_key); } op->rc = EXECRA_OK; op->op_status = LRM_OP_DONE; send_direct_ack(from_host, from_sys, rsc, op, rsc->id); crm_free(op_key); free_lrm_op(op); } else if(safe_str_eq(operation, CRMD_ACTION_DELETE)) { int rc = HA_OK; lrm_op_t* op = NULL; CRM_ASSERT(rsc != NULL); op = construct_op(input->xml, rsc->id, operation); CRM_ASSERT(op != NULL); op->op_status = LRM_OP_DONE; op->rc = EXECRA_OK; crm_info("Removing resource %s from the LRM", rsc->id); rc = fsa_lrm_conn->lrm_ops->delete_rsc(fsa_lrm_conn, rsc->id); if(rc != HA_OK) { crm_err("Failed to remove resource %s", rsc->id); op->op_status = LRM_OP_ERROR; op->rc = EXECRA_UNKNOWN_ERROR; } delete_rsc_entry(rsc->id); send_direct_ack(from_host, from_sys, rsc, op, rsc->id); free_lrm_op(op); g_hash_table_foreach_remove(pending_ops, lrm_remove_deleted_op, rsc->id); if(safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { /* this isn't expected - trigger a new transition */ time_t now = time(NULL); char *now_s = crm_itoa(now); crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc->id); update_attr(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, "last-lrm-refresh", now_s, FALSE); crm_free(now_s); } } else if(rsc != NULL) { do_lrm_rsc_op(rsc, operation, input->xml, input->msg); } lrm_free_rsc(rsc); } else { crm_err("Operation was neither a lrm_query, nor a rsc op. %s", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } lrm_op_t * construct_op(xmlNode *rsc_op, const char *rsc_id, const char *operation) { lrm_op_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; const char *op_interval = NULL; const char *transition = NULL; CRM_DEV_ASSERT(rsc_id != NULL); crm_malloc0(op, sizeof(lrm_op_t)); op->op_type = crm_strdup(operation); op->op_status = LRM_OP_PENDING; op->rc = -1; op->rsc_id = crm_strdup(rsc_id); op->interval = 0; op->timeout = 0; op->start_delay = 0; op->copyparams = 0; op->app_name = crm_strdup(CRM_SYSTEM_CRMD); if(rsc_op == NULL) { CRM_DEV_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); op->user_data = NULL; op->user_data_len = 0; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(op->params, crm_strdup(XML_ATTR_CRM_VERSION), crm_strdup(CRM_FEATURE_SET)); crm_debug_2("Constructed %s op for %s", operation, rsc_id); return op; } op->params = xml2list(rsc_op); if(op->params == NULL) { CRM_DEV_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); } op_delay = crm_meta_value(op->params, XML_OP_ATTR_START_DELAY); op_timeout = crm_meta_value(op->params, XML_ATTR_TIMEOUT); op_interval = crm_meta_value(op->params, XML_LRM_ATTR_INTERVAL); op->interval = crm_parse_int(op_interval, "0"); op->timeout = crm_parse_int(op_timeout, "0"); op->start_delay = crm_parse_int(op_delay, "0"); /* sanity */ if(op->interval < 0) { op->interval = 0; } if(op->timeout < 0) { op->timeout = 0; } if(op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = crm_strdup(transition); op->user_data_len = 1+strlen(op->user_data); if(op->interval != 0) { if(safe_str_eq(operation, CRMD_ACTION_START) || safe_str_eq(operation, CRMD_ACTION_STOP)) { crm_err("Start and Stop actions cannot have an interval: %d", op->interval); op->interval = 0; } } /* reset the resource's parameters? */ if(op->interval == 0) { if(safe_str_eq(CRMD_ACTION_START, operation) || safe_str_eq(CRMD_ACTION_STATUS, operation)) { op->copyparams = 1; } } crm_debug_2("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval); return op; } void send_direct_ack(const char *to_host, const char *to_sys, lrm_rsc_t *rsc, lrm_op_t* op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; xmlNode *fragment; CRM_CHECK(op != NULL, return); if(op->rsc_id == NULL) { CRM_DEV_ASSERT(rsc_id != NULL); op->rsc_id = crm_strdup(rsc_id); } if(to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } update = create_node_state( fsa_our_uname, NULL, NULL, NULL, NULL, NULL, FALSE, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, __FUNCTION__, 0, LOG_DEBUG); fragment = create_cib_fragment(update, XML_CIB_TAG_STATUS); reply = create_request(CRM_OP_INVOKE_LRM, fragment, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_debug_2(update, "ACK Update"); crm_info("ACK'ing resource op %s_%s_%d from %s: %s", op->rsc_id, op->op_type, op->interval, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if(relay_message(reply, TRUE) == FALSE) { crm_log_xml(LOG_ERR, "Unable to route reply", reply); } free_xml(fragment); free_xml(update); free_xml(reply); } static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { lrm_rsc_t *rsc = user_data; struct recurring_op_s *op = (struct recurring_op_s*)value; if(op->interval != 0 && safe_str_eq(op->rsc_id, rsc->id)) { if (cancel_op(rsc, key, op->call_id, FALSE) == FALSE) { return TRUE; } } return FALSE; } void do_lrm_rsc_op(lrm_rsc_t *rsc, const char *operation, xmlNode *msg, xmlNode *request) { int call_id = 0; char *op_id = NULL; lrm_op_t* op = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; CRM_CHECK(rsc != NULL, return); if(msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if(transition == NULL) { crm_log_xml_err(msg, "Missing transition number"); } } op = construct_op(msg, rsc->id, operation); /* stop the monitor before stopping the resource */ if(crm_str_eq(operation, CRMD_ACTION_STOP, TRUE) || crm_str_eq(operation, CRMD_ACTION_DEMOTE, TRUE) || crm_str_eq(operation, CRMD_ACTION_PROMOTE, TRUE) || crm_str_eq(operation, CRMD_ACTION_MIGRATE, TRUE)) { g_hash_table_foreach_remove(pending_ops, stop_recurring_action_by_rsc, rsc); } /* now do the op */ crm_info("Performing key=%s op=%s_%s_%d )", transition, rsc->id, operation, op->interval); if(fsa_state != S_NOT_DC && fsa_state != S_TRANSITION_ENGINE) { if(safe_str_neq(operation, "fail") && safe_str_neq(operation, CRMD_ACTION_STOP)) { crm_info("Discarding attempt to perform action %s on %s" " in state %s", operation, rsc->id, fsa_state2string(fsa_state)); op->rc = 99; op->op_status = LRM_OP_ERROR; send_direct_ack(NULL, NULL, rsc, op, rsc->id); free_lrm_op(op); crm_free(op_id); return; } } op_id = generate_op_key(rsc->id, op->op_type, op->interval); if(op->interval > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(rsc, op_id, FALSE); op->target_rc = CHANGED; } else { op->target_rc = EVERYTIME; } g_hash_table_replace(resources,crm_strdup(rsc->id), crm_strdup(op_id)); call_id = rsc->ops->perform_op(rsc, op); if(call_id <= 0) { crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else if(op->interval > 0 && op->start_delay > 5 * 60 * 1000) { char *uuid = NULL; int dummy = 0, target_rc = 0; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc); crm_free(uuid); op->rc = target_rc; op->op_status = LRM_OP_DONE; send_direct_ack(NULL, NULL, rsc, op, rsc->id); } else { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); struct recurring_op_s *pending = NULL; crm_malloc0(pending, sizeof(struct recurring_op_s)); crm_debug_2("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval = op->interval; pending->op_key = crm_strdup(op_id); pending->rsc_id = crm_strdup(rsc->id); g_hash_table_replace(pending_ops, call_id_s, pending); } crm_free(op_id); free_lrm_op(op); return; } void free_recurring_op(gpointer value) { struct recurring_op_s *op = (struct recurring_op_s*)value; crm_free(op->rsc_id); crm_free(op->op_key); crm_free(op); } void free_lrm_op(lrm_op_t *op) { g_hash_table_destroy(op->params); crm_free(op->user_data); crm_free(op->output); crm_free(op->rsc_id); crm_free(op->op_type); crm_free(op->app_name); crm_free(op); } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, crm_strdup(key), crm_strdup(value)); } lrm_op_t * copy_lrm_op(const lrm_op_t *op) { lrm_op_t *op_copy = NULL; CRM_CHECK(op != NULL, return NULL); CRM_CHECK(op->rsc_id != NULL, return NULL); crm_malloc0(op_copy, sizeof(lrm_op_t)); op_copy->op_type = crm_strdup(op->op_type); /* input fields */ op_copy->params = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if(op->params != NULL) { g_hash_table_foreach(op->params, dup_attr, op_copy->params); } op_copy->timeout = op->timeout; op_copy->interval = op->interval; op_copy->target_rc = op->target_rc; /* in the CRM, this is always a string */ if(op->user_data != NULL) { op_copy->user_data = crm_strdup(op->user_data); } /* output fields */ op_copy->op_status = op->op_status; op_copy->rc = op->rc; op_copy->call_id = op->call_id; op_copy->output = NULL; op_copy->rsc_id = crm_strdup(op->rsc_id); if(op->app_name != NULL) { op_copy->app_name = crm_strdup(op->app_name); } if(op->output != NULL) { op_copy->output = crm_strdup(op->output); } return op_copy; } lrm_rsc_t * copy_lrm_rsc(const lrm_rsc_t *rsc) { lrm_rsc_t *rsc_copy = NULL; if(rsc == NULL) { return NULL; } crm_malloc0(rsc_copy, sizeof(lrm_rsc_t)); rsc_copy->id = crm_strdup(rsc->id); rsc_copy->type = crm_strdup(rsc->type); rsc_copy->class = NULL; rsc_copy->provider = NULL; if(rsc->class != NULL) { rsc_copy->class = crm_strdup(rsc->class); } if(rsc->provider != NULL) { rsc_copy->provider = crm_strdup(rsc->provider); } /* GHashTable* params; */ rsc_copy->params = NULL; rsc_copy->ops = NULL; return rsc_copy; } void cib_rsc_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { switch(rc) { case cib_ok: case cib_diff_failed: case cib_diff_resync: crm_debug_2("Resource update %d complete: rc=%d", call_id, rc); break; default: crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, cib_error2string(rc)); } } int do_update_resource(lrm_op_t* op) { /* */ int rc = cib_ok; lrm_rsc_t *rsc = NULL; xmlNode *update, *iter = NULL; int call_opt = cib_quorum_override; CRM_CHECK(op != NULL, return 0); if(fsa_state == S_ELECTION || fsa_state == S_PENDING) { crm_info("Sending update to local CIB during election"); call_opt |= cib_scope_local; } iter = create_xml_node(iter, XML_CIB_TAG_STATUS); update = iter; iter = create_xml_node(iter, XML_CIB_TAG_STATE); set_uuid(iter, XML_ATTR_UUID, fsa_our_uname); crm_xml_add(iter, XML_ATTR_UNAME, fsa_our_uname); crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__); iter = create_xml_node(iter, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, op->rsc_id); CRM_CHECK(rsc->type != NULL, crm_err("Resource %s has no value for type", op->rsc_id)); CRM_CHECK(rsc->class != NULL, crm_err("Resource %s has no value for class", op->rsc_id)); crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER,rsc->provider); build_operation_update(iter, rsc, op, __FUNCTION__, 0, LOG_DEBUG); lrm_free_rsc(rsc); /* make it an asyncronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" when the really weren't * * the alternative however means blocking here for too long, which * isnt acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc); /* the return code is a call number, not an error code */ crm_debug_2("Sent resource state update message: %d", rc); fsa_cib_conn->cmds->register_callback( fsa_cib_conn, rc, 60, FALSE, NULL, "cib_rsc_callback", cib_rsc_callback); free_xml(update); return rc; } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data) { CRM_CHECK(FALSE, return); } gboolean process_lrm_event(lrm_op_t *op) { char *op_id = NULL; char *op_key = NULL; int update_id = 0; int log_level = LOG_ERR; gboolean removed = FALSE; struct recurring_op_s *pending = NULL; CRM_CHECK(op != NULL, return FALSE); CRM_CHECK(op->rsc_id != NULL, return FALSE); op_key = generate_op_key(op->rsc_id, op->op_type, op->interval); switch(op->op_status) { case LRM_OP_ERROR: case LRM_OP_PENDING: case LRM_OP_NOTSUPPORTED: break; case LRM_OP_CANCELLED: log_level = LOG_INFO; break; case LRM_OP_DONE: log_level = LOG_INFO; break; case LRM_OP_TIMEOUT: log_level = LOG_DEBUG_3; crm_err("LRM operation %s (%d) %s (timeout=%dms)", op_key, op->call_id, op_status2text(op->op_status), op->timeout); break; default: crm_err("Mapping unknown status (%d) to ERROR", op->op_status); op->op_status = LRM_OP_ERROR; } if(op->op_status == LRM_OP_ERROR && (op->rc == EXECRA_RUNNING_MASTER || op->rc == EXECRA_NOT_RUNNING)) { /* Leave it up to the TE/PE to decide if this is an error */ op->op_status = LRM_OP_DONE; log_level = LOG_INFO; } op_id = make_stop_id(op->rsc_id, op->call_id); pending = g_hash_table_lookup(pending_ops, op_id); if(op->op_status != LRM_OP_CANCELLED) { update_id = do_update_resource(op); if(op->interval != 0) { goto out; } } else if(op->interval == 0) { /* no known valid reason for this to happen */ crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id); } else if(pending == NULL) { crm_err("Op %s (call=%d): No 'pending' entry", op_key, op->call_id); } else if(op->user_data == NULL) { crm_err("Op %s (call=%d): No user data", op_key, op->call_id); } else if(pending->remove) { delete_op_entry(op, op->rsc_id, op_key, op->call_id); } else { crm_debug("Op %s (call=%d): no delete event required", op_key, op->call_id); } if(g_hash_table_remove(pending_ops, op_id)) { removed = TRUE; crm_debug_2("Op %s (call=%d, stop-id=%s): Confirmed", op_key, op->call_id, op_id); } out: if(op->op_status == LRM_OP_DONE) { do_crm_log(log_level, "LRM operation %s (call=%d, rc=%d, cib-update=%d, confirmed=%s) %s", op_key, op->call_id, op->rc, update_id, removed?"true":"false", execra_code2string(op->rc)); } else { do_crm_log(log_level, "LRM operation %s (call=%d, status=%d, cib-update=%d, confirmed=%s) %s", op_key, op->call_id, op->op_status, update_id, removed?"true":"false", op_status2text(op->op_status)); } if(op->rc != 0 && op->output != NULL) { crm_info("Result: %s", op->output); } else if(op->output != NULL) { crm_debug("Result: %s", op->output); } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ mainloop_set_trigger(fsa_source); crm_free(op_key); crm_free(op_id); return TRUE; } char * make_stop_id(const char *rsc, int call_id) { char *op_id = NULL; crm_malloc0(op_id, strlen(rsc) + 34); if(op_id != NULL) { snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id); } return op_id; } diff --git a/crmd/te_actions.c b/crmd/te_actions.c index 5cddc22472..c2a2f73a56 100644 --- a/crmd/te_actions.c +++ b/crmd/te_actions.c @@ -1,581 +1,570 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include char *te_uuid = NULL; void send_rsc_command(crm_action_t *action); static void te_start_action_timer(crm_graph_t *graph, crm_action_t *action) { crm_malloc0(action->timer, sizeof(crm_action_timer_t)); action->timer->timeout = action->timeout; action->timer->reason = timeout_action; action->timer->action = action; action->timer->source_id = g_timeout_add( action->timer->timeout + graph->network_delay, action_timer_callback, (void*)action->timer); CRM_ASSERT(action->timer->source_id != 0); } static gboolean te_pseudo_action(crm_graph_t *graph, crm_action_t *pseudo) { crm_info("Pseudo action %d fired and confirmed", pseudo->id); pseudo->confirmed = TRUE; update_graph(graph, pseudo); trigger_graph(); return TRUE; } void -send_stonith_update(stonith_ops_t * op) +send_stonith_update(crm_action_t *action) { enum cib_errors rc = cib_ok; - const char *target = op->node_name; - const char *uuid = op->node_uuid; + const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); /* zero out the node-status & remove all LRM status info */ xmlNode *node_state = create_xml_node(NULL, XML_CIB_TAG_STATE); - CRM_CHECK(op->node_name != NULL, return); - CRM_CHECK(op->node_uuid != NULL, return); + CRM_CHECK(target != NULL, return); + CRM_CHECK(uuid != NULL, return); crm_xml_add(node_state, XML_ATTR_UUID, uuid); crm_xml_add(node_state, XML_ATTR_UNAME, target); crm_xml_add(node_state, XML_CIB_ATTR_HASTATE, DEADSTATUS); crm_xml_add(node_state, XML_CIB_ATTR_INCCM, XML_BOOLEAN_NO); crm_xml_add(node_state, XML_CIB_ATTR_CRMDSTATE, OFFLINESTATUS); crm_xml_add(node_state, XML_CIB_ATTR_JOINSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_CIB_ATTR_EXPSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_ATTR_ORIGIN, __FUNCTION__); rc = fsa_cib_conn->cmds->update( fsa_cib_conn, XML_CIB_TAG_STATUS, node_state, cib_quorum_override|cib_scope_local|cib_can_create); if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); abort_transition( INFINITY, tg_shutdown, "CIB update failed", node_state); } else { /* delay processing the trigger until the update completes */ add_cib_op_callback(fsa_cib_conn, rc, FALSE, crm_strdup(target), cib_fencing_updated); } - erase_status_tag(op->node_name, XML_CIB_TAG_LRM); - erase_status_tag(op->node_name, XML_TAG_TRANSIENT_NODEATTRS); + erase_status_tag(target, XML_CIB_TAG_LRM); + erase_status_tag(target, XML_TAG_TRANSIENT_NODEATTRS); free_xml(node_state); - + #if 0 /* Make sure the membership cache is accurate */ crm_update_peer(0, 0, 0, -1, 0, uuid, target, NULL, CRM_NODE_LOST); #endif return; } static gboolean te_fence_node(crm_graph_t *graph, crm_action_t *action) { + int rc = 0; const char *id = NULL; const char *uuid = NULL; const char *target = NULL; const char *type = NULL; - stonith_ops_t * st_op = NULL; + gboolean invalid_action = FALSE; id = ID(action->xml); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); type = crm_meta_value(action->params, "stonith_action"); - CRM_CHECK(id != NULL, - crm_log_xml_warn(action->xml, "BadAction"); - return FALSE); - CRM_CHECK(uuid != NULL, - crm_log_xml_warn(action->xml, "BadAction"); - return FALSE); - CRM_CHECK(type != NULL, - crm_log_xml_warn(action->xml, "BadAction"); - return FALSE); - CRM_CHECK(target != NULL, - crm_log_xml_warn(action->xml, "BadAction"); - return FALSE); + CRM_CHECK(id != NULL, invalid_action = TRUE); + CRM_CHECK(uuid != NULL, invalid_action = TRUE); + CRM_CHECK(type != NULL, invalid_action = TRUE); + CRM_CHECK(target != NULL, invalid_action = TRUE); + + if(invalid_action) { + crm_log_xml_warn(action->xml, "BadAction"); + return FALSE; + } te_log_action(LOG_INFO, "Executing %s fencing operation (%s) on %s (timeout=%d)", type, id, target, transition_graph->stonith_timeout); /* Passing NULL means block until we can connect... */ te_connect_stonith(NULL); - - crm_malloc0(st_op, sizeof(stonith_ops_t)); - if(safe_str_eq(type, "poweroff")) { - st_op->optype = POWEROFF; - } else { - st_op->optype = RESET; + + if(type == NULL) { + type = "reboot"; } - st_op->timeout = transition_graph->stonith_timeout; - st_op->node_name = crm_strdup(target); - st_op->node_uuid = crm_strdup(uuid); - - st_op->private_data = generate_transition_key( - transition_graph->id, action->id, 0, te_uuid); - - CRM_ASSERT(stonithd_input_IPC_channel() != NULL); - - if (ST_OK != stonithd_node_fence( st_op )) { - crm_err("Cannot fence %s: stonithd_node_fence() call failed ", - target); - } + rc = stonith_api->cmds->fence( + stonith_api, 0, target, type, transition_graph->stonith_timeout/1000); + + stonith_api->cmds->register_callback( + stonith_api, rc, transition_graph->stonith_timeout/1000, FALSE, + generate_transition_key(transition_graph->id, action->id, 0, te_uuid), + "tengine_stonith_callback", tengine_stonith_callback); return TRUE; } static int get_target_rc(crm_action_t *action) { const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC); if(target_rc_s != NULL) { return crm_parse_int(target_rc_s, "0"); } return 0; } static gboolean te_crm_command(crm_graph_t *graph, crm_action_t *action) { char *counter = NULL; xmlNode *cmd = NULL; gboolean is_local = FALSE; const char *id = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; id = ID(action->xml); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, te_log_action(LOG_ERR, "Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task)); return FALSE); te_log_action(LOG_INFO, "Executing crm-event (%s): %s on %s%s%s", crm_str(id), crm_str(task), on_node, is_local?" (local)":"", no_wait?" - no waiting":""); if(safe_str_eq(on_node, fsa_our_uname)) { is_local = TRUE; } value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); if(crm_is_true(value)) { no_wait = TRUE; } if(is_local && safe_str_eq(task, CRM_OP_SHUTDOWN)) { /* defer until everything else completes */ te_log_action(LOG_INFO, "crm-event (%s) is a local shutdown", crm_str(id)); graph->completion_action = tg_shutdown; graph->abort_reason = "local shutdown"; action->confirmed = TRUE; update_graph(graph, action); trigger_graph(); return TRUE; } cmd = create_request(task, NULL, on_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); counter = generate_transition_key( transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); rc = send_cluster_message(on_node, crm_msg_crmd, cmd, TRUE); crm_free(counter); free_xml(cmd); value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); if(rc == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if(no_wait) { action->confirmed = TRUE; update_graph(graph, action); trigger_graph(); } else { if(action->timeout <= 0) { crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %dms instead", action->id, task, on_node, action->timeout, graph->network_delay); action->timeout = graph->network_delay; } te_start_action_timer(graph, action); } return TRUE; } gboolean cib_action_update(crm_action_t *action, int status, int op_rc) { char *op_id = NULL; char *code = NULL; char *digest = NULL; xmlNode *tmp = NULL; xmlNode *params = NULL; xmlNode *state = NULL; xmlNode *rsc = NULL; xmlNode *xml_op = NULL; xmlNode *action_rsc = NULL; enum cib_errors rc = cib_ok; const char *name = NULL; const char *value = NULL; const char *rsc_id = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); int call_options = cib_quorum_override|cib_scope_local; if(status == LRM_OP_PENDING) { crm_debug("%s %d: Recording pending operation %s on %s", crm_element_name(action->xml), action->id, task_uuid, target); } else { crm_warn("%s %d: %s on %s timed out", crm_element_name(action->xml), action->id, task_uuid, target); } action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE); if(action_rsc == NULL) { return FALSE; } rsc_id = ID(action_rsc); CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action"); return FALSE); /* update the CIB */ state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(state, XML_ATTR_UUID, target_uuid); crm_xml_add(state, XML_ATTR_UNAME, target); rsc = create_xml_node(state, XML_CIB_TAG_LRM); crm_xml_add(rsc, XML_ATTR_ID, target_uuid); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); crm_xml_add(rsc, XML_ATTR_ID, rsc_id); name = XML_ATTR_TYPE; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_CLASS; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_PROVIDER; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); xml_op = create_xml_node(rsc, XML_LRM_TAG_RSC_OP); crm_xml_add(xml_op, XML_ATTR_ID, task); op_id = generate_op_key(rsc_id, task, action->interval); crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_free(op_id); crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, -1); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, status); crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, action->interval); crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op_rc); crm_xml_add(xml_op, XML_ATTR_ORIGIN, __FUNCTION__); if(crm_str_eq(task, CRMD_ACTION_MIGRATED, TRUE)) { char *key = crm_meta_name("migrate_source_uuid"); xmlNode *attrs = first_named_child(action->xml, XML_TAG_ATTRS); const char *host = crm_element_value(attrs, key); CRM_CHECK(host != NULL, crm_log_xml_err(action->xml, "Bad Op")); crm_xml_add(xml_op, CRMD_ACTION_MIGRATED, host); crm_free(key); } code = generate_transition_key( transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, code); crm_free(code); code = generate_transition_magic( crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY), status, op_rc); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, code); crm_free(code); tmp = find_xml_node(action->xml, "attributes", TRUE); params = create_xml_node(NULL, XML_TAG_PARAMS); copy_in_properties(params, tmp); filter_action_parameters(params, CRM_FEATURE_SET); digest = calculate_xml_digest(params, TRUE, FALSE); /* info for now as this area has been problematic to debug */ crm_debug("Calculated digest %s for %s (%s)\n", digest, ID(xml_op), crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); crm_log_xml(LOG_DEBUG, "digest:source", params); crm_xml_add(xml_op, XML_LRM_ATTR_OP_DIGEST, digest); crm_free(digest); free_xml(params); crm_debug_3("Updating CIB with \"%s\" (%s): %s %s on %s", status<0?"new action":XML_ATTR_TIMEOUT, crm_element_name(action->xml), crm_str(task), rsc_id, target); rc = fsa_cib_conn->cmds->update( fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options); crm_debug_2("Updating CIB with %s action %d: %s on %s (call_id=%d)", op_status2text(status), action->id, task_uuid, target, rc); add_cib_op_callback(fsa_cib_conn, rc, FALSE, NULL, cib_action_updated); free_xml(state); action->sent_update = TRUE; if(rc < cib_ok) { return FALSE; } return TRUE; } static gboolean te_rsc_command(crm_graph_t *graph, crm_action_t *action) { /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ xmlNode *cmd = NULL; xmlNode *rsc_op = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; gboolean is_local = FALSE; char *counter = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *task_uuid = NULL; CRM_ASSERT(action != NULL); CRM_ASSERT(action->xml != NULL); action->executed = FALSE; on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, te_log_action(LOG_ERR, "Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task)); return FALSE); rsc_op = action->xml; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); on_node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET); counter = generate_transition_key( transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); if(safe_str_eq(on_node, fsa_our_uname)) { is_local = TRUE; } value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); if(crm_is_true(value)) { no_wait = TRUE; } crm_info("Initiating action %d: %s %s on %s%s%s", action->id, task, task_uuid, on_node, is_local?" (local)":"", no_wait?" - no waiting":""); cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, on_node, CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); if(is_local) { /* shortcut local resource commands */ ha_msg_input_t data = { .msg = cmd, .xml = rsc_op, }; fsa_data_t msg = { .id = 0, .data = &data, .data_type = fsa_dt_ha_msg, .fsa_input = I_NULL, .fsa_cause = C_FSA_INTERNAL, .actions = A_LRM_INVOKE, .origin = __FUNCTION__, }; do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg); } else { rc = send_cluster_message(on_node, crm_msg_lrmd, cmd, TRUE); } crm_free(counter); free_xml(cmd); action->executed = TRUE; if(rc == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if(no_wait) { action->confirmed = TRUE; update_graph(transition_graph, action); trigger_graph(); } else { if(action->timeout <= 0) { crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %dms instead", action->id, task, task_uuid, on_node, action->timeout, graph->network_delay); action->timeout = graph->network_delay; } te_start_action_timer(graph, action); } value = crm_meta_value(action->params, XML_OP_ATTR_PENDING); if(crm_is_true(value)) { /* write a "pending" entry to the CIB, inhibit notification */ crm_info("Recording pending op %s in the CIB", task_uuid); cib_action_update(action, LRM_OP_PENDING, EXECRA_STATUS_UNKNOWN); } return TRUE; } crm_graph_functions_t te_graph_fns = { te_pseudo_action, te_rsc_command, te_crm_command, te_fence_node }; void notify_crmd(crm_graph_t *graph) { int log_level = LOG_DEBUG; const char *type = "unknown"; enum crmd_fsa_input event = I_NULL; crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state)); CRM_CHECK(graph->complete, graph->complete = TRUE); switch(graph->completion_action) { case tg_stop: type = "stop"; /* fall through */ case tg_done: type = "done"; log_level = LOG_INFO; if(fsa_state == S_TRANSITION_ENGINE) { event = I_TE_SUCCESS; } break; case tg_restart: type = "restart"; if(fsa_state == S_TRANSITION_ENGINE) { event = I_PE_CALC; } else if(fsa_state == S_POLICY_ENGINE) { register_fsa_action(A_PE_INVOKE); } break; case tg_shutdown: type = "shutdown"; if(is_set(fsa_input_register, R_SHUTDOWN)) { event = I_STOP; } else { event = I_TERMINATE; } } te_log_action(log_level, "Transition %d status: %s - %s", graph->id, type, crm_str(graph->abort_reason)); graph->abort_reason = NULL; graph->completion_action = tg_done; clear_bit_inplace(fsa_input_register, R_IN_TRANSITION); if(event != I_NULL) { register_fsa_input(C_FSA_INTERNAL, event, NULL); } } diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c index b6143c3231..0d95a87b7b 100644 --- a/crmd/te_callbacks.c +++ b/crmd/te_callbacks.c @@ -1,507 +1,496 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include /* For ONLINESTATUS etc */ void te_update_confirm(const char *event, xmlNode *msg); extern char *te_uuid; gboolean shuttingdown = FALSE; crm_graph_t *transition_graph; crm_trigger_t *transition_trigger = NULL; /* #define rsc_op_template "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */ #define rsc_op_template "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']" static const char *get_node_id(xmlNode *rsc_op) { xmlNode *node = rsc_op; while(node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) { node = node->parent; } CRM_CHECK(node != NULL, return NULL); return ID(node); } static void process_resource_updates(xmlXPathObject *xpathObj) { /* */ int lpc = 0, max = xpathObj->nodesetval->nodeNr; for(lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); const char *node = get_node_id(rsc_op); process_graph_event(rsc_op, node); } } void te_update_diff(const char *event, xmlNode *msg) { int rc = -1; const char *op = NULL; xmlNode *diff = NULL; xmlNode *cib_top = NULL; xmlXPathObject *xpathObj = NULL; int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; CRM_CHECK(msg != NULL, return); crm_element_value_int(msg, F_CIB_RC, &rc); if(transition_graph == NULL) { crm_debug_3("No graph"); return; } else if(rc < cib_ok) { crm_debug_3("Filter rc=%d (%s)", rc, cib_error2string(rc)); return; } else if(transition_graph->complete == TRUE && fsa_state != S_IDLE && fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) { crm_debug_2("Filter state=%s, complete=%d", fsa_state2string(fsa_state), transition_graph->complete); return; } op = crm_element_value(msg, F_CIB_OPERATION); diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); cib_diff_version_details( diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_debug("Processing diff (%s): %d.%d.%d -> %d.%d.%d (%s)", op, diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates, fsa_state2string(fsa_state)); log_cib_diff(LOG_DEBUG_2, diff, op); /* Process crm_config updates */ cib_top = get_xpath_object("//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_ADDED"//"XML_CIB_TAG_CRMCONFIG, diff, LOG_DEBUG); if(cib_top != NULL) { mainloop_set_trigger(config_read); } /* Process anything that was added */ cib_top = get_xpath_object("//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB, diff, LOG_ERR); if(need_abort(cib_top)) { goto bail; /* configuration changed */ } /* Process anything that was removed */ cib_top = get_xpath_object("//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_REMOVED"//"XML_TAG_CIB, diff, LOG_ERR); if(need_abort(cib_top)) { goto bail; /* configuration changed */ } /* Transient Attributes - Added/Updated */ xpathObj = xpath_search(diff,"//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_ADDED"//"XML_TAG_TRANSIENT_NODEATTRS); if(xpathObj && xpathObj->nodesetval->nodeNr > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Transient attribute: update", aborted); goto bail; } else if(xpathObj) { xmlXPathFreeObject(xpathObj); } /* Transient Attributes - Removed */ xpathObj = xpath_search(diff,"//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_REMOVED"//"XML_TAG_TRANSIENT_NODEATTRS); if(xpathObj && xpathObj->nodesetval->nodeNr > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted); goto bail; } else if(xpathObj) { xmlXPathFreeObject(xpathObj); } /* Check for node state updates... possibly from a shutdown we requested */ xpathObj = xpath_search(diff, "//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_ADDED"//"XML_CIB_TAG_STATE); if(xpathObj) { int lpc = 0, max = xpathObj->nodesetval->nodeNr; for(lpc = 0; lpc < max; lpc++) { xmlNode *node = getXpathResult(xpathObj, lpc); const char *event_node = crm_element_value(node, XML_ATTR_ID); const char *ccm_state = crm_element_value(node, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node, XML_CIB_ATTR_HASTATE); const char *shutdown_s = crm_element_value(node, XML_CIB_ATTR_SHUTDOWN); const char *crmd_state = crm_element_value(node, XML_CIB_ATTR_CRMDSTATE); if(safe_str_eq(ccm_state, XML_BOOLEAN_FALSE) || safe_str_eq(ha_state, DEADSTATUS) || safe_str_eq(crmd_state, CRMD_JOINSTATE_DOWN)) { crm_action_t *shutdown = match_down_event(0, event_node, NULL); if(shutdown != NULL) { const char *task = crm_element_value(shutdown->xml, XML_LRM_ATTR_TASK); if(safe_str_neq(task, CRM_OP_FENCE)) { /* Wait for stonithd to tell us it is complete via tengine_stonith_callback() */ update_graph(transition_graph, shutdown); trigger_graph(); } } else { crm_info("Stonith/shutdown of %s not matched", event_node); abort_transition(INFINITY, tg_restart, "Node failure", node); } fail_incompletable_actions(transition_graph, event_node); } if(shutdown_s) { int shutdown = crm_parse_int(shutdown_s, NULL); if(shutdown > 0) { crm_info("Aborting on "XML_CIB_ATTR_SHUTDOWN" attribute for %s", event_node); abort_transition(INFINITY, tg_restart, "Shutdown request", node); } } } xmlXPathFreeObject(xpathObj); } /* * Check for and fast-track the processing of LRM refreshes * In large clusters this can result in _huge_ speedups * * Unfortunately we can only do so when there are no pending actions * Otherwise we could miss updates we're waiting for and stall * */ xpathObj = NULL; if(transition_graph->pending == 0) { xpathObj = xpath_search(diff, "//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_ADDED"//"XML_LRM_TAG_RESOURCE); } if(xpathObj) { int updates = xpathObj->nodesetval->nodeNr; if(updates > 1) { /* Updates by, or in response to, TE actions will never contain updates * for more than one resource at a time */ crm_info("Detected LRM refresh - %d resources updated: Skipping all resource events", updates); abort_transition(INFINITY, tg_restart, "LRM Refresh", diff); goto bail; } xmlXPathFreeObject(xpathObj); } /* Process operation updates */ xpathObj = xpath_search(diff, "//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_ADDED"//"XML_LRM_TAG_RSC_OP); if(xpathObj) { process_resource_updates(xpathObj); xmlXPathFreeObject(xpathObj); } /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */ xpathObj = xpath_search(diff, "//"XML_TAG_DIFF_REMOVED"//"XML_LRM_TAG_RSC_OP); if(xpathObj) { int lpc = 0, max = xpathObj->nodesetval->nodeNr; for(lpc = 0; lpc < max; lpc++) { int max = 0; const char *op_id = NULL; char *rsc_op_xpath = NULL; xmlXPathObject *op_match = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_CHECK(match != NULL, continue); op_id = ID(match); max = strlen(rsc_op_template) + strlen(op_id) + 1; crm_malloc0(rsc_op_xpath, max); snprintf(rsc_op_xpath, max, rsc_op_template, op_id); op_match = xpath_search(diff, rsc_op_xpath); if(op_match == NULL || op_match->nodesetval->nodeNr == 0) { /* Prevent false positives by matching cancelations too */ const char *node = get_node_id(match); crm_action_t *cancelled = get_cancel_action(op_id, node); if(cancelled == NULL) { crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id, node); abort_transition(INFINITY, tg_restart, "Resource op removal", match); goto bail; } else { crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d", op_id, node, cancelled->id); } } if(op_match) { xmlXPathFreeObject(op_match); } crm_free(rsc_op_xpath); } } bail: if(xpathObj) { xmlXPathFreeObject(xpathObj); } } gboolean process_te_message(xmlNode *msg, xmlNode *xml_data) { const char *from = crm_element_value(msg, F_ORIG); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *ref = crm_element_value(msg, XML_ATTR_REFERENCE); const char *op = crm_element_value(msg, F_CRM_TASK); const char *type = crm_element_value(msg, F_CRM_MSG_TYPE); crm_debug_2("Processing %s (%s) message", op, ref); crm_log_xml(LOG_DEBUG_3, "ipc", msg); if(op == NULL){ /* error */ } else if(sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) { crm_debug_2("Bad sys-to %s", crm_str(sys_to)); return FALSE; } else if(safe_str_eq(op, CRM_OP_INVOKE_LRM) && safe_str_eq(sys_from, CRM_SYSTEM_LRMD) /* && safe_str_eq(type, XML_ATTR_RESPONSE) */ ){ xmlXPathObject *xpathObj = NULL; crm_log_xml(LOG_DEBUG_2, "Processing (N)ACK", msg); crm_info("Processing (N)ACK %s from %s", crm_element_value(msg, XML_ATTR_REFERENCE), from); xpathObj = xpath_search(xml_data, "//"XML_LRM_TAG_RSC_OP); if(xpathObj) { process_resource_updates(xpathObj); xmlXPathFreeObject(xpathObj); xpathObj = NULL; } else { crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); return FALSE; } } else { crm_err("Unknown command: %s::%s from %s", type, op, sys_from); } crm_debug_3("finished processing message"); return TRUE; } void -tengine_stonith_callback(stonith_ops_t * op) +tengine_stonith_callback( + stonith_t *stonith, const xmlNode *msg, int call_id, int rc, xmlNode *output, void *userdata) { - const char *allow_fail = NULL; + char *uuid = NULL; int target_rc = -1; int stonith_id = -1; int transition_id = -1; - char *uuid = NULL; - crm_action_t *stonith_action = NULL; + crm_action_t *action = NULL; - if(op == NULL) { - crm_err("Called with a NULL op!"); - return; - } + CRM_CHECK(userdata != NULL, return); + crm_log_xml_info(output, "StonithOp"); + crm_info("Stonith operation %d/%s result=%d", call_id, (char*)userdata, rc); - crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", - op->call_id, op->optype, op->node_name, op->op_result, - (char *)op->node_list, op->private_data); + /* crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", */ + /* op->call_id, op->optype, op->node_name, op->op_result, */ + /* (char *)op->node_list, op->private_data); */ - /* this will mark the event complete if a match is found */ - CRM_CHECK(op->private_data != NULL, return); /* filter out old STONITH actions */ - - CRM_CHECK(decode_transition_key( - op->private_data, &uuid, &transition_id, &stonith_id, &target_rc), + CRM_CHECK(decode_transition_key(userdata, &uuid, &transition_id, &stonith_id, &target_rc), crm_err("Invalid event detected"); goto bail; ); if(transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid) || transition_graph->id != transition_id) { crm_info("Ignoring STONITH action initiated outside" " of the current transition"); + goto bail; } - stonith_action = get_action(stonith_id, TRUE); - - if(stonith_action == NULL) { + /* this will mark the event complete if a match is found */ + action = get_action(stonith_id, TRUE); + if(action == NULL) { crm_err("Stonith action not matched"); goto bail; } + + if(rc == stonith_ok) { + crm_info("Stonith of %s passed", + crm_element_value_const(action->xml, XML_LRM_ATTR_TARGET)); + send_stonith_update(action); - switch(op->op_result) { - case STONITH_SUCCEEDED: - send_stonith_update(op); - break; - case STONITH_CANNOT: - case STONITH_TIMEOUT: - case STONITH_GENERIC: - stonith_action->failed = TRUE; - allow_fail = crm_meta_value(stonith_action->params, XML_ATTR_TE_ALLOWFAIL); - - if(FALSE == crm_is_true(allow_fail)) { - crm_err("Stonith of %s failed (%d)..." - " aborting transition.", - op->node_name, op->op_result); - abort_transition(INFINITY, tg_restart, - "Stonith failed", NULL); - } - break; - default: - crm_err("Unsupported action result: %d", op->op_result); - abort_transition(INFINITY, tg_restart, - "Unsupport Stonith result", NULL); + } else { + const char *target = crm_element_value_const(action->xml, XML_LRM_ATTR_TARGET); + const char *allow_fail = crm_meta_value(action->params, XML_ATTR_TE_ALLOWFAIL); + + action->failed = TRUE; + if(crm_is_true(allow_fail) == FALSE) { + crm_err("Stonith of %s failed (%d)... aborting transition.", target, rc); + abort_transition(INFINITY, tg_restart, "Stonith failed", NULL); + } } - update_graph(transition_graph, stonith_action); + update_graph(transition_graph, action); trigger_graph(); bail: + crm_free(userdata); crm_free(uuid); return; } void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); crm_log_xml_warn(msg, "Failed update"); } crm_free(user_data); } void cib_action_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if(rc < cib_ok) { crm_err("Update %d FAILED: %s", call_id, cib_error2string(rc)); } } void cib_failcount_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if(rc < cib_ok) { crm_err("Update %d FAILED: %s", call_id, cib_error2string(rc)); } } gboolean action_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; CRM_CHECK(data != NULL, return FALSE); timer = (crm_action_timer_t*)data; stop_te_timer(timer); crm_warn("Timer popped (timeout=%d, abort_level=%d, complete=%s)", timer->timeout, transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_CHECK(timer->action != NULL, return FALSE); if(transition_graph->complete) { crm_warn("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_action_warn) { print_action( LOG_WARNING,"Action missed its timeout: ", timer->action); } else if(fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) { crm_err("Discarding action timeout in state: %s", fsa_state2string(fsa_state)); } else if(transition_graph->complete) { crm_err("Ignoring action timeout while not in transition"); } else { /* fail the action */ gboolean send_update = TRUE; const char *task = crm_element_value(timer->action->xml, XML_LRM_ATTR_TASK); print_action(LOG_ERR, "Aborting transition, action lost: ", timer->action); timer->action->failed = TRUE; timer->action->confirmed = TRUE; abort_transition(INFINITY, tg_restart, "Action lost", NULL); update_graph(transition_graph, timer->action); trigger_graph(); if(timer->action->type != action_type_rsc) { send_update = FALSE; } else if(safe_str_eq(task, "cancel")) { /* we dont need to update the CIB with these */ send_update = FALSE; } else if(safe_str_eq(task, "stop")) { /* *never* update the CIB with these */ send_update = FALSE; } if(send_update) { /* cib_action_update(timer->action, LRM_OP_PENDING, EXECRA_STATUS_UNKNOWN); */ cib_action_update(timer->action, LRM_OP_TIMEOUT, EXECRA_UNKNOWN_ERROR); } } return FALSE; } diff --git a/crmd/te_callbacks.h b/crmd/te_callbacks.h index 97b26bfe50..d33eb855a3 100644 --- a/crmd/te_callbacks.h +++ b/crmd/te_callbacks.h @@ -1,39 +1,40 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef TE_CALLBACKS__H #define TE_CALLBACKS__H extern void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); extern void cib_action_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); extern void cib_failcount_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); extern gboolean global_timer_callback(gpointer data); extern gboolean action_timer_callback(gpointer data); extern gboolean te_graph_trigger(gpointer user_data); extern void te_update_diff(const char *event, xmlNode *msg); -extern void tengine_stonith_callback(stonith_ops_t * op); +extern void tengine_stonith_callback( + stonith_t *stonith, const xmlNode *msg, int call_id, int rc, xmlNode *output, void *userdata); #endif diff --git a/crmd/te_utils.c b/crmd/te_utils.c index 7a8e129b07..f49767750c 100644 --- a/crmd/te_utils.c +++ b/crmd/te_utils.c @@ -1,328 +1,300 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include GCHSource *stonith_src = NULL; crm_trigger_t *stonith_reconnect = NULL; static gboolean fail_incompletable_stonith(crm_graph_t *graph) { const char *task = NULL; xmlNode *last_action = NULL; if(graph == NULL) { return FALSE; } slist_iter( synapse, synapse_t, graph->synapses, lpc, if (synapse->confirmed) { continue; } slist_iter( action, crm_action_t, synapse->actions, lpc, if(action->type != action_type_crm || action->confirmed) { continue; } task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if(task && safe_str_eq(task, CRM_OP_FENCE)) { action->failed = TRUE; last_action = action->xml; update_graph(graph, action); crm_notice("Failing action %d (%s): STONITHd terminated", action->id, ID(action->xml)); } ); ); if(last_action != NULL) { crm_warn("STONITHd failure resulted in un-runnable actions"); abort_transition(INFINITY, tg_restart, "Stonith failure", last_action); return TRUE; } return FALSE; } static void -tengine_stonith_connection_destroy(gpointer user_data) +tengine_stonith_connection_destroy(stonith_t *st, const char *event, xmlNode *msg) { - if(stonith_src == NULL) { - crm_info("Fencing daemon disconnected"); - - } else { + if(is_set(fsa_input_register, R_ST_REQUIRED)) { crm_crit("Fencing daemon connection failed"); mainloop_set_trigger(stonith_reconnect); + + } else { + crm_info("Fencing daemon disconnected"); } /* cbchan will be garbage at this point, arrange for it to be reset */ - set_stonithd_input_IPC_channel_NULL(); - stonith_src = NULL; + stonith_api->state = stonith_disconnected; fail_incompletable_stonith(transition_graph); trigger_graph(); return; } -static gboolean -tengine_stonith_dispatch(IPC_Channel *sender, void *user_data) -{ - while(stonithd_op_result_ready()) { - if (sender->ch_status != IPC_CONNECT) { - /* The message which was pending for us is that - * the IPC status is now IPC_DISCONNECT */ - break; - } - - if(ST_FAIL == stonithd_receive_ops_result(FALSE)) { - crm_err("stonithd_receive_ops_result() failed"); - } - } - - if (sender->ch_status != IPC_CONNECT) { - tengine_stonith_connection_destroy(NULL); - return FALSE; - } - return TRUE; -} - gboolean te_connect_stonith(gpointer user_data) { int lpc = 0; - int rc = ST_OK; - IPC_Channel *fence_ch = NULL; - if(stonith_src != NULL) { + int rc = stonith_ok; + + if(stonith_api == NULL) { + stonith_api = stonith_api_new(); + } + + if(stonith_api->state != stonith_disconnected) { crm_debug_2("Still connected"); return TRUE; } for(lpc = 0; lpc < 30; lpc++) { crm_info("Attempting connection to fencing daemon..."); sleep(1); - rc = stonithd_signon("tengine"); - if(rc == ST_OK) { + rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL, NULL); + + if(rc == stonith_ok) { break; } if(user_data != NULL) { crm_err("Sign-in failed: triggered a retry"); mainloop_set_trigger(stonith_reconnect); return TRUE; - } - + } + crm_err("Sign-in failed: pausing and trying again in 2s..."); sleep(1); } - CRM_ASSERT(rc == ST_OK); /* If not, we failed 30 times... just get out */ - CRM_ASSERT(stonithd_set_stonith_ops_callback( - tengine_stonith_callback) == ST_OK); - - crm_debug_2("Grabbing IPC channel"); - fence_ch = stonithd_input_IPC_channel(); - CRM_ASSERT(fence_ch != NULL); - - crm_debug_2("Attaching to mainloop"); - stonith_src = G_main_add_IPC_Channel( - G_PRIORITY_LOW, fence_ch, FALSE, tengine_stonith_dispatch, NULL, - tengine_stonith_connection_destroy); - - CRM_ASSERT(stonith_src != NULL); + CRM_CHECK(rc == stonith_ok, return TRUE); /* If not, we failed 30 times... just get out */ + rc = stonith_api->cmds->register_notification( + stonith_api, T_STONITH_NOTIFY_DISCONNECT, tengine_stonith_connection_destroy); + crm_info("Connected"); return TRUE; } gboolean stop_te_timer(crm_action_timer_t *timer) { const char *timer_desc = "action timer"; if(timer == NULL) { return FALSE; } if(timer->reason == timeout_abort) { timer_desc = "global timer"; crm_debug_2("Stopping %s", timer_desc); } if(timer->source_id != 0) { crm_debug_2("Stopping %s", timer_desc); g_source_remove(timer->source_id); timer->source_id = 0; } else { crm_debug_2("%s was already stopped", timer_desc); return FALSE; } return TRUE; } gboolean te_graph_trigger(gpointer user_data) { enum transition_status graph_rc = -1; if(transition_graph == NULL) { crm_debug("Nothing to do"); return TRUE; } crm_debug_2("Invoking graph %d in state %s", transition_graph->id, fsa_state2string(fsa_state)); switch(fsa_state) { case S_STARTING: case S_PENDING: case S_NOT_DC: case S_HALT: case S_ILLEGAL: case S_STOPPING: case S_TERMINATE: return TRUE; break; default: break; } if(transition_graph->complete == FALSE) { graph_rc = run_graph(transition_graph); print_graph(LOG_DEBUG_3, transition_graph); if(graph_rc == transition_active) { crm_debug_3("Transition not yet complete"); return TRUE; } else if(graph_rc == transition_pending) { crm_debug_3("Transition not yet complete - no actions fired"); return TRUE; } if(graph_rc != transition_complete) { crm_err("Transition failed: %s", transition_status(graph_rc)); print_graph(LOG_WARNING, transition_graph); } } crm_info("Transition %d is now complete", transition_graph->id); transition_graph->complete = TRUE; notify_crmd(transition_graph); return TRUE; } void trigger_graph_processing(const char *fn, int line) { mainloop_set_trigger(transition_trigger); crm_debug_2("%s:%d - Triggered graph processing", fn, line); } void abort_transition_graph( int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode *reason, const char *fn, int line) { int log_level = LOG_INFO; const char *magic = NULL; CRM_CHECK(transition_graph != NULL, return); if(reason) { int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; xmlNode *diff = get_xpath_object("//"F_CIB_UPDATE_RESULT"//diff", reason, LOG_DEBUG_2); magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC); if(diff) { cib_diff_version_details( diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); do_crm_log(log_level, "%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, magic=%s, cib=%d.%d.%d) : %s", fn, line, transition_graph->complete, TYPE(reason), ID(reason), magic?magic:"NA", diff_add_admin_epoch,diff_add_epoch,diff_add_updates, abort_text); } else { do_crm_log(log_level, "%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, magic=%s) : %s", fn, line, transition_graph->complete, TYPE(reason), ID(reason), magic?magic:"NA", abort_text); } } else { do_crm_log(log_level, "%s:%d - Triggered transition abort (complete=%d) : %s", fn, line, transition_graph->complete, abort_text); } switch(fsa_state) { case S_STARTING: case S_PENDING: case S_NOT_DC: case S_HALT: case S_ILLEGAL: case S_STOPPING: case S_TERMINATE: do_crm_log(log_level, "Abort suppressed: state=%s (complete=%d)", fsa_state2string(fsa_state), transition_graph->complete); return; default: break; } if(magic == NULL && reason != NULL) { crm_log_xml(log_level+1, "Cause", reason); } /* Make sure any queued calculations are discarded ASAP */ crm_free(fsa_pe_ref); fsa_pe_ref = NULL; if(transition_graph->complete) { register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); return; } update_abort_priority( transition_graph, abort_priority, abort_action, abort_text); mainloop_set_trigger(transition_trigger); } diff --git a/crmd/tengine.c b/crmd/tengine.c index 363f9ffccb..5e53248fba 100644 --- a/crmd/tengine.c +++ b/crmd/tengine.c @@ -1,287 +1,272 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include /* for access */ #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include #include #include extern crm_graph_functions_t te_graph_fns; struct crm_subsystem_s *te_subsystem = NULL; +stonith_t *stonith_api = NULL; static void global_cib_callback(const xmlNode *msg, int callid ,int rc, xmlNode *output) { } static crm_graph_t *create_blank_graph(void) { crm_graph_t *a_graph = unpack_graph(NULL, NULL); a_graph->complete = TRUE; a_graph->abort_reason = "DC Takeover"; a_graph->completion_action = tg_restart; return a_graph; } /* A_TE_START, A_TE_STOP, A_TE_RESTART */ void do_te_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { - int dummy; gboolean init_ok = TRUE; cl_uuid_t new_uuid; char uuid_str[UU_UNPARSE_SIZEOF]; if(action & A_TE_STOP) { if(transition_graph) { destroy_graph(transition_graph); transition_graph = NULL; } if(fsa_cib_conn && cib_ok != fsa_cib_conn->cmds->del_notify_callback( fsa_cib_conn, T_CIB_DIFF_NOTIFY, te_update_diff)) { crm_err("Could not set CIB notification callback"); init_ok = FALSE; } clear_bit_inplace(fsa_input_register, te_subsystem->flag_connected); - crm_info("Transitioner is now inactive"); - - if(stonith_src) { - GCHSource *source = stonith_src; - crm_info("Disconnecting STONITH..."); - stonith_src = NULL; /* so that we don't try to reconnect */ - G_main_del_IPC_Channel(source); - stonithd_signoff(); - } + crm_info("Transitioner is now inactive"); } if((action & A_TE_START) == 0) { return; } else if(is_set(fsa_input_register, te_subsystem->flag_connected)) { crm_debug("The transitioner is already active"); return; } else if((action & A_TE_START) && cur_state == S_STOPPING) { crm_info("Ignoring request to start %s while shutting down", te_subsystem->name); return; } cl_uuid_generate(&new_uuid); cl_uuid_unparse(&new_uuid, uuid_str); te_uuid = crm_strdup(uuid_str); crm_info("Registering TE UUID: %s", te_uuid); if(transition_trigger == NULL) { transition_trigger = mainloop_add_trigger( G_PRIORITY_LOW, te_graph_trigger, NULL); } - - if(stonith_reconnect == NULL) { - stonith_reconnect = mainloop_add_trigger( - G_PRIORITY_LOW, te_connect_stonith, &dummy); - } if(cib_ok != fsa_cib_conn->cmds->add_notify_callback( fsa_cib_conn, T_CIB_DIFF_NOTIFY, te_update_diff)) { crm_err("Could not set CIB notification callback"); init_ok = FALSE; } if(cib_EXISTS != fsa_cib_conn->cmds->add_notify_callback( fsa_cib_conn, T_CIB_DIFF_NOTIFY, te_update_diff)) { crm_err("Set duplicate CIB notification callback"); } if(cib_ok != fsa_cib_conn->cmds->set_op_callback(fsa_cib_conn, global_cib_callback)) { crm_err("Could not set CIB global callback"); init_ok = FALSE; } if(init_ok) { - mainloop_set_trigger(stonith_reconnect); - set_graph_functions(&te_graph_fns); if(transition_graph) { destroy_graph(transition_graph); } /* create a blank one */ crm_debug("Transitioner is now active"); transition_graph = create_blank_graph(); set_bit_inplace(fsa_input_register, te_subsystem->flag_connected); } } /* A_TE_INVOKE, A_TE_CANCEL */ void do_te_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { if(AM_I_DC == FALSE) { crm_err("Not DC: No need to invoke the TE (anymore): %s", fsa_action2string(action)); return; } else if(fsa_state != S_TRANSITION_ENGINE && (action & A_TE_INVOKE)) { crm_err("No need to invoke the TE (%s) in state %s", fsa_action2string(action), fsa_state2string(fsa_state)); return; } if(action & A_TE_CANCEL) { crm_debug("Cancelling the transition: %s", transition_graph->complete?"inactive":"active"); abort_transition(INFINITY, tg_restart, "Peer Cancelled", NULL); if(transition_graph->complete == FALSE) { crmd_fsa_stall(NULL); } } else if(action & A_TE_HALT) { crm_debug("Halting the transition: %s", transition_graph->complete?"inactive":"active"); abort_transition(INFINITY, tg_stop, "Peer Halt", NULL); if(transition_graph->complete == FALSE) { crmd_fsa_stall(NULL); } } else if(action & A_TE_INVOKE) { const char *value = NULL; xmlNode *graph_data = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *ref = crm_element_value(input->msg, XML_ATTR_REFERENCE); const char *graph_file = crm_element_value(input->msg, F_CRM_TGRAPH); const char *graph_input = crm_element_value(input->msg, F_CRM_TGRAPH_INPUT); if(graph_file == NULL && input->xml == NULL) { crm_log_xml_err(input->msg, "Bad command"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return; } if(transition_graph->complete == FALSE) { crm_info("Another transition is already active"); abort_transition(INFINITY, tg_restart, "Transition Active", NULL); return; } if(fsa_pe_ref == NULL || safe_str_neq(fsa_pe_ref, ref)) { crm_info("Transition is redundant: %s vs. %s", crm_str(fsa_pe_ref), crm_str(ref)); abort_transition(INFINITY, tg_restart, "Transition Redundant", NULL); } graph_data = input->xml; if(graph_data == NULL && graph_file != NULL) { graph_data = filename2xml(graph_file); } CRM_CHECK(graph_data != NULL, crm_err("Input raised by %s is invalid", msg_data->origin); crm_log_xml_err(input->msg, "Bad command"); return); destroy_graph(transition_graph); transition_graph = unpack_graph(graph_data, graph_input); CRM_CHECK(transition_graph != NULL, transition_graph = create_blank_graph(); return); crm_info("Processing graph %d (ref=%s) derived from %s", transition_graph->id, ref, graph_input); value = crm_element_value(graph_data, "failed-stop-offset"); if(value) { crm_free(failed_stop_offset); failed_stop_offset = crm_strdup(value); } value = crm_element_value(graph_data, "failed-start-offset"); if(value) { crm_free(failed_start_offset); failed_start_offset = crm_strdup(value); } trigger_graph(); print_graph(LOG_DEBUG_2, transition_graph); if(graph_data != input->xml) { free_xml(graph_data); } } } #if 0 gboolean shuttingdown; gboolean tengine_shutdown(int nsig, gpointer unused) { shuttingdown = TRUE; abort_transition(INFINITY, tg_shutdown, "Shutdown", NULL); return TRUE; } gboolean te_stop(void) { destroy_graph(transition_graph); #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { stonithd_signoff(); } #endif crm_free(te_uuid); } #endif diff --git a/crmd/tengine.h b/crmd/tengine.h index c6d7b049e1..9fa9346794 100644 --- a/crmd/tengine.h +++ b/crmd/tengine.h @@ -1,75 +1,76 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef TENGINE__H #define TENGINE__H #include #include -#include -extern void send_stonith_update(stonith_ops_t * op); +#include +extern stonith_t *stonith_api; +extern void send_stonith_update(crm_action_t *stonith_action); /* tengine */ extern crm_action_t *match_down_event( int rc, const char *target, const char *filter); extern crm_action_t *get_cancel_action(const char *id, const char *node); extern gboolean cib_action_update(crm_action_t *action, int status, int op_rc); extern gboolean fail_incompletable_actions(crm_graph_t *graph, const char *down_node); extern gboolean need_abort(xmlNode *update); extern gboolean process_graph_event(xmlNode *event, const char *event_node); /* utils */ extern crm_action_t *get_action(int id, gboolean confirmed); extern gboolean start_global_timer(crm_action_timer_t *timer, int timeout); extern gboolean stop_te_timer(crm_action_timer_t *timer); extern const char *get_rsc_state(const char *task, op_status_t status); /* unpack */ extern gboolean process_te_message(xmlNode * msg, xmlNode *xml_data); extern crm_graph_t *transition_graph; extern crm_trigger_t *transition_trigger; extern char *te_uuid; extern void notify_crmd(crm_graph_t *graph); #include extern void trigger_graph_processing(const char *fn, int line); extern void abort_transition_graph( int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode *reason, const char *fn, int line); #define trigger_graph() trigger_graph_processing(__FUNCTION__, __LINE__) #define abort_transition(pri, action, text, reason) \ abort_transition_graph(pri, action, text, reason,__FUNCTION__,__LINE__); extern gboolean te_connect_stonith(gpointer user_data); extern GCHSource *stonith_src; extern crm_trigger_t *transition_trigger; extern crm_trigger_t *stonith_reconnect; extern crm_action_timer_t *transition_timer; extern char *failed_stop_offset; extern char *failed_start_offset; extern int active_timeout; extern int stonith_op_active; #endif diff --git a/cts/CM_ais.py b/cts/CM_ais.py index 5755155e50..8601ae7bd6 100644 --- a/cts/CM_ais.py +++ b/cts/CM_ais.py @@ -1,288 +1,289 @@ '''CTS: Cluster Testing System: AIS dependent modules... ''' __copyright__=''' Copyright (C) 2007 Andrew Beekhof ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import os,sys,CTS,CTSaudits,CTStests, warnings from CTSvars import * from CTS import * from CM_lha import crm_lha from CTSaudits import ClusterAudit from CTStests import * from CIB import * ####################################################################### # # LinuxHA v2 dependent modules # ####################################################################### class crm_ais(crm_lha): ''' The crm version 3 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of openais ''' def __init__(self, Environment, randseed=None): crm_lha.__init__(self, Environment, randseed=randseed) self.update({ "Name" : "crm-ais", "UUIDQueryCmd" : "crmadmin -N", "EpocheCmd" : "crm_node -e", "QuorumCmd" : "crm_node -q", "ParitionCmd" : "crm_node -p", "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new", "Pat:ChildExit" : "Child process .* exited", # Bad news Regexes. Should never occur. "BadRegexes" : ( r"ERROR:", r"CRIT:", r"Shutting down\.", r"Forcing shutdown\.", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r", exiting\.", r"WARN.*Ignoring HA message.*vote.*not in our membership list", r"pengine.*Attempting recovery of resource", r"is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", r"Search terminated:", r"No need to invoke the TE", r":global_timer_callback", r"Faking parameter digest creation", r"Parameters to .* action changed:", r"Parameters to .* changed", r"Child process .* terminated with signal 11", r"Executing .* fencing operation", ), }) def errorstoignore(self): # At some point implement a more elegant solution that # also produces a report at the end '''Return list of errors which are known and very noisey should be ignored''' if 1: return [ "crm_mon:", "crmadmin:", "async_notify: strange, client not found", "ERROR: Message hist queue is filling up" ] return [] def NodeUUID(self, node): return node def ais_components(self): self.complist = [] self.common_ignore = [ "Pending action:", "ERROR: crm_log_message_adv:", "ERROR: MSG: No message to dump", "pending LRM operations at shutdown", "Lost connection to the CIB service", "Connection to the CIB terminated...", "Sending message to CIB service FAILED", "apply_xml_diff: Diff application failed!", "crmd: .*Action A_RECOVER .* not supported", "pingd: .*ERROR: send_update: Could not send update", "send_ipc_message: IPC Channel to .* is not connected", "unconfirmed_actions: Waiting on .* unconfirmed actions", "cib_native_msgready: Message pending on command channel", "crmd:.*do_exit: Performing A_EXIT_1 - forcefully exiting the CRMd", "verify_stopped: Resource .* was active at shutdown. You may ignore this error if it is unmanaged.", - "ERROR: stonithd_op_result_ready: not signed on", "ERROR: attrd_connection_destroy: Lost connection to attrd", "nfo: te_fence_node: Executing .* fencing operation", ] - self.complist.append(Process("cib", 0, [ + self.complist.append(Process(self, "cib", pats = [ "State transition S_IDLE", "Respawning .* crmd", "Respawning .* attrd", "Lost connection to the CIB service", "Connection to the CIB terminated...", "Child process crmd exited .* rc=2", "Child process attrd exited .* rc=1", "crmd: .*Input I_TERMINATE from do_recover", "crmd: .*I_ERROR.*crmd_cib_connection_destroy", "crmd:.*do_exit: Could not recover from internal error", - ], [], self.common_ignore, 0, self)) + ], badnews_ignore = self.common_ignore)) - self.complist.append(Process("lrmd", 0, [ + self.complist.append(Process(self, "lrmd", pats = [ "State transition S_IDLE", "LRM Connection failed", "Respawning .* crmd", "crmd: .*I_ERROR.*lrm_connection_destroy", "Child process crmd exited .* rc=2", "crmd: .*Input I_TERMINATE from do_recover", "crmd:.*do_exit: Could not recover from internal error", - ], [], self.common_ignore, 0, self)) - self.complist.append(Process("crmd", 0, [ + ], badnews_ignore = self.common_ignore)) + + self.complist.append(Process(self, "crmd", pats = [ # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # Only if the node wasn't the DC: "State transition S_IDLE", "State transition .* -> S_IDLE", - ], [], self.common_ignore, 0, self)) + ], badnews_ignore = self.common_ignore)) - self.complist.append(Process("attrd", 0, [ + self.complist.append(Process(self, "attrd", pats = [ "crmd: .*ERROR: attrd_connection_destroy: Lost connection to attrd" - ], [], self.common_ignore, 0, self)) + ], badnews_ignore = self.common_ignore)) - self.complist.append(Process("pengine", 0, [ - ], [ + self.complist.append(Process(self, "pengine", dc_pats = [ "State transition S_IDLE", "Respawning .* crmd", "Child process crmd exited .* rc=2", "crmd: .*pe_connection_destroy: Connection to the Policy Engine failed", "crmd: .*I_ERROR.*save_cib_contents", "crmd: .*Input I_TERMINATE from do_recover", "crmd:.*do_exit: Could not recover from internal error", - ], self.common_ignore, 0, self)) + ], badnews_ignore = self.common_ignore)) if self.Env["DoFencing"] == 1 : stonith_ignore = [ - "ERROR: stonithd_signon: ", "update_failcount: Updating failcount for child_DoFencing", "ERROR: te_connect_stonith: Sign-in failed: triggered a retry", ] stonith_ignore.extend(self.common_ignore) - self.complist.append(Process("stonithd", 0, [], [ + self.complist.append(Process(self, "stonith-ng", process="stonithd", pats = [ + "CRIT: stonith_dispatch: Lost connection to the STONITH service", "tengine_stonith_connection_destroy: Fencing daemon connection failed", "Attempting connection to fencing daemon", "te_connect_stonith: Connected", - ], stonith_ignore, 0, self)) + ], badnews_ignore = stonith_ignore)) + return self.complist class crm_whitetank(crm_ais): ''' The crm version 3 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of openais ''' def __init__(self, Environment, randseed=None): crm_ais.__init__(self, Environment, randseed=randseed) self.update({ "Name" : "crm-whitetank", "StartCmd" : CTSvars.INITDIR+"/openais start", "StopCmd" : CTSvars.INITDIR+"/openais stop", "Pat:We_stopped" : "%s.*openais.*pcmk_shutdown: Shutdown complete", "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new", "Pat:They_dead" : "openais:.*Node %s is now: lost", "Pat:ChildKilled" : "%s openais.*Child process %s terminated with signal 9", "Pat:ChildRespawn" : "%s openais.*Respawning failed child process: %s", "Pat:ChildExit" : "Child process .* exited", }) def Components(self): self.ais_components() aisexec_ignore = [ "ERROR: ais_dispatch: Receiving message .* failed", "crmd: .*I_ERROR.*crmd_cib_connection_destroy", "cib: .*ERROR: cib_ais_destroy: AIS connection terminated", #"crmd: .*ERROR: crm_ais_destroy: AIS connection terminated", "crmd:.*do_exit: Could not recover from internal error", "crmd: .*I_TERMINATE.*do_recover", "attrd: .*CRIT: attrd_ais_destroy: Lost connection to OpenAIS service!", "stonithd: .*ERROR: AIS connection terminated", ] aisexec_ignore.extend(self.common_ignore) - self.complist.append(Process("aisexec", 0, [ + self.complist.append(Process(self, "aisexec", pats = [ "ERROR: ais_dispatch: AIS connection failed", "crmd: .*ERROR: do_exit: Could not recover from internal error", "pengine: .*Scheduling Node .* for STONITH", "stonithd: .*requests a STONITH operation RESET on node", "stonithd: .*Succeeded to STONITH the node", - ], [], aisexec_ignore, 0, self)) + ], badnews_ignore = aisexec_ignore)) class crm_flatiron(crm_ais): ''' The crm version 3 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of openais ''' def __init__(self, Environment, randseed=None): crm_ais.__init__(self, Environment, randseed=randseed) self.update({ "Name" : "crm-flatiron", "StartCmd" : CTSvars.INITDIR+"/corosync start", "StopCmd" : CTSvars.INITDIR+"/corosync stop", # The next pattern is too early # "Pat:We_stopped" : "%s.*Service engine unloaded: Pacemaker Cluster Manager", # The next pattern would be preferred, but it doesn't always come out # "Pat:We_stopped" : "%s.*Corosync Cluster Engine exiting with status", "Pat:We_stopped" : "%s.*Service engine unloaded: corosync cluster quorum service", "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new", "Pat:They_dead" : "corosync:.*Node %s is now: lost", "Pat:ChildKilled" : "%s corosync.*Child process %s terminated with signal 9", "Pat:ChildRespawn" : "%s corosync.*Respawning failed child process: %s", "Pat:ChildExit" : "Child process .* exited", }) def Components(self): self.ais_components() corosync_ignore = [ "ERROR: ais_dispatch: Receiving message .* failed", "crmd: .*I_ERROR.*crmd_cib_connection_destroy", "cib: .*ERROR: cib_ais_destroy: AIS connection terminated", #"crmd: .*ERROR: crm_ais_destroy: AIS connection terminated", "crmd:.*do_exit: Could not recover from internal error", "crmd: .*I_TERMINATE.*do_recover", "attrd: .*CRIT: attrd_ais_destroy: Lost connection to Corosync service!", "stonithd: .*ERROR: AIS connection terminated", ] corosync_ignore.extend(self.common_ignore) -# self.complist.append(Process("corosync", 0, [ +# self.complist.append(Process(self, "corosync", pats = [ # "ERROR: ais_dispatch: AIS connection failed", # "crmd: .*ERROR: do_exit: Could not recover from internal error", # "pengine: .*Scheduling Node .* for STONITH", # "stonithd: .*requests a STONITH operation RESET on node", # "stonithd: .*Succeeded to STONITH the node", -# ], [], corosync_ignore, 0, self)) +# ], badnews_ignore = corosync_ignore)) + return self.complist diff --git a/cts/CM_lha.py b/cts/CM_lha.py index e05159cd22..a519928a7b 100755 --- a/cts/CM_lha.py +++ b/cts/CM_lha.py @@ -1,609 +1,606 @@ '''CTS: Cluster Testing System: LinuxHA v2 dependent modules... ''' __copyright__=''' Author: Huang Zhen Copyright (C) 2004 International Business Machines Additional Audits, Revised Start action, Default Configuration: Copyright (C) 2004 Andrew Beekhof ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import os,sys,CTS,CTSaudits,CTStests, warnings from CTSvars import * from CTS import * from CTSaudits import ClusterAudit from CTStests import * from CIB import * try: from xml.dom.minidom import * except ImportError: sys.__stdout__.write("Python module xml.dom.minidom not found\n") sys.__stdout__.write("Please install python-xml or similar before continuing\n") sys.__stdout__.flush() sys.exit(1) ####################################################################### # # LinuxHA v2 dependent modules # ####################################################################### class crm_lha(ClusterManager): ''' The linux-ha version 2 cluster manager class. It implements the things we need to talk to and manipulate linux-ha version 2 clusters ''' def __init__(self, Environment, randseed=None): ClusterManager.__init__(self, Environment, randseed=randseed) #HeartbeatCM.__init__(self, Environment, randseed=randseed) self.fastfail = 0 self.clear_cache = 0 self.cib_installed = 0 self.config = None self.cluster_monitor = 0 self.use_short_names = 1 self.update({ "Name" : "crm-lha", "DeadTime" : 300, "StartTime" : 300, # Max time to start up "StableTime" : 30, "StartCmd" : CTSvars.INITDIR+"/heartbeat start > /dev/null 2>&1", "StopCmd" : CTSvars.INITDIR+"/heartbeat stop > /dev/null 2>&1", "ElectionCmd" : "crmadmin -E %s", "StatusCmd" : "crmadmin -t 60000 -S %s 2>/dev/null", "EpocheCmd" : "crm_node -H -e", "QuorumCmd" : "crm_node -H -q", "ParitionCmd" : "crm_node -H -p", "CibQuery" : "cibadmin -Ql", "ExecuteRscOp" : "lrmadmin -n %s -E %s %s 0 %d EVERYTIME 2>&1", "CIBfile" : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml", "TmpDir" : "/tmp", "BreakCommCmd" : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1", "FixCommCmd" : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1", # tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit # tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated # tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1 # tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null "ReduceCommCmd" : "", "RestoreCommCmd" : "tc qdisc del dev lo root", "LogFileName" : Environment["LogFileName"], "UUIDQueryCmd" : "crmadmin -N", "StandbyCmd" : "crm_standby -U %s -v %s 2>/dev/null", "StandbyQueryCmd" : "crm_standby -GQ -U %s 2>/dev/null", # Patterns to look for in the log files for various occasions... "Pat:DC_IDLE" : "crmd.*State transition.*-> S_IDLE", # This wont work if we have multiple partitions "Pat:Local_started" : "%s crmd:.*The local CRM is operational", "Pat:Slave_started" : "%s crmd:.*State transition.*-> S_NOT_DC", "Pat:Master_started" : "%s crmd:.* State transition.*-> S_IDLE", "Pat:We_stopped" : "heartbeat.*%s.*Heartbeat shutdown complete", "Pat:Logd_stopped" : "%s logd:.*Exiting write process", "Pat:They_stopped" : "%s crmd:.*LOST:.* %s ", "Pat:They_dead" : "node %s.*: is dead", "Pat:TransitionComplete" : "Transition status: Complete: complete", "Pat:ChildKilled" : "%s heartbeat.*%s.*killed by signal 9", "Pat:ChildRespawn" : "%s heartbeat.*Respawning client.*%s", "Pat:ChildExit" : "ERROR: Client .* exited with return code", # Bad news Regexes. Should never occur. "BadRegexes" : ( r"ERROR:", r"CRIT:", r"Shutting down\.", r"Forcing shutdown\.", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r", exiting\.", r"WARN.*Ignoring HA message.*vote.*not in our membership list", r"pengine.*Attempting recovery of resource", r"is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", r"Search terminated:", r"No need to invoke the TE", r"global_timer_callback:", r"Faking parameter digest creation", r"Parameters to .* action changed:", r"Parameters to .* changed", ), }) if self.Env["DoBSC"]: del self["Pat:They_stopped"] del self["Pat:Logd_stopped"] self.Env["use_logd"] = 0 self._finalConditions() self.check_transitions = 0 self.check_elections = 0 self.CIBsync = {} self.CibFactory = ConfigFactory(self) self.cib = self.CibFactory.createConfig(self.Env["Schema"]) def errorstoignore(self): # At some point implement a more elegant solution that # also produces a report at the end '''Return list of errors which are known and very noisey should be ignored''' if 1: return [ "ERROR: crm_abort: crm_glib_handler: ", "ERROR: Message hist queue is filling up", "stonithd: .*CRIT: external_hostlist: 'vmware gethosts' returned an empty hostlist", "stonithd: .*ERROR: Could not list nodes for stonith RA external/vmware.", "pengine: Preventing .* from re-starting", ] return [] def install_config(self, node): if not self.ns.WaitForNodeToComeUp(node): self.log("Node %s is not up." % node) return None if not self.CIBsync.has_key(node) and self.Env["ClobberCIB"] == 1: self.CIBsync[node] = 1 self.rsh(node, "rm -f "+CTSvars.CRM_CONFIG_DIR+"/cib*") # Only install the CIB on the first node, all the other ones will pick it up from there if self.cib_installed == 1: return None self.cib_installed = 1 if self.Env["CIBfilename"] == None: self.debug("Installing Generated CIB on node %s" %(node)) self.cib.install(node) else: self.log("Installing CIB (%s) on node %s" %(self.Env["CIBfilename"], node)) if 0 != self.rsh.cp(self.Env["CIBfilename"], "root@" + (self["CIBfile"]%node)): raise ValueError("Can not scp file to %s %d"%(node)) self.rsh(node, "chown "+CTSvars.CRM_DAEMON_USER+" "+CTSvars.CRM_CONFIG_DIR+"/cib.xml") def prepare(self): '''Finish the Initialization process. Prepare to test...''' self.partitions_expected = 1 for node in self.Env["nodes"]: self.ShouldBeStatus[node] = "" self.unisolate_node(node) self.StataCM(node) def test_node_CM(self, node): '''Report the status of the cluster manager on a given node''' watchpats = [ ] watchpats.append("Current ping state: (S_IDLE|S_NOT_DC)") watchpats.append(self["Pat:Slave_started"]%node) idle_watch = CTS.LogWatcher(self["LogFileName"], watchpats) idle_watch.setwatch() out = self.rsh(node, self["StatusCmd"]%node, 1) self.debug("Node %s status: '%s'" %(node, out)) if not out or string.find(out, 'ok') < 0: if self.ShouldBeStatus[node] == "up": self.log( "Node status for %s is %s but we think it should be %s" %(node, "down", self.ShouldBeStatus[node])) self.ShouldBeStatus[node]="down" return 0 if self.ShouldBeStatus[node] == "down": self.log( "Node status for %s is %s but we think it should be %s: %s" %(node, "up", self.ShouldBeStatus[node], out)) self.ShouldBeStatus[node]="up" # check the output first - because syslog-ng looses messages if string.find(out, 'S_NOT_DC') != -1: # Up and stable return 2 if string.find(out, 'S_IDLE') != -1: # Up and stable return 2 # fall back to syslog-ng and wait if not idle_watch.look(): # just up self.debug("Warn: Node %s is unstable: %s" %(node, out)) return 1 # Up and stable return 2 # Is the node up or is the node down def StataCM(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) > 0: return 1 return None # Being up and being stable is not the same question... def node_stable(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) == 2: return 1 self.log("Warn: Node %s not stable" %(node)) return None def partition_stable(self, nodes, timeout=None): watchpats = [ ] watchpats.append("Current ping state: S_IDLE") watchpats.append(self["Pat:DC_IDLE"]) self.debug("Waiting for cluster stability...") if timeout == None: timeout = self["DeadTime"] idle_watch = CTS.LogWatcher(self["LogFileName"], watchpats, timeout) idle_watch.setwatch() any_up = 0 for node in self.Env["nodes"]: # have each node dump its current state if self.ShouldBeStatus[node] == "up": self.rsh(node, self["StatusCmd"] %node, 1) any_up = 1 if any_up == 0: self.debug("Cluster is inactive") return 1 ret = idle_watch.look() while ret: self.debug(ret) for node in nodes: if re.search(node, ret): return 1 ret = idle_watch.look() self.debug("Warn: Partition %s not IDLE after %ds" % (repr(nodes), timeout)) return None def cluster_stable(self, timeout=None): partitions = self.find_partitions() for partition in partitions: if not self.partition_stable(partition, timeout): return None return 1 def is_node_dc(self, node, status_line=None): rc = 0 if not status_line: status_line = self.rsh(node, self["StatusCmd"]%node, 1) if not status_line: rc = 0 elif string.find(status_line, 'S_IDLE') != -1: rc = 1 elif string.find(status_line, 'S_INTEGRATION') != -1: rc = 1 elif string.find(status_line, 'S_FINALIZE_JOIN') != -1: rc = 1 elif string.find(status_line, 'S_POLICY_ENGINE') != -1: rc = 1 elif string.find(status_line, 'S_TRANSITION_ENGINE') != -1: rc = 1 return rc def active_resources(self, node): # [SM].* {node} matches Started, Slave, Master # Stopped wont be matched as it wont include {node} (rc, output) = self.rsh(node, """crm_resource -c""", None) resources = [] for line in output: if re.search("^Resource", line): tmp = AuditResource(self, line) if tmp.type == "primitive" and tmp.host == node: resources.append(tmp.id) return resources def ResourceOp(self, resource, op, node, interval=0, app="lrmadmin"): ''' Execute an operation on a resource ''' cmd = self["ExecuteRscOp"] % (app, resource, op, interval) (rc, lines) = self.rsh(node, cmd, None) if rc == 127: self.log("Command '%s' failed. Binary not installed?" % cmd) for line in lines: self.log("Output: "+line) return rc def ResourceLocation(self, rid): ResourceNodes = [] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": dummy = 0 rc = self.ResourceOp(rid, "monitor", node) # Strange error codes from remote_py # 65024 == not installed # 2048 == 8 # 1792 == 7 # 0 == 0 if rc == 127: dummy = 1 elif rc == 254 or rc == 65024: dummy = 1 #self.debug("%s is not installed on %s: %d" % (rid, node, rc)) elif rc == 0 or rc == 2048 or rc == 8: ResourceNodes.append(node) elif rc == 7 or rc == 1792: dummy = 1 #self.debug("%s is not running on %s: %d" % (rid, node, rc)) else: # not active on this node? self.log("Unknown rc code for %s on %s: %d" % (rid, node, rc)) return ResourceNodes def find_partitions(self): ccm_partitions = [] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": partition = self.rsh(node, self["ParitionCmd"], 1) if not partition: self.log("no partition details for %s" %node) elif len(partition) > 2: partition = partition[:-1] found=0 for a_partition in ccm_partitions: if partition == a_partition: found = 1 if found == 0: self.debug("Adding partition from %s: %s" %(node, partition)) ccm_partitions.append(partition) else: self.debug("Partition '%s' from %s is consistent with existing entries" %(partition, node)) else: self.log("bad partition details for %s" %node) else: self.debug("Node %s is down... skipping" %node) return ccm_partitions def HasQuorum(self, node_list): # If we are auditing a partition, then one side will # have quorum and the other not. # So the caller needs to tell us which we are checking # If no value for node_list is specified... assume all nodes if not node_list: node_list = self.Env["nodes"] for node in node_list: if self.ShouldBeStatus[node] == "up": quorum = self.rsh(node, self["QuorumCmd"], 1) if string.find(quorum, "1") != -1: return 1 elif string.find(quorum, "0") != -1: return 0 else: self.log("WARN: Unexpected quorum test result from "+ node +":"+ quorum) return 0 def Components(self): complist = [] common_ignore = [ "Pending action:", "ERROR: crm_log_message_adv:", "ERROR: MSG: No message to dump", "pending LRM operations at shutdown", "Lost connection to the CIB service", "Connection to the CIB terminated...", "Sending message to CIB service FAILED", "crmd: .*Action A_RECOVER .* not supported", "ERROR: stonithd_op_result_ready: not signed on", "pingd: .*ERROR: send_update: Could not send update", "send_ipc_message: IPC Channel to .* is not connected", "unconfirmed_actions: Waiting on .* unconfirmed actions", "cib_native_msgready: Message pending on command channel", "crmd:.*do_exit: Performing A_EXIT_1 - forcefully exiting the CRMd", "verify_stopped: Resource .* was active at shutdown. You may ignore this error if it is unmanaged.", ] stonith_ignore = [ "ERROR: stonithd_signon: ", "update_failcount: Updating failcount for child_DoFencing", "ERROR: te_connect_stonith: Sign-in failed: triggered a retry", ] stonith_ignore.extend(common_ignore) ccm_ignore = [ "ERROR: get_channel_token: No reply message - disconnected" ] ccm_ignore.extend(common_ignore) - ccm = Process("ccm", 0, [ + ccm = Process(self, "ccm", triggersreboot=self.fastfail, pats = [ "State transition S_IDLE", "CCM connection appears to have failed", "crmd: .*Action A_RECOVER .* not supported", "crmd: .*Input I_TERMINATE from do_recover", "Exiting to recover from CCM connection failure", "crmd:.*do_exit: Could not recover from internal error", "crmd: .*I_ERROR.*(ccm_dispatch|crmd_cib_connection_destroy)", "crmd .*exited with return code 2.", "attrd .*exited with return code 1.", "cib .*exited with return code 2.", # Not if it was fenced # "A new node joined the cluster", # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # "tengine_stonith_callback: .*result=0", # "Processing I_NODE_JOIN:.* cause=C_HA_MESSAGE", # "State transition S_.* -> S_INTEGRATION.*input=I_NODE_JOIN", "State transition S_STARTING -> S_PENDING", - ], [], common_ignore, self.fastfail, self) + ], badnews_ignore = common_ignore) - cib = Process("cib", 0, [ + cib = Process(self, "cib", triggersreboot=self.fastfail, pats = [ "State transition S_IDLE", "Lost connection to the CIB service", "Connection to the CIB terminated...", "crmd: .*Input I_TERMINATE from do_recover", "crmd: .*I_ERROR.*crmd_cib_connection_destroy", "crmd:.*do_exit: Could not recover from internal error", "crmd .*exited with return code 2.", "attrd .*exited with return code 1.", - ], [], common_ignore, self.fastfail, self) + ], badnews_ignore = common_ignore) - lrmd = Process("lrmd", 0, [ + lrmd = Process(self, "lrmd", triggersreboot=self.fastfail, pats = [ "State transition S_IDLE", "LRM Connection failed", "crmd: .*I_ERROR.*lrm_connection_destroy", "State transition S_STARTING -> S_PENDING", "crmd: .*Input I_TERMINATE from do_recover", "crmd:.*do_exit: Could not recover from internal error", "crmd .*exited with return code 2.", - ], [], common_ignore, self.fastfail, self) + ], badnews_ignore = common_ignore) - crmd = Process("crmd", 0, [ + crmd = Process(self, "crmd", triggersreboot=self.fastfail, pats = [ # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # "tengine_stonith_callback: .*result=0", "State transition .* S_IDLE", "State transition S_STARTING -> S_PENDING", - ], [ - ], common_ignore, self.fastfail, self) + ], badnews_ignore = common_ignore) - pengine = Process("pengine", 1, [ + pengine = Process(self, "pengine", triggersreboot=self.fastfail, pats = [ "State transition S_IDLE", "crmd .*exited with return code 2.", "crmd: .*Input I_TERMINATE from do_recover", "crmd: .*do_exit: Could not recover from internal error", "crmd: .*CRIT: pe_connection_destroy: Connection to the Policy Engine failed", "crmd: .*I_ERROR.*save_cib_contents", "crmd .*exited with return code 2.", - ], [], common_ignore, self.fastfail, self) + ], badnews_ignore = common_ignore, dc_only=1) if self.Env["DoFencing"] == 1 : - complist.append(Process("stonithd", 0, [], [ + complist.append(Process(self, "stoniths", triggersreboot=self.fastfail, dc_pats = [ "crmd: .*CRIT: tengine_stonith_connection_destroy: Fencing daemon connection failed", "Attempting connection to fencing daemon", "te_connect_stonith: Connected", - ], stonith_ignore, 0, self)) -# complist.append(Process("heartbeat", 0, [], [], [], None, self)) - + ], badnews_ignore = stonith_ignore)) if self.fastfail == 0: ccm.pats.extend([ "attrd .* exited with return code 1", "ERROR: Respawning client .*attrd", "cib .* exited with return code 2", "ERROR: Respawning client .*cib", "crmd .* exited with return code 2", "ERROR: Respawning client .*crmd" ]) cib.pats.extend([ "attrd .* exited with return code 1", "ERROR: Respawning client .*attrd", "crmd .* exited with return code 2", "ERROR: Respawning client .*crmd" ]) lrmd.pats.extend([ "crmd .* exited with return code 2", "ERROR: Respawning client .*crmd" ]) pengine.pats.extend([ "ERROR: Respawning client .*crmd" ]) complist.append(ccm) complist.append(cib) complist.append(lrmd) complist.append(crmd) complist.append(pengine) return complist def NodeUUID(self, node): lines = self.rsh(node, self["UUIDQueryCmd"], 1) for line in lines: self.debug("UUIDLine:"+ line) m = re.search(r'%s.+\((.+)\)' % node, line) if m: return m.group(1) return "" def StandbyStatus(self, node): out=self.rsh(node, self["StandbyQueryCmd"]%node, 1) if not out: return "off" out = out[:-1] self.debug("Standby result: "+out) return out # status == "on" : Enter Standby mode # status == "off": Enter Active mode def SetStandbyMode(self, node, status): current_status = self.StandbyStatus(node) cmd = self["StandbyCmd"] % (node, status) ret = self.rsh(node, cmd) return True ####################################################################### # # A little test code... # # Which you are advised to completely ignore... # ####################################################################### if __name__ == '__main__': pass diff --git a/cts/CTS.py b/cts/CTS.py index 9f79088950..040441ff22 100755 --- a/cts/CTS.py +++ b/cts/CTS.py @@ -1,1184 +1,1188 @@ '''CTS: Cluster Testing System: Main module Classes related to testing high-availability clusters... ''' __copyright__=''' Copyright (C) 2000, 2001 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import types, string, select, sys, time, re, os, struct, os, signal import base64, pickle, binascii from UserDict import UserDict from syslog import * from subprocess import Popen,PIPE from CTSvars import * class RemoteExec: '''This is an abstract remote execution class. It runs a command on another machine - somehow. The somehow is up to us. This particular class uses ssh. Most of the work is done by fork/exec of ssh or scp. ''' def __init__(self, Env=None): self.Env = Env # -n: no stdin, -x: no X11 self.Command = "ssh -l root -n -x" # -B: batch mode, -q: no stats (quiet) self.CpCommand = "scp -B -q" self.OurNode=string.lower(os.uname()[1]) def _fixcmd(self, cmd): return re.sub("\'", "'\\''", cmd) def _cmd(self, *args): '''Compute the string that will run the given command on the given remote system''' args= args[0] sysname = args[0] command = args[1] #print "sysname: %s, us: %s" % (sysname, self.OurNode) if sysname == None or string.lower(sysname) == self.OurNode or sysname == "localhost": ret = command else: ret = self.Command + " " + sysname + " '" + self._fixcmd(command) + "'" #print ("About to run %s\n" % ret) return ret def __call__(self, node, command, stdout=0, blocking=1): '''Run the given command on the given remote system If you call this class like a function, this is the function that gets called. It just runs it roughly as though it were a system() call on the remote machine. The first argument is name of the machine to run it on. ''' rc = 0 result = None if not blocking: proc = Popen(self._cmd([node, command]), stdout = PIPE, stderr = PIPE, close_fds = True, shell = True) self.Env.debug("cmd: async: target=%s, rc=%d: %s" % (node, proc.pid, command)) if proc.pid > 0: return 0 return -1 proc = Popen(self._cmd([node, command]), stdout = PIPE, stderr = PIPE, close_fds = True, shell = True) if stdout == 1: result = proc.stdout.readline() else: result = proc.stdout.readlines() proc.stdout.close() rc = proc.wait() self.Env.debug("cmd: target=%s, rc=%d: %s" % (node, rc, command)) if stdout == 1: return result if proc.stderr: errors = proc.stderr.readlines() proc.stderr.close() for err in errors: if not self.Env: print ("cmd: stderr: %s" % err) else: self.Env.debug("cmd: stderr: %s" % err) if stdout == 0: if result and not self.Env: for line in result: print ("cmd: stdout: %s" % line) elif result: for line in result: self.Env.debug("cmd: stdout: %s" % line) return rc return (rc, result) def cp(self, *args): '''Perform a remote copy''' cpstring = self.CpCommand for arg in args: cpstring = cpstring + " \'" + arg + "\'" rc = os.system(cpstring) self.Env.debug("cmd: rc=%d: %s" % (rc, cpstring)) return rc class LogWatcher: '''This class watches logs for messages that fit certain regular expressions. Watching logs for events isn't the ideal way to do business, but it's better than nothing :-) On the other hand, this class is really pretty cool ;-) The way you use this class is as follows: Construct a LogWatcher object Call setwatch() when you want to start watching the log Call look() to scan the log looking for the patterns ''' def __init__(self, log, regexes, timeout=10, debug=None): '''This is the constructor for the LogWatcher class. It takes a log name to watch, and a list of regular expressions to watch for." ''' # Validate our arguments. Better sooner than later ;-) for regex in regexes: assert re.compile(regex) self.regexes = regexes self.filename = log self.debug=debug self.whichmatch = -1 self.unmatched = None if self.debug: print "Debug now on for for log", log self.Timeout = int(timeout) self.returnonlymatch = None if not os.access(log, os.R_OK): raise ValueError("File [" + log + "] not accessible (r)") def setwatch(self, frombeginning=None): '''Mark the place to start watching the log from. ''' self.file = open(self.filename, "r") self.size = os.path.getsize(self.filename) if not frombeginning: self.file.seek(0, 2) # 2 means seek to EOF def ReturnOnlyMatch(self, onlymatch=1): '''Mark the place to start watching the log from. ''' self.returnonlymatch = onlymatch def look(self, timeout=None): '''Examine the log looking for the given patterns. It starts looking from the place marked by setwatch(). This function looks in the file in the fashion of tail -f. It properly recovers from log file truncation, but not from removing and recreating the log. It would be nice if it recovered from this as well :-) We return the first line which matches any of our patterns. ''' last_line=None first_line=None if timeout == None: timeout = self.Timeout done=time.time()+timeout+1 if self.debug: print "starting search: timeout=%d" % timeout for regex in self.regexes: print "Looking for regex: ", regex while (timeout <= 0 or time.time() <= done): newsize=os.path.getsize(self.filename) if self.debug > 4: print "newsize = %d" % newsize if newsize < self.size: # Somebody truncated the log! if self.debug: print "Log truncated!" self.setwatch(frombeginning=1) continue if newsize > self.file.tell(): line=self.file.readline() if self.debug > 2: print "Looking at line:", line if line: last_line=line if not first_line: first_line=line if self.debug: print "First line: "+ line which=-1 for regex in self.regexes: which=which+1 if self.debug > 3: print "Comparing line to ", regex #matchobj = re.search(string.lower(regex), string.lower(line)) matchobj = re.search(regex, line) if matchobj: self.whichmatch=which if self.returnonlymatch: return matchobj.group(self.returnonlymatch) else: if self.debug: print "Returning line" return line newsize=os.path.getsize(self.filename) if self.file.tell() == newsize: if timeout > 0: time.sleep(0.025) else: if self.debug: print "End of file" if self.debug: print "Last line: "+last_line return None if self.debug: print "Timeout" if self.debug: print "Last line: "+last_line return None def lookforall(self, timeout=None, allow_multiple_matches=None): '''Examine the log looking for ALL of the given patterns. It starts looking from the place marked by setwatch(). We return when the timeout is reached, or when we have found ALL of the regexes that were part of the watch ''' if timeout == None: timeout = self.Timeout save_regexes = self.regexes returnresult = [] while (len(self.regexes) > 0): oneresult = self.look(timeout) if not oneresult: self.unmatched = self.regexes self.matched = returnresult self.regexes = save_regexes return None returnresult.append(oneresult) if not allow_multiple_matches: del self.regexes[self.whichmatch] else: # Allow multiple regexes to match a single line tmp_regexes = self.regexes self.regexes = [] which = 0 for regex in tmp_regexes: matchobj = re.search(regex, oneresult) if not matchobj: self.regexes.append(regex) self.unmatched = None self.matched = returnresult self.regexes = save_regexes return returnresult class NodeStatus: def __init__(self, Env): self.Env = Env def IsNodeBooted(self, node): '''Return TRUE if the given node is booted (responds to pings)''' return self.Env.rsh("localhost", "ping -nq -c1 -w1 %s" % node) == 0 def IsSshdUp(self, node): #return self.rsh(node, "true") == 0; rc = self.Env.rsh(node, "true") return rc == 0 def WaitForNodeToComeUp(self, node, Timeout=300): '''Return TRUE when given node comes up, or None/FALSE if timeout''' timeout=Timeout anytimeouts=0 while timeout > 0: if self.IsNodeBooted(node) and self.IsSshdUp(node): if anytimeouts: # Fudge to wait for the system to finish coming up time.sleep(30) self.Env.debug("Node %s now up" % node) return 1 time.sleep(30) if (not anytimeouts): self.Env.debug("Waiting for node %s to come up" % node) anytimeouts=1 timeout = timeout - 1 self.Env.log("%s did not come up within %d tries" % (node, Timeout)) answer = raw_input('Continue? [nY]') if answer and answer == "n": raise ValueError("%s did not come up within %d tries" % (node, Timeout)) def WaitForAllNodesToComeUp(self, nodes, timeout=300): '''Return TRUE when all nodes come up, or FALSE if timeout''' for node in nodes: if not self.WaitForNodeToComeUp(node, timeout): return None return 1 class ClusterManager(UserDict): '''The Cluster Manager class. This is an subclass of the Python dictionary class. (this is because it contains lots of {name,value} pairs, not because it's behavior is that terribly similar to a dictionary in other ways.) This is an abstract class which class implements high-level operations on the cluster and/or its cluster managers. Actual cluster managers classes are subclassed from this type. One of the things we do is track the state we think every node should be in. ''' def __InitialConditions(self): #if os.geteuid() != 0: # raise ValueError("Must Be Root!") None def _finalConditions(self): for key in self.keys(): if self[key] == None: raise ValueError("Improper derivation: self[" + key + "] must be overridden by subclass.") def __init__(self, Environment, randseed=None): self.Env = Environment self.__InitialConditions() self.clear_cache = 0 self.TestLoggingLevel=0 self.data = { "up" : "up", # Status meaning up "down" : "down", # Status meaning down "StonithCmd" : "stonith -t baytech -p '10.10.10.100 admin admin' %s", "DeadTime" : 30, # Max time to detect dead node... "StartTime" : 90, # Max time to start up # # These next values need to be overridden in the derived class. # "Name" : None, "StartCmd" : None, "StopCmd" : None, "StatusCmd" : None, #"RereadCmd" : None, "BreakCommCmd" : None, "FixCommCmd" : None, #"TestConfigDir" : None, "LogFileName" : None, #"Pat:Master_started" : None, #"Pat:Slave_started" : None, "Pat:We_stopped" : None, "Pat:They_stopped" : None, "BadRegexes" : None, # A set of "bad news" regexes # to apply to the log } self.rsh = self.Env.rsh self.ShouldBeStatus={} self.OurNode=string.lower(os.uname()[1]) self.ShouldBeStatus={} self.ns = NodeStatus(self.Env) def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [] def log(self, args): self.Env.log(args) def debug(self, args): self.Env.debug(args) def prepare(self): '''Finish the Initialization process. Prepare to test...''' for node in self.Env["nodes"]: if self.StataCM(node): self.ShouldBeStatus[node]="up" else: self.ShouldBeStatus[node]="down" self.unisolate_node(node) def upcount(self): '''How many nodes are up?''' count=0 for node in self.Env["nodes"]: if self.ShouldBeStatus[node]=="up": count=count+1 return count def install_config(self, node): return None def clear_all_caches(self): if self.clear_cache: for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "down": self.debug("Removing cache file on: "+node) self.rsh(node, "rm -f "+CTSvars.HA_VARLIBHBDIR+"/hostcache") else: self.debug("NOT Removing cache file on: "+node) def StartaCM(self, node): '''Start up the cluster manager on a given node''' self.debug("Starting %s on node %s" %(self["Name"], node)) ret = 1 if not self.ShouldBeStatus.has_key(node): self.ShouldBeStatus[node] = "down" if self.ShouldBeStatus[node] != "down": return 1 patterns = [] # Technically we should always be able to notice ourselves starting patterns.append(self["Pat:Local_started"] % node) if self.upcount() == 0: patterns.append(self["Pat:Master_started"] % node) else: patterns.append(self["Pat:Slave_started"] % node) watch = LogWatcher( self["LogFileName"], patterns, timeout=self["StartTime"]+10) watch.setwatch() self.install_config(node) self.ShouldBeStatus[node] = "any" if self.StataCM(node) and self.cluster_stable(self["DeadTime"]): self.log ("%s was already started" %(node)) return 1 # Clear out the host cache so autojoin can be exercised if self.clear_cache: self.debug("Removing cache file on: "+node) self.rsh(node, "rm -f "+CTSvars.HA_VARLIBHBDIR+"/hostcache") if not(self.Env["valgrind-tests"]): startCmd = self["StartCmd"] else: if self.Env["valgrind-prefix"]: prefix = self.Env["valgrind-prefix"] else: prefix = "cts" startCmd = """G_SLICE=always-malloc HA_VALGRIND_ENABLED='%s' VALGRIND_OPTS='%s --log-file=/tmp/%s-%s.valgrind' %s""" % ( self.Env["valgrind-procs"], self.Env["valgrind-opts"], prefix, """%p""", self["StartCmd"]) if self.rsh(node, startCmd) != 0: self.log ("Warn: Start command failed on node %s" %(node)) return None self.ShouldBeStatus[node]="up" watch_result = watch.lookforall() if watch.unmatched: for regex in watch.unmatched: self.log ("Warn: Startup pattern not found: %s" %(regex)) if watch_result: #self.debug("Found match: "+ repr(watch_result)) self.cluster_stable(self["DeadTime"]) return 1 if self.StataCM(node) and self.cluster_stable(self["DeadTime"]): return 1 self.log ("Warn: Start failed for node %s" %(node)) return None def StartaCMnoBlock(self, node): '''Start up the cluster manager on a given node with none-block mode''' self.debug("Starting %s on node %s" %(self["Name"], node)) # Clear out the host cache so autojoin can be exercised if self.clear_cache: self.debug("Removing cache file on: "+node) self.rsh(node, "rm -f "+CTSvars.HA_VARLIBHBDIR+"/hostcache") if not(self.Env["valgrind-tests"]): startCmd = self["StartCmd"] else: if self.Env["valgrind-prefix"]: prefix = self.Env["valgrind-prefix"] else: prefix = "cts" startCmd = """G_SLICE=always-malloc HA_VALGRIND_ENABLED='%s' VALGRIND_OPTS='%s --log-file=/tmp/%s-%s.valgrind' %s""" % ( self.Env["valgrind-procs"], self.Env["valgrind-opts"], prefix, """%p""", self["StartCmd"]) self.rsh(node, startCmd, blocking=0) self.ShouldBeStatus[node]="up" return 1 def StopaCM(self, node): '''Stop the cluster manager on a given node''' self.debug("Stopping %s on node %s" %(self["Name"], node)) if self.ShouldBeStatus[node] != "up": return 1 if self.rsh(node, self["StopCmd"]) == 0: self.ShouldBeStatus[node]="down" self.cluster_stable(self["DeadTime"]) return 1 else: self.log ("Could not stop %s on node %s" %(self["Name"], node)) return None def StopaCMnoBlock(self, node): '''Stop the cluster manager on a given node with none-block mode''' self.debug("Stopping %s on node %s" %(self["Name"], node)) self.rsh(node, self["StopCmd"], blocking=0) self.ShouldBeStatus[node]="down" return 1 def cluster_stable(self, timeout = None): time.sleep(self["StableTime"]) return 1 def node_stable(self, node): return 1 def RereadCM(self, node): '''Force the cluster manager on a given node to reread its config This may be a no-op on certain cluster managers. ''' rc=self.rsh(node, self["RereadCmd"]) if rc == 0: return 1 else: self.log ("Could not force %s on node %s to reread its config" % (self["Name"], node)) return None def StataCM(self, node): '''Report the status of the cluster manager on a given node''' out=self.rsh(node, self["StatusCmd"], 1) ret= (string.find(out, 'stopped') == -1) try: if ret: if self.ShouldBeStatus[node] == "down": self.log( "Node status for %s is %s but we think it should be %s" % (node, "up", self.ShouldBeStatus[node])) else: if self.ShouldBeStatus[node] == "up": self.log( "Node status for %s is %s but we think it should be %s" % (node, "down", self.ShouldBeStatus[node])) except KeyError: pass if ret: self.ShouldBeStatus[node]="up" else: self.ShouldBeStatus[node]="down" return ret def startall(self, nodelist=None): '''Start the cluster manager on every node in the cluster. We can do it on a subset of the cluster if nodelist is not None. ''' ret = 1 map = {} if not nodelist: nodelist=self.Env["nodes"] for node in nodelist: if self.ShouldBeStatus[node] == "down": if not self.StartaCM(node): ret = 0 return ret def stopall(self, nodelist=None): '''Stop the cluster managers on every node in the cluster. We can do it on a subset of the cluster if nodelist is not None. ''' ret = 1 map = {} if not nodelist: nodelist=self.Env["nodes"] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": if not self.StopaCM(node): ret = 0 return ret def rereadall(self, nodelist=None): '''Force the cluster managers on every node in the cluster to reread their config files. We can do it on a subset of the cluster if nodelist is not None. ''' map = {} if not nodelist: nodelist=self.Env["nodes"] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": self.RereadCM(node) def statall(self, nodelist=None): '''Return the status of the cluster managers in the cluster. We can do it on a subset of the cluster if nodelist is not None. ''' result={} if not nodelist: nodelist=self.Env["nodes"] for node in nodelist: if self.StataCM(node): result[node] = "up" else: result[node] = "down" return result def isolate_node(self, target, nodes=None): '''isolate the communication between the nodes''' if not nodes: nodes = self.Env["nodes"] for node in nodes: if node != target: rc = self.rsh(target, self["BreakCommCmd"] % node) if rc != 0: self.log("Could not break the communication between %s and %s: %d" % (target, node, rc)) return None else: self.debug("Communication cut between %s and %s" % (target, node)) return 1 def unisolate_node(self, target, nodes=None): '''fix the communication between the nodes''' if not nodes: nodes = self.Env["nodes"] for node in nodes: if node != target: restored = 0 # Limit the amount of time we have asynchronous connectivity for # Restore both sides as simultaneously as possible self.rsh(target, self["FixCommCmd"] % node, blocking=0) self.rsh(node, self["FixCommCmd"] % target, blocking=0) self.debug("Communication restored between %s and %s" % (target, node)) def reducecomm_node(self,node): '''reduce the communication between the nodes''' rc = self.rsh(node, self["ReduceCommCmd"]%(self.Env["XmitLoss"],self.Env["RecvLoss"])) if rc == 0: return 1 else: self.log("Could not reduce the communication between the nodes from node: %s" % node) return None def restorecomm_node(self,node): '''restore the saved communication between the nodes''' rc = 0 if float(self.Env["XmitLoss"])!=0 or float(self.Env["RecvLoss"])!=0 : rc = self.rsh(node, self["RestoreCommCmd"]); if rc == 0: return 1 else: self.log("Could not restore the communication between the nodes from node: %s" % node) return None def HasQuorum(self, node_list): "Return TRUE if the cluster currently has quorum" # If we are auditing a partition, then one side will # have quorum and the other not. # So the caller needs to tell us which we are checking # If no value for node_list is specified... assume all nodes raise ValueError("Abstract Class member (HasQuorum)") def Components(self): raise ValueError("Abstract Class member (Components)") def oprofileStart(self, node=None): if not node: for n in self.Env["oprofile"]: self.oprofileStart(n) elif node in self.Env["oprofile"]: self.debug("Enabling oprofile on %s" % node) self.rsh(node, "opcontrol --init") self.rsh(node, "opcontrol --setup --no-vmlinux --separate=lib --callgraph=20 --image=all") self.rsh(node, "opcontrol --start") self.rsh(node, "opcontrol --reset") def oprofileSave(self, test, node=None): if not node: for n in self.Env["oprofile"]: self.oprofileSave(test, n) elif node in self.Env["oprofile"]: self.rsh(node, "opcontrol --dump") self.rsh(node, "opcontrol --save=cts.%d" % test) # Read back with: opreport -l session:cts.0 image:/usr/lib/heartbeat/c* if None: self.rsh(node, "opcontrol --reset") else: self.oprofileStop(node) self.oprofileStart(node) def oprofileStop(self, node=None): if not node: for n in self.Env["oprofile"]: self.oprofileStop(n) elif node in self.Env["oprofile"]: self.debug("Stopping oprofile on %s" % node) self.rsh(node, "opcontrol --reset") self.rsh(node, "opcontrol --shutdown 2>&1 > /dev/null") class Resource: ''' This is an HA resource (not a resource group). A resource group is just an ordered list of Resource objects. ''' def __init__(self, cm, rsctype=None, instance=None): self.CM = cm self.ResourceType = rsctype self.Instance = instance self.needs_quorum = 1 def Type(self): return self.ResourceType def Instance(self, nodename): return self.Instance def IsRunningOn(self, nodename): ''' This member function returns true if our resource is running on the given node in the cluster. It is analagous to the "status" operation on SystemV init scripts and heartbeat scripts. FailSafe calls it the "exclusive" operation. ''' raise ValueError("Abstract Class member (IsRunningOn)") return None def IsWorkingCorrectly(self, nodename): ''' This member function returns true if our resource is operating correctly on the given node in the cluster. Heartbeat does not require this operation, but it might be called the Monitor operation, which is what FailSafe calls it. For remotely monitorable resources (like IP addresses), they *should* be monitored remotely for testing. ''' raise ValueError("Abstract Class member (IsWorkingCorrectly)") return None def Start(self, nodename): ''' This member function starts or activates the resource. ''' raise ValueError("Abstract Class member (Start)") return None def Stop(self, nodename): ''' This member function stops or deactivates the resource. ''' raise ValueError("Abstract Class member (Stop)") return None def __repr__(self): if (self.Instance and len(self.Instance) > 1): return "{" + self.ResourceType + "::" + self.Instance + "}" else: return "{" + self.ResourceType + "}" class Component: def kill(self, node): None class Process(Component): - def __init__(self, name, dc_only, pats, dc_pats, badnews_ignore, triggersreboot, cm): + def __init__(self, cm, name, process=None, dc_only=0, pats=[], dc_pats=[], badnews_ignore=[], triggersreboot=0): self.name = str(name) self.dc_only = dc_only self.pats = pats self.dc_pats = dc_pats self.CM = cm self.badnews_ignore = badnews_ignore self.triggersreboot = triggersreboot - self.KillCmd = "killall -9 " + self.name - + if process: + self.proc = str(process) + else: + self.proc = str(name) + self.KillCmd = "killall -9 " + self.proc + def kill(self, node): if self.CM.rsh(node, self.KillCmd) != 0: self.CM.log ("ERROR: Kill %s failed on node %s" %(self.name,node)) return None return 1 class ScenarioComponent: def __init__(self, Env): self.Env = Env def IsApplicable(self): '''Return TRUE if the current ScenarioComponent is applicable in the given LabEnvironment given to the constructor. ''' raise ValueError("Abstract Class member (IsApplicable)") def SetUp(self, CM): '''Set up the given ScenarioComponent''' raise ValueError("Abstract Class member (Setup)") def TearDown(self, CM): '''Tear down (undo) the given ScenarioComponent''' raise ValueError("Abstract Class member (Setup)") class Scenario: ( '''The basic idea of a scenario is that of an ordered list of ScenarioComponent objects. Each ScenarioComponent is SetUp() in turn, and then after the tests have been run, they are torn down using TearDown() (in reverse order). A Scenario is applicable to a particular cluster manager iff each ScenarioComponent is applicable. A partially set up scenario is torn down if it fails during setup. ''') def __init__(self, Components): "Initialize the Scenario from the list of ScenarioComponents" for comp in Components: if not issubclass(comp.__class__, ScenarioComponent): raise ValueError("Init value must be subclass of" " ScenarioComponent") self.Components = Components def IsApplicable(self): ( '''A Scenario IsApplicable() iff each of its ScenarioComponents IsApplicable() ''' ) for comp in self.Components: if not comp.IsApplicable(): return None return 1 def SetUp(self, CM): '''Set up the Scenario. Return TRUE on success.''' j=0 while j < len(self.Components): if not self.Components[j].SetUp(CM): # OOPS! We failed. Tear partial setups down. CM.log("Tearing down partial setup") self.TearDown(CM, j) return None j=j+1 return 1 def TearDown(self, CM, max=None): '''Tear Down the Scenario - in reverse order.''' if max == None: max = len(self.Components)-1 j=max while j >= 0: self.Components[j].TearDown(CM) j=j-1 class InitClusterManager(ScenarioComponent): ( '''InitClusterManager is the most basic of ScenarioComponents. This ScenarioComponent simply starts the cluster manager on all the nodes. It is fairly robust as it waits for all nodes to come up before starting as they might have been rebooted or crashed for some reason beforehand. ''') def __init__(self, Env): pass def IsApplicable(self): '''InitClusterManager is so generic it is always Applicable''' return 1 def SetUp(self, CM): '''Basic Cluster Manager startup. Start everything''' CM.prepare() # Clear out the cobwebs ;-) self.TearDown(CM) # Now start the Cluster Manager on all the nodes. CM.log("Starting Cluster Manager on all nodes.") return CM.startall() def TearDown(self, CM): '''Set up the given ScenarioComponent''' # Stop the cluster manager everywhere CM.log("Stopping Cluster Manager on all nodes") return CM.stopall() class PingFest(ScenarioComponent): ( '''PingFest does a flood ping to each node in the cluster from the test machine. If the LabEnvironment Parameter PingSize is set, it will be used as the size of ping packet requested (via the -s option). If it is not set, it defaults to 1024 bytes. According to the manual page for ping: Outputs packets as fast as they come back or one hundred times per second, whichever is more. For every ECHO_REQUEST sent a period ``.'' is printed, while for every ECHO_REPLY received a backspace is printed. This provides a rapid display of how many packets are being dropped. Only the super-user may use this option. This can be very hard on a net- work and should be used with caution. ''' ) def __init__(self, Env): self.Env = Env def IsApplicable(self): '''PingFests are always applicable ;-) ''' return 1 def SetUp(self, CM): '''Start the PingFest!''' self.PingSize=1024 if CM.Env.has_key("PingSize"): self.PingSize=CM.Env["PingSize"] CM.log("Starting %d byte flood pings" % self.PingSize) self.PingPids=[] for node in CM.Env["nodes"]: self.PingPids.append(self._pingchild(node)) CM.log("Ping PIDs: " + repr(self.PingPids)) return 1 def TearDown(self, CM): '''Stop it right now! My ears are pinging!!''' for pid in self.PingPids: if pid != None: CM.log("Stopping ping process %d" % pid) os.kill(pid, signal.SIGKILL) def _pingchild(self, node): Args = ["ping", "-qfn", "-s", str(self.PingSize), node] sys.stdin.flush() sys.stdout.flush() sys.stderr.flush() pid = os.fork() if pid < 0: self.Env.log("Cannot fork ping child") return None if pid > 0: return pid # Otherwise, we're the child process. os.execvp("ping", Args) self.Env.log("Cannot execvp ping: " + repr(Args)) sys.exit(1) class PacketLoss(ScenarioComponent): ( ''' It would be useful to do some testing of CTS with a modest amount of packet loss enabled - so we could see that everything runs like it should with a certain amount of packet loss present. ''') def IsApplicable(self): '''always Applicable''' return 1 def SetUp(self, CM): '''Reduce the reliability of communications''' if float(CM.Env["XmitLoss"]) == 0 and float(CM.Env["RecvLoss"]) == 0 : return 1 for node in CM.Env["nodes"]: CM.reducecomm_node(node) CM.log("Reduce the reliability of communications") return 1 def TearDown(self, CM): '''Fix the reliability of communications''' if float(CM.Env["XmitLoss"]) == 0 and float(CM.Env["RecvLoss"]) == 0 : return 1 for node in CM.Env["nodes"]: CM.unisolate_node(node) CM.log("Fix the reliability of communications") class BasicSanityCheck(ScenarioComponent): ( ''' ''') def IsApplicable(self): return self.Env["DoBSC"] def SetUp(self, CM): CM.prepare() # Clear out the cobwebs self.TearDown(CM) # Now start the Cluster Manager on all the nodes. CM.log("Starting Cluster Manager on BSC node(s).") return CM.startall() def TearDown(self, CM): CM.log("Stopping Cluster Manager on BSC node(s).") return CM.stopall() class Benchmark(ScenarioComponent): ( ''' ''') def IsApplicable(self): return self.Env["benchmark"] def SetUp(self, CM): CM.prepare() # Clear out the cobwebs self.TearDown(CM) # Now start the Cluster Manager on all the nodes. CM.log("Starting Cluster Manager on all node(s).") return CM.startall() def TearDown(self, CM): CM.log("Stopping Cluster Manager on all node(s).") return CM.stopall() class RollingUpgrade(ScenarioComponent): ( ''' Test a rolling upgrade between two versions of the stack ''') def __init__(self, Env): self.Env = Env def IsApplicable(self): if not self.Env["rpm-dir"]: return None if not self.Env["current-version"]: return None if not self.Env["previous-version"]: return None return 1 def install(self, node, version): target_dir = "/tmp/rpm-%s" % version src_dir = "%s/%s" % (self.CM.Env["rpm-dir"], version) rc = self.CM.rsh(node, "mkdir -p %s" % target_dir) rc = self.CM.cp("%s/*.rpm %s:%s" % (src_dir, node, target_dir)) rc = self.CM.rsh(node, "rpm -Uvh --force %s/*.rpm" % (target_dir)) return self.success() def upgrade(self, node): return self.install(node, self.CM.Env["current-version"]) def downgrade(self, node): return self.install(node, self.CM.Env["previous-version"]) def SetUp(self, CM): CM.prepare() # Clear out the cobwebs CM.stopall() CM.log("Downgrading all nodes to %s." % self.Env["previous-version"]) for node in self.Env["nodes"]: if not self.downgrade(node): CM.log("Couldn't downgrade %s" % node) return None return 1 def TearDown(self, CM): # Stop everything CM.log("Stopping Cluster Manager on Upgrade nodes.") CM.stopall() CM.log("Upgrading all nodes to %s." % self.Env["current-version"]) for node in self.Env["nodes"]: if not self.upgrade(node): CM.log("Couldn't upgrade %s" % node) return None return 1 diff --git a/cts/CTStests.py b/cts/CTStests.py index 90774f797f..89297e1109 100644 --- a/cts/CTStests.py +++ b/cts/CTStests.py @@ -1,2393 +1,2394 @@ '''CTS: Cluster Testing System: Tests module There are a few things we want to do here: ''' __copyright__=''' Copyright (C) 2000, 2001 Alan Robertson Licensed under the GNU GPL. Add RecourceRecover testcase Zhao Kai ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # SPECIAL NOTE: # # Tests may NOT implement any cluster-manager-specific code in them. # EXTEND the ClusterManager object to provide the base capabilities # the test needs if you need to do something that the current CM classes # do not. Otherwise you screw up the whole point of the object structure # in CTS. # # Thank you. # import CTS import CTSaudits import time, os, re, types, string, tempfile, sys from CTSaudits import * from stat import * # List of all class objects for tests which we ought to # consider running. class AllTests: ''' A collection of tests which are run at random. ''' def __init__(self, scenario, cm, tests, Audits): self.CM = cm self.Env = cm.Env self.Scenario = scenario self.Tests = [] self.Audits = [] self.ns=CTS.NodeStatus(self.Env) self.Stats = {"success":0, "failure":0, "BadNews":0, "skipped":0} self.IndividualStats= {} for audit in Audits: if not issubclass(audit.__class__, ClusterAudit): raise ValueError("Init value must be a subclass of ClusterAudit") if audit.is_applicable(): self.Audits.append(audit) for test in tests: if not issubclass(test.__class__, CTSTest): raise ValueError("Init value must be a subclass of CTSTest") if test.is_applicable(): self.Tests.append(test) if not scenario.IsApplicable(): raise ValueError("Scenario not applicable in" " given Environment") def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 def audit(self, BadNews, test): errcount=0 BadNewsDebug=0 #BadNews.debug=1 ignorelist = [] ignorelist.append(" CTS: ") ignorelist.append("BadNews:") ignorelist.extend(self.CM.errorstoignore()) if test: ignorelist.extend(test.errorstoignore()) while errcount < 1000: if BadNewsDebug: print "Looking for BadNews" match=BadNews.look(0) if match: if BadNewsDebug: print "BadNews found: "+match add_err = 1 for ignore in ignorelist: if add_err == 1 and re.search(ignore, match): if BadNewsDebug: print "Ignoring based on pattern: ("+ignore+")" add_err = 0 if add_err == 1: self.CM.log("BadNews: " + match) self.incr("BadNews") errcount=errcount+1 else: break else: answer = raw_input('Big problems. Continue? [nY]') if answer and answer == "n": self.CM.log("Shutting down.") self.CM.stopall() self.summarize() raise ValueError("Looks like we hit a BadNews jackpot!") for audit in self.Audits: if not audit(): self.CM.log("Audit " + audit.name() + " FAILED.") self.incr("auditfail") if test: test.incr("auditfail") def summarize(self): self.CM.log("****************") self.CM.log("Overall Results:" + repr(self.Stats)) self.CM.log("****************") stat_filter = { "calls":0, "failure":0, "skipped":0, "auditfail":0, } self.CM.log("Test Summary") for test in self.Tests: for key in stat_filter.keys(): stat_filter[key] = test.Stats[key] self.CM.log(("Test %s: "%test.name).ljust(25) + " %s"%repr(stat_filter)) self.CM.debug("Detailed Results") for test in self.Tests: self.CM.debug(("Test %s: "%test.name).ljust(25) + " %s"%repr(test.Stats)) self.CM.log("<<<<<<<<<<<<<<<< TESTS COMPLETED") def test_loop(self, BadNews, max): testcount=1 self.CM.log("Executing all tests once") for test in self.Tests: if self.run_test(BadNews, test, testcount): testcount += 1 return testcount def run_test(self, BadNews, test, testcount): nodechoice = self.Env.RandomNode() ret = 1 where = "" did_run = 0 self.CM.log(("Running test %s" % test.name).ljust(35) + (" (%s) " % nodechoice).ljust(15) +"["+ ("%d" % testcount).rjust(3) +"]") starttime=test.set_starttime() if not test.setup(nodechoice): self.CM.log("Setup failed") ret = 0 elif not test.canrunnow(nodechoice): self.CM.log("Skipped") test.skipped() else: did_run = 1 ret = test(nodechoice) if not test.teardown(nodechoice): self.CM.log("Teardown failed") ret = 0 test.log_mark("stop") stoptime=time.time() self.CM.oprofileSave(testcount) elapsed_time = stoptime - starttime test_time = stoptime - test.starttime if not test.has_key("min_time"): test["elapsed_time"] = elapsed_time test["min_time"] = test_time test["max_time"] = test_time else: test["elapsed_time"] = test["elapsed_time"] + elapsed_time if test_time < test["min_time"]: test["min_time"] = test_time if test_time > test["max_time"]: test["max_time"] = test_time if ret: self.incr("success") self.CM.debug("Test %s runtime: %.2f" % (test.name, test_time)) else: self.incr("failure") self.CM.statall() did_run = 1 # Force the test count to be incrimented anyway so test extraction works self.audit(BadNews, test) return did_run def run(self, max=1): ( ''' Set up the given scenario, then run the selected tests at random for the selected number of iterations. ''') BadNews=CTS.LogWatcher(self.CM["LogFileName"], self.CM["BadRegexes"] , timeout=0) BadNews.setwatch() self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"]) self.CM.oprofileStop() self.CM.oprofileStart() if not self.CM.Env["DoBSC"]: audit = LogAudit(self.CM) if not audit(): self.CM.log("Audit " + audit.name() + " FAILED.") return (None, None) else: self.CM.log("Audit " + audit.name() + " passed.") audit = DiskAudit(self.CM) if not audit(): self.CM.log("Audit " + audit.name() + " FAILED.") return (None, None) else: self.CM.log("Audit " + audit.name() + " passed.") if not self.Scenario.SetUp(self.CM): return (None, None) self.CM.oprofileSave(0) time.sleep(30) # This makes sure everything is stabilized before starting... self.audit(BadNews, None) testcount = self.test_loop(BadNews, max) self.Scenario.TearDown(self.CM) self.CM.oprofileSave(testcount) self.CM.oprofileStop() self.audit(BadNews, None) for test in self.Tests: self.IndividualStats[test.name] = test.Stats return self.Stats, self.IndividualStats class RandomTests(AllTests): def test_loop(self, BadNews, max): testcount=1 self.CM.log("Executing tests at random") while testcount <= max: test = self.Env.RandomGen.choice(self.Tests) if self.run_test(BadNews, test, testcount): testcount += 1 return testcount class BenchTests(AllTests): ''' Nothing (yet) here. ''' AllTestClasses = [ ] class CTSTest: ''' A Cluster test. We implement the basic set of properties and behaviors for a generic cluster test. Cluster tests track their own statistics. We keep each of the kinds of counts we track as separate {name,value} pairs. ''' def __init__(self, cm): #self.name="the unnamed test" self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} # if not issubclass(cm.__class__, ClusterManager): # raise ValueError("Must be a ClusterManager object") self.CM = cm self.Audits = [] self.timeout=120 self.starttime=0 self.passed = 1 self.is_loop = 0 self.is_unsafe = 0 self.is_experimental = 0 self.is_valgrind = 0 self.benchmark = 0 # which tests to benchmark def has_key(self, key): return self.Stats.has_key(key) def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def log_mark(self, msg): self.CM.debug("MARK: test %s %s %d" % (self.name,msg,time.time())) return def set_starttime(self): self.starttime=time.time() self.log_mark("start") return self.starttime def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 # Reset the test passed boolean if name == "calls": self.passed = 1 def failure(self, reason="none"): '''Increment the failure count''' self.passed = 0 self.incr("failure") self.CM.log(("Test %s" % self.name).ljust(35) +" FAILED: %s" % reason) return None def success(self): '''Increment the success count''' self.incr("success") return 1 def skipped(self): '''Increment the skipped count''' self.incr("skipped") return 1 def __call__(self, node): '''Perform the given test''' raise ValueError("Abstract Class member (__call__)") self.incr("calls") return self.failure() def audit(self): passed = 1 if len(self.Audits) > 0: for audit in self.Audits: if not audit(): self.CM.log("Internal %s Audit %s FAILED." % (self.name, audit.name())) self.incr("auditfail") passed = 0 return passed def setup(self, node): '''Setup the given test''' return self.success() def teardown(self, node): '''Tear down the given test''' return self.success() def local_badnews(self, prefix, watch, local_ignore=[]): errcount = 0 if not prefix: prefix = "LocalBadNews:" ignorelist = [] ignorelist.append(" CTS: ") ignorelist.append(prefix) ignorelist.extend(local_ignore) while errcount < 100: match=watch.look(0) if match: add_err = 1 for ignore in ignorelist: if add_err == 1 and re.search(ignore, match): add_err = 0 if add_err == 1: self.CM.log(prefix + " " + match) errcount=errcount+1 else: break else: self.CM.log("Too many errors!") return errcount def is_applicable(self): return self.is_applicable_common() def is_applicable_common(self): '''Return TRUE if we are applicable in the current test configuration''' #raise ValueError("Abstract Class member (is_applicable)") if self.is_loop and not self.CM.Env["loop-tests"]: return 0 elif self.is_unsafe and not self.CM.Env["unsafe-tests"]: return 0 elif self.is_valgrind and not self.CM.Env["valgrind-tests"]: return 0 elif self.is_experimental and not self.CM.Env["experimental-tests"]: return 0 return 1 def find_ocfs2_resources(self, node): self.r_o2cb = None self.r_ocfs2 = [] (rc, lines) = self.CM.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): r = AuditResource(self.CM, line) if r.rtype == "o2cb" and r.parent != "NA": self.CM.debug("Found o2cb: %s" % self.r_o2cb) self.r_o2cb = r.parent if re.search("^Constraint", line): c = AuditConstraint(self.CM, line) if c.type == "rsc_colocation" and c.target == self.r_o2cb: self.r_ocfs2.append(c.rsc) self.CM.debug("Found ocfs2 filesystems: %s" % repr(self.r_ocfs2)) return len(self.r_ocfs2) def canrunnow(self, node): '''Return TRUE if we can meaningfully run right now''' return 1 def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [] ################################################################### class StopTest(CTSTest): ################################################################### '''Stop (deactivate) the cluster manager on a node''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name="Stop" def __call__(self, node): '''Perform the 'stop' test. ''' self.incr("calls") if self.CM.ShouldBeStatus[node] != "up": return self.skipped() patterns = [] # Technically we should always be able to notice ourselves stopping patterns.append(self.CM["Pat:We_stopped"] % node) #if self.CM.Env["use_logd"]: # patterns.append(self.CM["Pat:Logd_stopped"] % node) # Any active node needs to notice this one left # NOTE: This wont work if we have multiple partitions for other in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[other] == "up" and other != node: patterns.append(self.CM["Pat:They_stopped"] %(other, node)) #self.debug("Checking %s will notice %s left"%(other, node)) watch = CTS.LogWatcher( self.CM["LogFileName"], patterns, self.CM["DeadTime"]) watch.setwatch() if node == self.CM.OurNode: self.incr("us") else: if self.CM.upcount() <= 1: self.incr("all") else: self.incr("them") self.CM.StopaCM(node) watch_result = watch.lookforall() failreason=None UnmatchedList = "||" if watch.unmatched: (rc, output) = self.CM.rsh(node, "/bin/ps axf", None) for line in output: self.CM.debug(line) for regex in watch.unmatched: self.CM.log ("ERROR: Shutdown pattern not found: %s" % (regex)) UnmatchedList += regex + "||"; failreason="Missing shutdown pattern" self.CM.cluster_stable(self.CM["DeadTime"]) if not watch.unmatched or self.CM.upcount() == 0: return self.success() if len(watch.unmatched) >= self.CM.upcount(): return self.failure("no match against (%s)" % UnmatchedList) if failreason == None: return self.success() else: return self.failure(failreason) # # We don't register StopTest because it's better when called by # another test... # ################################################################### class StartTest(CTSTest): ################################################################### '''Start (activate) the cluster manager on a node''' def __init__(self, cm, debug=None): CTSTest.__init__(self,cm) self.name="start" self.debug = debug def __call__(self, node): '''Perform the 'start' test. ''' self.incr("calls") if self.CM.upcount() == 0: self.incr("us") else: self.incr("them") if self.CM.ShouldBeStatus[node] != "down": return self.skipped() elif self.CM.StartaCM(node): return self.success() else: return self.failure("Startup %s on node %s failed" %(self.CM["Name"], node)) # # We don't register StartTest because it's better when called by # another test... # ################################################################### class FlipTest(CTSTest): ################################################################### '''If it's running, stop it. If it's stopped start it. Overthrow the status quo... ''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Flip" self.start = StartTest(cm) self.stop = StopTest(cm) def __call__(self, node): '''Perform the 'Flip' test. ''' self.incr("calls") if self.CM.ShouldBeStatus[node] == "up": self.incr("stopped") ret = self.stop(node) type="up->down" # Give the cluster time to recognize it's gone... time.sleep(self.CM["StableTime"]) elif self.CM.ShouldBeStatus[node] == "down": self.incr("started") ret = self.start(node) type="down->up" else: return self.skipped() self.incr(type) if ret: return self.success() else: return self.failure("%s failure" % type) # Register FlipTest as a good test to run AllTestClasses.append(FlipTest) ################################################################### class RestartTest(CTSTest): ################################################################### '''Stop and restart a node''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Restart" self.start = StartTest(cm) self.stop = StopTest(cm) self.benchmark = 1 def __call__(self, node): '''Perform the 'restart' test. ''' self.incr("calls") self.incr("node:" + node) ret1 = 1 if self.CM.StataCM(node): self.incr("WasStopped") if not self.start(node): return self.failure("start (setup) failure: "+node) self.set_starttime() if not self.stop(node): return self.failure("stop failure: "+node) if not self.start(node): return self.failure("start failure: "+node) return self.success() # Register RestartTest as a good test to run AllTestClasses.append(RestartTest) ################################################################### class StonithdTest(CTSTest): ################################################################### def __init__(self, cm): CTSTest.__init__(self, cm) self.name="Stonithd" self.startall = SimulStartLite(cm) self.benchmark = 1 def __call__(self, node): self.incr("calls") if len(self.CM.Env["nodes"]) < 2: return self.skipped() ret = self.startall(None) if not ret: return self.failure("Setup failed") watchpats = [] watchpats.append("Forcing node %s to be terminated" % node) watchpats.append("Scheduling Node %s for STONITH" % node) watchpats.append("Executing .* fencing operation") - watchpats.append("sending fencing op RESET for %s" % node) + watchpats.append("stonith-ng:.*Operation .* for host '%s' with device .* returned: 0" % node) if not self.CM.is_node_dc(node): # Won't be found if the DC is shot (and there's no equivalent message from stonithd) watchpats.append("tengine_stonith_callback: .*result=0") + # TODO else: look for the notification on a peer once implimented if self.CM.Env["at-boot"] == 0: self.CM.debug("Expecting %s to stay down" % node) self.CM.ShouldBeStatus[node]="down" else: self.CM.debug("Expecting %s to come up again %d" % (node, self.CM.Env["at-boot"])) watchpats.append("%s crmd: .* S_STARTING -> S_PENDING" % node) watchpats.append("%s crmd: .* S_PENDING -> S_NOT_DC" % node) watch = CTS.LogWatcher(self.CM["LogFileName"], watchpats, self.CM["DeadTime"] + self.CM["StableTime"] + self.CM["StartTime"]) watch.setwatch() self.CM.rsh(node, "crm_attribute --node %s --type status --attr-name terminate --attr-value true" % node) matched = watch.lookforall() if matched: self.CM.debug("Found: "+ repr(matched)) else: self.CM.log("Patterns not found: " + repr(watch.unmatched)) self.CM.debug("Waiting for the cluster to recover") self.CM.cluster_stable() self.CM.debug("Waiting STONITHd node to come back up") self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"], 600) self.CM.debug("Waiting for the cluster to re-stabilize with all nodes") is_stable = self.CM.cluster_stable(self.CM["StartTime"]) if not matched: return self.failure("Didn't find all expected patterns") elif not is_stable: return self.failure("Cluster did not become stable") return self.success() def errorstoignore(self): return [ "Executing .* fencing operation" ] def is_applicable(self): if not self.is_applicable_common(): return 0 if self.CM.Env.has_key("DoStonith"): return self.CM.Env["DoStonith"] return 1 AllTestClasses.append(StonithdTest) ################################################################### class StartOnebyOne(CTSTest): ################################################################### '''Start all the nodes ~ one by one''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="StartOnebyOne" self.stopall = SimulStopLite(cm) self.start = StartTest(cm) self.ns=CTS.NodeStatus(cm.Env) def __call__(self, dummy): '''Perform the 'StartOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Test setup failed") failed=[] self.set_starttime() for node in self.CM.Env["nodes"]: if not self.start(node): failed.append(node) if len(failed) > 0: return self.failure("Some node failed to start: " + repr(failed)) return self.success() # Register StartOnebyOne as a good test to run AllTestClasses.append(StartOnebyOne) ################################################################### class SimulStart(CTSTest): ################################################################### '''Start all the nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStart" self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) def __call__(self, dummy): '''Perform the 'SimulStart' test. ''' self.incr("calls") # We ignore the "node" parameter... # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Setup failed") self.CM.clear_all_caches() if not self.startall(None): return self.failure("Startall failed") return self.success() # Register SimulStart as a good test to run AllTestClasses.append(SimulStart) ################################################################### class SimulStop(CTSTest): ################################################################### '''Stop all the nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStop" self.startall = SimulStartLite(cm) self.stopall = SimulStopLite(cm) def __call__(self, dummy): '''Perform the 'SimulStop' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") if not self.stopall(None): return self.failure("Stopall failed") return self.success() # Register SimulStop as a good test to run AllTestClasses.append(SimulStop) ################################################################### class StopOnebyOne(CTSTest): ################################################################### '''Stop all the nodes in order''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="StopOnebyOne" self.startall = SimulStartLite(cm) self.stop = StopTest(cm) def __call__(self, dummy): '''Perform the 'StopOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") failed=[] self.set_starttime() for node in self.CM.Env["nodes"]: if not self.stop(node): failed.append(node) if len(failed) > 0: return self.failure("Some node failed to stop: " + repr(failed)) self.CM.clear_all_caches() return self.success() # Register StopOnebyOne as a good test to run AllTestClasses.append(StopOnebyOne) ################################################################### class RestartOnebyOne(CTSTest): ################################################################### '''Restart all the nodes in order''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="RestartOnebyOne" self.startall = SimulStartLite(cm) def __call__(self, dummy): '''Perform the 'RestartOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") did_fail=[] self.set_starttime() self.restart = RestartTest(self.CM) for node in self.CM.Env["nodes"]: if not self.restart(node): did_fail.append(node) if did_fail: return self.failure("Could not restart %d nodes: %s" %(len(did_fail), repr(did_fail))) return self.success() # Register StopOnebyOne as a good test to run AllTestClasses.append(RestartOnebyOne) ################################################################### class PartialStart(CTSTest): ################################################################### '''Start a node - but tell it to stop before it finishes starting up''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="PartialStart" self.startall = SimulStartLite(cm) self.stopall = SimulStopLite(cm) #self.is_unsafe = 1 def __call__(self, node): '''Perform the 'PartialStart' test. ''' self.incr("calls") ret = self.stopall(None) if not ret: return self.failure("Setup failed") # FIXME! This should use the CM class to get the pattern # then it would be applicable in general watchpats = [] watchpats.append("Starting crmd") watch = CTS.LogWatcher(self.CM["LogFileName"], watchpats, timeout=self.CM["DeadTime"]+10) watch.setwatch() self.CM.StartaCMnoBlock(node) ret = watch.lookforall() if not ret: self.CM.log("Patterns not found: " + repr(watch.unmatched)) return self.failure("Setup of %s failed" % node) ret = self.stopall(None) if not ret: return self.failure("%s did not stop in time" % node) return self.success() # Register StopOnebyOne as a good test to run AllTestClasses.append(PartialStart) ####################################################################### class StandbyTest(CTSTest): ####################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Standby" self.benchmark = 1 self.start = StartTest(cm) self.startall = SimulStartLite(cm) # make sure the node is active # set the node to standby mode # check resources, none resource should be running on the node # set the node to active mode # check resouces, resources should have been migrated back (SHOULD THEY?) def __call__(self, node): self.incr("calls") ret=self.startall(None) if not ret: return self.failure("Start all nodes failed") self.CM.debug("Make sure node %s is active" % node) if self.CM.StandbyStatus(node) != "off": if not self.CM.SetStandbyMode(node, "off"): return self.failure("can't set node %s to active mode" % node) self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "off": return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status)) self.CM.debug("Getting resources running on node %s" % node) rsc_on_node = self.CM.active_resources(node) self.CM.debug("Setting node %s to standby mode" % node) if not self.CM.SetStandbyMode(node, "on"): return self.failure("can't set node %s to standby mode" % node) self.log_mark("standby:on") self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "on": return self.failure("standby status of %s is [%s] but we expect [on]" % (node, status)) self.log_mark("standby:on-idle") self.CM.debug("Checking resources") bad_run = self.CM.active_resources(node) if len(bad_run) > 0: rc = self.failure("%s set to standby, %s is still running on it" % (node, repr(bad_run))) self.CM.debug("Setting node %s to active mode" % node) self.CM.SetStandbyMode(node, "off") return rc self.CM.debug("Setting node %s to active mode" % node) if not self.CM.SetStandbyMode(node, "off"): return self.failure("can't set node %s to active mode" % node) self.log_mark("standby:off") self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "off": return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status)) self.log_mark("standby:off-idle") return self.success() AllTestClasses.append(StandbyTest) ####################################################################### class ValgrindTest(CTSTest): ####################################################################### '''Check for memory leaks''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Valgrind" self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) self.is_valgrind = 1 self.is_loop = 1 def setup(self, node): self.incr("calls") ret=self.stopall(None) if not ret: return self.failure("Stop all nodes failed") # Enable valgrind self.logPat = "/tmp/%s-*.valgrind" % self.name self.CM.Env["valgrind-prefix"] = self.name self.CM.rsh(node, "rm -f %s" % self.logPat, None) ret=self.startall(None) if not ret: return self.failure("Start all nodes failed") for node in self.CM.Env["nodes"]: (rc, output) = self.CM.rsh(node, "ps u --ppid `pidofproc aisexec`", None) for line in output: self.CM.debug(line) return self.success() def teardown(self, node): # Disable valgrind self.CM.Env["valgrind-prefix"] = None # Return all nodes to normal ret=self.stopall(None) if not ret: return self.failure("Stop all nodes failed") return self.success() def find_leaks(self): # Check for leaks leaked = [] self.stop = StopTest(self.CM) for node in self.CM.Env["nodes"]: (rc, ps_out) = self.CM.rsh(node, "ps u --ppid `pidofproc aisexec`", None) rc = self.stop(node) if not rc: self.failure("Couldn't shut down %s" % node) rc = self.CM.rsh(node, "grep -e indirectly.*lost:.*[1-9] -e definitely.*lost:.*[1-9] -e ERROR.*SUMMARY:.*[1-9].*errors %s" % self.logPat, 0) if rc != 1: leaked.append(node) self.failure("Valgrind errors detected on %s" % node) for line in ps_out: self.CM.log(line) (rc, output) = self.CM.rsh(node, "grep -e lost: -e SUMMARY: %s" % self.logPat, None) for line in output: self.CM.log(line) (rc, output) = self.CM.rsh(node, "cat %s" % self.logPat, None) for line in output: self.CM.debug(line) self.CM.rsh(node, "rm -f %s" % self.logPat, None) return leaked def __call__(self, node): leaked = self.find_leaks() if len(leaked) > 0: return self.failure("Nodes %s leaked" % repr(leaked)) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [ """cib:.*readCibXmlFile:""", """HA_VALGRIND_ENABLED""" ] ####################################################################### class StandbyLoopTest(ValgrindTest): ####################################################################### '''Check for memory leaks by putting a node in and out of standby for an hour''' def __init__(self, cm): ValgrindTest.__init__(self,cm) self.name="StandbyLoop" def __call__(self, node): lpc = 0 delay = 2 failed = 0 done=time.time() + self.CM.Env["loop-minutes"]*60 while time.time() <= done and not failed: lpc = lpc + 1 time.sleep(delay) if not self.CM.SetStandbyMode(node, "on"): self.failure("can't set node %s to standby mode" % node) failed = lpc time.sleep(delay) if not self.CM.SetStandbyMode(node, "off"): self.failure("can't set node %s to active mode" % node) failed = lpc leaked = self.find_leaks() if failed: return self.failure("Iteration %d failed" % failed) elif len(leaked) > 0: return self.failure("Nodes %s leaked" % repr(leaked)) return self.success() AllTestClasses.append(StandbyLoopTest) ############################################################################## class BandwidthTest(CTSTest): ############################################################################## # Tests should not be cluster-manager-specific # If you need to find out cluster manager configuration to do this, then # it should be added to the generic cluster manager API. '''Test the bandwidth which heartbeat uses''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name = "Bandwidth" self.start = StartTest(cm) self.__setitem__("min",0) self.__setitem__("max",0) self.__setitem__("totalbandwidth",0) self.tempfile = tempfile.mktemp(".cts") self.startall = SimulStartLite(cm) def __call__(self, node): '''Perform the Bandwidth test''' self.incr("calls") if self.CM.upcount()<1: return self.skipped() Path = self.CM.InternalCommConfig() if "ip" not in Path["mediatype"]: return self.skipped() port = Path["port"][0] port = int(port) ret = self.startall(None) if not ret: return self.failure("Test setup failed") time.sleep(5) # We get extra messages right after startup. fstmpfile = "/var/run/band_estimate" dumpcmd = "tcpdump -p -n -c 102 -i any udp port %d > %s 2>&1" \ % (port, fstmpfile) rc = self.CM.rsh(node, dumpcmd) if rc == 0: farfile = "root@%s:%s" % (node, fstmpfile) self.CM.rsh.cp(farfile, self.tempfile) Bandwidth = self.countbandwidth(self.tempfile) if not Bandwidth: self.CM.log("Could not compute bandwidth.") return self.success() intband = int(Bandwidth + 0.5) self.CM.log("...bandwidth: %d bits/sec" % intband) self.Stats["totalbandwidth"] = self.Stats["totalbandwidth"] + Bandwidth if self.Stats["min"] == 0: self.Stats["min"] = Bandwidth if Bandwidth > self.Stats["max"]: self.Stats["max"] = Bandwidth if Bandwidth < self.Stats["min"]: self.Stats["min"] = Bandwidth self.CM.rsh(node, "rm -f %s" % fstmpfile) os.unlink(self.tempfile) return self.success() else: return self.failure("no response from tcpdump command [%d]!" % rc) def countbandwidth(self, file): fp = open(file, "r") fp.seek(0) count = 0 sum = 0 while 1: line = fp.readline() if not line: return None if re.search("udp",line) or re.search("UDP,", line): count=count+1 linesplit = string.split(line," ") for j in range(len(linesplit)-1): if linesplit[j]=="udp": break if linesplit[j]=="length:": break try: sum = sum + int(linesplit[j+1]) except ValueError: self.CM.log("Invalid tcpdump line: %s" % line) return None T1 = linesplit[0] timesplit = string.split(T1,":") time2split = string.split(timesplit[2],".") time1 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001 break while count < 100: line = fp.readline() if not line: return None if re.search("udp",line) or re.search("UDP,", line): count = count+1 linessplit = string.split(line," ") for j in range(len(linessplit)-1): if linessplit[j] =="udp": break if linesplit[j]=="length:": break try: sum=int(linessplit[j+1])+sum except ValueError: self.CM.log("Invalid tcpdump line: %s" % line) return None T2 = linessplit[0] timesplit = string.split(T2,":") time2split = string.split(timesplit[2],".") time2 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001 time = time2-time1 if (time <= 0): return 0 return (sum*8)/time def is_applicable(self): '''BandwidthTest never applicable''' return 0 AllTestClasses.append(BandwidthTest) ################################################################### class ResourceRecover(CTSTest): ################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name="ResourceRecover" self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.max=30 self.rid=None #self.is_unsafe = 1 self.benchmark = 1 # these are the values used for the new LRM API call self.action = "asyncmon" self.interval = 0 def __call__(self, node): '''Perform the 'ResourceRecover' test. ''' self.incr("calls") ret = self.startall(None) if not ret: return self.failure("Setup failed") resourcelist = self.CM.active_resources(node) # if there are no resourcelist, return directly if len(resourcelist)==0: self.CM.log("No active resources on %s" % node) return self.skipped() self.rid = self.CM.Env.RandomGen.choice(resourcelist) rsc = None (rc, lines) = self.CM.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): tmp = AuditResource(self.CM, line) if tmp.id == self.rid: rsc = tmp # Handle anonymous clones that get renamed self.rid = rsc.clone_id break if not rsc: return self.failure("Could not find %s in the resource list" % self.rid) self.CM.debug("Shooting %s aka. %s" % (rsc.clone_id, rsc.id)) pats = [] pats.append("Updating failcount for %s on .* after .* %s" % (self.rid, self.action)) if rsc.managed(): pats.append("crmd:.* Performing .* op=%s_stop_0" % self.rid) if rsc.unique(): pats.append("crmd:.* Performing .* op=%s_start_0" % self.rid) pats.append("crmd:.* LRM operation %s_start_0.*confirmed.*ok" % self.rid) else: # Anonymous clones may get restarted with a different clone number pats.append("crmd:.* Performing .* op=.*_start_0") pats.append("crmd:.* LRM operation .*_start_0.*confirmed.*ok") watch = CTS.LogWatcher(self.CM["LogFileName"], pats, timeout=60) watch.setwatch() self.CM.rsh(node, "crm_resource -F -r %s -H %s &>/dev/null" % (self.rid, node)) watch.lookforall() self.CM.cluster_stable() recovered=self.CM.ResourceLocation(self.rid) if watch.unmatched: return self.failure("Patterns not found: %s" % repr(watch.unmatched)) elif rsc.unique() and len(recovered) > 1: return self.failure("%s is now active on more than one node: %s"%(self.rid, repr(recovered))) elif len(recovered) > 0: self.CM.debug("%s is running on: %s" %(self.rid, repr(recovered))) elif rsc.managed(): return self.failure("%s was not recovered and is inactive" % self.rid) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [ """Updating failcount for %s""" % self.rid, """Unknown operation: fail""", """ERROR: sending stonithRA op to stonithd failed.""", """ERROR: process_lrm_event: LRM operation %s_%s_%d""" % (self.rid, self.action, self.interval), """ERROR: process_graph_event: Action %s_%s_%d .* initiated outside of a transition""" % (self.rid, self.action, self.interval), ] AllTestClasses.append(ResourceRecover) ################################################################### class ComponentFail(CTSTest): ################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name="ComponentFail" self.startall = SimulStartLite(cm) self.complist = cm.Components() self.patterns = [] self.okerrpatterns = [] self.is_unsafe = 1 def __call__(self, node): '''Perform the 'ComponentFail' test. ''' self.incr("calls") self.patterns = [] self.okerrpatterns = [] # start all nodes ret = self.startall(None) if not ret: return self.failure("Setup failed") if not self.CM.cluster_stable(self.CM["StableTime"]): return self.failure("Setup failed - unstable") node_is_dc = self.CM.is_node_dc(node, None) # select a component to kill chosen = self.CM.Env.RandomGen.choice(self.complist) while chosen.dc_only == 1 and node_is_dc == 0: chosen = self.CM.Env.RandomGen.choice(self.complist) self.CM.debug("...component %s (dc=%d,boot=%d)" % (chosen.name, node_is_dc,chosen.triggersreboot)) self.incr(chosen.name) if chosen.name != "aisexec": if self.CM["Name"] != "crm-lha" or chosen.name != "pengine": self.patterns.append(self.CM["Pat:ChildKilled"] %(node, chosen.name)) self.patterns.append(self.CM["Pat:ChildRespawn"] %(node, chosen.name)) self.patterns.extend(chosen.pats) if node_is_dc: self.patterns.extend(chosen.dc_pats) # In an ideal world, this next stuff should be in the "chosen" object as a member function if self.CM["Name"] == "crm-lha" and chosen.triggersreboot: # Make sure the node goes down and then comes back up if it should reboot... for other in self.CM.Env["nodes"]: if other != node: self.patterns.append(self.CM["Pat:They_stopped"] %(other, node)) self.patterns.append(self.CM["Pat:Slave_started"] % node) self.patterns.append(self.CM["Pat:Local_started"] % node) if chosen.dc_only: # Sometimes these will be in the log, and sometimes they won't... self.okerrpatterns.append("%s crmd:.*Process %s:.* exited" %(node, chosen.name)) self.okerrpatterns.append("%s crmd:.*I_ERROR.*crmdManagedChildDied" %node) self.okerrpatterns.append("%s crmd:.*The %s subsystem terminated unexpectedly" %(node, chosen.name)) self.okerrpatterns.append("ERROR: Client .* exited with return code") else: # Sometimes this won't be in the log... self.okerrpatterns.append(self.CM["Pat:ChildKilled"] %(node, chosen.name)) self.okerrpatterns.append(self.CM["Pat:ChildRespawn"] %(node, chosen.name)) self.okerrpatterns.append(self.CM["Pat:ChildExit"]) # supply a copy so self.patterns doesnt end up empty tmpPats = [] tmpPats.extend(self.patterns) self.patterns.extend(chosen.badnews_ignore) # Look for STONITH ops, depending on Env["at-boot"] we might need to change the nodes status stonithPats = [] - stonithPats.append("sending fencing op RESET for %s" % node) + stonithPats.append("stonith-ng:.*Operation .* for host '%s' with device .* returned: 0" % node) stonith = CTS.LogWatcher(self.CM["LogFileName"], stonithPats, 0) stonith.setwatch() # set the watch for stable watch = CTS.LogWatcher( self.CM["LogFileName"], tmpPats, self.CM["DeadTime"] + self.CM["StableTime"] + self.CM["StartTime"]) watch.setwatch() # kill the component chosen.kill(node) # check to see Heartbeat noticed matched = watch.lookforall(allow_multiple_matches=1) if matched: self.CM.debug("Found: "+ repr(matched)) else: self.CM.log("Patterns not found: " + repr(watch.unmatched)) if self.CM.Env["at-boot"] == 0: self.CM.debug("Checking if %s was shot" % node) shot = stonith.look(60) if shot: self.CM.debug("Found: "+ repr(shot)) self.CM.ShouldBeStatus[node]="down" self.CM.debug("Waiting for the cluster to recover") self.CM.cluster_stable() self.CM.debug("Waiting for any STONITHd node to come back up") self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"], 600) self.CM.debug("Waiting for the cluster to re-stabilize with all nodes") is_stable = self.CM.cluster_stable(self.CM["StartTime"]) if not matched: return self.failure("Didn't find all expected patterns") elif not is_stable: return self.failure("Cluster did not become stable") return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' # Note that okerrpatterns refers to the last time we ran this test # The good news is that this works fine for us... self.okerrpatterns.extend(self.patterns) return self.okerrpatterns AllTestClasses.append(ComponentFail) #################################################################### class SplitBrainTest(CTSTest): #################################################################### '''It is used to test split-brain. when the path between the two nodes break check the two nodes both take over the resource''' def __init__(self,cm): CTSTest.__init__(self,cm) self.name = "SplitBrain" self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.is_experimental = 1 def isolate_partition(self, partition): other_nodes = [] other_nodes.extend(self.CM.Env["nodes"]) for node in partition: try: other_nodes.remove(node) except ValueError: self.CM.log("Node "+node+" not in " + repr(self.CM.Env["nodes"]) + " from " +repr(partition)) if len(other_nodes) == 0: return 1 self.CM.debug("Creating partition: " + repr(partition)) self.CM.debug("Everyone else: " + repr(other_nodes)) for node in partition: if not self.CM.isolate_node(node, other_nodes): self.CM.log("Could not isolate %s" % node) return 0 return 1 def heal_partition(self, partition): other_nodes = [] other_nodes.extend(self.CM.Env["nodes"]) for node in partition: try: other_nodes.remove(node) except ValueError: self.CM.log("Node "+node+" not in " + repr(self.CM.Env["nodes"])) if len(other_nodes) == 0: return 1 self.CM.debug("Healing partition: " + repr(partition)) self.CM.debug("Everyone else: " + repr(other_nodes)) for node in partition: self.CM.unisolate_node(node, other_nodes) def __call__(self, node): '''Perform split-brain test''' self.incr("calls") self.passed = 1 partitions = {} ret = self.startall(None) if not ret: return self.failure("Setup failed") while 1: # Retry until we get multiple partitions partitions = {} p_max = len(self.CM.Env["nodes"]) for node in self.CM.Env["nodes"]: p = self.CM.Env.RandomGen.randint(1, p_max) if not partitions.has_key(p): partitions[p]= [] partitions[p].append(node) p_max = len(partitions.keys()) if p_max > 1: break # else, try again self.CM.debug("Created %d partitions" % p_max) for key in partitions.keys(): self.CM.debug("Partition["+str(key)+"]:\t"+repr(partitions[key])) # Disabling STONITH to reduce test complexity for now self.CM.rsh(node, "crm_attribute -n stonith-enabled -v false") for key in partitions.keys(): self.isolate_partition(partitions[key]) count = 30 while count > 0: if len(self.CM.find_partitions()) != p_max: time.sleep(10) else: break else: self.failure("Expected partitions were not created") # Target number of partitions formed - wait for stability if not self.CM.cluster_stable(): self.failure("Partitioned cluster not stable") # Now audit the cluster state self.CM.partitions_expected = p_max if not self.audit(): self.failure("Audits failed") self.CM.partitions_expected = 1 # And heal them again for key in partitions.keys(): self.heal_partition(partitions[key]) # Wait for a single partition to form count = 30 while count > 0: if len(self.CM.find_partitions()) != 1: time.sleep(10) count -= 1 else: break else: self.failure("Cluster did not reform") # Wait for it to have the right number of members count = 30 while count > 0: members = [] partitions = self.CM.find_partitions() if len(partitions) > 0: members = partitions[0].split() if len(members) != len(self.CM.Env["nodes"]): time.sleep(10) count -= 1 else: break else: self.failure("Cluster did not completely reform") # Wait up to 20 minutes - the delay is more preferable than # trying to continue with in a messed up state if not self.CM.cluster_stable(1200): self.failure("Reformed cluster not stable") answer = raw_input('Continue? [nY]') if answer and answer == "n": raise ValueError("Reformed cluster not stable") # Turn fencing back on if self.CM.Env["DoStonith"]: self.CM.rsh(node, "crm_attribute -D -n stonith-enabled") self.CM.cluster_stable() if self.passed: return self.success() return self.failure("See previous errors") def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [ "Another DC detected:", "ERROR: attrd_cib_callback: .*Application of an update diff failed", "crmd_ha_msg_callback:.*not in our membership list", "CRIT:.*node.*returning after partition", ] def is_applicable(self): if not self.is_applicable_common(): return 0 return len(self.CM.Env["nodes"]) > 2 AllTestClasses.append(SplitBrainTest) #################################################################### class Reattach(CTSTest): #################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Reattach" self.startall = SimulStartLite(cm) self.restart1 = RestartTest(cm) self.stopall = SimulStopLite(cm) self.is_unsafe = 0 # Handled by canrunnow() def setup(self, node): return self.startall(None) def canrunnow(self, node): '''Return TRUE if we can meaningfully run right now''' if self.find_ocfs2_resources(node): self.CM.log("Detach/Reattach scenarios are not possible with OCFS2 services present") return 0 return 1 def __call__(self, node): self.incr("calls") pats = [] managed = CTS.LogWatcher(self.CM["LogFileName"], ["is-managed-default"], timeout=60) managed.setwatch() self.CM.debug("Disable resource management") self.CM.rsh(node, "crm_attribute -n is-managed-default -v false") if not managed.lookforall(): self.CM.log("Patterns not found: " + repr(managed.unmatched)) return self.failure("Resource management not disabled") pats = [] pats.append("crmd:.*Performing.*_stop_0") pats.append("crmd:.*Performing.*_start_0") pats.append("crmd:.*Performing.*_promote_0") pats.append("crmd:.*Performing.*_demote_0") pats.append("crmd:.*Performing.*_migrate_.*_0") watch = CTS.LogWatcher(self.CM["LogFileName"], pats, timeout=60) watch.setwatch() self.CM.debug("Shutting down the cluster") ret = self.stopall(None) if not ret: self.CM.debug("Re-enable resource management") self.CM.rsh(node, "crm_attribute -D -n is-managed-default") return self.failure("Couldn't shut down the cluster") self.CM.debug("Bringing the cluster back up") ret = self.startall(None) if not ret: self.CM.debug("Re-enable resource management") self.CM.rsh(node, "crm_attribute -D -n is-managed-default") return self.failure("Couldn't restart the cluster") if self.local_badnews("ResourceActivity:", watch): self.CM.debug("Re-enable resource management") self.CM.rsh(node, "crm_attribute -D -n is-managed-default") return self.failure("Resources stopped or started during cluster restart") watch = CTS.LogWatcher(self.CM["LogFileName"], pats, timeout=60) watch.setwatch() managed = CTS.LogWatcher(self.CM["LogFileName"], ["is-managed-default"], timeout=60) managed.setwatch() self.CM.debug("Re-enable resource management") self.CM.rsh(node, "crm_attribute -D -n is-managed-default") if not managed.lookforall(): self.CM.log("Patterns not found: " + repr(managed.unmatched)) return self.failure("Resource management not enabled") self.CM.cluster_stable() # Ignore actions for STONITH resources ignore = [] (rc, lines) = self.CM.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): r = AuditResource(self.CM, line) if r.rclass == "stonith": self.CM.debug("Ignoring: crmd:.*Performing.*op=%s_.*_0" % r.id) ignore.append("crmd:.*Performing.*op=%s_.*_0" % r.id) if self.local_badnews("ResourceActivity:", watch, ignore): return self.failure("Resources stopped or started after resource management was re-enabled") return ret def errorstoignore(self): '''Return list of errors which should be ignored''' return [ "You may ignore this error if it is unmanaged.", "pingd: .*ERROR: send_ipc_message:", "pingd: .*ERROR: send_update:", ] def is_applicable(self): if self.CM["Name"] == "crm-lha": return None return 1 AllTestClasses.append(Reattach) #################################################################### class SpecialTest1(CTSTest): #################################################################### '''Set up a custom test to cause quorum failure issues for Andrew''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SpecialTest1" self.startall = SimulStartLite(cm) self.restart1 = RestartTest(cm) self.stopall = SimulStopLite(cm) def __call__(self, node): '''Perform the 'SpecialTest1' test for Andrew. ''' self.incr("calls") # Shut down all the nodes... ret = self.stopall(None) if not ret: return ret # Start the selected node ret = self.restart1(node) if not ret: return ret # Start all remaining nodes ret = self.startall(None) return ret AllTestClasses.append(SpecialTest1) #################################################################### class HAETest(CTSTest): #################################################################### '''Set up a custom test to cause quorum failure issues for Andrew''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="HAETest" self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) self.is_loop = 1 def setup(self, node): # Start all remaining nodes ret = self.startall(None) if not ret: return self.failure("Couldn't start all nodes") return self.success() def teardown(self, node): # Stop everything ret = self.stopall(None) if not ret: return self.failure("Couldn't stop all nodes") return self.success() def wait_on_state(self, node, resource, expected_clones, attempts=240): while attempts > 0: active=0 (rc, lines) = self.CM.rsh(node, "crm_resource -r %s -W -Q" % resource, stdout=None) # Hack until crm_resource does the right thing if rc == 0 and lines: active = len(lines) if len(lines) == expected_clones: return 1 elif rc == 1: self.CM.debug("Resource %s is still inactive" % resource) elif rc == 234: self.CM.log("Unknown resource %s" % resource) return 0 elif rc == 246: self.CM.log("Cluster is inactive") return 0 elif rc != 0: self.CM.log("Call to crm_resource failed, rc=%d" % rc) return 0 else: self.CM.debug("Resource %s is active on %d times instead of %d" % (resource, active, expected_clones)) attempts -= 1 time.sleep(1) return 0 def find_dlm(self, node): self.r_dlm = None (rc, lines) = self.CM.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): r = AuditResource(self.CM, line) if r.rtype == "controld" and r.parent != "NA": self.CM.debug("Found dlm: %s" % self.r_dlm) self.r_dlm = r.parent return 1 return 0 def find_hae_resources(self, node): self.r_dlm = None self.r_o2cb = None self.r_ocfs2 = [] if self.find_dlm(node): self.find_ocfs2_resources(node) def is_applicable(self): if not self.is_applicable_common(): return 0 if self.CM.Env["Schema"] == "hae": return 1 return None #################################################################### class HAERoleTest(HAETest): #################################################################### def __init__(self, cm): '''Lars' mount/unmount test for the HA extension. ''' HAETest.__init__(self,cm) self.name="HAERoleTest" def change_state(self, node, resource, target): rc = self.CM.rsh(node, "crm_resource -r %s -p target-role -v %s --meta" % (resource, target)) return rc def __call__(self, node): self.incr("calls") lpc = 0 failed = 0 delay = 2 done=time.time() + self.CM.Env["loop-minutes"]*60 self.find_hae_resources(node) clone_max = len(self.CM.Env["nodes"]) while time.time() <= done and not failed: lpc = lpc + 1 self.change_state(node, self.r_dlm, "Stopped") if not self.wait_on_state(node, self.r_dlm, 0): self.failure("%s did not go down correctly" % self.r_dlm) failed = lpc self.change_state(node, self.r_dlm, "Started") if not self.wait_on_state(node, self.r_dlm, clone_max): self.failure("%s did not come up correctly" % self.r_dlm) failed = lpc if not self.wait_on_state(node, self.r_o2cb, clone_max): self.failure("%s did not come up correctly" % self.r_o2cb) failed = lpc for fs in self.r_ocfs2: if not self.wait_on_state(node, fs, clone_max): self.failure("%s did not come up correctly" % fs) failed = lpc if failed: return self.failure("iteration %d failed" % failed) return self.success() AllTestClasses.append(HAERoleTest) #################################################################### class HAEStandbyTest(HAETest): #################################################################### '''Set up a custom test to cause quorum failure issues for Andrew''' def __init__(self, cm): HAETest.__init__(self,cm) self.name="HAEStandbyTest" def change_state(self, node, resource, target): rc = self.CM.rsh(node, "crm_standby -l reboot -v %s" % (target)) return rc def __call__(self, node): self.incr("calls") lpc = 0 failed = 0 done=time.time() + self.CM.Env["loop-minutes"]*60 self.find_hae_resources(node) clone_max = len(self.CM.Env["nodes"]) while time.time() <= done and not failed: lpc = lpc + 1 self.change_state(node, self.r_dlm, "true") if not self.wait_on_state(node, self.r_dlm, clone_max-1): self.failure("%s did not go down correctly" % self.r_dlm) failed = lpc self.change_state(node, self.r_dlm, "false") if not self.wait_on_state(node, self.r_dlm, clone_max): self.failure("%s did not come up correctly" % self.r_dlm) failed = lpc if not self.wait_on_state(node, self.r_o2cb, clone_max): self.failure("%s did not come up correctly" % self.r_o2cb) failed = lpc for fs in self.r_ocfs2: if not self.wait_on_state(node, fs, clone_max): self.failure("%s did not come up correctly" % fs) failed = lpc if failed: return self.failure("iteration %d failed" % failed) return self.success() AllTestClasses.append(HAEStandbyTest) ################################################################### class NearQuorumPointTest(CTSTest): ################################################################### ''' This test brings larger clusters near the quorum point (50%). In addition, it will test doing starts and stops at the same time. Here is how I think it should work: - loop over the nodes and decide randomly which will be up and which will be down Use a 50% probability for each of up/down. - figure out what to do to get into that state from the current state - in parallel, bring up those going up and bring those going down. ''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="NearQuorumPoint" def __call__(self, dummy): '''Perform the 'NearQuorumPoint' test. ''' self.incr("calls") startset = [] stopset = [] #decide what to do with each node for node in self.CM.Env["nodes"]: action = self.CM.Env.RandomGen.choice(["start","stop"]) #action = self.CM.Env.RandomGen.choice(["start","stop","no change"]) if action == "start" : startset.append(node) elif action == "stop" : stopset.append(node) self.CM.debug("start nodes:" + repr(startset)) self.CM.debug("stop nodes:" + repr(stopset)) #add search patterns watchpats = [ ] for node in stopset: if self.CM.ShouldBeStatus[node] == "up": watchpats.append(self.CM["Pat:We_stopped"] % node) for node in startset: if self.CM.ShouldBeStatus[node] == "down": #watchpats.append(self.CM["Pat:Slave_started"] % node) watchpats.append(self.CM["Pat:Local_started"] % node) else: for stopping in stopset: if self.CM.ShouldBeStatus[stopping] == "up": watchpats.append(self.CM["Pat:They_stopped"] % (node, stopping)) if len(watchpats) == 0: return self.skipped() if len(startset) != 0: watchpats.append(self.CM["Pat:DC_IDLE"]) watch = CTS.LogWatcher(self.CM["LogFileName"], watchpats , timeout=self.CM["DeadTime"]+10) watch.setwatch() #begin actions for node in stopset: if self.CM.ShouldBeStatus[node] == "up": self.CM.StopaCMnoBlock(node) for node in startset: if self.CM.ShouldBeStatus[node] == "down": self.CM.StartaCMnoBlock(node) #get the result if watch.lookforall(): self.CM.cluster_stable() return self.success() self.CM.log("Warn: Patterns not found: " + repr(watch.unmatched)) #get the "bad" nodes upnodes = [] for node in stopset: if self.CM.StataCM(node) == 1: upnodes.append(node) downnodes = [] for node in startset: if self.CM.StataCM(node) == 0: downnodes.append(node) if upnodes == [] and downnodes == []: self.CM.cluster_stable() # Make sure they're completely down with no residule for node in stopset: self.CM.rsh(node, self.CM["StopCmd"]) return self.success() if len(upnodes) > 0: self.CM.log("Warn: Unstoppable nodes: " + repr(upnodes)) if len(downnodes) > 0: self.CM.log("Warn: Unstartable nodes: " + repr(downnodes)) return self.failure() AllTestClasses.append(NearQuorumPointTest) ################################################################### class RollingUpgradeTest(CTSTest): ################################################################### '''Perform a rolling upgrade of the cluster''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="RollingUpgrade" self.start = StartTest(cm) self.stop = StopTest(cm) self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) def setup(self, node): # Start all remaining nodes ret = self.stopall(None) if not ret: return self.failure("Couldn't stop all nodes") for node in self.CM.Env["nodes"]: if not self.downgrade(node, None): return self.failure("Couldn't downgrade %s" % node) ret = self.startall(None) if not ret: return self.failure("Couldn't start all nodes") return self.success() def teardown(self, node): # Stop everything ret = self.stopall(None) if not ret: return self.failure("Couldn't stop all nodes") for node in self.CM.Env["nodes"]: if not self.upgrade(node, None): return self.failure("Couldn't upgrade %s" % node) return self.success() def install(self, node, version, start=1, flags="--force"): target_dir = "/tmp/rpm-%s" % version src_dir = "%s/%s" % (self.CM.Env["rpm-dir"], version) self.CM.log("Installing %s on %s with %s" % (version, node, flags)) if not self.stop(node): return self.failure("stop failure: "+node) rc = self.CM.rsh(node, "mkdir -p %s" % target_dir) rc = self.CM.rsh(node, "rm -f %s/*.rpm" % target_dir) (rc, lines) = self.CM.rsh(node, "ls -1 %s/*.rpm" % src_dir, None) for line in lines: line = line[:-1] rc = self.CM.rsh.cp("%s" % (line), "%s:%s/" % (node, target_dir)) rc = self.CM.rsh(node, "rpm -Uvh %s %s/*.rpm" % (flags, target_dir)) if start and not self.start(node): return self.failure("start failure: "+node) return self.success() def upgrade(self, node, start=1): return self.install(node, self.CM.Env["current-version"], start) def downgrade(self, node, start=1): return self.install(node, self.CM.Env["previous-version"], start, "--force --nodeps") def __call__(self, node): '''Perform the 'Rolling Upgrade' test. ''' self.incr("calls") for node in self.CM.Env["nodes"]: if self.upgrade(node): return self.failure("Couldn't upgrade %s" % node) self.CM.cluster_stable() return self.success() def is_applicable(self): if not self.is_applicable_common(): return None if not self.CM.Env.has_key("rpm-dir"): return None if not self.CM.Env.has_key("current-version"): return None if not self.CM.Env.has_key("previous-version"): return None return 1 # Register RestartTest as a good test to run AllTestClasses.append(RollingUpgradeTest) ################################################################### class BSC_AddResource(CTSTest): ################################################################### '''Add a resource to the cluster''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name="AddResource" self.resource_offset = 0 self.cib_cmd="""cibadmin -C -o %s -X '%s' """ def __call__(self, node): self.incr("calls") self.resource_offset = self.resource_offset + 1 r_id = "bsc-rsc-%s-%d" % (node, self.resource_offset) start_pat = "crmd.*%s_start_0.*confirmed.*ok" patterns = [] patterns.append(start_pat % r_id) watch = CTS.LogWatcher( self.CM["LogFileName"], patterns, self.CM["DeadTime"]) watch.setwatch() fields = string.split(self.CM.Env["IPBase"], '.') fields[3] = str(int(fields[3])+1) ip = string.join(fields, '.') self.CM.Env["IPBase"] = ip if not self.make_ip_resource(node, r_id, "ocf", "IPaddr", ip): return self.failure("Make resource %s failed" % r_id) failed = 0 watch_result = watch.lookforall() if watch.unmatched: for regex in watch.unmatched: self.CM.log ("Warn: Pattern not found: %s" % (regex)) failed = 1 if failed: return self.failure("Resource pattern(s) not found") if not self.CM.cluster_stable(self.CM["DeadTime"]): return self.failure("Unstable cluster") return self.success() def make_ip_resource(self, node, id, rclass, type, ip): self.CM.log("Creating %s::%s:%s (%s) on %s" % (rclass,type,id,ip,node)) rsc_xml=""" """ % (id, rclass, type, id, id, ip) node_constraint=""" """ % (id, id, id, id, node) rc = 0 (rc, lines) = self.CM.rsh(node, self.cib_cmd % ("constraints", node_constraint), None) if rc != 0: self.CM.log("Constraint creation failed: %d" % rc) return None (rc, lines) = self.CM.rsh(node, self.cib_cmd % ("resources", rsc_xml), None) if rc != 0: self.CM.log("Resource creation failed: %d" % rc) return None return 1 def is_applicable(self): if self.CM.Env["DoBSC"]: return 1 return None class SimulStopLite(CTSTest): ################################################################### '''Stop any active nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStopLite" def __call__(self, dummy): '''Perform the 'SimulStopLite' setup work. ''' self.incr("calls") self.CM.debug("Setup: " + self.name) # We ignore the "node" parameter... watchpats = [ ] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "up": self.incr("WasStarted") watchpats.append(self.CM["Pat:We_stopped"] % node) #if self.CM.Env["use_logd"]: # watchpats.append(self.CM["Pat:Logd_stopped"] % node) if len(watchpats) == 0: self.CM.clear_all_caches() return self.success() # Stop all the nodes - at about the same time... watch = CTS.LogWatcher(self.CM["LogFileName"], watchpats , timeout=self.CM["DeadTime"]+10) watch.setwatch() self.set_starttime() for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "up": self.CM.StopaCMnoBlock(node) if watch.lookforall(): self.CM.clear_all_caches() # Make sure they're completely down with no residule for node in self.CM.Env["nodes"]: self.CM.rsh(node, self.CM["StopCmd"]) return self.success() did_fail=0 up_nodes = [] for node in self.CM.Env["nodes"]: if self.CM.StataCM(node) == 1: did_fail=1 up_nodes.append(node) if did_fail: return self.failure("Active nodes exist: " + repr(up_nodes)) self.CM.log("Warn: All nodes stopped but CTS didnt detect: " + repr(watch.unmatched)) self.CM.clear_all_caches() return self.failure("Missing log message: "+repr(watch.unmatched)) def is_applicable(self): '''SimulStopLite is a setup test and never applicable''' return 0 ################################################################### class SimulStartLite(CTSTest): ################################################################### '''Start any stopped nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStartLite" def __call__(self, dummy): '''Perform the 'SimulStartList' setup work. ''' self.incr("calls") self.CM.debug("Setup: " + self.name) # We ignore the "node" parameter... watchpats = [ ] uppat = self.CM["Pat:Slave_started"] if self.CM.upcount() == 0: uppat = self.CM["Pat:Local_started"] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "down": self.incr("WasStopped") watchpats.append(uppat % node) if len(watchpats) == 0: return self.success() watchpats.append(self.CM["Pat:DC_IDLE"]) # Start all the nodes - at about the same time... watch = CTS.LogWatcher(self.CM["LogFileName"], watchpats , timeout=self.CM["DeadTime"]+10) watch.setwatch() self.set_starttime() for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "down": self.CM.StartaCMnoBlock(node) if watch.lookforall(): for attempt in (1, 2, 3, 4, 5): if self.CM.cluster_stable(): return self.success() return self.failure("Cluster did not stabilize") did_fail=0 unstable = [] for node in self.CM.Env["nodes"]: if self.CM.StataCM(node) == 0: did_fail=1 unstable.append(node) if did_fail: return self.failure("Unstarted nodes exist: " + repr(unstable)) unstable = [] for node in self.CM.Env["nodes"]: if not self.CM.node_stable(node): did_fail=1 unstable.append(node) if did_fail: return self.failure("Unstable cluster nodes exist: " + repr(unstable)) self.CM.log("ERROR: All nodes started but CTS didnt detect: " + repr(watch.unmatched)) return self.failure() def is_applicable(self): '''SimulStartLite is a setup test and never applicable''' return 0 def TestList(cm, audits): result = [] for testclass in AllTestClasses: bound_test = testclass(cm) if bound_test.is_applicable(): bound_test.Audits = audits result.append(bound_test) return result def BenchTestList(cm, audits): all = TestList(cm, audits) result = [] for test in all: if test.benchmark: result.append(test) return result diff --git a/extra/resources/controld b/extra/resources/controld index 37500856fa..9976473c35 100644 --- a/extra/resources/controld +++ b/extra/resources/controld @@ -1,201 +1,209 @@ #!/bin/sh # # Resource Agent for managing the DLM controld process. # # Copyright (c) 2009 Novell, Inc # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs ####################################################################### meta_data() { cat < 1.0 This Resource Agent can control the dlm_controld services needed by ocfs2. It assumes that dlm_controld is in your default PATH. In most cases, it should be run as an anonymous clone. DLM Agent for OCFS2 Any additional options to start the dlm_controld service with DLM Options The location where configfs is or should be mounted Location of configfs + + +The daemon to start - supports gfs_controld(.pcmk) and dlm_controld(.pcmk) + +The daemon to start + + + END } ####################################################################### controld_usage() { cat < /dev/null if [ $? != 0 ]; then mount -t configfs none $OCF_RESKEY_configdir fi if [ ! -e $OCF_RESKEY_configdir/dlm ]; then modprobe dlm if [ ! -e $OCF_RESKEY_configdir/dlm ]; then ocf_log err "$OCF_RESKEY_configdir/dlm not available" return $OCF_NOT_INSTALLED fi fi ${OCF_RESKEY_daemon} $OCF_RESKEY_args sleep 1 controld_monitor } controld_stop() { controld_monitor; rc=$? if [ $rc = $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi killall -TERM ${OCF_RESKEY_daemon}; rc=$? if [ $rc != 0 ]; then return $OCF_ERR_GENERIC fi rc=$OCF_SUCCESS while [ $rc = $OCF_SUCCESS ]; do controld_monitor; rc=$? sleep 1 done if [ $rc = $OCF_NOT_RUNNING ]; then rc=$OCF_SUCCESS fi return $rc } controld_monitor() { killall -0 ${OCF_RESKEY_daemon}; rc=$? case $rc in 0) return $OCF_SUCCESS;; 1) return $OCF_NOT_RUNNING;; *) return $OCF_ERR_GENERIC;; esac } controld_validate() { check_binary ${OCF_RESKEY_daemon} case ${OCF_RESKEY_CRM_meta_gloablly_unique} in yes|Yes|true|True|1) ocf_log err "$OCF_RESOURCE_INSTANCE must be configured with the gloablly_unique=false meta attribute" exit $OCF_ERR_CONFIGURED ;; esac [ -d /var/run/cluster ] || mkdir /var/run/cluster return $OCF_SUCCESS } : ${OCF_RESKEY_args=-q 0} : ${OCF_RESKEY_sctp=false} : ${OCF_RESKEY_daemon=dlm_controld.pcmk} : ${OCF_RESKEY_configdir=/sys/kernel/config} : ${OCF_RESKEY_CRM_meta_gloablly_unique:="false"} case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) controld_validate; controld_start;; stop) controld_stop;; monitor) controld_validate; controld_monitor;; validate-all) controld_validate;; usage|help) controld_usage exit $OCF_SUCCESS ;; *) controld_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? exit $rc diff --git a/extra/resources/pingd b/extra/resources/pingd index 3e5aeaa6e9..0f77e47145 100644 --- a/extra/resources/pingd +++ b/extra/resources/pingd @@ -1,310 +1,316 @@ #!/bin/sh # # # pingd OCF Resource Agent # Records (in the CIB) the current number of ping nodes a # cluster node can connect to. # # Copyright (c) 2006 Andrew Beekhof # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: +if [ "x" != "x$OCF_RESKEY_host_list" ]; then + ocf_log err "It is recommended that you use the ping resource instead" +# ${OCF_ROOT}/resource.d/pacemaker/ping $1 +# exit $? +fi + . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs ####################################################################### meta_data() { cat < 1.0 This is a pingd Resource Agent. It records (in the CIB) the current number of ping nodes a node can connect to. pingd resource agent PID file PID file The user we want to run pingd as The user we want to run pingd as The time to wait (dampening) further changes occur Dampening interval The name of the instance_attributes set to place the value in. Rarely needs to be specified. Set name The name of the attributes to set. This is the name to be used in the constraints. Attribute name The section place the value in. Rarely needs to be specified. Section name The number by which to multiply the number of connected ping nodes by Value multiplier The list of ping nodes to count. Defaults to all configured ping nodes. Rarely needs to be specified. Host list How often, in seconds, to check for node liveliness ping interval in seconds Number of ping attempts, per host, before declaring it dead no. of ping attempts How long, in seconds, to wait before declaring a ping lost ping timeout in seconds A catch all for any other options that need to be passed to pingd. Extra Options END } ####################################################################### pingd_usage() { cat </dev/null if [ $? -eq 0 ]; then : Yes, user exists. We can further check his permission on crm_mon if necessary else ocf_log err "The user $OCF_RESKEY_user does not exist!" exit $OCF_ERR_ARGS fi fi # Pidfile better be an absolute path case $OCF_RESKEY_pidfile in /*) ;; *) ocf_log warn "You should have pidfile($OCF_RESKEY_pidfile) of absolute path!" ;; esac # Check the ping interval if ocf_is_decimal "$OCF_RESKEY_interval" && [ $OCF_RESKEY_interval -gt 0 ]; then : else ocf_log err "Invalid ping interval $OCF_RESKEY_interval. It should be positive integer!" exit $OCF_ERR_ARGS fi echo "Validate OK" return $OCF_SUCCESS } if [ $# -ne 1 ]; then pingd_usage exit $OCF_ERR_ARGS fi : ${OCF_RESKEY_options:=""} : ${OCF_RESKEY_dampen:="5s"} : ${OCF_RESKEY_interval:="1"} : ${OCF_RESKEY_name:="pingd"} : ${OCF_RESKEY_CRM_meta_interval:=0} : ${OCF_RESKEY_CRM_meta_globally_unique:="true"} if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then : ${OCF_RESKEY_pidfile:="$HA_VARRUN/pingd-${OCF_RESKEY_name}"} else : ${OCF_RESKEY_pidfile:="$HA_VARRUN/pingd-${OCF_RESOURCE_INSTANCE}"} fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) pingd_start ;; stop) pingd_stop ;; monitor) pingd_monitor ;; validate-all) pingd_validate ;; usage|help) pingd_usage exit $OCF_SUCCESS ;; *) pingd_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/fencing/Makefile.am b/fencing/Makefile.am index db0a495be7..dc184ac8d0 100644 --- a/fencing/Makefile.am +++ b/fencing/Makefile.am @@ -1,20 +1,48 @@ # Author: Sun Jiang Dong # Copyright (c) 2004 International Business Machines # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in -SUBDIRS = stonithd test +SUBDIRS = + +## binary progs +halibdir = $(CRM_DAEMON_DIR) +halib_PROGRAMS = stonithd stonith-test + +sbin_PROGRAMS = stonith_admin +sbin_SCRIPTS = fence_legacy + +stonith_test_SOURCES = test.c + +stonith_test_LDADD = $(CRYPTOLIB) $(CLUSTERLIBS) \ + $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/common/libcrmcluster.la \ + $(top_builddir)/lib/fencing/libstonithd.la + +stonith_admin_SOURCES = admin.c + +stonith_admin_LDADD = $(CRYPTOLIB) $(CLUSTERLIBS) \ + $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/common/libcrmcluster.la \ + $(top_builddir)/lib/fencing/libstonithd.la + +stonithd_SOURCES = main.c commands.c remote.c + +stonithd_LDADD = $(CRYPTOLIB) $(CLUSTERLIBS) \ + $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/common/libcrmcluster.la \ + $(top_builddir)/lib/fencing/libstonithd.la diff --git a/fencing/admin.c b/fencing/admin.c new file mode 100644 index 0000000000..a305629003 --- /dev/null +++ b/fencing/admin.c @@ -0,0 +1,227 @@ +/* + * Copyright (C) 2009 Andrew Beekhof + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +static struct crm_option long_options[] = { + {"verbose", 0, 0, 'V'}, + {"version", 0, 0, '$'}, + {"help", 0, 0, '?'}, + + {"list", 1, 0, 'l'}, + {"list-all", 0, 0, 'L'}, + + {"query", 1, 0, 'Q'}, + {"fence", 1, 0, 'F'}, + {"unfence", 1, 0, 'U'}, + + {"register", 1, 0, 'R'}, + {"deregister", 1, 0, 'D'}, + + {"env-option", 1, 0, 'e'}, + {"option", 1, 0, 'o'}, + {"agent", 1, 0, 'a'}, + + {0, 0, 0, 0} +}; + +int st_opts = st_opt_sync_call; + +static void st_callback(stonith_t *st, const char *event, xmlNode *msg) +{ + crm_log_xml_notice(msg, event); +} + +int +main(int argc, char ** argv) +{ + int flag; + int rc = 0; + int argerr = 0; + int option_index = 0; + + char name[512]; + char value[512]; + const char *agent = NULL; + const char *device = NULL; + const char *target = NULL; + + char action = 0; + stonith_t *st = NULL; + GHashTable *hash = g_hash_table_new(g_str_hash, g_str_equal); + + crm_log_init("stonith-admin", LOG_INFO, TRUE, TRUE, argc, argv); + crm_set_options("V?$LQ:R:D:o:a:l:e:F:U:", "mode [options]", long_options, + "Provides a summary of cluster's current state." + "\n\nOutputs varying levels of detail in a number of different formats.\n"); + + while (1) { + flag = crm_get_option(argc, argv, &option_index); + if (flag == -1) + break; + + switch(flag) { + case 'V': + alter_debug(DEBUG_INC); + cl_log_enable_stderr(1); + break; + case '$': + case '?': + crm_help(flag, LSB_EXIT_OK); + break; + case 'L': + action = flag; + break; + case 'Q': + case 'R': + case 'D': + action = flag; + device = optarg; + break; + case 'a': + agent = optarg; + break; + case 'l': + target = optarg; + action = 'L'; + break; + case 'F': + case 'U': + target = optarg; + action = flag; + break; + case 'o': + crm_info("Scanning: -o %s", optarg); + rc = sscanf(optarg, "%[^=]=%[^=]", name, value); + if(rc != 2) { + crm_err("Invalid option: -o %s", optarg); + ++argerr; + } else { + crm_info("Got: '%s'='%s'", name, value); + g_hash_table_insert(hash, crm_strdup(name), crm_strdup(value)); + } + break; + case 'e': + { + char *key = crm_concat("OCF_RESKEY", optarg, '_'); + const char *env = getenv(key); + + if(env == NULL) { + crm_err("Invalid option: -e %s", optarg); + ++argerr; + } else { + crm_info("Got: '%s'='%s'", optarg, env); + g_hash_table_insert(hash, crm_strdup(optarg), crm_strdup(env)); + } + } + break; + default: + ++argerr; + break; + } + } + + if (optind > argc) { + ++argerr; + } + + if (argerr) { + crm_help('?', LSB_EXIT_GENERIC); + } + +#if 0 + g_hash_table_insert(hash, crm_strdup("ipaddr"), crm_strdup("localhost")); + g_hash_table_insert(hash, crm_strdup("pcmk-portmap"), crm_strdup("some-host=pcmk-1 pcmk-3=3,4")); + g_hash_table_insert(hash, crm_strdup("login"), crm_strdup("root")); + g_hash_table_insert(hash, crm_strdup("identity_file"), crm_strdup("/root/.ssh/id_dsa")); +#endif + + crm_debug("Create"); + st = stonith_api_new(); + + rc = st->cmds->connect(st, crm_system_name, NULL, NULL); + crm_debug("Connect: %d", rc); + + rc = st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback); + + switch(action) + { + case 'L': + { + GListPtr devices = NULL; + rc = st->cmds->query(st, st_opts, target, &devices, 10); + if(rc == 0) { + fprintf(stderr, "No devices found\n"); + + } else if(rc > 0) { + fprintf(stderr, "%d devices found\n", rc); + slist_iter(device, char, devices, lpc, + fprintf(stdout, " %s\n", device); + ); + rc = 0; + } + } + break; + case 'Q': + rc = st->cmds->call(st, st_opts, device, "monitor", NULL, 10); + if(rc < 0) { + rc = st->cmds->call(st, st_opts, device, "list", NULL, 10); + } + break; + case 'R': + rc = st->cmds->register_device(st, st_opts, device, "stonith-ng", agent, hash); + break; + case 'D': + rc = st->cmds->remove_device(st, st_opts, device); + break; + case 'F': + rc = st->cmds->fence(st, st_opts, target, "off", 120); + break; + case 'U': + rc = st->cmds->fence(st, st_opts, target, "on", 120); + break; + } + + st->cmds->disconnect(st); + crm_debug("Disconnect: %d", rc); + + crm_debug("Destroy"); + stonith_api_delete(st); + + return rc; +} diff --git a/fencing/commands.c b/fencing/commands.c new file mode 100644 index 0000000000..671775097f --- /dev/null +++ b/fencing/commands.c @@ -0,0 +1,915 @@ +/* + * Copyright (C) 2009 Andrew Beekhof + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#define FE_AGENT_FORK -2 +#define FE_AGENT_ERROR -3 + +GHashTable *device_list = NULL; +static int active_children = 0; + +static void exec_child_done(ProcTrack* proc, int status, int signo, int rc, int waslogged); +static void exec_child_new(ProcTrack* p) { active_children++; } +static const char *exec_child_name(ProcTrack* p) { + async_command_t *cmd = proctrack_data(p); + return cmd->client?cmd->client:cmd->remote; +} + +static ProcTrack_ops StonithdProcessTrackOps = { + exec_child_done, + exec_child_new, + exec_child_name, +}; + + +static async_command_t *create_async_command(xmlNode *msg, const char *action) +{ + async_command_t *cmd = NULL; + CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL); + + crm_malloc0(cmd, sizeof(async_command_t)); + crm_element_value_int(msg, F_STONITH_CALLID, &cmd->id); + crm_element_value_int(msg, F_STONITH_CALLOPTS, &cmd->options); + + cmd->origin = crm_element_value_copy(msg, F_ORIG); + cmd->remote = crm_element_value_copy(msg, F_STONITH_REMOTE); + cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID); + cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION); + cmd->action = crm_strdup(action); + cmd->port = crm_element_value_copy(msg, F_STONITH_TARGET); + + CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); return NULL); + CRM_CHECK(cmd->client != NULL || cmd->remote != NULL, crm_log_xml_warn(msg, "NoClient")); + + return cmd; +} + +static void free_async_command(async_command_t *cmd) +{ + crm_free(cmd->action); + crm_free(cmd->port); + crm_free(cmd->remote); + crm_free(cmd->client); + crm_free(cmd->origin); + crm_free(cmd->op); + crm_free(cmd); +} + +static void append_arg( + gpointer key, gpointer value, gpointer user_data) +{ + int len = 3; /* =, \n, \0 */ + int last = 0; + char **args = user_data; + + CRM_CHECK(key != NULL, return); + CRM_CHECK(value != NULL, return); + + len += strlen(key); + len += strlen(value); + if(*args != NULL) { + last = strlen(*args); + } + + crm_realloc(*args, last+len); + + sprintf((*args)+last, "%s=%s\n", (char *)key, (char *)value); +} + +static void append_const_arg(const char *key, const char *value, char **arg_list) +{ + char *glib_sucks_key = crm_strdup(key); + char *glib_sucks_value = crm_strdup(value); + + append_arg(glib_sucks_key, glib_sucks_value, arg_list); + + crm_free(glib_sucks_value); + crm_free(glib_sucks_key); +} + + +static char *make_args(GHashTable *args, const char *action, const char *port) +{ + char *arg_list = NULL; + CRM_CHECK(action != NULL, return NULL); + + g_hash_table_foreach(args, append_arg, &arg_list); + append_const_arg("option", action, &arg_list); + if(port) { + append_const_arg("port", port, &arg_list); + } + crm_debug_3("Calculated: %s", arg_list); + return arg_list; +} + +/* Borrowed from libfence */ +static int run_agent( + char *agent, GHashTable *arg_hash, const char *action, const char *port, + int *agent_result, char **output, async_command_t *track) +{ + char *args = make_args(arg_hash, action, port); + int pid, status, len, rc = -1; + int p_read_fd, p_write_fd; /* parent read/write file descriptors */ + int c_read_fd, c_write_fd; /* child read/write file descriptors */ + int fd1[2]; + int fd2[2]; + + c_read_fd = c_write_fd = p_read_fd = p_write_fd = -1; + + if (args == NULL || agent == NULL) + goto fail; + len = strlen(args); + + if (pipe(fd1)) + goto fail; + p_read_fd = fd1[0]; + c_write_fd = fd1[1]; + + if (pipe(fd2)) + goto fail; + c_read_fd = fd2[0]; + p_write_fd = fd2[1]; + + pid = fork(); + if (pid < 0) { + *agent_result = FE_AGENT_FORK; + goto fail; + } + + if (pid) { + /* parent */ + int ret; + + fcntl(p_read_fd, F_SETFL, fcntl(p_read_fd, F_GETFL, 0) | O_NONBLOCK); + + do { + ret = write(p_write_fd, args, len); + + } while (ret < 0 && errno == EINTR); + + if (ret != len) { + if(rc >= 0) { + rc = st_err_generic; + } + goto fail; + } + + close(p_write_fd); + + if(track) { + NewTrackedProc(pid, 0, PT_LOGNORMAL, track, &StonithdProcessTrackOps); + +#if 0 + ProcTrackKillInfo *info = NULL; + crm_malloc0(info, sizeof(ProcTrackKillInfo) * 3); + + killseq[0].mstimeout = timeout; /* after timeout send TERM */ + killseq[0].signalno = SIGTERM; + killseq[1].mstimeout = 5000; /* after 5 secs remove it */ + killseq[1].signalno = SIGKILL; + killseq[2].mstimeout = 5000; /* if it's still there after 5, complain */ + killseq[2].signalno = 0; + SetTrackedProcTimeouts(pid,killseq); +#endif + track->stdout = p_read_fd; + + crm_free(args); + close(c_write_fd); + close(c_read_fd); + return pid; + + } else { + waitpid(pid, &status, 0); + + if(output != NULL) { + len = 0; + do { + char buf[500]; + ret = read(p_read_fd, buf, 500); + if(ret > 0) { + buf[ret] = 0; + crm_realloc(*output, len + ret + 1); + sprintf((*output)+len, "%s", buf); + len += ret; + } + + } while (ret == 500 || (ret < 0 && errno == EINTR)); + } + + *agent_result = FE_AGENT_ERROR; + if (WIFEXITED(status)) { + *agent_result = -WEXITSTATUS(status); + rc = 0; + } + } + + } else { + /* child */ + + close(1); + if (dup(c_write_fd) < 0) + goto fail; + close(2); + if (dup(c_write_fd) < 0) + goto fail; + close(0); + if (dup(c_read_fd) < 0) + goto fail; + + /* keep c_write_fd open so parent can report all errors. */ + close(c_read_fd); + close(p_read_fd); + close(p_write_fd); + + execlp(agent, agent, NULL); + exit(EXIT_FAILURE); + } + + fail: + crm_free(args); + + close(p_read_fd); + close(p_write_fd); + + close(c_read_fd); + close(c_write_fd); + return rc; +} + +static void free_device(gpointer data) +{ + stonith_device_t *device = data; + + g_hash_table_destroy(device->params); + slist_destroy(char, item, device->targets, crm_free(item)); + crm_free(device->namespace); + crm_free(device->agent); + crm_free(device->id); + crm_free(device); +} + +static void build_port_aliases(stonith_device_t *device) +{ + char *name = NULL; + char *value = NULL; + int last = 0, lpc = 0, max = 0; + + const char *portmap = g_hash_table_lookup(device->params, "portmap"); + if(portmap == NULL) { + return; + } + + max = strlen(portmap); + for(; lpc < max; lpc++) { + if(portmap[lpc] == 0) { + break; + + } else if(isalpha(portmap[lpc])) { + /* keep going */ + + } else if(portmap[lpc] == '=') { + crm_malloc0(name, 1 + lpc - last); + strncpy(name, portmap + last, lpc - last); + last = lpc + 1; + + } else if(name && isspace(portmap[lpc])) { + crm_malloc0(value, 1 + lpc - last); + strncpy(value, portmap + last, lpc - last); + last = lpc + 1; + + crm_info("Adding alias '%s'='%s' for %s", name, value, device->id); + g_hash_table_replace(device->aliases, name, value); + value=NULL; + name=NULL; + + } else if(isspace(portmap[lpc])) { + last = lpc; + } + } +} + +static stonith_device_t *build_device_from_xml(xmlNode *msg) +{ + xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, msg, LOG_ERR); + stonith_device_t *device = NULL; + + crm_malloc0(device, sizeof(stonith_device_t)); + device->id = crm_element_value_copy(dev, XML_ATTR_ID); + device->agent = crm_element_value_copy(dev, "agent"); + device->namespace = crm_element_value_copy(dev, "namespace"); + device->params = xml2list(dev); + device->aliases = g_hash_table_new_full(g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); + return device; +} + +static int stonith_device_register(xmlNode *msg) +{ + stonith_device_t *device = build_device_from_xml(msg); + + build_port_aliases(device); + g_hash_table_replace(device_list, device->id, device); + + crm_info("Added '%s' to the device list (%d active devices)", device->id, g_hash_table_size(device_list)); + return stonith_ok; +} + +static int stonith_device_remove(xmlNode *msg) +{ + xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, msg, LOG_ERR); + const char *id = crm_element_value(dev, XML_ATTR_ID); + if(g_hash_table_remove(device_list, id)) { + crm_info("Removed '%s' from the device list (%d active devices)", + id, g_hash_table_size(device_list)); + } else { + crm_info("Device '%s' not found (%d active devices)", + id, g_hash_table_size(device_list)); + } + + return stonith_ok; +} + +static GListPtr parse_host_list(const char *hosts) +{ + int lpc = 0; + int max = 0; + int last = 0; + GListPtr output = NULL; + + if(hosts) { + max = strlen(hosts); + } + + for(lpc = 0; lpc < max; lpc++) { + if(isspace(hosts[lpc]) || hosts[lpc] == ',') { + int rc = 0; + char *entry = NULL; + crm_malloc0(entry, 1 + lpc - last); + rc = sscanf(hosts+last, "%[a-zA-Z0-9_-]", entry); + if(rc == 1) { + crm_debug("Adding '%s'", entry); + output = g_list_append(output, entry); + entry = NULL; + } + + crm_free(entry); + last = lpc + 1; + } + } + + return output; +} + +static gboolean string_in_list(GListPtr list, const char *item) +{ + int lpc = 0; + int max = g_list_length(list); + for(lpc = 0; lpc < max; lpc ++) { + const char *value = g_list_nth_data(list, lpc); + if(safe_str_eq(item, value)) { + return TRUE; + } + } + return FALSE; +} + +static const char *get_device_port(stonith_device_t *dev, const char *host) +{ + time_t now; + char *alias = NULL; + + if(host == NULL) { + return NULL; + } + + now = time(NULL); + alias = g_hash_table_lookup(dev->aliases, host); + + if(dev->targets == NULL || dev->targets_age + 300 < now) { + char *output = NULL; + int rc = stonith_ok; + int exec_rc = stonith_ok; + + slist_destroy(char, item, dev->targets, crm_free(item)); + dev->targets = NULL; + + exec_rc = run_agent(dev->agent, dev->params, "hostlist", NULL, &rc, &output, NULL); + if(exec_rc < 0 || rc != 0) { + crm_info("Disabling port list queries for %s", dev->id); + dev->targets_age = -1; + + } else { + crm_info("Refreshing port list for %s", dev->id); + dev->targets = parse_host_list(output); + dev->targets_age = now; + } + + crm_free(output); + } + + /* See if portmap is defined and look up the translated name */ + if(alias && dev->targets == NULL) { + return alias; + + } else if(alias && string_in_list(dev->targets, alias)) { + return alias; + + } else if(dev->targets && string_in_list(dev->targets, host)) { + return host; + } + + return NULL; +} + +static int stonith_device_action(xmlNode *msg, char **output) +{ + int rc = stonith_ok; + xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, msg, LOG_ERR); + const char *id = crm_element_value(dev, F_STONITH_DEVICE); + const char *action = crm_element_value(dev, F_STONITH_ACTION); + + async_command_t *cmd = NULL; + stonith_device_t *device = NULL; + + if(id) { + crm_debug_2("Looking for '%s'", id); + device = g_hash_table_lookup(device_list, id); + + } else { + CRM_CHECK(safe_str_eq(action, "metadata"), crm_log_xml_warn(msg, "StrangeOp")); + + device = build_device_from_xml(msg); + if(device != NULL && device->id == NULL) { + device->id = crm_strdup(device->agent); + } + } + + if(device) { + int exec_rc = 0; + const char *device_port = NULL; + + cmd = create_async_command(msg, action); + if(cmd == NULL) { + return st_err_internal; + } + + device_port = get_device_port(device, cmd->port); + if(cmd->port && device_port == NULL) { + crm_err("Unknown or unhandled port '%s' for device '%s'", cmd->port, device->id); + free_async_command(cmd); + return st_err_unknown_port; + } + cmd->device = crm_strdup(device->id); + crm_debug("Calling '%s' with action '%s'%s%s", + device->id, action, device_port?" on port ":"", device_port?device_port:""); + + exec_rc = run_agent( + device->agent, device->params, action, device_port, &rc, output, cmd); + if(exec_rc < 0 || rc != 0) { + crm_warn("Operation %s on %s failed (%d/%d): %.100s", + action, device->id, exec_rc, rc, *output); + + } else if(exec_rc > 0) { + crm_info("Operation %s on %s active with pid: %d", action, device->id, exec_rc); + rc = exec_rc; + + } else { + crm_info("Operation %s on %s passed: %.100s", action, device->id, *output); + } + + } else { + crm_notice("Device %s not found", id); + rc = st_err_unknown_device; + } + + if(id == NULL) { + free_device(device); + } + return rc; +} + +struct device_search_s +{ + const char *host; + GListPtr capable; +}; + +static void search_devices( + gpointer key, gpointer value, gpointer user_data) +{ + stonith_device_t *dev = value; + struct device_search_s *search = user_data; + if(search->host == NULL || get_device_port(dev, search->host)) { + search->capable = g_list_append(search->capable, value); + } +} + +static int stonith_query(xmlNode *msg, xmlNode **list) +{ + struct device_search_s search; + xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_ERR); + + search.host = NULL; + search.capable = NULL; + + if(dev) { + search.host = crm_element_value(dev, F_STONITH_TARGET); + } + + crm_log_xml_info(msg, "Query"); + + g_hash_table_foreach(device_list, search_devices, &search); + crm_info("Found %d matching devices for '%s'", g_list_length(search.capable), search.host); + + /* Pack the results into data */ + if(list) { + *list = create_xml_node(NULL, __FUNCTION__); + crm_xml_add_int(*list, "st-available-devices", g_list_length(search.capable)); + slist_iter(device, stonith_device_t, search.capable, lpc, + dev = create_xml_node(*list, F_STONITH_DEVICE); + crm_xml_add(dev, XML_ATTR_ID, device->id); + crm_xml_add(dev, "namespace", device->namespace); + crm_xml_add(dev, "agent", device->agent); + ); + } + + return g_list_length(search.capable); +} + +static void log_operation(async_command_t *cmd, int rc, int pid, const char *next, const char *output) +{ + if(rc == 0) { + next = NULL; + } + + if(cmd->port != NULL) { + do_crm_log(rc==0?LOG_INFO:LOG_ERR, + "Operation '%s' [%d] for host '%s' with device '%s' returned: %d%s%s", + cmd->action, pid, cmd->port, cmd->device, rc, next?". Trying: ":"", next?next:""); + } else { + do_crm_log(rc==0?LOG_INFO:LOG_NOTICE, + "Operation '%s' [%d] for device '%s' returned: %d%s%s", + cmd->action, pid, cmd->device, rc, next?". Trying: ":"", next?next:""); + } + + if(output) { + /* Logging the whole string confuses syslog when the string is xml */ + char *local_copy = crm_strdup(output); + int lpc = 0, last = 0, more = strlen(local_copy); + for(lpc = 0; lpc < more; lpc++) { + if(local_copy[lpc] == '\n' || local_copy[lpc] == 0) { + local_copy[lpc] = 0; + crm_debug("%s output: %s", cmd->device, local_copy+last); + last = lpc+1; + } + } + crm_debug("%s output: %s (total %d bytes)", cmd->device, local_copy+last, more); + crm_free(local_copy); + } +} + +#define READ_MAX 500 +static void +exec_child_done(ProcTrack* proc, int status, int signum, int rc, int waslogged) +{ + int len = 0; + int more = 0; + + char *output = NULL; + xmlNode *data = NULL; + xmlNode *reply = NULL; + + int pid = proctrack_pid(proc); + async_command_t *cmd = proctrack_data(proc); + + CRM_CHECK(cmd != NULL, return); + active_children--; + + if( signum ) { + rc = st_err_signal; + if( proctrack_timedout(proc) ) { + crm_warn("Child '%d' performing action '%s' with '%s' timed out", + pid, cmd->action, cmd->device); + rc = st_err_timeout; + } + } + + do { + char buffer[READ_MAX]; + + errno = 0; + memset(&buffer, 0, READ_MAX); + more = read(cmd->stdout, buffer, READ_MAX-1); + crm_debug_3("Got %d more bytes", more); + + if(more > 0) { + crm_realloc(output, len + more + 1); + sprintf(output+len, "%s", buffer); + len += more; + } + + } while (more == (READ_MAX-1) || (more < 0 && errno == EINTR)); + + if(cmd->stdout) { + close(cmd->stdout); + cmd->stdout = 0; + } + + while(rc != 0 && cmd->device_next) { + int exec_rc = 0; + stonith_device_t *dev = cmd->device_next->data; + const char *port = get_device_port(dev, cmd->port); + + log_operation(cmd, rc, pid, dev->id, output); + + cmd->device = dev->id; + cmd->device_next = cmd->device_next->next; + + exec_rc = run_agent(dev->agent, dev->params, cmd->action, port, &rc, NULL, cmd); + if(exec_rc > 0) { + goto done; + } + pid = exec_rc; + } + + reply = stonith_construct_async_reply(cmd, output, data, rc); + + if(safe_str_eq(cmd->action, "metadata")) { + /* Too verbose to log */ + crm_free(output); output = NULL; + } + + log_operation(cmd, rc, pid, NULL, output); + crm_log_xml_debug_3(reply, "Reply"); + + if(cmd->origin) { + send_cluster_message(cmd->origin, crm_msg_stonith_ng, reply, FALSE); + + } else { + do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE); + } + + free_async_command(cmd); + done: + + reset_proctrack_data(proc); + crm_free(output); + free_xml(reply); + free_xml(data); +} + +static int stonith_fence(xmlNode *msg, const char *action) +{ + int rc = 0; + struct device_search_s search; + stonith_device_t *device = NULL; + async_command_t *cmd = create_async_command(msg, crm_element_value(msg, F_STONITH_ACTION)); + xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_ERR); + + if(cmd == NULL) { + return st_err_internal; + } + + search.capable = NULL; + search.host = crm_element_value(dev, F_STONITH_TARGET); + + crm_log_xml_info(msg, "Exec"); + + g_hash_table_foreach(device_list, search_devices, &search); + crm_info("Found %d matching devices for '%s'", g_list_length(search.capable), search.host); + + if(g_list_length(search.capable) == 0) { + return st_err_none_available; + } + + device = search.capable->data; + cmd->device = device->id; + + if(g_list_length(search.capable) > 1) { + /* TODO: Order based on priority */ + cmd->device_list = search.capable; + } + + return run_agent(device->agent, device->params, cmd->action, cmd->port, &rc, NULL, cmd); +} + +xmlNode *stonith_construct_reply(xmlNode *request, char *output, xmlNode *data, int rc) +{ + int lpc = 0; + xmlNode *reply = NULL; + + const char *name = NULL; + const char *value = NULL; + const char *names[] = { + F_STONITH_OPERATION, + F_STONITH_CALLID, + F_STONITH_CLIENTID, + F_STONITH_REMOTE, + F_STONITH_CALLOPTS + }; + + crm_debug_4("Creating a basic reply"); + reply = create_xml_node(NULL, T_STONITH_REPLY); + + crm_xml_add(reply, "st_origin", __FUNCTION__); + crm_xml_add(reply, F_TYPE, T_STONITH_NG); + + for(lpc = 0; lpc < DIMOF(names); lpc++) { + name = names[lpc]; + value = crm_element_value(request, name); + crm_xml_add(reply, name, value); + } + + crm_xml_add_int(reply, F_STONITH_RC, rc); + crm_xml_add(reply, "st_output", output); + + if(data != NULL) { + crm_debug_4("Attaching reply output"); + add_message_xml(reply, F_STONITH_CALLDATA, data); + } + return reply; +} + +xmlNode *stonith_construct_async_reply(async_command_t *cmd, char *output, xmlNode *data, int rc) +{ + xmlNode *reply = NULL; + + crm_debug_4("Creating a basic reply"); + reply = create_xml_node(NULL, T_STONITH_REPLY); + + crm_xml_add(reply, "st_origin", __FUNCTION__); + crm_xml_add(reply, F_TYPE, T_STONITH_NG); + + crm_xml_add(reply, F_STONITH_OPERATION, cmd->op); + crm_xml_add(reply, F_STONITH_REMOTE, cmd->remote); + crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client); + crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id); + crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options); + + crm_xml_add_int(reply, F_STONITH_RC, rc); + crm_xml_add(reply, "st_output", output); + + if(data != NULL) { + crm_info("Attaching reply output"); + add_message_xml(reply, F_STONITH_CALLDATA, data); + } + return reply; +} + +void +stonith_command(stonith_client_t *client, xmlNode *request, const char *remote) +{ + int rc = st_err_generic; + int call_options = 0; + + gboolean is_reply = FALSE; + + xmlNode *reply = NULL; + xmlNode *data = NULL; + + char *output = NULL; + const char *op = crm_element_value(request, F_STONITH_OPERATION); + const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); + + crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); + if(get_xpath_object("//"T_STONITH_REPLY, request, LOG_DEBUG_3)) { + is_reply = TRUE; + } + + if(device_list == NULL) { + device_list = g_hash_table_new_full( + g_str_hash, g_str_equal, NULL, free_device); + } + + crm_debug("Processing %s%s from %s", op, is_reply?" reply":"", + client?client->name:remote); + + if(crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { + return; + + } else if(crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) { + const char *flag_name = NULL; + + flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE); + if(flag_name) { + crm_debug("Setting %s callbacks for %s (%s): ON", + flag_name, client->name, client->id); + client->flags |= get_stonith_flag(flag_name); + } + + flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE); + if(flag_name) { + crm_debug("Setting %s callbacks for %s (%s): off", + flag_name, client->name, client->id); + client->flags |= get_stonith_flag(flag_name); + } + return; + + } else if(crm_str_eq(op, STONITH_OP_DEVICE_ADD, TRUE)) { + rc = stonith_device_register(request); + do_stonith_notify(call_options, op, rc, request, NULL); + + } else if(crm_str_eq(op, STONITH_OP_DEVICE_DEL, TRUE)) { + rc = stonith_device_remove(request); + do_stonith_notify(call_options, op, rc, request, NULL); + + + } else if(crm_str_eq(op, STONITH_OP_EXEC, TRUE)) { + rc = stonith_device_action(request, &output); + + } else if(crm_str_eq(op, STONITH_OP_FENCE, TRUE)) { + xmlNode *cmd = NULL; + const char *action = NULL; + + if(is_reply) { + process_remote_stonith_exec(request); + return; + } + + cmd = get_xpath_object("//@"F_STONITH_TARGET, request, LOG_ERR); + action = crm_element_value(cmd, F_STONITH_ACTION); + + if(remote) { + rc = stonith_fence(request, action); + + } else if(call_options & st_opt_local_first) { + rc = stonith_fence(request, action); + if(rc < 0) { + crm_log_xml_info(request, "EscalateLocal"); + initiate_remote_stonith_op(client, request, action); + return; + } + + } else { + crm_log_xml_info(request, "Escalate"); + initiate_remote_stonith_op(client, request, action); + return; + } + + } else if(crm_str_eq(op, STONITH_OP_QUERY, TRUE)) { + if(is_reply) { + process_remote_stonith_query(request); + + } else { + rc = stonith_query(request, &data); + } + } + + crm_debug("Processing %s%s from %s: rc=%d", op, is_reply?" reply":"", + client?client->name:remote, rc); + + if(is_reply) { + + } else if(remote) { + reply = stonith_construct_reply(request, output, data, rc); + send_cluster_message(remote, crm_msg_stonith_ng, reply, FALSE); + + } else if(rc <= 0) { + reply = stonith_construct_reply(request, output, data, rc); + do_local_reply(reply, client_id, call_options & st_opt_sync_call, remote!=NULL); + free_xml(reply); + } + + crm_free(output); + free_xml(data); +} diff --git a/fencing/fence_legacy b/fencing/fence_legacy new file mode 100644 index 0000000000..5c6c7ef9c3 --- /dev/null +++ b/fencing/fence_legacy @@ -0,0 +1,170 @@ +#!/usr/bin/perl + +use Getopt::Std; + +my $ME = $0; + +END { + defined fileno STDOUT or return; + close STDOUT and return; + warn "$ME: failed to close standard output: $!\n"; + $? ||= 1; +} + +# Get the program name from $0 and strip directory names +$_=$0; +s/.*\///; +my $pname = $_; + +$opt_o = 'reset'; # Default fence action +$opt_s = 'stonith'; # Default fence binary +$opt_t = 'none'; # Default fence type +$extra_args = ''; + +sub usage +{ + print "Usage:\n"; + print "\n"; + print "$pname [options]\n"; + print "\n"; + print "Options:\n"; + print " -h usage\n"; + print " -t sub agent\n"; + print " -n nodename\n"; + print " -o Action: on | off | reset (default) | stat | hostlist\n"; + print " -s stonith command\n"; + print " -q quiet mode\n"; + print " -V version\n"; + + exit 0; +} + +sub fail +{ + ($msg) = @_; + print $msg."\n" unless defined $opt_q; + $t->close if defined $t; + exit 1; +} + +sub fail_usage +{ + ($msg)=@_; + print STDERR $msg."\n" if $msg; + print STDERR "Please use '-h' for usage.\n"; + exit 1; +} + +sub version +{ + print "1.0.0\n"; + + exit 0; +} + +sub get_options_stdin +{ + my $opt; + my $line = 0; + while( defined($in = <>) ) + { + $_ = $in; + chomp; + + # strip leading and trailing whitespace + s/^\s*//; + s/\s*$//; + + # skip comments + next if /^#/; + + $line+=1; + $opt=$_; + next unless $opt; + + ($name,$val)=split /\s*=\s*/, $opt; + + if ( $name eq "" ) + { + print STDERR "parse error: illegal name in option $line\n"; + exit 2; + } + + # DO NOTHING -- this field is used by fenced + elsif ($name eq "agent" ) {} + + elsif ($name eq "plugin" ) + { + $opt_t = $val; + } + elsif ($name eq "option" ) + { + $opt_o = $val; + } + elsif ($name eq "port" ) + { + $opt_n = $val; + } + elsif ($name eq "stonith" ) + { + $opt_s = $val; + } + else + { + $extra_args="$extra_args $name=\"$val\"" + } + + } +} + +######################################################################33 +# MAIN + +if (@ARGV > 0) { + getopts("ht:n:o:s:qV") || fail_usage ; + + usage if defined $opt_h; + version if defined $opt_V; + + fail_usage "Unknown parameter." if (@ARGV > 0); + +} else { + get_options_stdin(); + +} + +$opt_o=lc($opt_o); +fail "failed: unrecognised action: $opt_o" + unless $opt_o =~ /^(on|off|reset|reboot|stat|hostlist|monitor)$/; + +if ( $pid=fork() == 0 ) +{ + if ( $opt_o eq "reboot" ) + { + $opt_o="reset"; + } + + if ( $opt_o eq "hostlist" ) + { + exec "$opt_s -t $opt_t $extra_args -l" or die "failed to exec \"$opt_s\"\n"; + } + elsif ( $opt_o eq "monitor" || $opt_o eq "stat" ) + { + print "Performing: $opt_s -t $opt_t -S $opt_n\n" unless defined $opt_q; + exec "$opt_s -t $opt_t $extra_args -S $opt_n" or die "failed to exec \"$opt_s\"\n"; + } + else + { + print "Performing: $opt_s -t $opt_t -T $opt_o $opt_n\n" unless defined $opt_q; + fail "failed: no plug number" unless defined $opt_n; + exec "$opt_s -t $opt_t $extra_args -T $opt_o $opt_n" or die "failed to exec \"$opt_s\"\n"; + } +} + +wait; +$status=$?/256; + +print (($status == 0 ? "success":"failed") . ": $opt_n $status\n") + unless defined $opt_q; + +exit ($status == 0 ? 0 : 1 ); diff --git a/fencing/internal.h b/fencing/internal.h new file mode 100644 index 0000000000..f332e44523 --- /dev/null +++ b/fencing/internal.h @@ -0,0 +1,74 @@ +typedef struct stonith_device_s +{ + char *id; + char *agent; + char *namespace; + + GListPtr targets; + time_t targets_age; + + GHashTable *params; + GHashTable *aliases; + +} stonith_device_t; + +typedef struct stonith_client_s +{ + char *id; + char *name; + char *callback_id; + + const char *channel_name; + + IPC_Channel *channel; + GCHSource *source; + + long long flags; + +} stonith_client_t; + +typedef struct async_command_s +{ + + int id; + int stdout; + int options; + + char *op; + char *origin; + char *client; + char *remote; + + char *port; + char *action; + char *device; + + GListPtr device_list; + GListPtr device_next; + +} async_command_t; + +extern long long get_stonith_flag(const char *name); + +extern void stonith_command( + stonith_client_t *client, xmlNode *op_request, const char *remote); + +extern void do_local_reply( + xmlNode *notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer); + +extern xmlNode *stonith_construct_reply( + xmlNode *request, char *output, xmlNode *data, int rc); + +extern xmlNode *stonith_construct_async_reply( + async_command_t *cmd, char *output, xmlNode *data, int rc);; + +extern void do_stonith_notify( + int options, const char *type, enum stonith_errors result, xmlNode *data, + const char *remote); + +extern void initiate_remote_stonith_op( + stonith_client_t *client, xmlNode *request, const char *action); + +extern int process_remote_stonith_exec(xmlNode *msg); + +extern int process_remote_stonith_query(xmlNode *msg); diff --git a/fencing/main.c b/fencing/main.c new file mode 100644 index 0000000000..1b43726022 --- /dev/null +++ b/fencing/main.c @@ -0,0 +1,641 @@ +/* + * Copyright (C) 2009 Andrew Beekhof + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include + +char *channel1 = NULL; +char *channel2 = NULL; +char *stonith_our_uname = NULL; + +GMainLoop *mainloop = NULL; +GHashTable *client_list = NULL; + +gboolean stonith_shutdown_flag = FALSE; + +#if SUPPORT_HEARTBEAT +ll_cluster_t *hb_conn = NULL; +#endif + +static gboolean +stonith_client_disconnect( + IPC_Channel *channel, stonith_client_t *stonith_client) +{ + if (channel == NULL) { + CRM_DEV_ASSERT(stonith_client == NULL); + + } else if (stonith_client == NULL) { + crm_err("No client"); + + } else { + CRM_DEV_ASSERT(channel->ch_status != IPC_CONNECT); + crm_debug_2("Cleaning up after client disconnect: %s/%s/%s", + crm_str(stonith_client->name), + stonith_client->channel_name, + stonith_client->id); + + if(stonith_client->id != NULL) { + if(!g_hash_table_remove(client_list, stonith_client->id)) { + crm_err("Client %s not found in the hashtable", + stonith_client->name); + } + } + } + + return FALSE; +} + +static gboolean +stonith_client_callback(IPC_Channel *channel, gpointer user_data) +{ + int lpc = 0; + const char *value = NULL; + xmlNode *request = NULL; + gboolean keep_channel = TRUE; + stonith_client_t *stonith_client = user_data; + + CRM_CHECK(stonith_client != NULL, crm_err("Invalid client"); return FALSE); + CRM_CHECK(stonith_client->id != NULL, + crm_err("Invalid client: %p", stonith_client); return FALSE); + + if(IPC_ISRCONN(channel) && channel->ops->is_message_pending(channel)) { + + lpc++; + request = xmlfromIPC(channel, MAX_IPC_DELAY); + if (request == NULL) { + goto bail; + } + + if(stonith_client->name == NULL) { + value = crm_element_value(request, F_STONITH_CLIENTNAME); + if(value == NULL) { + stonith_client->name = crm_itoa(channel->farside_pid); + } else { + stonith_client->name = crm_strdup(value); + } + } + + crm_xml_add(request, F_STONITH_CLIENTID, stonith_client->id); + crm_xml_add(request, F_STONITH_CLIENTNAME, stonith_client->name); + + if(stonith_client->callback_id == NULL) { + value = crm_element_value(request, F_STONITH_CALLBACK_TOKEN); + if(value != NULL) { + stonith_client->callback_id = crm_strdup(value); + + } else { + stonith_client->callback_id = crm_strdup(stonith_client->id); + } + } + + crm_log_xml(LOG_MSG, "Client[inbound]", request); + stonith_command(stonith_client, request, NULL); + + free_xml(request); + } + + bail: + if(channel->ch_status != IPC_CONNECT) { + crm_debug_2("Client disconnected"); + keep_channel = stonith_client_disconnect(channel, stonith_client); + } + + return keep_channel; +} + +static void +stonith_client_destroy(gpointer user_data) +{ + stonith_client_t *stonith_client = user_data; + + if(stonith_client == NULL) { + crm_debug_4("Destroying %p", user_data); + return; + } + + if(stonith_client->source != NULL) { + crm_debug_4("Deleting %s (%p) from mainloop", + stonith_client->name, stonith_client->source); + G_main_del_IPC_Channel(stonith_client->source); + stonith_client->source = NULL; + } + + crm_debug_3("Destroying %s (%p)", stonith_client->name, user_data); + crm_free(stonith_client->name); + crm_free(stonith_client->callback_id); + crm_free(stonith_client->id); + crm_free(stonith_client); + crm_debug_4("Freed the cib client"); + + return; +} + +static gboolean +stonith_client_connect(IPC_Channel *channel, gpointer user_data) +{ + cl_uuid_t client_id; + xmlNode *reg_msg = NULL; + stonith_client_t *new_client = NULL; + char uuid_str[UU_UNPARSE_SIZEOF]; + const char *channel_name = user_data; + + crm_debug_3("Connecting channel"); + CRM_CHECK(channel_name != NULL, return FALSE); + + if (channel == NULL) { + crm_err("Channel was NULL"); + return FALSE; + + } else if (channel->ch_status != IPC_CONNECT) { + crm_err("Channel was disconnected"); + return FALSE; + + } else if(stonith_shutdown_flag) { + crm_info("Ignoring new client [%d] during shutdown", + channel->farside_pid); + return FALSE; + } + + crm_malloc0(new_client, sizeof(stonith_client_t)); + new_client->channel = channel; + new_client->channel_name = channel_name; + + crm_debug_3("Created channel %p for channel %s", + new_client, new_client->channel_name); + + channel->ops->set_recv_qlen(channel, 1024); + channel->ops->set_send_qlen(channel, 1024); + + new_client->source = G_main_add_IPC_Channel( + G_PRIORITY_DEFAULT, channel, FALSE, stonith_client_callback, + new_client, stonith_client_destroy); + + crm_debug_3("Channel %s connected for client %s", + new_client->channel_name, new_client->id); + + cl_uuid_generate(&client_id); + cl_uuid_unparse(&client_id, uuid_str); + + CRM_CHECK(new_client->id == NULL, crm_free(new_client->id)); + new_client->id = crm_strdup(uuid_str); + + /* make sure we can find ourselves later for sync calls + * redirected to the master instance + */ + g_hash_table_insert(client_list, new_client->id, new_client); + + reg_msg = create_xml_node(NULL, "callback"); + crm_xml_add(reg_msg, F_STONITH_OPERATION, CRM_OP_REGISTER); + crm_xml_add(reg_msg, F_STONITH_CLIENTID, new_client->id); + + send_ipc_message(channel, reg_msg); + free_xml(reg_msg); + + return TRUE; +} + +static void +stonith_peer_callback(xmlNode * msg, void* private_data) +{ + const char *remote = crm_element_value(msg, F_ORIG); + crm_log_xml(LOG_MSG, "Peer[inbound]", msg); + stonith_command(NULL, msg, remote); +} + +static void +stonith_peer_hb_callback(HA_Message * msg, void* private_data) +{ + xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__); + stonith_peer_callback(xml, private_data); + free_xml(xml); +} + + +#if SUPPORT_AIS +static gboolean stonith_peer_ais_callback( + AIS_Message *wrapper, char *data, int sender) +{ + xmlNode *xml = NULL; + + if(wrapper->header.id == crm_class_cluster) { + xml = string2xml(data); + if(xml == NULL) { + goto bail; + } + crm_xml_add(xml, F_ORIG, wrapper->sender.uname); + crm_xml_add_int(xml, F_SEQ, wrapper->id); + stonith_peer_callback(xml, NULL); + } + + free_xml(xml); + return TRUE; + + bail: + crm_err("Invalid XML: '%.120s'", data); + return TRUE; + +} + +static void +stonith_peer_ais_destroy(gpointer user_data) +{ + crm_err("AIS connection terminated"); + ais_fd_sync = -1; + exit(1); +} +#endif + +static void +stonith_peer_hb_destroy(gpointer user_data) +{ + if(stonith_shutdown_flag) { + crm_info("Heartbeat disconnection complete... exiting"); + } else { + crm_err("Heartbeat connection lost! Exiting."); + } + + crm_info("Exiting..."); + if (mainloop != NULL && g_main_is_running(mainloop)) { + g_main_quit(mainloop); + + } else { + exit(LSB_EXIT_OK); + } +} + +static int +send_via_callback_channel(xmlNode *msg, const char *token) +{ + stonith_client_t *hash_client = NULL; + enum stonith_errors rc = stonith_ok; + + crm_debug_3("Delivering msg %p to client %s", msg, token); + + if(token == NULL) { + crm_err("No client id token, cant send message"); + if(rc == stonith_ok) { + rc = -1; + } + + } else if(msg == NULL) { + crm_err("No message to send"); + rc = -1; + + } else { + /* A client that left before we could reply is not really + * _our_ error. Warn instead. + */ + hash_client = g_hash_table_lookup(client_list, token); + if(hash_client == NULL) { + crm_warn("Cannot find client for token %s", token); + rc = -1; + + } else if (crm_str_eq(hash_client->channel_name, "remote", FALSE)) { + /* just hope it's alive */ + + } else if(hash_client->channel == NULL) { + crm_err("Cannot find channel for client %s", token); + rc = -1; + } + } + + if(rc == stonith_ok) { + crm_debug_3("Delivering reply to client %s (%s)", + token, hash_client->channel_name); + if(send_ipc_message(hash_client->channel, msg) == FALSE) { + crm_warn("Delivery of reply to client %s/%s failed", + hash_client->name, token); + rc = -1; + } + } + + return rc; +} + +void do_local_reply(xmlNode *notify_src, const char *client_id, + gboolean sync_reply, gboolean from_peer) +{ + /* send callback to originating child */ + stonith_client_t *client_obj = NULL; + enum stonith_errors local_rc = stonith_ok; + + crm_debug_2("Sending response"); + + if(client_id != NULL) { + client_obj = g_hash_table_lookup(client_list, client_id); + } else { + crm_debug_2("No client to sent the response to." + " F_STONITH_CLIENTID not set."); + } + + crm_debug_3("Sending callback to request originator"); + if(client_obj == NULL) { + local_rc = -1; + + } else { + const char *client_id = client_obj->callback_id; + crm_debug_2("Sending %ssync response to %s %s", + sync_reply?"":"an a-", + client_obj->name, + from_peer?"(originator of delegated request)":""); + + if(sync_reply) { + client_id = client_obj->id; + } + local_rc = send_via_callback_channel(notify_src, client_id); + } + + if(local_rc != stonith_ok && client_obj != NULL) { + crm_warn("%sSync reply to %s failed: %s", + sync_reply?"":"A-", + client_obj?client_obj->name:"", stonith_error2string(local_rc)); + } +} + +long long get_stonith_flag(const char *name) +{ + if(safe_str_eq(name, STONITH_OP_FENCE)) { + return 0x01; + + } else if(safe_str_eq(name, STONITH_OP_DEVICE_ADD)) { + return 0x04; + + } else if(safe_str_eq(name, STONITH_OP_DEVICE_DEL)) { + return 0x10; + } + return 0; +} + +static void +stonith_notify_client(gpointer key, gpointer value, gpointer user_data) +{ + + IPC_Channel *ipc_client = NULL; + xmlNode *update_msg = user_data; + stonith_client_t *client = value; + const char *type = NULL; + + CRM_CHECK(client != NULL, return); + CRM_CHECK(update_msg != NULL, return); + + type = crm_element_value(update_msg, F_SUBTYPE); + CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); + + if(client == NULL) { + crm_warn("Skipping NULL client"); + return; + + } else if(client->channel == NULL) { + crm_warn("Skipping client with NULL channel"); + return; + + } else if(client->name == NULL) { + crm_debug_2("Skipping unnammed client / comamnd channel"); + return; + } + + ipc_client = client->channel; + if(client->flags & get_stonith_flag(type)) { + crm_info("Sending %s-notification to client %s/%s", type, client->name, client->id); + if(ipc_client->send_queue->current_qlen >= ipc_client->send_queue->max_qlen) { + /* We never want the STONITH to exit because our client is slow */ + crm_crit("%s-notification of client %s/%s failed - queue saturated", + type, client->name, client->id); + + } else if(send_ipc_message(ipc_client, update_msg) == FALSE) { + crm_warn("%s-Notification of client %s/%s failed", + type, client->name, client->id); + } + } +} + +void +do_stonith_notify( + int options, const char *type, enum stonith_errors result, xmlNode *data, + const char *remote) +{ + /* TODO: Standardize the contents of data */ + /* TODO: Implement cluster-wide notification */ + + xmlNode *update_msg = create_xml_node(NULL, "notify"); + + CRM_CHECK_AND_STORE(type != NULL, ;); + + crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY); + crm_xml_add(update_msg, F_SUBTYPE, type); + crm_xml_add(update_msg, F_STONITH_OPERATION, type); + crm_xml_add_int(update_msg, F_STONITH_RC, result); + + if(data != NULL) { + add_message_xml(update_msg, F_STONITH_CALLDATA, data); + } + + crm_debug_3("Notifying clients"); + g_hash_table_foreach(client_list, stonith_notify_client, update_msg); + free_xml(update_msg); + crm_debug_3("Notify complete"); +} + +static void +stonith_shutdown(int nsig) +{ + stonith_shutdown_flag = TRUE; + crm_info("Terminating with %d clients", g_hash_table_size(client_list)); + stonith_client_disconnect(NULL, NULL); + exit(0); +} + +static void +stonith_cleanup(void) +{ + crm_peer_destroy(); + g_hash_table_destroy(client_list); + crm_free(stonith_our_uname); +#if HAVE_LIBXML2 + xmlCleanupParser(); +#endif + crm_free(channel1); +} + +static struct crm_option long_options[] = { + {"stand-alone", 0, 0, 's'}, + {"verbose", 0, 0, 'V'}, + {"version", 0, 0, '$'}, + {"help", 0, 0, '?'}, + + {0, 0, 0, 0} +}; + +int +main(int argc, char ** argv) +{ + int flag; + int rc = 0; + int argerr = 0; + int option_index = 0; + gboolean stand_alone = FALSE; + + crm_log_init("stonith-ng", LOG_INFO, TRUE, TRUE, argc, argv); + crm_set_options("V?s$", "mode [options]", long_options, + "Provides a summary of cluster's current state." + "\n\nOutputs varying levels of detail in a number of different formats.\n"); + + while (1) { + flag = crm_get_option(argc, argv, &option_index); + if (flag == -1) + break; + + switch(flag) { + case 'V': + alter_debug(DEBUG_INC); + cl_log_enable_stderr(1); + break; + case 's': + stand_alone = TRUE; + cl_log_enable_stderr(1); + break; + case '$': + case '?': + crm_help(flag, LSB_EXIT_OK); + break; + default: + ++argerr; + break; + } + } + + if(argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { + /* TODO: Cleanup */; + + printf("\n"); + printf("\n"); + printf("1.0\n"); + printf("This is a fake resource that details the instance attributes handled by stonithd.\n"); + printf("stonithd Options\n"); + printf("\n"); + printf("\n"); + printf("How long to wait for the STONITH action to complete. Overrides the stonith-timeout cluster property\n"); + printf("\n"); + printf("How long to wait for the STONITH action to complete. Overrides the stonith-timeout cluster property\n"); + printf("\n"); + printf("\n"); + printf("The priority of the stonith resource. The lower the number, the higher the priority.\n"); + printf("\n"); + printf("The priority of the stonith resource. The lower the number, the higher the priority.\n"); + printf("\n"); + printf("\n"); + printf("\n"); + return 0; + } + + if (optind > argc) { + ++argerr; + } + + if (argerr) { + crm_help('?', LSB_EXIT_GENERIC); + } + + mainloop_add_signal(SIGTERM, stonith_shutdown); + + /* EnableProcLogging(); */ + set_sigchld_proctrack(G_PRIORITY_HIGH,DEFAULT_MAXDISPATCHTIME); + + crm_peer_init(); + client_list = g_hash_table_new(g_str_hash, g_str_equal); + + if(stand_alone == FALSE) { + void *dispatch = stonith_peer_hb_callback; + void *destroy = stonith_peer_hb_destroy; + + if(is_openais_cluster()) { +#if SUPPORT_AIS + destroy = stonith_peer_ais_destroy; + dispatch = stonith_peer_ais_callback; +#endif + } + + if(crm_cluster_connect(&stonith_our_uname, NULL, dispatch, destroy, +#if SUPPORT_HEARTBEAT + &hb_conn +#else + NULL +#endif + ) == FALSE){ + crm_crit("Cannot sign in to the cluster... terminating"); + exit(100); + } + + } else { + stonith_our_uname = crm_strdup("localhost"); + } + + channel1 = crm_strdup(stonith_channel); + rc = init_server_ipc_comms( + channel1, stonith_client_connect, + default_ipc_connection_destroy); + + channel2 = crm_strdup(stonith_channel_callback); + rc = init_server_ipc_comms( + channel2, stonith_client_connect, + default_ipc_connection_destroy); + + if(rc == 0) { + /* Create the mainloop and run it... */ + mainloop = g_main_new(FALSE); + crm_info("Starting %s mainloop", crm_system_name); + + g_main_run(mainloop); + + } else { + crm_err("Couldnt start all communication channels, exiting."); + } + + stonith_cleanup(); + +#if SUPPORT_HEARTBEAT + if(hb_conn) { + hb_conn->llc_ops->delete(hb_conn); + } +#endif + + crm_info("Done"); + return rc; +} + diff --git a/fencing/remote.c b/fencing/remote.c new file mode 100644 index 0000000000..a5597105b0 --- /dev/null +++ b/fencing/remote.c @@ -0,0 +1,379 @@ +/* + * Copyright (C) 2009 Andrew Beekhof + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +extern xmlNode *stonith_create_op( + int call_id, const char *token, const char *op, xmlNode *data, int call_options); + +enum op_state +{ + st_query, + st_exec, + st_done, + st_failed, +}; + +GHashTable *remote_op_list = NULL; + +typedef struct st_query_result_s +{ + char *host; + int devices; + +} st_query_result_t; + +typedef struct remote_fencing_op_s +{ + char *id; + char *target; + char *action; + guint replies; + guint op_timer; + guint query_timer; + long long call_options; + + char *delegate; + time_t completed; + + enum op_state state; + char *originator; + GListPtr query_results; + xmlNode *request; + +} remote_fencing_op_t; + +static void free_remote_query(gpointer data) +{ + st_query_result_t *query = data; + + crm_free(query->host); + crm_free(query); +} + +static void free_remote_op(gpointer data) +{ + remote_fencing_op_t *op = data; + + crm_free(op->id); + crm_free(op->action); + crm_free(op->target); + crm_free(op->originator); + + if(op->query_timer) { + g_source_remove(op->query_timer); + } + if(op->op_timer) { + g_source_remove(op->op_timer); + } + if(op->query_results) { + slist_destroy(st_query_result_t, result, op->query_results, + free_remote_query(result); + ); + } + if(op->request) { + free_xml(op->request); + op->request = NULL; + } + crm_free(op); +} + +static void remote_op_reply_and_notify(remote_fencing_op_t *op, xmlNode *data, int rc) +{ + xmlNode *reply = NULL; + xmlNode *local_data = NULL; + + /* TODO: Have the delegate perform the notification */ + op->completed = time(NULL); + if(data == NULL) { + data = create_xml_node(NULL, "remote-op"); + local_data = data; + + } else { + op->delegate = crm_element_value_copy(data, F_ORIG); + } + + crm_xml_add_int(data, "state", op->state); + crm_xml_add(data, F_STONITH_TARGET, op->target); + crm_xml_add(data, F_STONITH_OPERATION, op->action); + + reply = stonith_construct_reply(op->request, NULL, data, rc); + crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate); + + do_stonith_notify(0, STONITH_OP_FENCE, rc, reply, NULL); + do_local_reply(reply, op->originator, op->call_options & st_opt_sync_call, FALSE); + + free_xml(local_data); + free_xml(reply); + + /* Free non-essential parts of the record + * Keep the record around so we can query the history + */ + if(op->query_results) { + slist_destroy(st_query_result_t, result, op->query_results, + free_remote_query(result); + ); + op->query_results = NULL; + } + + if(op->request) { + free_xml(op->request); + op->request = NULL; + } +} + + +static gboolean remote_op_timeout(gpointer userdata) +{ + remote_fencing_op_t *op = userdata; + crm_err("Action %s (%s) for %s timed out", op->action, op->id, op->target); + op->query_timer = 0; + + remote_op_reply_and_notify(op, NULL, st_err_timeout); + + op->state = st_failed; + + return FALSE; +} + +static gboolean remote_op_query_timeout(gpointer data) +{ + remote_fencing_op_t *op = data; + crm_err("Query %s for %s timed out", op->id, op->target); + op->query_timer = 0; + if(op->op_timer) { + g_source_remove(op->op_timer); + op->op_timer = 0; + } + remote_op_timeout(op); + return FALSE; +} + +void initiate_remote_stonith_op( + stonith_client_t *client, xmlNode *request, const char *action) +{ + cl_uuid_t new_uuid; + char uuid_str[UU_UNPARSE_SIZEOF]; + + int timeout = 0; + xmlNode *query = NULL; + remote_fencing_op_t *op = NULL; + xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, request, LOG_ERR); + + if(remote_op_list == NULL) { + remote_op_list = g_hash_table_new_full( + g_str_hash, g_str_equal, NULL, free_remote_op); + } + + crm_malloc0(op, sizeof(remote_fencing_op_t)); + crm_element_value_int(dev, "timeout", &timeout); + + cl_uuid_generate(&new_uuid); + cl_uuid_unparse(&new_uuid, uuid_str); + + op->id = crm_strdup(uuid_str); + g_hash_table_replace(remote_op_list, op->id, op); + + op->state = st_query; + op->action = crm_strdup(action); + op->originator = crm_strdup(client->id); + op->target = crm_element_value_copy(dev, F_STONITH_TARGET); + op->op_timer = g_timeout_add(1000*timeout, remote_op_timeout, op); + op->query_timer = g_timeout_add(100*timeout, remote_op_query_timeout, op); + op->request = copy_xml(request); /* TODO: Figure out how to avoid this */ + crm_element_value_int(request, F_STONITH_CALLOPTS, (int*)&(op->call_options)); + + query = stonith_create_op(0, op->id, STONITH_OP_QUERY, NULL, 0); + crm_xml_add(query, F_STONITH_REMOTE, op->id); + crm_xml_add(query, F_STONITH_TARGET, op->target); + + crm_info("Initiating remote operation %s for %s: %s", op->action, op->target, op->id); + CRM_CHECK(op->action, return); + + send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); + + free_xml(query); +} + +static void call_remote_stonith(remote_fencing_op_t *op, st_query_result_t *result) +{ + xmlNode *query = stonith_create_op(0, op->id, STONITH_OP_FENCE, NULL, 0);; + crm_xml_add(query, F_STONITH_REMOTE, op->id); + crm_xml_add(query, F_STONITH_TARGET, op->target); + crm_xml_add(query, F_STONITH_ACTION, op->action); + + op->state = st_exec; + crm_info("Requesting that %s perform op %s %s", result->host, op->action, op->target); + + send_cluster_message(result->host, crm_msg_stonith_ng, query, FALSE); + free_xml(query); +} + + +int process_remote_stonith_query(xmlNode *msg) +{ + int devices = 0; + const char *id = NULL; + remote_fencing_op_t *op = NULL; + st_query_result_t *result = NULL; + xmlNode *dev = get_xpath_object("//@"F_STONITH_REMOTE, msg, LOG_ERR); + + crm_log_xml_info(msg, "QueryResult"); + + CRM_CHECK(dev != NULL, return st_err_internal); + + id = crm_element_value(dev, F_STONITH_REMOTE); + CRM_CHECK(id != NULL, return st_err_internal); + + dev = get_xpath_object("//@st-available-devices", msg, LOG_ERR); + CRM_CHECK(dev != NULL, return st_err_internal); + crm_element_value_int(dev, "st-available-devices", &devices); + + op = g_hash_table_lookup(remote_op_list, id); + if(op == NULL) { + crm_debug("Unknown or expired remote op: %s", id); + return st_err_unknown_operation; + } + + op->replies++; + crm_malloc0(result, sizeof(st_query_result_t)); + result->host = crm_element_value_copy(msg, F_ORIG); + result->devices = devices; + + /* TODO: Implement options + * A) If we have anyone that can do the job + * B) If we have someone that can do the job and some percent of the known peers + * C) If all known peers have responded + * + * Implement A first + */ + + /* Track A */ + + if(result->devices > 0) { + if(op->call_options & st_opt_allow_suicide) { + crm_info("Allowing %s to potentialy fence itself", op->target); + + } else if(safe_str_eq(result->host, op->target)) { + crm_info("Ignoring reply from %s, hosts are not permitted to commit suicide", op->target); + free_remote_query(result); + return 0; + } + + if(op->query_timer) { + g_source_remove(op->query_timer); + op->query_timer = 0; + } + + if(op->state == st_query) { + call_remote_stonith(op, result); + free_remote_query(result); + + } else if(op->state == st_exec) { + /* TODO: insert in sorted order (key = num devices) */ + crm_info("Queuing query result from %s while operation is pending", result->host); + op->query_results = g_list_append(op->query_results, result); + + } else { + crm_info("Discarding query result from %s. Operation is in state %d", + result->host, op->state); + free_remote_query(result); + } + + + } else { + crm_info("Discarding query result from %s. No valid devices", result->host); + free_remote_query(result); + } + + return 0; +} + +int process_remote_stonith_exec(xmlNode *msg) +{ + int rc = 0; + const char *id = NULL; + remote_fencing_op_t *op = NULL; + xmlNode *dev = get_xpath_object("//@"F_STONITH_REMOTE, msg, LOG_ERR); + + crm_log_xml_info(msg, "ExecResult"); + + CRM_CHECK(dev != NULL, return st_err_internal); + + id = crm_element_value(dev, F_STONITH_REMOTE); + CRM_CHECK(id != NULL, return st_err_internal); + + dev = get_xpath_object("//@"F_STONITH_RC, msg, LOG_ERR); + CRM_CHECK(dev != NULL, return st_err_internal); + + op = g_hash_table_lookup(remote_op_list, id); + if(op == NULL) { + crm_debug("Unknown or expired remote op: %s", id); + return st_err_unknown_operation; + } + + crm_element_value_int(dev, F_STONITH_RC, &rc); + if(rc == stonith_ok) { + if(op->op_timer) { + g_source_remove(op->op_timer); + op->op_timer = 0; + } + remote_op_reply_and_notify(op, msg, rc); + + } else if(rc < stonith_ok) { + if(op->state == st_exec) { + st_query_result_t *result = g_list_nth_data(op->query_results, 0); + op->query_results = g_list_remove(op->query_results, result); + + if(result && result->devices > 0) { + call_remote_stonith(op, result); + + } else { + remote_op_timeout(op); + } + + if(result) { + free_remote_query(result); + } + } + } + return rc; +} diff --git a/fencing/test.c b/fencing/test.c new file mode 100644 index 0000000000..0d8e213e61 --- /dev/null +++ b/fencing/test.c @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2009 Andrew Beekhof + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +static struct crm_option long_options[] = { + {"verbose", 0, 0, 'V'}, + {"version", 0, 0, '$'}, + {"help", 0, 0, '?'}, + {"passive", 0, 0, 'p'}, + + {0, 0, 0, 0} +}; + +int st_opts = st_opt_sync_call; +GMainLoop *mainloop = NULL; + +static void st_callback(stonith_t *st, const char *event, xmlNode *msg) +{ + crm_log_xml_notice(msg, event); +} + +static gboolean timeout_handler(gpointer data) +{ + g_main_quit(mainloop); + return FALSE; +} + +int +main(int argc, char ** argv) +{ + int flag; + int rc = 0; + int argerr = 0; + int option_index = 0; + + stonith_t *st = NULL; + GHashTable *hash = NULL; + + gboolean passive_mode = FALSE; + + crm_log_init("stonith-test", LOG_INFO, TRUE, TRUE, argc, argv); + crm_set_options("V?$p", "mode [options]", long_options, + "Provides a summary of cluster's current state." + "\n\nOutputs varying levels of detail in a number of different formats.\n"); + + while (1) { + flag = crm_get_option(argc, argv, &option_index); + if (flag == -1) + break; + + switch(flag) { + case 'V': + alter_debug(DEBUG_INC); + cl_log_enable_stderr(1); + break; + case '$': + case '?': + crm_help(flag, LSB_EXIT_OK); + break; + case 'p': + passive_mode = TRUE; + break; + default: + ++argerr; + break; + } + } + + if (optind > argc) { + ++argerr; + } + + if (argerr) { + crm_help('?', LSB_EXIT_GENERIC); + } + + hash = g_hash_table_new(g_str_hash, g_str_equal); + g_hash_table_insert(hash, crm_strdup("ipaddr"), crm_strdup("localhost")); + g_hash_table_insert(hash, crm_strdup("pcmk-portmap"), crm_strdup("some-host=pcmk-1 pcmk-3=3,4")); + g_hash_table_insert(hash, crm_strdup("login"), crm_strdup("root")); + g_hash_table_insert(hash, crm_strdup("identity_file"), crm_strdup("/root/.ssh/id_dsa")); + + crm_debug("Create"); + st = stonith_api_new(); + + rc = st->cmds->connect(st, crm_system_name, NULL, NULL); + crm_debug("Connect: %d", rc); + + rc = st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback); + + if(passive_mode) { + rc = st->cmds->register_notification(st, STONITH_OP_FENCE, st_callback); + + rc = st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback); + rc = st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback); + + mainloop = g_main_new(FALSE); + crm_info("Looking for notification"); + g_timeout_add(500*1000, timeout_handler, NULL); + + g_main_run(mainloop); + + } else { + rc = st->cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_virsh", hash); + crm_debug("Register: %d", rc); + + rc = st->cmds->call(st, st_opts, "test-id", "list", NULL, 10); + crm_debug("List: %d", rc); + + rc = st->cmds->call(st, st_opts, "test-id", "monitor", NULL, 10); + crm_debug("Monitor: %d", rc); + + rc = st->cmds->call(st, st_opts, "test-id", "status", "pcmk-2", 10); + crm_debug("Status pcmk-2: %d", rc); + + rc = st->cmds->call(st, st_opts, "test-id", "status", "pcmk-1", 10); + crm_debug("Status pcmk-1: %d", rc); + + rc = st->cmds->fence(st, st_opts, "unknown-host", "off", 60); + crm_debug("Fence unknown-host: %d", rc); + + rc = st->cmds->call(st, st_opts, "test-id", "status", "pcmk-1", 10); + crm_debug("Status pcmk-1: %d", rc); + + rc = st->cmds->fence(st, st_opts, "pcmk-1", "off", 60); + crm_debug("Fence pcmk-1: %d", rc); + + rc = st->cmds->call(st, st_opts, "test-id", "status", "pcmk-1", 10); + crm_debug("Status pcmk-1: %d", rc); + + rc = st->cmds->fence(st, st_opts, "pcmk-1", "on", 10); + crm_debug("Unfence pcmk-1: %d", rc); + + rc = st->cmds->call(st, st_opts, "test-id", "status", "pcmk-1", 10); + crm_debug("Status pcmk-1: %d", rc); + + rc = st->cmds->fence(st, st_opts, "some-host", "off", 10); + crm_debug("Fence alias: %d", rc); + + rc = st->cmds->call(st, st_opts, "test-id", "status", "some-host", 10); + crm_debug("Status alias: %d", rc); + + rc = st->cmds->fence(st, st_opts, "pcmk-1", "on", 10); + crm_debug("Unfence pcmk-1: %d", rc); + + rc = st->cmds->remove_device(st, st_opts, "test-id"); + crm_debug("Remove test-id: %d", rc); + } + + rc = st->cmds->disconnect(st); + crm_debug("Disconnect: %d", rc); + + crm_debug("Destroy"); + stonith_api_delete(st); + + return rc; +} diff --git a/include/Makefile.am b/include/Makefile.am index e8e16d68e4..f7f57140fc 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -1,26 +1,26 @@ # # Copyright (C) 2004-2009 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in config.h.in noinst_HEADERS = portability.h config.h crm_internal.h pkginclude_HEADERS = crm_config.h -SUBDIRS = crm fencing +SUBDIRS = crm .PHONY: $(ARCHIVE_VERSION) diff --git a/include/crm/ais.h b/include/crm/ais.h index a382a156ea..c5cc3d29d6 100644 --- a/include/crm/ais.h +++ b/include/crm/ais.h @@ -1,427 +1,435 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef CRM_AIS__H #define CRM_AIS__H #include #include #include #include #include #define AIS_IPC_MESSAGE_SIZE 8192*128 #if SUPPORT_AIS # ifdef AIS_COROSYNC # include # include # include # endif # ifdef AIS_WHITETANK /* cheap hacks for building against the stable series of openais */ # include enum service_types { EVS_SERVICE = 0, CLM_SERVICE = 1, AMF_SERVICE = 2, CKPT_SERVICE = 3, EVT_SERVICE = 4, LCK_SERVICE = 5, MSG_SERVICE = 6, CFG_SERVICE = 7, CPG_SERVICE = 8 }; typedef struct { int size; __attribute__((aligned(8))) int id __attribute__((aligned(8))); SaAisErrorT error __attribute__((aligned(8))); } coroipc_response_header_t __attribute__((aligned(8))); typedef struct { int size __attribute__((aligned(8))); int id __attribute__((aligned(8))); } coroipc_request_header_t __attribute__((aligned(8))); # ifdef TRADITIONAL_AIS_IPC extern SaAisErrorT saRecvRetry (int s, void *msg, size_t len); extern SaAisErrorT saServiceConnect (int *responseOut, int *callbackOut, enum service_types service); extern SaAisErrorT saSendReceiveReply (int s, void *requestMessage, int requestLen, void *responseMessage, int responseLen); # else extern int openais_fd_get(void *ipc_context); extern int openais_dispatch_recv (void *ipc_context, void *buf, int timeout); extern SaAisErrorT openais_service_disconnect (void *ipc_context); extern SaAisErrorT openais_service_connect (enum service_types service, void **ipc_context); extern SaAisErrorT openais_msg_send_reply_receive (void *ipc_context, struct iovec *iov, int iov_len, void *res_msg, int res_len); # endif #define CS_OK SA_AIS_OK #define CS_ERR_LIBRARY SA_AIS_ERR_LIBRARY #define CS_ERR_VERSION SA_AIS_ERR_VERSION #define CS_ERR_INIT SA_AIS_ERR_INIT #define CS_ERR_TIMEOUT SA_AIS_ERR_TIMEOUT #define CS_ERR_TRY_AGAIN SA_AIS_ERR_TRY_AGAIN #define CS_ERR_INVALID_PARAM SA_AIS_ERR_INVALID_PARAM #define CS_ERR_NO_MEMORY SA_AIS_ERR_NO_MEMORY #define CS_ERR_BAD_HANDLE SA_AIS_ERR_BAD_HANDLE #define CS_ERR_BUSY SA_AIS_ERR_BUSY #define CS_ERR_ACCESS SA_AIS_ERR_ACCESS #define CS_ERR_NOT_EXIST SA_AIS_ERR_NOT_EXIST #define CS_ERR_NAME_TOO_LONG SA_AIS_ERR_NAME_TOO_LONG #define CS_ERR_EXIST SA_AIS_ERR_EXIST #define CS_ERR_NO_SPACE SA_AIS_ERR_NO_SPACE #define CS_ERR_INTERRUPT SA_AIS_ERR_INTERRUPT #define CS_ERR_NAME_NOT_FOUND SA_AIS_ERR_NAME_NOT_FOUND #define CS_ERR_NO_RESOURCES SA_AIS_ERR_NO_RESOURCES #define CS_ERR_NOT_SUPPORTED SA_AIS_ERR_NOT_SUPPORTED #define CS_ERR_BAD_OPERATION SA_AIS_ERR_BAD_OPERATION #define CS_ERR_FAILED_OPERATION SA_AIS_ERR_FAILED_OPERATION #define CS_ERR_MESSAGE_ERROR SA_AIS_ERR_MESSAGE_ERROR #define CS_ERR_QUEUE_FULL SA_AIS_ERR_QUEUE_FULL #define CS_ERR_QUEUE_NOT_AVAILABLE SA_AIS_ERR_QUEUE_NOT_AVAILABLE #define CS_ERR_BAD_FLAGS SA_AIS_ERR_BAD_FLAGS #define CS_ERR_TOO_BIG SA_AIS_ERR_TOO_BIG #define CS_ERR_NO_SECTIONS SA_AIS_ERR_NO_SECTIONS # endif #else typedef struct { int size __attribute__((aligned(8))); int id __attribute__((aligned(8))); } coroipc_request_header_t __attribute__((aligned(8))); typedef struct { int size; __attribute__((aligned(8))) int id __attribute__((aligned(8))); int error __attribute__((aligned(8))); } coroipc_response_header_t __attribute__((aligned(8))); #endif #define PCMK_SERVICE_ID 9 #define CRM_MESSAGE_IPC_ACK 0 #ifndef CRM_SERVICE #define CRM_SERVICE PCMK_SERVICE_ID #endif #define MAX_NAME 256 #define AIS_IPC_NAME "ais-crm-ipc" #define CRM_NODE_LOST "lost" #define CRM_NODE_MEMBER "member" #define CRM_NODE_ACTIVE CRM_NODE_MEMBER #define CRM_NODE_INACTIVE CRM_NODE_LOST #define CRM_NODE_EVICTED "evicted" typedef struct crm_ais_host_s AIS_Host; typedef struct crm_ais_msg_s AIS_Message; enum crm_ais_msg_class { crm_class_cluster = 0, crm_class_members = 1, crm_class_notify = 2, crm_class_nodeid = 3, crm_class_rmpeer = 4, crm_class_quorum = 5, }; /* order here matters - its used to index into the crm_children array */ enum crm_ais_msg_types { crm_msg_none = 0, crm_msg_ais = 1, crm_msg_lrmd = 2, crm_msg_cib = 3, crm_msg_crmd = 4, crm_msg_attrd = 5, crm_msg_stonithd = 6, crm_msg_te = 7, crm_msg_pe = 8, + crm_msg_stonith_ng = 9, }; enum crm_proc_flag { crm_proc_none = 0x00000001, crm_proc_ais = 0x00000002, crm_proc_lrmd = 0x00000010, crm_proc_cib = 0x00000100, crm_proc_crmd = 0x00000200, crm_proc_attrd = 0x00001000, crm_proc_stonithd = 0x00002000, crm_proc_pe = 0x00010000, crm_proc_te = 0x00020000, crm_proc_mgmtd = 0x00040000, + crm_proc_stonith_ng = 0x00100000, }; typedef struct crm_peer_node_s { uint32_t id; uint64_t born; uint64_t last_seen; int32_t votes; uint32_t processes; char *uname; char *state; char *uuid; char *addr; char *version; } crm_node_t; struct crm_ais_host_s { uint32_t id; uint32_t pid; gboolean local; enum crm_ais_msg_types type; uint32_t size; char uname[MAX_NAME]; } __attribute__((packed)); struct crm_ais_msg_s { coroipc_response_header_t header __attribute__((aligned(8))); uint32_t id; gboolean is_compressed; AIS_Host host; AIS_Host sender; uint32_t size; uint32_t compressed_size; /* 584 bytes */ char data[0]; } __attribute__((packed)); struct crm_ais_nodeid_resp_s { coroipc_response_header_t header __attribute__((aligned(8))); uint32_t id; uint32_t counter; char uname[MAX_NAME]; char cname[MAX_NAME]; } __attribute__((packed)); struct crm_ais_quorum_resp_s { coroipc_response_header_t header __attribute__((aligned(8))); uint64_t id; uint32_t votes; uint32_t expected_votes; uint32_t quorate; } __attribute__((packed)); static inline const char *msg_type2text(enum crm_ais_msg_types type) { const char *text = "unknown"; switch(type) { case crm_msg_none: text = "unknown"; break; case crm_msg_ais: text = "ais"; break; case crm_msg_cib: text = "cib"; break; case crm_msg_crmd: text = "crmd"; break; case crm_msg_pe: text = "pengine"; break; case crm_msg_te: text = "tengine"; break; case crm_msg_lrmd: text = "lrmd"; break; case crm_msg_attrd: text = "attrd"; break; case crm_msg_stonithd: text = "stonithd"; break; + case crm_msg_stonith_ng: + text = "stonith-ng"; + break; } return text; } static inline const char *peer2text(enum crm_proc_flag proc) { const char *text = "unknown"; switch(proc) { case crm_proc_none: text = "unknown"; break; case crm_proc_ais: text = "ais"; break; case crm_proc_cib: text = "cib"; break; case crm_proc_crmd: text = "crmd"; break; case crm_proc_pe: text = "pengine"; break; case crm_proc_te: text = "tengine"; break; case crm_proc_lrmd: text = "lrmd"; break; case crm_proc_attrd: text = "attrd"; break; case crm_proc_stonithd: text = "stonithd"; break; + case crm_proc_stonith_ng: + text = "stonith-ng"; + break; case crm_proc_mgmtd: text = "mgmtd"; break; } return text; } static inline const char *ais_dest(const struct crm_ais_host_s *host) { if(host->local) { return "local"; } else if(host->size > 0) { return host->uname; } else { return ""; } } #define ais_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size) static inline AIS_Message *ais_msg_copy(const AIS_Message *source) { AIS_Message *target = malloc(sizeof(AIS_Message) + ais_data_len(source)); memcpy(target, source, sizeof(AIS_Message)); memcpy(target->data, source->data, ais_data_len(target)); return target; } static inline const char *ais_error2text(int error) { const char *text = "unknown"; # if SUPPORT_AIS switch(error) { case CS_OK: text = "None"; break; case CS_ERR_LIBRARY: text = "Library error"; break; case CS_ERR_VERSION: text = "Version error"; break; case CS_ERR_INIT: text = "Initialization error"; break; case CS_ERR_TIMEOUT: text = "Timeout"; break; case CS_ERR_TRY_AGAIN: text = "Try again"; break; case CS_ERR_INVALID_PARAM: text = "Invalid parameter"; break; case CS_ERR_NO_MEMORY: text = "No memory"; break; case CS_ERR_BAD_HANDLE: text = "Bad handle"; break; case CS_ERR_BUSY: text = "Busy"; break; case CS_ERR_ACCESS: text = "Access error"; break; case CS_ERR_NOT_EXIST: text = "Doesn't exist"; break; case CS_ERR_NAME_TOO_LONG: text = "Name too long"; break; case CS_ERR_EXIST: text = "Exists"; break; case CS_ERR_NO_SPACE: text = "No space"; break; case CS_ERR_INTERRUPT: text = "Interrupt"; break; case CS_ERR_NAME_NOT_FOUND: text = "Name not found"; break; case CS_ERR_NO_RESOURCES: text = "No resources"; break; case CS_ERR_NOT_SUPPORTED: text = "Not supported"; break; case CS_ERR_BAD_OPERATION: text = "Bad operation"; break; case CS_ERR_FAILED_OPERATION: text = "Failed operation"; break; case CS_ERR_MESSAGE_ERROR: text = "Message error"; break; case CS_ERR_QUEUE_FULL: text = "Queue full"; break; case CS_ERR_QUEUE_NOT_AVAILABLE: text = "Queue not available"; break; case CS_ERR_BAD_FLAGS: text = "Bad flags"; break; case CS_ERR_TOO_BIG: text = "To big"; break; case CS_ERR_NO_SECTIONS: text = "No sections"; break; } # endif return text; } extern enum crm_ais_msg_types crm_system_type; extern enum crm_ais_msg_types text2msg_type(const char *text); extern char *get_ais_data(const AIS_Message *msg); extern gboolean check_message_sanity(const AIS_Message *msg, const char *data); #endif diff --git a/include/crm/common/util.h b/include/crm/common/util.h index 6839126147..173d3d065e 100644 --- a/include/crm/common/util.h +++ b/include/crm/common/util.h @@ -1,272 +1,275 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef CRM_COMMON_UTIL__H #define CRM_COMMON_UTIL__H #include #include #include #include #include #include #include #if SUPPORT_HEARTBEAT # include #else # define NORMALNODE "normal" # define ACTIVESTATUS "active" /* fully functional, and all links are up */ # define DEADSTATUS "dead" /* Status of non-working link or machine */ # define PINGSTATUS "ping" /* Status of a working ping node */ # define JOINSTATUS "join" /* Status when an api client joins */ # define LEAVESTATUS "leave" /* Status when an api client leaves */ # define ONLINESTATUS "online" /* Status of an online client */ # define OFFLINESTATUS "offline" /* Status of an offline client */ #endif #define DEBUG_INC SIGUSR1 #define DEBUG_DEC SIGUSR2 extern unsigned int crm_log_level; extern gboolean crm_config_error; extern gboolean crm_config_warning; #ifdef HAVE_GETOPT_H # include #else #define no_argument 0 #define required_argument 1 #endif #define pcmk_option_default 0x00000 #define pcmk_option_hidden 0x00001 #define pcmk_option_paragraph 0x00002 #define pcmk_option_example 0x00004 struct crm_option { /* Fields from 'struct option' in getopt.h */ /* name of long option */ const char *name; /* * one of no_argument, required_argument, and optional_argument: * whether option takes an argument */ int has_arg; /* if not NULL, set *flag to val when option found */ int *flag; /* if flag not NULL, value to set *flag to; else return value */ int val; /* Custom fields */ const char *desc; long flags; }; #define crm_config_err(fmt...) { crm_config_error = TRUE; crm_err(fmt); } #define crm_config_warn(fmt...) { crm_config_warning = TRUE; crm_warn(fmt); } extern void crm_log_deinit(void); extern gboolean crm_log_init( const char *entity, int level, gboolean coredir, gboolean to_stderr, int argc, char **argv); /* returns the old value */ extern unsigned int set_crm_log_level(unsigned int level); extern unsigned int get_crm_log_level(void); extern char *crm_itoa(int an_int); extern char *crm_strdup_fn(const char *a, const char *file, const char *fn, int line); extern char *generate_hash_key(const char *crm_msg_reference, const char *sys); extern char *generate_hash_value(const char *src_node, const char *src_subsys); extern gboolean decodeNVpair(const char *srcstring, char separator, char **name, char **value); extern int compare_version(const char *version1, const char *version2); extern char *generateReference(const char *custom1, const char *custom2); extern void alter_debug(int nsig); extern void g_hash_destroy_str(gpointer data); extern gboolean crm_is_true(const char * s); extern int crm_str_to_boolean(const char * s, int * ret); extern long long crm_get_msec(const char * input); extern unsigned long long crm_get_interval(const char * input); extern const char *op_status2text(op_status_t status); extern char *generate_op_key( const char *rsc_id, const char *op_type, int interval); extern gboolean parse_op_key( const char *key, char **rsc_id, char **op_type, int *interval); extern char *generate_notify_key( const char *rsc_id, const char *notify_type, const char *op_type); extern char *generate_transition_magic_v202( const char *transition_key, int op_status); extern char *generate_transition_magic( const char *transition_key, int op_status, int op_rc); extern gboolean decode_transition_magic( const char *magic, char **uuid, int *transition_id, int *action_id, int *op_status, int *op_rc, int *target_rc); extern char *generate_transition_key(int action, int transition_id, int target_rc, const char *node); extern gboolean decode_transition_key( const char *key, char **uuid, int *action, int *transition_id, int *target_rc); extern char *crm_concat(const char *prefix, const char *suffix, char join); extern gboolean decode_op_key( const char *key, char **rsc_id, char **op_type, int *interval); extern void filter_action_parameters(xmlNode *param_set, const char *version); extern void filter_reload_parameters(xmlNode *param_set, const char *restart_string); #define safe_str_eq(a, b) crm_str_eq(a, b, FALSE) extern gboolean crm_str_eq(const char *a, const char *b, gboolean use_case); extern gboolean safe_str_neq(const char *a, const char *b); extern int crm_parse_int(const char *text, const char *default_text); extern long long crm_int_helper(const char *text, char **end_text); #define crm_atoi(text, default_text) crm_parse_int(text, default_text) extern void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork); extern char *generate_series_filename( const char *directory, const char *series, int sequence, gboolean bzip); extern int get_last_sequence(const char *directory, const char *series); extern void write_last_sequence( const char *directory, const char *series, int sequence, int max); extern int crm_pid_active(long pid); extern int crm_read_pidfile(const char *filename); extern int crm_lock_pidfile(const char *filename); extern void crm_make_daemon( const char *name, gboolean daemonize, const char *pidfile); typedef struct pe_cluster_option_s { const char *name; const char *alt_name; const char *type; const char *values; const char *default_value; gboolean (*is_valid)(const char *); const char *description_short; const char *description_long; } pe_cluster_option; extern const char *cluster_option( GHashTable* options, gboolean(*validate)(const char*), const char *name, const char *old_name, const char *def_value); extern const char *get_cluster_pref( GHashTable *options, pe_cluster_option *option_list, int len, const char *name); extern void config_metadata( const char *name, const char *version, const char *desc_short, const char *desc_long, pe_cluster_option *option_list, int len); extern void verify_all_options(GHashTable *options, pe_cluster_option *option_list, int len); extern gboolean check_time(const char *value); extern gboolean check_timer(const char *value); extern gboolean check_boolean(const char *value); extern gboolean check_number(const char *value); extern int char2score(const char *score); extern char *score2char(int score); extern gboolean crm_is_writable( const char *dir, const char *file, const char *user, const char *group, gboolean need_both); extern long long crm_set_bit(const char *function, long long word, long long bit); extern long long crm_clear_bit(const char *function, long long word, long long bit); #define set_bit(word, bit) word = crm_set_bit(__PRETTY_FUNCTION__, word, bit) #define clear_bit(word, bit) word = crm_clear_bit(__PRETTY_FUNCTION__, word, bit) #define set_bit_inplace(word, bit) word |= bit #define clear_bit_inplace(word, bit) word &= ~bit static inline gboolean is_not_set(long long word, long long bit) { return ((word & bit) == 0); } static inline gboolean is_set(long long word, long long bit) { return ((word & bit) == bit); } static inline gboolean is_set_any(long long word, long long bit) { return ((word & bit) != 0); } extern gboolean is_openais_cluster(void); extern gboolean is_heartbeat_cluster(void); extern xmlNode *cib_recv_remote_msg(void *session, gboolean encrypted); extern void cib_send_remote_msg(void *session, xmlNode *msg, gboolean encrypted); extern char *crm_meta_name(const char *field); extern const char *crm_meta_value(GHashTable *hash, const char *field); extern void crm_set_options(const char *short_options, const char *usage, struct crm_option *long_options, const char *app_desc); extern int crm_get_option(int argc, char **argv, int *index); extern void crm_help(char cmd, int exit_code); extern gboolean attrd_update(IPC_Channel *cluster, char command, const char *host, const char *name, const char *value, const char *section, const char *set, const char *dampen); extern gboolean attrd_lazy_update(char command, const char *host, const char *name, const char *value, const char *section, const char *set, const char *dampen); extern gboolean attrd_update_no_mainloop(int *connection, char command, const char *host, const char *name, const char *value, const char *section, const char *set, const char *dampen); extern int node_score_red; extern int node_score_green; extern int node_score_yellow; extern int node_score_infinity; +#include +extern xmlNode *create_operation_update(xmlNode *parent, lrm_op_t *op, const char *caller_version, int target_rc, const char *origin); + #endif diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h index 09e5bbe4b9..df5f42d55a 100644 --- a/include/crm/msg_xml.h +++ b/include/crm/msg_xml.h @@ -1,271 +1,274 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef XML_TAGS__H #define XML_TAGS__H #define CIB_OPTIONS_FIRST "cib-bootstrap-options" #define F_CRM_DATA "crm_xml" #define F_CRM_TASK "crm_task" #define F_CRM_HOST_TO "crm_host_to" #define F_CRM_MSG_TYPE F_SUBTYPE #define F_CRM_SYS_TO "crm_sys_to" #define F_CRM_SYS_FROM "crm_sys_from" #define F_CRM_HOST_FROM F_ORIG #define F_CRM_REFERENCE XML_ATTR_REFERENCE #define F_CRM_VERSION XML_ATTR_VERSION #define F_CRM_ORIGIN "origin" #define F_CRM_JOIN_ID "join_id" #define F_CRM_ELECTION_ID "election-id" #define F_CRM_ELECTION_OWNER "election-owner" #define F_CRM_TGRAPH "crm-tgraph" #define F_CRM_TGRAPH_INPUT "crm-tgraph-in" /*---- Common tags/attrs */ #define XML_DIFF_MARKER "__crm_diff_marker__" #define XML_ATTR_TAGNAME F_XML_TAGNAME #define XML_TAG_CIB "cib" #define XML_TAG_FAILED "failed" #define XML_ATTR_CRM_VERSION "crm_feature_set" #define XML_ATTR_DIGEST "digest" #define XML_ATTR_VALIDATION "validate-with" #define XML_ATTR_QUORUM_PANIC "no-quorum-panic" #define XML_ATTR_HAVE_QUORUM "have-quorum" #define XML_ATTR_EXPECTED_VOTES "expected-quorum-votes" #define XML_ATTR_GENERATION "epoch" #define XML_ATTR_GENERATION_ADMIN "admin_epoch" #define XML_ATTR_NUMUPDATES "num_updates" #define XML_ATTR_TIMEOUT "timeout" #define XML_ATTR_ORIGIN "crm-debug-origin" #define XML_ATTR_TSTAMP "crm-timestamp" #define XML_CIB_ATTR_WRITTEN "cib-last-written" #define XML_ATTR_VERSION "version" #define XML_ATTR_DESC "description" #define XML_ATTR_ID "id" #define XML_ATTR_IDREF "id-ref" #define XML_ATTR_ID_LONG "long-id" #define XML_ATTR_TYPE "type" #define XML_ATTR_FILTER_TYPE "type-filter" #define XML_ATTR_FILTER_ID "id-filter" #define XML_ATTR_FILTER_PRIORITY "priority-filter" #define XML_ATTR_VERBOSE "verbose" #define XML_ATTR_OP "op" #define XML_ATTR_DC "is_dc" #define XML_ATTR_DC_UUID "dc-uuid" #define XML_BOOLEAN_TRUE "true" #define XML_BOOLEAN_FALSE "false" #define XML_BOOLEAN_YES XML_BOOLEAN_TRUE #define XML_BOOLEAN_NO XML_BOOLEAN_FALSE #define XML_TAG_OPTIONS "options" /*---- top level tags/attrs */ #define XML_MSG_TAG "crm_message" #define XML_MSG_TAG_DATA "msg_data" #define XML_ATTR_REQUEST "request" #define XML_ATTR_RESPONSE "response" #define XML_ATTR_UNAME "uname" #define XML_ATTR_UUID "id" #define XML_ATTR_REFERENCE "reference" #define XML_FAIL_TAG_RESOURCE "failed_resource" #define XML_FAILRES_ATTR_RESID "resource_id" #define XML_FAILRES_ATTR_REASON "reason" #define XML_FAILRES_ATTR_RESSTATUS "resource_status" #define XML_CRM_TAG_PING "ping_response" #define XML_PING_ATTR_STATUS "result" #define XML_PING_ATTR_SYSFROM "crm_subsystem" #define XML_TAG_FRAGMENT "cib_fragment" #define XML_ATTR_RESULT "result" #define XML_ATTR_SECTION "section" #define XML_FAIL_TAG_CIB "failed_update" #define XML_FAILCIB_ATTR_ID "id" #define XML_FAILCIB_ATTR_OBJTYPE "object_type" #define XML_FAILCIB_ATTR_OP "operation" #define XML_FAILCIB_ATTR_REASON "reason" /*---- CIB specific tags/attrs */ #define XML_CIB_TAG_SECTION_ALL "all" #define XML_CIB_TAG_CONFIGURATION "configuration" #define XML_CIB_TAG_STATUS "status" #define XML_CIB_TAG_RESOURCES "resources" #define XML_CIB_TAG_NODES "nodes" #define XML_CIB_TAG_CONSTRAINTS "constraints" #define XML_CIB_TAG_CRMCONFIG "crm_config" #define XML_CIB_TAG_OPCONFIG "op_defaults" #define XML_CIB_TAG_RSCCONFIG "rsc_defaults" #define XML_CIB_TAG_STATE "node_state" #define XML_CIB_TAG_NODE "node" #define XML_CIB_TAG_CONSTRAINT "constraint" #define XML_CIB_TAG_NVPAIR "nvpair" #define XML_CIB_TAG_PROPSET "cluster_property_set" #define XML_TAG_ATTR_SETS "instance_attributes" #define XML_TAG_META_SETS "meta_attributes" #define XML_TAG_ATTRS "attributes" #define XML_TAG_PARAMS "parameters" #define XML_TAG_PARAM "param" +#define XML_TAG_UTILIZATION "utilization" #define XML_TAG_RESOURCE_REF "resource_ref" #define XML_CIB_TAG_RESOURCE "primitive" #define XML_CIB_TAG_GROUP "group" #define XML_CIB_TAG_INCARNATION "clone" #define XML_CIB_TAG_MASTER "master" #define XML_RSC_ATTR_RESTART "restart-type" #define XML_RSC_ATTR_ORDERED "ordered" #define XML_RSC_ATTR_INTERLEAVE "interleave" #define XML_RSC_ATTR_INCARNATION "clone" #define XML_RSC_ATTR_INCARNATION_MAX "clone-max" #define XML_RSC_ATTR_INCARNATION_NODEMAX "clone-node-max" #define XML_RSC_ATTR_MASTER_MAX "master-max" #define XML_RSC_ATTR_MASTER_NODEMAX "master-node-max" #define XML_RSC_ATTR_STATE "clone-state" #define XML_RSC_ATTR_MANAGED "is-managed" #define XML_RSC_ATTR_TARGET_ROLE "target-role" #define XML_RSC_ATTR_UNIQUE "globally-unique" #define XML_RSC_ATTR_NOTIFY "notify" #define XML_RSC_ATTR_STICKINESS "resource-stickiness" #define XML_RSC_ATTR_FAIL_STICKINESS "migration-threshold" #define XML_RSC_ATTR_FAIL_TIMEOUT "failure-timeout" #define XML_RSC_ATTR_MULTIPLE "multiple-active" #define XML_RSC_ATTR_PRIORITY "priority" #define XML_OP_ATTR_ON_FAIL "on-fail" #define XML_OP_ATTR_START_DELAY "start-delay" #define XML_OP_ATTR_ALLOW_MIGRATE "allow-migrate" #define XML_OP_ATTR_ORIGIN "interval-origin" #define XML_OP_ATTR_PENDING "record-pending" #define XML_CIB_TAG_LRM "lrm" #define XML_LRM_TAG_RESOURCES "lrm_resources" #define XML_LRM_TAG_RESOURCE "lrm_resource" #define XML_LRM_TAG_AGENTS "lrm_agents" #define XML_LRM_TAG_AGENT "lrm_agent" #define XML_LRM_TAG_RSC_OP "lrm_rsc_op" #define XML_AGENT_ATTR_CLASS "class" #define XML_AGENT_ATTR_PROVIDER "provider" #define XML_LRM_TAG_ATTRIBUTES "attributes" #define XML_CIB_ATTR_REPLACE "replace" #define XML_CIB_ATTR_SOURCE "source" #define XML_CIB_ATTR_HEALTH "health" #define XML_CIB_ATTR_WEIGHT "weight" #define XML_CIB_ATTR_PRIORITY "priority" #define XML_CIB_ATTR_CLEAR "clear_on" #define XML_CIB_ATTR_SOURCE "source" #define XML_CIB_ATTR_JOINSTATE "join" #define XML_CIB_ATTR_EXPSTATE "expected" #define XML_CIB_ATTR_INCCM "in_ccm" #define XML_CIB_ATTR_CRMDSTATE "crmd" #define XML_CIB_ATTR_HASTATE "ha" #define XML_CIB_ATTR_SHUTDOWN "shutdown" #define XML_CIB_ATTR_STONITH "stonith" #define XML_LRM_ATTR_INTERVAL "interval" #define XML_LRM_ATTR_TASK "operation" #define XML_LRM_ATTR_TASK_KEY "operation_key" #define XML_LRM_ATTR_TARGET "on_node" #define XML_LRM_ATTR_TARGET_UUID "on_node_uuid" #define XML_LRM_ATTR_RSCID "rsc-id" #define XML_LRM_ATTR_OPSTATUS "op-status" #define XML_LRM_ATTR_RC "rc-code" #define XML_LRM_ATTR_CALLID "call-id" #define XML_LRM_ATTR_OP_DIGEST "op-digest" #define XML_LRM_ATTR_OP_RESTART "op-force-restart" #define XML_LRM_ATTR_RESTART_DIGEST "op-restart-digest" #define XML_TAG_GRAPH "transition_graph" #define XML_GRAPH_TAG_RSC_OP "rsc_op" #define XML_GRAPH_TAG_PSEUDO_EVENT "pseudo_event" #define XML_GRAPH_TAG_CRM_EVENT "crm_event" #define XML_TAG_RULE "rule" #define XML_RULE_ATTR_SCORE "score" #define XML_RULE_ATTR_SCORE_ATTRIBUTE "score-attribute" #define XML_RULE_ATTR_SCORE_MANGLED "score-attribute-mangled" #define XML_RULE_ATTR_ROLE "role" #define XML_RULE_ATTR_RESULT "result" #define XML_RULE_ATTR_BOOLEAN_OP "boolean-op" #define XML_TAG_EXPRESSION "expression" #define XML_EXPR_ATTR_ATTRIBUTE "attribute" #define XML_EXPR_ATTR_OPERATION "operation" #define XML_EXPR_ATTR_VALUE "value" #define XML_EXPR_ATTR_TYPE "type" #define XML_CONS_TAG_RSC_DEPEND "rsc_colocation" #define XML_CONS_TAG_RSC_ORDER "rsc_order" #define XML_CONS_TAG_RSC_LOCATION "rsc_location" +#define XML_CONS_TAG_RSC_SET "resource_set" #define XML_CONS_ATTR_SYMMETRICAL "symmetrical" #define XML_COLOC_ATTR_SOURCE "rsc" #define XML_COLOC_ATTR_SOURCE_ROLE "rsc-role" #define XML_COLOC_ATTR_TARGET "with-rsc" #define XML_COLOC_ATTR_TARGET_ROLE "with-rsc-role" #define XML_COLOC_ATTR_NODE_ATTR "node-attribute" #define XML_ORDER_ATTR_FIRST "first" #define XML_ORDER_ATTR_THEN "then" #define XML_ORDER_ATTR_FIRST_ACTION "first-action" #define XML_ORDER_ATTR_THEN_ACTION "then-action" +#define XML_ORDER_ATTR_KIND "kind" #define XML_NVPAIR_ATTR_NAME "name" #define XML_NVPAIR_ATTR_VALUE "value" #define XML_NODE_ATTR_STATE "state" #define XML_CONFIG_ATTR_DC_DEADTIME "dc-deadtime" #define XML_CONFIG_ATTR_ELECTION_FAIL "election-timeout" #define XML_CONFIG_ATTR_FORCE_QUIT "shutdown-escalation" #define XML_CONFIG_ATTR_RECHECK "cluster-recheck-interval" #define XML_CIB_TAG_GENERATION_TUPPLE "generation_tuple" #define XML_ATTR_TRANSITION_MAGIC "transition-magic" #define XML_ATTR_TRANSITION_KEY "transition-key" #define XML_ATTR_TE_NOWAIT "op_no_wait" #define XML_ATTR_TE_TARGET_RC "op_target_rc" #define XML_ATTR_TE_ALLOWFAIL "op_allow_fail" #define XML_ATTR_LRM_PROBE "lrm-is-probe" #define XML_TAG_TRANSIENT_NODEATTRS "transient_attributes" #define XML_TAG_DIFF_ADDED "diff-added" #define XML_TAG_DIFF_REMOVED "diff-removed" #include #define ID(x) crm_element_value(x, XML_ATTR_ID) #define INSTANCE(x) crm_element_value(x, XML_CIB_ATTR_INSTANCE) #define TSTAMP(x) crm_element_value(x, XML_ATTR_TSTAMP) #define TYPE(x) crm_element_name(x) #endif diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h index 7e3550e6c5..4e9dd31052 100644 --- a/include/crm/pengine/status.h +++ b/include/crm/pengine/status.h @@ -1,264 +1,268 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef PENGINE_STATUS__H #define PENGINE_STATUS__H #include #include #include typedef struct node_s node_t; typedef struct action_s action_t; typedef struct resource_s resource_t; typedef enum no_quorum_policy_e { no_quorum_freeze, no_quorum_stop, no_quorum_ignore, no_quorum_suicide } no_quorum_policy_t; enum node_type { node_ping, node_member }; enum pe_restart { pe_restart_restart, pe_restart_ignore }; #define pe_flag_have_quorum 0x00000001ULL #define pe_flag_symmetric_cluster 0x00000002ULL #define pe_flag_is_managed_default 0x00000004ULL #define pe_flag_maintenance_mode 0x00000008ULL #define pe_flag_stonith_enabled 0x00000010ULL #define pe_flag_have_stonith_resource 0x00000020ULL #define pe_flag_stop_rsc_orphans 0x00000100ULL #define pe_flag_stop_action_orphans 0x00000200ULL #define pe_flag_stop_everything 0x00000400ULL #define pe_flag_start_failure_fatal 0x00001000ULL #define pe_flag_remove_after_stop 0x00002000ULL typedef struct pe_working_set_s { xmlNode *input; ha_time_t *now; /* options extracted from the input */ char *dc_uuid; node_t *dc_node; const char *stonith_action; + const char *placement_strategy; unsigned long long flags; int stonith_timeout; int default_resource_stickiness; no_quorum_policy_t no_quorum_policy; GHashTable *config_hash; GListPtr nodes; GListPtr resources; GListPtr placement_constraints; GListPtr ordering_constraints; GListPtr colocation_constraints; GListPtr actions; xmlNode *failed; xmlNode *op_defaults; xmlNode *rsc_defaults; /* stats */ int num_synapse; int max_valid_nodes; int order_id; int action_id; /* final output */ xmlNode *graph; } pe_working_set_t; struct node_shared_s { const char *id; const char *uname; gboolean online; gboolean standby; gboolean standby_onfail; gboolean pending; gboolean unclean; gboolean shutdown; gboolean expected_up; gboolean is_dc; int num_resources; GListPtr running_rsc; /* resource_t* */ GListPtr allocated_rsc; /* resource_t* */ GHashTable *attrs; /* char* => char* */ enum node_type type; + + GHashTable *utilization; }; struct node_s { int weight; gboolean fixed; int count; struct node_shared_s *details; }; #include #define pe_rsc_orphan 0x00000001ULL #define pe_rsc_managed 0x00000002ULL #define pe_rsc_notify 0x00000010ULL #define pe_rsc_unique 0x00000020ULL #define pe_rsc_can_migrate 0x00000040ULL #define pe_rsc_provisional 0x00000100ULL #define pe_rsc_allocating 0x00000200ULL #define pe_rsc_merging 0x00000400ULL #define pe_rsc_failed 0x00010000ULL #define pe_rsc_shutdown 0x00020000ULL #define pe_rsc_runnable 0x00040000ULL #define pe_rsc_start_pending 0x00080000ULL #define pe_rsc_starting 0x00100000ULL #define pe_rsc_stopping 0x00200000ULL struct resource_s { char *id; char *clone_name; char *long_name; xmlNode *xml; xmlNode *ops_xml; resource_t *parent; void *variant_opaque; enum pe_obj_types variant; resource_object_functions_t *fns; resource_alloc_functions_t *cmds; enum rsc_recovery_type recovery_type; enum pe_restart restart_type; int priority; int stickiness; int sort_index; int failure_timeout; int effective_priority; int migration_threshold; unsigned long long flags; GListPtr rsc_cons_lhs; /* rsc_colocation_t* */ GListPtr rsc_cons; /* rsc_colocation_t* */ GListPtr rsc_location; /* rsc_to_node_t* */ GListPtr actions; /* action_t* */ node_t *allocated_to; GListPtr running_on; /* node_t* */ GListPtr known_on; /* node_t* */ GListPtr allowed_nodes; /* node_t* */ enum rsc_role_e role; enum rsc_role_e next_role; GHashTable *meta; GHashTable *parameters; + GHashTable *utilization; GListPtr children; /* resource_t* */ }; struct action_s { int id; int priority; resource_t *rsc; void *rsc_opaque; node_t *node; char *task; char *uuid; xmlNode *op_entry; gboolean pseudo; gboolean runnable; gboolean optional; gboolean print_always; gboolean failure_is_fatal; gboolean implied_by_stonith; gboolean allow_reload_conversion; enum rsc_start_requirement needs; enum action_fail_response on_fail; enum rsc_role_e fail_role; gboolean dumped; gboolean processed; action_t *pre_notify; action_t *pre_notified; action_t *post_notify; action_t *post_notified; int seen_count; GHashTable *meta; GHashTable *extra; GListPtr actions_before; /* action_warpper_t* */ GListPtr actions_after; /* action_warpper_t* */ }; typedef struct notify_data_s { GHashTable *keys; const char *action; action_t *pre; action_t *post; action_t *pre_done; action_t *post_done; GListPtr active; /* notify_entry_t* */ GListPtr inactive; /* notify_entry_t* */ GListPtr start; /* notify_entry_t* */ GListPtr stop; /* notify_entry_t* */ GListPtr demote; /* notify_entry_t* */ GListPtr promote; /* notify_entry_t* */ GListPtr master; /* notify_entry_t* */ GListPtr slave; /* notify_entry_t* */ } notify_data_t; gboolean cluster_status(pe_working_set_t *data_set); extern void set_working_set_defaults(pe_working_set_t *data_set); extern void cleanup_calculations(pe_working_set_t *data_set); extern resource_t *pe_find_resource(GListPtr rsc_list, const char *id_rh); extern node_t *pe_find_node(GListPtr node_list, const char *uname); extern node_t *pe_find_node_id(GListPtr node_list, const char *id); extern GListPtr find_operations( const char *rsc, const char *node, gboolean active_filter, pe_working_set_t *data_set); #endif diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h new file mode 100644 index 0000000000..34f82bb56c --- /dev/null +++ b/include/crm/stonith-ng.h @@ -0,0 +1,153 @@ +/* + * Copyright (C) 2004 Andrew Beekhof + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef STONITH_NG__H +#define STONITH_NG__H + +#include +#include + +enum stonith_state { + stonith_connected_command, + stonith_connected_query, + stonith_disconnected +}; + +enum stonith_call_options { + st_opt_none = 0x00000000, + st_opt_verbose = 0x00000001, + st_opt_allow_suicide = 0x00000002, + st_opt_local_first = 0x00000004, + st_opt_discard_reply = 0x00000010, + st_opt_scope_local = 0x00000100, + st_opt_sync_call = 0x00001000, +}; + +#define stonith_default_options = stonith_none + +enum stonith_errors { + stonith_ok = 0, + st_err_generic = -1, + st_err_internal = -2, + st_err_not_supported = -3, + st_err_connection = -4, + st_err_missing = -5, + st_err_exists = -6, + st_err_timeout = -7, + st_err_ipc = -8, + st_err_peer = -9, + st_err_unknown_operation = -10, + st_err_unknown_device = -11, + st_err_unknown_port = -12, + st_err_none_available = -13, + st_err_authentication = -14, + st_err_signal = -15, +}; + +#define F_STONITH_CLIENTID "st_clientid" +#define F_STONITH_CALLOPTS "st_callopt" +#define F_STONITH_CALLID "st_callid" +#define F_STONITH_CALLDATA "st_calldata" +#define F_STONITH_OPERATION "st_op" +#define F_STONITH_TARGET "st_target" +#define F_STONITH_REMOTE "st_remote_op" +#define F_STONITH_RC "st_rc" +#define F_STONITH_TIMEOUT "st_timeout" +#define F_STONITH_CALLBACK_TOKEN "st_async_id" +#define F_STONITH_CLIENTNAME "st_clientname" +#define F_STONITH_NOTIFY_TYPE "st_notify_type" +#define F_STONITH_NOTIFY_ACTIVATE "st_notify_activate" +#define F_STONITH_NOTIFY_DEACTIVATE "st_notify_deactivate" +#define F_STONITH_DELEGATE "st_delegate" + +#define T_STONITH_NG "stonith-ng" +#define T_STONITH_REPLY "st-reply" + +#define F_STONITH_DEVICE "st_device_id" +#define F_STONITH_ACTION "st_device_action" + + +#define T_STONITH_NOTIFY "st_notify" +#define T_STONITH_NOTIFY_DISCONNECT "st_notify_disconnect" + +#define STONITH_OP_EXEC "st_execute" +#define STONITH_OP_QUERY "st_query" +#define STONITH_OP_FENCE "st_fence" +#define STONITH_OP_DEVICE_ADD "st_device_register" +#define STONITH_OP_DEVICE_DEL "st_device_remove" +#define STONITH_OP_DEVICE_METADATA "st_device_metadata" + +#define stonith_channel "st_command" +#define stonith_channel_callback "st_callback" + +typedef struct stonith_s stonith_t; + +typedef struct stonith_api_operations_s +{ + int (*free) (stonith_t *st); + int (*connect) (stonith_t *st, const char *name, int *async_fd, int *sync_fd); + int (*disconnect)(stonith_t *st); + + int (*remove_device)( + stonith_t *st, int options, const char *name); + int (*register_device)( + stonith_t *st, int options, const char *id, + const char *namespace, const char *agent, GHashTable *parameters); + + int (*metadata)(stonith_t *st, int options, + const char *device, const char *namespace, char **output, int timeout); + int (*call)(stonith_t *st, int options, const char *id, + const char *action, const char *port, int timeout); + + int (*query)(stonith_t *st, int options, const char *node, GListPtr *devices, int timeout); + int (*fence)(stonith_t *st, int options, const char *node, const char *action, int timeout); + + int (*register_notification)( + stonith_t *st, const char *event, + void (*notify)(stonith_t *st, const char *event, xmlNode *msg)); + int (*remove_notification)(stonith_t *st, const char *event); + + int (*register_callback)( + stonith_t *st, int call_id, int timeout, gboolean only_success, + void *userdata, const char *callback_name, + void (*callback)(stonith_t *st, const xmlNode *msg, int call, int rc, xmlNode *output, void *userdata)); + int (*remove_callback)(stonith_t *st, int call_id, gboolean all_callbacks); + +} stonith_api_operations_t; + +struct stonith_s +{ + enum stonith_state state; + + int call_id; + int call_timeout; + void *private; + + GList *notify_list; + + stonith_api_operations_t *cmds; +}; + +/* Core functions */ +extern stonith_t *stonith_api_new(void); +extern void stonith_api_delete(stonith_t *st); + +extern const char *stonith_error2string(enum stonith_errors return_code); +extern void stonith_dump_pending_callbacks(void); + +#endif + diff --git a/include/crm/transition.h b/include/crm/transition.h index 36f1949c07..200b0113fa 100644 --- a/include/crm/transition.h +++ b/include/crm/transition.h @@ -1,157 +1,160 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include typedef enum { action_type_pseudo, action_type_rsc, action_type_crm } action_type_e; typedef struct te_timer_s crm_action_timer_t; typedef struct synapse_s { int id; int priority; gboolean ready; gboolean executed; gboolean confirmed; GListPtr actions; /* crm_action_t* */ GListPtr inputs; /* crm_action_t* */ } synapse_t; typedef struct crm_action_s { int id; int timeout; int interval; GHashTable *params; action_type_e type; crm_action_timer_t *timer; synapse_t *synapse; gboolean sent_update; /* sent to the CIB */ gboolean executed; /* sent to the CRM */ gboolean confirmed; gboolean failed; gboolean can_fail; xmlNode *xml; } crm_action_t; enum timer_reason { timeout_action, timeout_action_warn, timeout_abort, }; struct te_timer_s { int source_id; int timeout; enum timer_reason reason; crm_action_t *action; }; /* order matters here */ enum transition_action { tg_done, tg_stop, tg_restart, tg_shutdown, }; typedef struct crm_graph_s { int id; char *source; int abort_priority; gboolean complete; const char *abort_reason; enum transition_action completion_action; int num_actions; int num_synapses; int batch_limit; int network_delay; int stonith_timeout; int transition_timeout; int fired; int pending; int skipped; int completed; int incomplete; GListPtr synapses; /* synpase_t* */ } crm_graph_t; typedef struct crm_graph_functions_s { gboolean (*pseudo)(crm_graph_t *graph, crm_action_t *action); gboolean (*rsc)(crm_graph_t *graph, crm_action_t *action); gboolean (*crmd)(crm_graph_t *graph, crm_action_t *action); gboolean (*stonith)(crm_graph_t *graph, crm_action_t *action); } crm_graph_functions_t; enum transition_status { transition_active, transition_pending, /* active but no actions performed this time */ transition_complete, transition_stopped, transition_terminated, transition_action_failed, transition_failed, }; extern void set_default_graph_functions(void); extern void set_graph_functions(crm_graph_functions_t *fns); extern crm_graph_t *unpack_graph(xmlNode *xml_graph, const char *reference); extern int run_graph(crm_graph_t *graph); extern gboolean update_graph(crm_graph_t *graph, crm_action_t *action); extern void destroy_graph(crm_graph_t *graph); extern const char *transition_status(enum transition_status state); extern void print_graph(unsigned int log_level, crm_graph_t *graph); extern void print_action( int log_level, const char *prefix, crm_action_t *action); extern void update_abort_priority( crm_graph_t *graph, int priority, enum transition_action action, const char *abort_reason); extern const char *actiontype2text(action_type_e type); #ifdef TESTING # define te_log_action(log_level, fmt, args...) { \ do_crm_log(log_level, fmt, ##args); \ fprintf(stderr, fmt"\n", ##args); \ } #else # define te_log_action(log_level, fmt, args...) do_crm_log(log_level, fmt, ##args) #endif + +#include +extern lrm_op_t *convert_graph_action(xmlNode *resource, crm_action_t *action, int status, int rc); diff --git a/lib/ais/plugin.c b/lib/ais/plugin.c index 23cb1499ef..9339baf19a 100644 --- a/lib/ais/plugin.c +++ b/lib/ais/plugin.c @@ -1,1610 +1,1612 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef AIS_COROSYNC # include # include # include #endif #include #include #include "plugin.h" #include "utils.h" #include #include #include #include #include #include #include #include #include struct corosync_api_v1 *pcmk_api = NULL; uint32_t plugin_has_votes = 0; uint32_t plugin_expected_votes = 1024; int use_mgmtd = 0; int plugin_log_level = LOG_DEBUG; char *local_uname = NULL; int local_uname_len = 0; char *local_cname = NULL; int local_cname_len = 0; uint32_t local_nodeid = 0; char *ipc_channel_name = NULL; static uint64_t local_born_on = 0; uint64_t membership_seq = 0; pthread_t pcmk_wait_thread; gboolean wait_active = TRUE; gboolean have_reliable_membership_id = FALSE; GHashTable *ipc_client_list = NULL; GHashTable *membership_list = NULL; GHashTable *membership_notify_list = NULL; #define MAX_RESPAWN 100 #define LOOPBACK_ID 16777343 #define crm_flag_none 0x00000000 #define crm_flag_members 0x00000001 struct crm_identify_msg_s { coroipc_request_header_t header __attribute__((aligned(8))); uint32_t id; uint32_t pid; int32_t votes; uint32_t processes; char uname[256]; char version[256]; uint64_t born_on; } __attribute__((packed)); static crm_child_t pcmk_children[] = { { 0, crm_proc_none, crm_flag_none, 0, 0, FALSE, "none", NULL, NULL, NULL, NULL }, { 0, crm_proc_ais, crm_flag_none, 0, 0, FALSE, "ais", NULL, NULL, NULL, NULL }, { 0, crm_proc_lrmd, crm_flag_none, 3, 0, TRUE, "lrmd", NULL, CRM_DAEMON_DIR"/lrmd", NULL, NULL }, { 0, crm_proc_cib, crm_flag_members, 2, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib", NULL, NULL }, { 0, crm_proc_crmd, crm_flag_members, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd", NULL, NULL }, { 0, crm_proc_attrd, crm_flag_none, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd", NULL, NULL }, - { 0, crm_proc_stonithd, crm_flag_none, 1, 0, TRUE, "stonithd", NULL, CRM_DAEMON_DIR"/stonithd", NULL, NULL }, + { 0, crm_proc_stonithd, crm_flag_none, 0, 0, TRUE, "stonithd", NULL, "/bin/false", NULL, NULL }, { 0, crm_proc_pe, crm_flag_none, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine", NULL, NULL }, { 0, crm_proc_mgmtd, crm_flag_none, 7, 0, TRUE, "mgmtd", NULL, CRM_DAEMON_DIR"/mgmtd", NULL, NULL }, + { 0, crm_proc_stonith_ng, crm_flag_none, 1, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd", NULL, NULL }, }; void send_cluster_id(void); int send_cluster_msg_raw(const AIS_Message *ais_msg); char *pcmk_generate_membership_data(void); gboolean check_message_sanity(const AIS_Message *msg, const char *data); #ifdef AIS_COROSYNC typedef const void ais_void_ptr; int pcmk_shutdown(void); void pcmk_peer_update(enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id); #else typedef void ais_void_ptr; extern totempg_groups_handle openais_group_handle; int pcmk_shutdown(struct objdb_iface_ver0 *objdb); void pcmk_peer_update(enum totem_configuration_type configuration_type, unsigned int *member_list, int member_list_entries, unsigned int *left_list, int left_list_entries, unsigned int *joined_list, int joined_list_entries, struct memb_ring_id *ring_id); #endif int pcmk_startup (struct corosync_api_v1 *corosync_api); int pcmk_config_init(struct corosync_api_v1 *corosync_api); int pcmk_ipc_exit (void *conn); int pcmk_ipc_connect (void *conn); void pcmk_ipc(void *conn, ais_void_ptr *msg); void pcmk_exec_dump(void); void pcmk_cluster_swab(void *msg); void pcmk_cluster_callback(ais_void_ptr *message, unsigned int nodeid); void pcmk_nodeid(void *conn, ais_void_ptr *msg); void pcmk_nodes(void *conn, ais_void_ptr *msg); void pcmk_notify(void *conn, ais_void_ptr *msg); void pcmk_remove_member(void *conn, ais_void_ptr *msg); void pcmk_quorum(void *conn, ais_void_ptr *msg); void pcmk_cluster_id_swab(void *msg); void pcmk_cluster_id_callback(ais_void_ptr *message, unsigned int nodeid); void ais_remove_peer(char *node_id); static struct corosync_lib_handler pcmk_lib_service[] = { { /* 0 */ .lib_handler_fn = pcmk_ipc, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, #ifdef AIS_WHITETANK .response_size = sizeof (coroipc_response_header_t), .response_id = CRM_MESSAGE_IPC_ACK, #endif }, { /* 1 */ .lib_handler_fn = pcmk_nodes, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, #ifdef AIS_WHITETANK .response_size = sizeof (coroipc_response_header_t), .response_id = CRM_MESSAGE_IPC_ACK, #endif }, { /* 2 */ .lib_handler_fn = pcmk_notify, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, #ifdef AIS_WHITETANK .response_size = sizeof (coroipc_response_header_t), .response_id = CRM_MESSAGE_IPC_ACK, #endif }, { /* 3 */ .lib_handler_fn = pcmk_nodeid, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, #ifdef AIS_WHITETANK .response_size = sizeof (struct crm_ais_nodeid_resp_s), .response_id = crm_class_nodeid, #endif }, { /* 4 */ .lib_handler_fn = pcmk_remove_member, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, #ifdef AIS_WHITETANK .response_size = sizeof (coroipc_response_header_t), .response_id = CRM_MESSAGE_IPC_ACK, #endif }, { /* 5 */ .lib_handler_fn = pcmk_quorum, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, #ifdef AIS_WHITETANK .response_size = sizeof (coroipc_response_header_t), .response_id = CRM_MESSAGE_IPC_ACK, #endif }, }; static struct corosync_exec_handler pcmk_exec_service[] = { { /* 0 */ .exec_handler_fn = pcmk_cluster_callback, .exec_endian_convert_fn = pcmk_cluster_swab }, { /* 1 */ .exec_handler_fn = pcmk_cluster_id_callback, .exec_endian_convert_fn = pcmk_cluster_id_swab } }; /* * Exports the interface for the service */ struct corosync_service_engine pcmk_service_handler = { .name = (unsigned char *)"Pacemaker Cluster Manager "PACKAGE_VERSION, .id = PCMK_SERVICE_ID, .private_data_size = 0, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, .lib_init_fn = pcmk_ipc_connect, .lib_exit_fn = pcmk_ipc_exit, .exec_init_fn = pcmk_startup, .exec_exit_fn = pcmk_shutdown, .config_init_fn = pcmk_config_init, #ifdef AIS_COROSYNC .priority = 50, .lib_engine = pcmk_lib_service, .lib_engine_count = sizeof (pcmk_lib_service) / sizeof (struct corosync_lib_handler), .exec_engine = pcmk_exec_service, .exec_engine_count = sizeof (pcmk_exec_service) / sizeof (struct corosync_exec_handler), #else .lib_service = pcmk_lib_service, .lib_service_count = sizeof (pcmk_lib_service) / sizeof (struct corosync_lib_handler), .exec_service = pcmk_exec_service, .exec_service_count = sizeof (pcmk_exec_service) / sizeof (struct corosync_exec_handler), #endif .confchg_fn = pcmk_peer_update, .exec_dump_fn = pcmk_exec_dump, /* void (*sync_init) (void); */ /* int (*sync_process) (void); */ /* void (*sync_activate) (void); */ /* void (*sync_abort) (void); */ }; /* * Dynamic Loader definition */ struct corosync_service_engine *pcmk_get_handler_ver0 (void); #ifdef AIS_COROSYNC struct corosync_service_engine_iface_ver0 pcmk_service_handler_iface = { .corosync_get_service_engine_ver0 = pcmk_get_handler_ver0 }; #else struct openais_service_handler_iface_ver0 pcmk_service_handler_iface = { .openais_get_service_handler_ver0 = pcmk_get_handler_ver0 }; #endif static struct lcr_iface openais_pcmk_ver0[1] = { { .name = "pacemaker", .version = 0, .versions_replace = 0, .versions_replace_count = 0, .dependencies = 0, .dependency_count = 0, .constructor = NULL, .destructor = NULL, .interfaces = NULL } }; static struct lcr_comp pcmk_comp_ver0 = { .iface_count = 1, .ifaces = openais_pcmk_ver0 }; struct corosync_service_engine *pcmk_get_handler_ver0 (void) { return (&pcmk_service_handler); } __attribute__ ((constructor)) static void register_this_component (void) { lcr_interfaces_set (&openais_pcmk_ver0[0], &pcmk_service_handler_iface); lcr_component_register (&pcmk_comp_ver0); } static int plugin_has_quorum(void) { if((plugin_expected_votes >> 1) < plugin_has_votes) { return 1; } return 0; } static void update_expected_votes(int value) { if(value > 0 && plugin_expected_votes != value) { ais_info("Expected quorum votes %d -> %d", plugin_expected_votes, value); plugin_expected_votes = value; } } /* Create our own local copy of the config so we can navigate it */ static void process_ais_conf(void) { char *value = NULL; hdb_handle_t top_handle = 0; hdb_handle_t local_handle = 0; ais_info("Reading configure"); top_handle = config_find_init(pcmk_api, "logging"); local_handle = config_find_next(pcmk_api, "logging", top_handle); get_config_opt(pcmk_api, local_handle, "debug", &value, "on"); if(ais_get_boolean(value)) { plugin_log_level = LOG_DEBUG; setenv("HA_debug", "1", 1); } else { plugin_log_level = LOG_INFO; setenv("HA_debug", "0", 1); } get_config_opt(pcmk_api, local_handle, "to_file", &value, "off"); if(ais_get_boolean(value)) { get_config_opt(pcmk_api, local_handle, "to_syslog", &value, "on"); if(ais_get_boolean(value) == FALSE) { ais_err("The use of 'to_file: on' is not a replacement for 'to_syslog: on' and is not supported."); ais_err("Using to_file results in most logs being lost as several of the daemons do not run as root"); ais_err("If you really wish to disable syslog, set 'syslog_facility: none'"); } get_config_opt(pcmk_api, local_handle, "logfile", &value, NULL); if(value == NULL) { ais_err("Logging to a file requested but no log file specified"); } else { setenv("HA_logfile", value, 1); } } get_config_opt(pcmk_api, local_handle, "syslog_facility", &value, "daemon"); setenv("HA_logfacility", value, 1); setenv("HA_LOGFACILITY", value, 1); config_find_done(pcmk_api, local_handle); top_handle = config_find_init(pcmk_api, "service"); local_handle = config_find_next(pcmk_api, "service", top_handle); while(local_handle) { value = NULL; pcmk_api->object_key_get(local_handle, "name", strlen("name"), (void**)&value, NULL); if(ais_str_eq("pacemaker", value)) { break; } local_handle = config_find_next(pcmk_api, "service", top_handle); } get_config_opt(pcmk_api, local_handle, "clustername", &local_cname, "pcmk"); local_cname_len = strlen(local_cname); get_config_opt(pcmk_api, local_handle, "use_logd", &value, "no"); setenv("HA_use_logd", value, 1); get_config_opt(pcmk_api, local_handle, "use_mgmtd", &value, "no"); if(ais_get_boolean(value) == FALSE) { int lpc = 0; for (; lpc < SIZEOF(pcmk_children); lpc++) { if(crm_proc_mgmtd & pcmk_children[lpc].flag) { /* Disable mgmtd startup */ pcmk_children[lpc].start_seq = 0; break; } } } config_find_done(pcmk_api, local_handle); } int pcmk_config_init(struct corosync_api_v1 *unused) { return 0; } static void *pcmk_wait_dispatch (void *arg) { struct timespec waitsleep = { .tv_sec = 1, .tv_nsec = 0 }; while(wait_active) { int lpc = 0; for (; lpc < SIZEOF(pcmk_children); lpc++) { if(pcmk_children[lpc].pid > 0) { int status; pid_t pid = wait4( pcmk_children[lpc].pid, &status, WNOHANG, NULL); if(pid == 0) { continue; } else if(pid < 0) { ais_perror("Call to wait4(%s) failed", pcmk_children[lpc].name); continue; } /* cleanup */ pcmk_children[lpc].pid = 0; pcmk_children[lpc].conn = NULL; pcmk_children[lpc].async_conn = NULL; if(WIFSIGNALED(status)) { int sig = WTERMSIG(status); ais_err("Child process %s terminated with signal %d" " (pid=%d, core=%s)", pcmk_children[lpc].name, sig, pid, WCOREDUMP(status)?"true":"false"); } else if (WIFEXITED(status)) { int rc = WEXITSTATUS(status); do_ais_log(rc==0?LOG_NOTICE:LOG_ERR, "Child process %s exited (pid=%d, rc=%d)", pcmk_children[lpc].name, pid, rc); if(rc == 100) { ais_notice("Child process %s no longer wishes" " to be respawned", pcmk_children[lpc].name); pcmk_children[lpc].respawn = FALSE; } } pcmk_children[lpc].respawn_count += 1; if(pcmk_children[lpc].respawn_count > MAX_RESPAWN) { ais_err("Child respawn count exceeded by %s", pcmk_children[lpc].name); pcmk_children[lpc].respawn = FALSE; } if(pcmk_children[lpc].respawn) { ais_notice("Respawning failed child process: %s", pcmk_children[lpc].name); spawn_child(&(pcmk_children[lpc])); } else { send_cluster_id(); } } } sched_yield (); nanosleep (&waitsleep, 0); } return 0; } static uint32_t pcmk_update_nodeid(void) { int last = local_nodeid; #if AIS_COROSYNC local_nodeid = pcmk_api->totem_nodeid_get(); #else local_nodeid = totempg_my_nodeid_get(); #endif if(last != local_nodeid) { if(last == 0) { ais_info("Local node id: %u", local_nodeid); } else { char *last_s = NULL; ais_malloc0(last_s, 32); ais_warn("Detected local node id change: %u -> %u", last, local_nodeid); snprintf(last_s, 31, "%u", last); ais_remove_peer(last_s); ais_free(last_s); } update_member(local_nodeid, 0, 0, 1, 0, local_uname, CRM_NODE_MEMBER, NULL); } return local_nodeid; } int pcmk_startup(struct corosync_api_v1 *init_with) { int rc = 0; int lpc = 0; int start_seq = 1; struct utsname us; struct rlimit cores; static int max = SIZEOF(pcmk_children); struct passwd *pwentry = getpwnam(CRM_DAEMON_USER); pcmk_api = init_with; #ifdef AIS_WHITETANK log_init ("crm"); #endif process_ais_conf(); membership_list = g_hash_table_new_full( g_direct_hash, g_direct_equal, NULL, destroy_ais_node); membership_notify_list = g_hash_table_new(g_direct_hash, g_direct_equal); ipc_client_list = g_hash_table_new(g_direct_hash, g_direct_equal); setenv("HA_COMPRESSION", "bz2", 1); setenv("HA_cluster_type", "openais", 1); ais_info("CRM: Initialized"); log_printf(LOG_INFO, "Logging: Initialized %s\n", __PRETTY_FUNCTION__); rc = getrlimit(RLIMIT_CORE, &cores); if(rc < 0) { ais_perror("Cannot determine current maximum core size."); } if(cores.rlim_max <= 0) { cores.rlim_max = RLIM_INFINITY; rc = setrlimit(RLIMIT_CORE, &cores); if(rc < 0) { ais_perror("Core file generation will remain disabled." " Core files are an important diagnositic tool," " please consider enabling them by default."); } } else { ais_info("Maximum core file size is: %lu", cores.rlim_max); if(system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) { ais_perror("Could not enable /proc/sys/kernel/core_uses_pid"); } } AIS_CHECK(pwentry != NULL, ais_err("Cluster user %s does not exist", CRM_DAEMON_USER); return TRUE); mkdir(CRM_STATE_DIR, 0750); chown(CRM_STATE_DIR, pwentry->pw_uid, pwentry->pw_gid); mkdir(HA_STATE_DIR"/heartbeat", 0755); /* Used by RAs - Leave owned by root */ mkdir(HA_STATE_DIR"/heartbeat/rsctmp", 0755); /* Used by RAs - Leave owned by root */ rc = uname(&us); AIS_ASSERT(rc == 0); local_uname = ais_strdup(us.nodename); local_uname_len = strlen(local_uname); ais_info("Service: %d", PCMK_SERVICE_ID); ais_info("Local hostname: %s", local_uname); pcmk_update_nodeid(); pthread_create (&pcmk_wait_thread, NULL, pcmk_wait_dispatch, NULL); for (start_seq = 1; start_seq < max; start_seq++) { /* dont start anything with start_seq < 1 */ for (lpc = 0; lpc < max; lpc++) { if(start_seq == pcmk_children[lpc].start_seq) { spawn_child(&(pcmk_children[lpc])); } } } return 0; } /* static void ais_print_node(const char *prefix, struct totem_ip_address *host) { int len = 0; char *buffer = NULL; ais_malloc0(buffer, INET6_ADDRSTRLEN+1); inet_ntop(host->family, host->addr, buffer, INET6_ADDRSTRLEN); len = strlen(buffer); ais_info("%s: %.*s", prefix, len, buffer); ais_free(buffer); } */ #if 0 /* copied here for reference from exec/totempg.c */ char *totempg_ifaces_print (unsigned int nodeid) { static char iface_string[256 * INTERFACE_MAX]; char one_iface[64]; struct totem_ip_address interfaces[INTERFACE_MAX]; char **status; unsigned int iface_count; unsigned int i; int res; iface_string[0] = '\0'; res = totempg_ifaces_get (nodeid, interfaces, &status, &iface_count); if (res == -1) { return ("no interface found for nodeid"); } for (i = 0; i < iface_count; i++) { sprintf (one_iface, "r(%d) ip(%s), ", i, totemip_print (&interfaces[i])); strcat (iface_string, one_iface); } return (iface_string); } #endif static void ais_mark_unseen_peer_dead( gpointer key, gpointer value, gpointer user_data) { int *changed = user_data; crm_node_t *node = value; if(node->last_seen != membership_seq && ais_str_eq(CRM_NODE_LOST, node->state) == FALSE) { ais_info("Node %s was not seen in the previous transition", node->uname); *changed += update_member(node->id, 0, membership_seq, node->votes, node->processes, node->uname, CRM_NODE_LOST, NULL); } } void pcmk_peer_update ( enum totem_configuration_type configuration_type, #ifdef AIS_COROSYNC const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id #else unsigned int *member_list, int member_list_entries, unsigned int *left_list, int left_list_entries, unsigned int *joined_list, int joined_list_entries, struct memb_ring_id *ring_id #endif ) { int lpc = 0; int changed = 0; int do_update = 0; AIS_ASSERT(ring_id != NULL); switch(configuration_type) { case TOTEM_CONFIGURATION_REGULAR: do_update = 1; break; case TOTEM_CONFIGURATION_TRANSITIONAL: break; } membership_seq = ring_id->seq; ais_notice("%s membership event on ring %lld: memb=%ld, new=%ld, lost=%ld", do_update?"Stable":"Transitional", ring_id->seq, (long)member_list_entries, (long)joined_list_entries, (long)left_list_entries); if(do_update == 0) { for(lpc = 0; lpc < joined_list_entries; lpc++) { const char *prefix = "new: "; uint32_t nodeid = joined_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for(lpc = 0; lpc < member_list_entries; lpc++) { const char *prefix = "memb:"; uint32_t nodeid = member_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for(lpc = 0; lpc < left_list_entries; lpc++) { const char *prefix = "lost:"; uint32_t nodeid = left_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } return; } for(lpc = 0; lpc < joined_list_entries; lpc++) { const char *prefix = "NEW: "; uint32_t nodeid = joined_list[lpc]; crm_node_t *node = NULL; changed += update_member( nodeid, 0, membership_seq, -1, 0, NULL, CRM_NODE_MEMBER, NULL); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(nodeid)); if(node->addr == NULL) { const char *addr = totempg_ifaces_print(nodeid); node->addr = ais_strdup(addr); ais_debug("Node %u has address %s", nodeid, node->addr); } } plugin_has_votes = 0; for(lpc = 0; lpc < member_list_entries; lpc++) { const char *prefix = "MEMB:"; uint32_t nodeid = member_list[lpc]; plugin_has_votes++; changed += update_member( nodeid, 0, membership_seq, -1, 0, NULL, CRM_NODE_MEMBER, NULL); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for(lpc = 0; lpc < left_list_entries; lpc++) { const char *prefix = "LOST:"; uint32_t nodeid = left_list[lpc]; changed += update_member( nodeid, 0, membership_seq, -1, 0, NULL, CRM_NODE_LOST, NULL); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } if(changed && joined_list_entries == 0 && left_list_entries == 0) { ais_err("Something strange happened: %d", changed); changed = 0; } ais_debug_2("Reaping unseen nodes..."); g_hash_table_foreach(membership_list, ais_mark_unseen_peer_dead, &changed); if(plugin_has_votes > plugin_expected_votes) { update_expected_votes(plugin_has_votes); changed = 1; } if(member_list_entries > 1) { /* Used to set born-on in send_cluster_id()) * We need to wait until we have at least one peer since first * membership id is based on the one before we stopped and isn't reliable */ have_reliable_membership_id = TRUE; } if(changed) { ais_debug("%d nodes changed", changed); pcmk_update_nodeid(); send_member_notification(); } send_cluster_id(); } int pcmk_ipc_exit (void *conn) { int lpc = 0; const char *client = NULL; void *async_conn = conn; for (; lpc < SIZEOF(pcmk_children); lpc++) { if(pcmk_children[lpc].conn == conn) { if(wait_active == FALSE) { /* Make sure the shutdown loop exits */ pcmk_children[lpc].pid = 0; } pcmk_children[lpc].conn = NULL; pcmk_children[lpc].async_conn = NULL; client = pcmk_children[lpc].name; break; } } g_hash_table_remove(membership_notify_list, async_conn); g_hash_table_remove(ipc_client_list, async_conn); do_ais_log(client?LOG_INFO:(LOG_DEBUG+1), "Client %s (conn=%p, async-conn=%p) left", client?client:"unknown-transient", conn, async_conn); return (0); } int pcmk_ipc_connect (void *conn) { /* OpenAIS hasn't finished setting up the connection at this point * Sending messages now messes up the protocol! */ return (0); } /* * Executive message handlers */ void pcmk_cluster_swab(void *msg) { AIS_Message *ais_msg = msg; ais_debug_3("Performing endian conversion..."); ais_msg->id = swab32 (ais_msg->id); ais_msg->size = swab32 (ais_msg->size); ais_msg->is_compressed = swab32 (ais_msg->is_compressed); ais_msg->compressed_size = swab32 (ais_msg->compressed_size); ais_msg->host.id = swab32 (ais_msg->host.id); ais_msg->host.pid = swab32 (ais_msg->host.pid); ais_msg->host.type = swab32 (ais_msg->host.type); ais_msg->host.size = swab32 (ais_msg->host.size); ais_msg->host.local = swab32 (ais_msg->host.local); ais_msg->sender.id = swab32 (ais_msg->sender.id); ais_msg->sender.pid = swab32 (ais_msg->sender.pid); ais_msg->sender.type = swab32 (ais_msg->sender.type); ais_msg->sender.size = swab32 (ais_msg->sender.size); ais_msg->sender.local = swab32 (ais_msg->sender.local); } void pcmk_cluster_callback ( ais_void_ptr *message, unsigned int nodeid) { const AIS_Message *ais_msg = message; ais_debug_2("Message from node %u (%s)", nodeid, nodeid==local_nodeid?"local":"remote"); /* Shouldn't be required... update_member( ais_msg->sender.id, membership_seq, -1, 0, ais_msg->sender.uname, NULL); */ if(ais_msg->host.size == 0 || ais_str_eq(ais_msg->host.uname, local_uname)) { route_ais_message(ais_msg, FALSE); } else { ais_debug_3("Discarding Msg[%d] (dest=%s:%s, from=%s:%s)", ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(ais_msg->host.type), ais_dest(&(ais_msg->sender)), msg_type2text(ais_msg->sender.type)); } } void pcmk_cluster_id_swab(void *msg) { struct crm_identify_msg_s *ais_msg = msg; ais_debug_3("Performing endian conversion..."); ais_msg->id = swab32 (ais_msg->id); ais_msg->pid = swab32 (ais_msg->pid); ais_msg->votes = swab32 (ais_msg->votes); ais_msg->processes = swab32 (ais_msg->processes); } void pcmk_cluster_id_callback (ais_void_ptr *message, unsigned int nodeid) { int changed = 0; const struct crm_identify_msg_s *msg = message; if(nodeid != msg->id) { ais_err("Invalid message: Node %u claimed to be node %d", nodeid, msg->id); return; } ais_debug("Node update: %s (%s)", msg->uname, msg->version); changed = update_member( nodeid, msg->born_on, membership_seq, msg->votes, msg->processes, msg->uname, NULL, msg->version); if(changed) { send_member_notification(); } } struct res_overlay { coroipc_response_header_t header __attribute((aligned(8))); char buf[4096]; }; struct res_overlay *res_overlay = NULL; static void send_ipc_ack(void *conn) { if(res_overlay == NULL) { ais_malloc0(res_overlay, sizeof(struct res_overlay)); } res_overlay->header.id = CRM_MESSAGE_IPC_ACK; res_overlay->header.size = sizeof (coroipc_response_header_t); res_overlay->header.error = CS_OK; #ifdef AIS_COROSYNC pcmk_api->ipc_response_send (conn, res_overlay, res_overlay->header.size); #else openais_response_send (conn, res_overlay, res_overlay->header.size); #endif } /* local callbacks */ void pcmk_ipc(void *conn, ais_void_ptr *msg) { AIS_Message *mutable; int type = 0, size = 0; gboolean transient = TRUE; const AIS_Message *ais_msg = (const AIS_Message*)msg; void *async_conn = conn; ais_debug_2("Message from client %p", conn); if(check_message_sanity(msg, ((const AIS_Message*)msg)->data) == FALSE) { /* The message is corrupted - ignore */ send_ipc_ack(conn); msg = NULL; return; } /* Make a copy of the message here and ACK it * The message is only valid until a response is sent * but the response must also be sent _before_ we send anything else */ mutable = ais_msg_copy(ais_msg); AIS_ASSERT(check_message_sanity(mutable, mutable->data)); size = mutable->header.size; /* ais_malloc0(ais_msg, size); */ /* memcpy(ais_msg, msg, size); */ type = mutable->sender.type; ais_debug_3("type: %d local: %d conn: %p host type: %d ais: %d sender pid: %d child pid: %d size: %d", type, mutable->host.local, pcmk_children[type].conn, mutable->host.type, crm_msg_ais, mutable->sender.pid, pcmk_children[type].pid, ((int)SIZEOF(pcmk_children))); if(type > crm_msg_none && type < SIZEOF(pcmk_children)) { /* known child process */ transient = FALSE; } /* If this check fails, the order of pcmk_children probably * doesn't match that of the crm_ais_msg_types enum */ AIS_CHECK(transient || mutable->sender.pid == pcmk_children[type].pid, ais_err("Sender: %d, child[%d]: %d", mutable->sender.pid, type, pcmk_children[type].pid); return); if(transient == FALSE && type > crm_msg_none && mutable->host.local && pcmk_children[type].conn == NULL && mutable->host.type == crm_msg_ais) { AIS_CHECK(mutable->sender.type != mutable->sender.pid, ais_err("Pid=%d, type=%d", mutable->sender.pid, mutable->sender.type)); ais_info("Recorded connection %p for %s/%d", conn, pcmk_children[type].name, pcmk_children[type].pid); pcmk_children[type].conn = conn; pcmk_children[type].async_conn = async_conn; /* Make sure they have the latest membership */ if(pcmk_children[type].flags & crm_flag_members) { char *update = pcmk_generate_membership_data(); g_hash_table_replace(membership_notify_list, async_conn, async_conn); ais_info("Sending membership update "U64T" to %s", membership_seq, pcmk_children[type].name); send_client_msg(async_conn, crm_class_members, crm_msg_none,update); } } else if(transient) { AIS_CHECK(mutable->sender.type == mutable->sender.pid, ais_err("Pid=%d, type=%d", mutable->sender.pid, mutable->sender.type)); g_hash_table_replace(ipc_client_list, async_conn, GUINT_TO_POINTER(mutable->sender.pid)); } mutable->sender.id = local_nodeid; mutable->sender.size = local_uname_len; memset(mutable->sender.uname, 0, MAX_NAME); memcpy(mutable->sender.uname, local_uname, mutable->sender.size); route_ais_message(mutable, TRUE); send_ipc_ack(conn); msg = NULL; ais_free(mutable); } int pcmk_shutdown ( #ifdef AIS_COROSYNC void #else struct objdb_iface_ver0 *objdb #endif ) { int lpc = 0; static int phase = 0; static time_t next_log = 0; static int max = SIZEOF(pcmk_children); if(phase == 0) { ais_notice("Shuting down Pacemaker"); phase = max; } wait_active = FALSE; /* stop the wait loop */ for (; phase > 0; phase--) { /* dont stop anything with start_seq < 1 */ for (lpc = max - 1; lpc >= 0; lpc--) { if(phase != pcmk_children[lpc].start_seq) { continue; } #ifdef AIS_WHITETANK retry: #endif if(pcmk_children[lpc].pid) { pid_t pid = 0; int status = 0; time_t now = time(NULL); if(pcmk_children[lpc].respawn) { next_log = now + 30; pcmk_children[lpc].respawn = FALSE; stop_child(&(pcmk_children[lpc]), SIGTERM); } pid = wait4(pcmk_children[lpc].pid, &status, WNOHANG, NULL); if(pid < 0) { ais_perror("Call to wait4(%s/%d) failed - treating it as stopped", pcmk_children[lpc].name, pcmk_children[lpc].pid); } else if(pid == 0) { if(now >= next_log) { next_log = now + 30; ais_notice("Still waiting for %s (pid=%d, seq=%d) to terminate...", pcmk_children[lpc].name, pcmk_children[lpc].pid, pcmk_children[lpc].start_seq); } #ifdef AIS_WHITETANK { struct timespec waitsleep = { .tv_sec = 1, .tv_nsec = 0 }; sched_yield (); nanosleep (&waitsleep, 0); goto retry; } #else /* Return control to corosync */ return -1; #endif } } /* cleanup */ ais_notice("%s confirmed stopped", pcmk_children[lpc].name); pcmk_children[lpc].async_conn = NULL; pcmk_children[lpc].conn = NULL; pcmk_children[lpc].pid = 0; } } send_cluster_id(); ais_notice("Shutdown complete"); /* TODO: Add back the logsys flush call once its written */ #ifdef AIS_WHITETANK /* Bug bnc#482847, bnc#482905 * * All cluster services are now down, we could allow OpenAIS to continue * unloading plugins, but its kinda new at that and there are a bunch of * race conditions that get exercised. * * Take the easy way out for now (on whitetank) and eventually fix for * CoroSync which is where everyone wants to be eventually anyway */ ais_notice("Forcing clean exit of OpenAIS"); exit(0); #endif return 0; } struct member_loop_data { char *string; }; void member_loop_fn(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; struct member_loop_data *data = user_data; ais_debug_2("Dumping node %u", node->id); data->string = append_member(data->string, node); } char *pcmk_generate_membership_data(void) { int size = 0; struct member_loop_data data; size = 256; ais_malloc0(data.string, size); snprintf(data.string, size, "", membership_seq, plugin_has_quorum()?"true":"false", plugin_expected_votes, plugin_has_votes); g_hash_table_foreach(membership_list, member_loop_fn, &data); size = strlen(data.string); data.string = realloc(data.string, size + 9) ;/* 9 = + nul */ sprintf(data.string + size, ""); return data.string; } void pcmk_nodes(void *conn, ais_void_ptr *msg) { char *data = pcmk_generate_membership_data(); void *async_conn = conn; /* send the ACK before we send any other messages * - but after we no longer need to access the message */ send_ipc_ack(conn); msg = NULL; if(async_conn) { send_client_msg(async_conn, crm_class_members, crm_msg_none, data); } ais_free(data); } void pcmk_remove_member(void *conn, ais_void_ptr *msg) { const AIS_Message *ais_msg = msg; char *data = get_ais_data(ais_msg); send_ipc_ack(conn); msg = NULL; if(data != NULL) { char *bcast = ais_concat("remove-peer", data, ':'); send_cluster_msg(crm_msg_ais, NULL, bcast); ais_info("Sent: %s", bcast); ais_free(bcast); } ais_free(data); } static void send_quorum_details(void *conn) { int size = 256; char *data = NULL; ais_malloc0(data, size); snprintf(data, size, "", membership_seq, plugin_has_quorum()?"true":"false", plugin_expected_votes, plugin_has_votes); send_client_msg(conn, crm_class_quorum, crm_msg_none, data); ais_free(data); } void pcmk_quorum(void *conn, ais_void_ptr *msg) { const AIS_Message *ais_msg = msg; char *data = get_ais_data(ais_msg); send_ipc_ack(conn); msg = NULL; if(data != NULL) { int value = 0; value = ais_get_int(data, NULL); update_expected_votes(value); } send_quorum_details(conn); ais_free(data); } void pcmk_notify(void *conn, ais_void_ptr *msg) { const AIS_Message *ais_msg = msg; char *data = get_ais_data(ais_msg); void *async_conn = conn; int enable = 0; int sender = ais_msg->sender.pid; send_ipc_ack(conn); msg = NULL; if(ais_str_eq("true", data)) { enable = 1; } ais_info("%s node notifications for child %d (%p)", enable?"Enabling":"Disabling", sender, async_conn); if(enable) { g_hash_table_replace(membership_notify_list, async_conn, async_conn); } else { g_hash_table_remove(membership_notify_list, async_conn); } ais_free(data); } void pcmk_nodeid(void *conn, ais_void_ptr *msg) { static int counter = 0; struct crm_ais_nodeid_resp_s resp; ais_debug_2("Sending local nodeid: %d to %p[%d]", local_nodeid, conn, counter); resp.header.id = crm_class_nodeid; resp.header.size = sizeof (struct crm_ais_nodeid_resp_s); resp.header.error = CS_OK; resp.id = local_nodeid; resp.counter = counter++; memset(resp.uname, 0, MAX_NAME); memcpy(resp.uname, local_uname, local_uname_len); memset(resp.cname, 0, MAX_NAME); memcpy(resp.cname, local_cname, local_cname_len); #ifdef AIS_COROSYNC pcmk_api->ipc_response_send (conn, &resp, resp.header.size); #else openais_response_send (conn, &resp, resp.header.size); #endif } static gboolean ghash_send_update(gpointer key, gpointer value, gpointer data) { if(send_client_msg(value, crm_class_members, crm_msg_none, data) != 0) { /* remove it */ return TRUE; } return FALSE; } void send_member_notification(void) { char *update = pcmk_generate_membership_data(); ais_info("Sending membership update "U64T" to %d children", membership_seq, g_hash_table_size(membership_notify_list)); g_hash_table_foreach_remove(membership_notify_list, ghash_send_update, update); ais_free(update); } gboolean check_message_sanity(const AIS_Message *msg, const char *data) { gboolean sane = TRUE; gboolean repaired = FALSE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if(sane && msg->header.size == 0) { ais_err("Message with no size"); sane = FALSE; } if(sane && msg->header.error != CS_OK) { ais_err("Message header contains an error: %d", msg->header.error); sane = FALSE; } AIS_CHECK(msg->header.size > sizeof(AIS_Message), ais_err("Message %d size too small: %d < %zu", msg->header.id, msg->header.size, sizeof(AIS_Message)); return FALSE); if(sane && ais_data_len(msg) != tmp_size) { ais_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg), tmp_size); sane = TRUE; } if(sane && ais_data_len(msg) == 0) { ais_err("Message with no payload"); sane = FALSE; } if(sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if(ais_data_len(msg) != str_size) { int lpc = 0; ais_err("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for(lpc = (str_size - 10); lpc < msg->size; lpc++) { if(lpc < 0) { lpc = 0; } ais_debug_2("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if(sane == FALSE) { AIS_CHECK(sane, ais_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size)); } else if(repaired) { ais_err("Repaired message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { ais_debug_3("Verified message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } static int delivered_transient = 0; static void deliver_transient_msg(gpointer key, gpointer value, gpointer user_data) { int pid = GPOINTER_TO_INT(value); AIS_Message *mutable = user_data; if(pid == mutable->host.type) { int rc = send_client_ipc(key, mutable); delivered_transient++; ais_info("Sent message to %s.%d (rc=%d)", ais_dest(&(mutable->host)), pid, rc); if(rc != 0) { ais_warn("Sending message to %s.%d failed (rc=%d)", ais_dest(&(mutable->host)), pid, rc); log_ais_message(LOG_DEBUG, mutable); } } } gboolean route_ais_message(const AIS_Message *msg, gboolean local_origin) { int rc = 0; int dest = msg->host.type; const char *reason = "unknown"; AIS_Message *mutable = ais_msg_copy(msg); static int service_id = SERVICE_ID_MAKE(PCMK_SERVICE_ID, 0); ais_debug_3("Msg[%d] (dest=%s:%s, from=%s:%s.%d, remote=%s, size=%d)", mutable->id, ais_dest(&(mutable->host)), msg_type2text(dest), ais_dest(&(mutable->sender)), msg_type2text(mutable->sender.type), mutable->sender.pid, local_origin?"false":"true", ais_data_len((mutable))); if(local_origin == FALSE) { if(mutable->host.size == 0 || ais_str_eq(local_uname, mutable->host.uname)) { mutable->host.local = TRUE; } } if(check_message_sanity(mutable, mutable->data) == FALSE) { /* Dont send this message to anyone */ rc = 1; goto bail; } if(mutable->host.local) { void *conn = NULL; const char *lookup = NULL; if(dest == crm_msg_ais) { process_ais_message(mutable); goto bail; } else if(dest == crm_msg_lrmd) { /* lrmd messages are routed via the crm */ dest = crm_msg_crmd; } else if(dest == crm_msg_te) { /* te messages are routed via the crm */ dest = crm_msg_crmd; } else if(dest >= SIZEOF(pcmk_children)) { /* Transient client */ delivered_transient = 0; g_hash_table_foreach(ipc_client_list, deliver_transient_msg, mutable); if(delivered_transient) { ais_debug_2("Sent message to %d transient clients: %d", delivered_transient, dest); goto bail; } else { /* try the crmd */ ais_debug_2("Sending message to transient client %d via crmd", dest); dest = crm_msg_crmd; } } else if(dest == 0) { ais_err("Invalid destination: %d", dest); log_ais_message(LOG_ERR, mutable); log_printf(LOG_ERR, "%s", get_ais_data(mutable)); rc = 1; goto bail; } lookup = msg_type2text(dest); conn = pcmk_children[dest].async_conn; /* the cluster fails in weird and wonderfully obscure ways when this is not true */ AIS_ASSERT(ais_str_eq(lookup, pcmk_children[dest].name)); if(mutable->header.id == service_id) { mutable->header.id = 0; /* reset this back to zero for IPC messages */ } else if(mutable->header.id != 0) { ais_err("reset header id back to zero from %d", mutable->header.id); mutable->header.id = 0; /* reset this back to zero for IPC messages */ } + reason = "ipc delivery failed"; rc = send_client_ipc(conn, mutable); } else if(local_origin) { /* forward to other hosts */ ais_debug_3("Forwarding to cluster"); reason = "cluster delivery failed"; rc = send_cluster_msg_raw(mutable); } if(rc != 0) { ais_warn("Sending message to %s.%s failed: %s (rc=%d)", ais_dest(&(mutable->host)), msg_type2text(dest), reason, rc); log_ais_message(LOG_DEBUG, mutable); } bail: ais_free(mutable); return rc==0?TRUE:FALSE; } int send_cluster_msg_raw(const AIS_Message *ais_msg) { int rc = 0; struct iovec iovec; static uint32_t msg_id = 0; AIS_Message *mutable = ais_msg_copy(ais_msg); AIS_ASSERT(local_nodeid != 0); AIS_ASSERT(ais_msg->header.size == (sizeof(AIS_Message) + ais_data_len(ais_msg))); if(mutable->id == 0) { msg_id++; AIS_CHECK(msg_id != 0 /* detect wrap-around */, msg_id++; ais_err("Message ID wrapped around")); mutable->id = msg_id; } mutable->header.error = CS_OK; mutable->header.id = SERVICE_ID_MAKE(PCMK_SERVICE_ID, 0); mutable->sender.id = local_nodeid; mutable->sender.size = local_uname_len; memset(mutable->sender.uname, 0, MAX_NAME); memcpy(mutable->sender.uname, local_uname, mutable->sender.size); iovec.iov_base = (char *)mutable; iovec.iov_len = mutable->header.size; ais_debug_3("Sending message (size=%u)", (unsigned int)iovec.iov_len); #if AIS_COROSYNC rc = pcmk_api->totem_mcast(&iovec, 1, TOTEMPG_SAFE); #else rc = totempg_groups_mcast_joined(openais_group_handle, &iovec, 1, TOTEMPG_SAFE); #endif if(rc == 0 && mutable->is_compressed == FALSE) { ais_debug_2("Message sent: %.80s", mutable->data); } AIS_CHECK(rc == 0, ais_err("Message not sent (%d): %.120s", rc, mutable->data)); ais_free(mutable); return rc; } #define min(x,y) (x)<(y)?(x):(y) void send_cluster_id(void) { int rc = 0; int lpc = 0; int len = 0; struct iovec iovec; struct crm_identify_msg_s *msg = NULL; AIS_ASSERT(local_nodeid != 0); if(local_born_on == 0 && have_reliable_membership_id) { local_born_on = membership_seq; } ais_malloc0(msg, sizeof(struct crm_identify_msg_s)); msg->header.size = sizeof(struct crm_identify_msg_s); msg->id = local_nodeid; /* msg->header.error = CS_OK; */ msg->header.id = SERVICE_ID_MAKE(PCMK_SERVICE_ID, 1); len = min(local_uname_len, MAX_NAME-1); memset(msg->uname, 0, MAX_NAME); memcpy(msg->uname, local_uname, len); len = min(strlen(VERSION), MAX_NAME-1); memset(msg->version, 0, MAX_NAME); memcpy(msg->version, VERSION, len); msg->votes = 1; msg->pid = getpid(); msg->processes = crm_proc_ais; msg->born_on = local_born_on; for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) { if(pcmk_children[lpc].pid != 0) { msg->processes |= pcmk_children[lpc].flag; } } ais_debug("Local update: id=%u, born="U64T", seq="U64T"", local_nodeid, local_born_on, membership_seq); update_member( local_nodeid, local_born_on, membership_seq, msg->votes, msg->processes, NULL, NULL, VERSION); iovec.iov_base = (char *)msg; iovec.iov_len = msg->header.size; #if AIS_COROSYNC rc = pcmk_api->totem_mcast(&iovec, 1, TOTEMPG_SAFE); #else rc = totempg_groups_mcast_joined(openais_group_handle, &iovec, 1, TOTEMPG_SAFE); #endif AIS_CHECK(rc == 0, ais_err("Message not sent (%d)", rc)); ais_free(msg); } static gboolean ghash_send_removal(gpointer key, gpointer value, gpointer data) { send_quorum_details(value); if(send_client_msg(value, crm_class_rmpeer, crm_msg_none, data) != 0) { /* remove it */ return TRUE; } return FALSE; } void ais_remove_peer(char *node_id) { uint32_t id = ais_get_int(node_id, NULL); crm_node_t *node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(id)); if(node == NULL) { ais_info("Peer %u is unknown", id); } else if(ais_str_eq(CRM_NODE_MEMBER, node->state)) { ais_warn("Peer %u/%s is still active", id, node->uname); } else if(g_hash_table_remove(membership_list, GUINT_TO_POINTER(id))) { plugin_expected_votes--; ais_notice("Removed dead peer %u from the membership list", id); ais_info("Sending removal of %u to %d children", id, g_hash_table_size(membership_notify_list)); g_hash_table_foreach_remove(membership_notify_list, ghash_send_removal, node_id); } else { ais_warn("Peer %u/%s was not removed", id, node->uname); } } gboolean process_ais_message(const AIS_Message *msg) { int len = ais_data_len(msg); char *data = get_ais_data(msg); do_ais_log(LOG_DEBUG, "Msg[%d] (dest=%s:%s, from=%s:%s.%d, remote=%s, size=%d): %.90s", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->sender.uname==local_uname?"false":"true", ais_data_len(msg), data); if(data && len > 12 && strncmp("remove-peer:", data, 12) == 0) { char *node = data+12; ais_remove_peer(node); } ais_free(data); return TRUE; } static void member_dump_fn(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; ais_info(" node id:%u, uname=%s state=%s processes=%.16x born="U64T" seen="U64T" addr=%s version=%s", node->id, node->uname?node->uname:"-unknown-", node->state, node->processes, node->born, node->last_seen, node->addr?node->addr:"-unknown-", node->version?node->version:"-unknown-"); } void pcmk_exec_dump(void) { /* Called after SIG_USR2 */ process_ais_conf(); ais_info("Local id: %u, uname: %s, born: "U64T, local_nodeid, local_uname, local_born_on); ais_info("Membership id: "U64T", quorate: %s, expected: %u, actual: %u", membership_seq, plugin_has_quorum()?"true":"false", plugin_expected_votes, plugin_has_votes); g_hash_table_foreach(membership_list, member_dump_fn, NULL); } diff --git a/lib/common/ais.c b/lib/common/ais.c index de5c50feae..df4b25fac6 100644 --- a/lib/common/ais.c +++ b/lib/common/ais.c @@ -1,750 +1,752 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include "stack.h" #ifdef AIS_COROSYNC # include #endif enum crm_ais_msg_types text2msg_type(const char *text) { int type = crm_msg_none; CRM_CHECK(text != NULL, return type); if(safe_str_eq(text, "ais")) { type = crm_msg_ais; } else if(safe_str_eq(text, "crm_plugin")) { type = crm_msg_ais; } else if(safe_str_eq(text, CRM_SYSTEM_CIB)) { type = crm_msg_cib; } else if(safe_str_eq(text, CRM_SYSTEM_CRMD)) { type = crm_msg_crmd; } else if(safe_str_eq(text, CRM_SYSTEM_DC)) { type = crm_msg_crmd; } else if(safe_str_eq(text, CRM_SYSTEM_TENGINE)) { type = crm_msg_te; } else if(safe_str_eq(text, CRM_SYSTEM_PENGINE)) { type = crm_msg_pe; } else if(safe_str_eq(text, CRM_SYSTEM_LRMD)) { type = crm_msg_lrmd; } else if(safe_str_eq(text, CRM_SYSTEM_STONITHD)) { type = crm_msg_stonithd; + } else if(safe_str_eq(text, "stonith-ng")) { + type = crm_msg_stonith_ng; } else if(safe_str_eq(text, "attrd")) { type = crm_msg_attrd; } else { /* This will normally be a transient client rather than * a cluster daemon. Set the type to the pid of the client */ int scan_rc = sscanf(text, "%d", &type); if(scan_rc != 1) { /* Ensure its sane */ type = crm_msg_none; } } return type; } char *get_ais_data(const AIS_Message *msg) { int rc = BZ_OK; char *uncompressed = NULL; unsigned int new_size = msg->size + 1; if(msg->is_compressed == FALSE) { crm_debug_2("Returning uncompressed message data"); uncompressed = strdup(msg->data); } else { crm_debug_2("Decompressing message data"); crm_malloc0(uncompressed, new_size); rc = BZ2_bzBuffToBuffDecompress( uncompressed, &new_size, (char*)msg->data, msg->compressed_size, 1, 0); CRM_ASSERT(rc = BZ_OK); CRM_ASSERT(new_size == msg->size); } return uncompressed; } #if SUPPORT_AIS int ais_fd_sync = -1; int ais_fd_async = -1; /* never send messages via this channel */ void *ais_ipc_ctx = NULL; #ifdef AIS_COROSYNC # ifndef TRADITIONAL_AIS_IPC hdb_handle_t ais_ipc_handle = 0; # endif #endif GFDSource *ais_source = NULL; GFDSource *ais_source_sync = NULL; static char *ais_cluster_name = NULL; gboolean get_ais_nodeid(uint32_t *id, char **uname) { struct iovec iov; int retries = 0; int rc = CS_OK; coroipc_response_header_t header; struct crm_ais_nodeid_resp_s answer; header.error = CS_OK; header.id = crm_class_nodeid; header.size = sizeof(coroipc_response_header_t); CRM_CHECK(id != NULL, return FALSE); CRM_CHECK(uname != NULL, return FALSE); iov.iov_base = &header; iov.iov_len = header.size; retry: errno = 0; #ifdef TRADITIONAL_AIS_IPC rc = saSendReceiveReply(ais_fd_sync, &header, header.size, &answer, sizeof (struct crm_ais_nodeid_resp_s)); #else # ifdef AIS_WHITETANK rc = openais_msg_send_reply_receive( ais_ipc_ctx, &iov, 1, &answer, sizeof (answer)); # else rc = coroipcc_msg_send_reply_receive( ais_ipc_handle, &iov, 1, &answer, sizeof (answer)); # endif #endif if(rc == CS_OK) { CRM_CHECK(answer.header.size == sizeof (struct crm_ais_nodeid_resp_s), crm_err("Odd message: id=%d, size=%d, error=%d", answer.header.id, answer.header.size, answer.header.error)); CRM_CHECK(answer.header.id == crm_class_nodeid, crm_err("Bad response id: %d", answer.header.id)); } if(rc == CS_ERR_TRY_AGAIN && retries < 20) { retries++; crm_info("Peer overloaded: Re-sending message (Attempt %d of 20)", retries); sleep(retries); /* Proportional back off */ goto retry; } if(rc != CS_OK) { crm_err("Sending nodeid request: FAILED (rc=%d): %s", rc, ais_error2text(rc)); return FALSE; } else if(answer.header.error != CS_OK) { crm_err("Bad response from peer: (rc=%d): %s", rc, ais_error2text(rc)); return FALSE; } crm_info("Server details: id=%u uname=%s cname=%s", answer.id, answer.uname, answer.cname); *id = answer.id; *uname = crm_strdup(answer.uname); ais_cluster_name = crm_strdup(answer.cname); return TRUE; } gboolean crm_get_cluster_name(char **cname) { CRM_CHECK(cname != NULL, return FALSE); if(ais_cluster_name) { *cname = crm_strdup(ais_cluster_name); return TRUE; } return FALSE; } gboolean send_ais_text(int class, const char *data, gboolean local, const char *node, enum crm_ais_msg_types dest) { static int msg_id = 0; static int local_pid = 0; int retries = 0; int rc = CS_OK; int buf_len = sizeof(coroipc_response_header_t); char *buf = NULL; struct iovec iov; coroipc_response_header_t *header; AIS_Message *ais_msg = NULL; enum crm_ais_msg_types sender = text2msg_type(crm_system_name); /* There are only 6 handlers registered to crm_lib_service in plugin.c */ CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); return FALSE); if(data == NULL) { data = ""; } if(local_pid == 0) { local_pid = getpid(); } if(sender == crm_msg_none) { sender = local_pid; } crm_malloc0(ais_msg, sizeof(AIS_Message)); ais_msg->id = msg_id++; ais_msg->header.id = class; ais_msg->header.error = CS_OK; ais_msg->host.type = dest; ais_msg->host.local = local; if(node) { ais_msg->host.size = strlen(node); memset(ais_msg->host.uname, 0, MAX_NAME); memcpy(ais_msg->host.uname, node, ais_msg->host.size); ais_msg->host.id = 0; } else { ais_msg->host.size = 0; memset(ais_msg->host.uname, 0, MAX_NAME); ais_msg->host.id = 0; } ais_msg->sender.type = sender; ais_msg->sender.pid = local_pid; ais_msg->sender.size = 0; memset(ais_msg->sender.uname, 0, MAX_NAME); ais_msg->sender.id = 0; ais_msg->size = 1 + strlen(data); if(ais_msg->size < CRM_BZ2_THRESHOLD) { failback: crm_realloc(ais_msg, sizeof(AIS_Message) + ais_msg->size); memcpy(ais_msg->data, data, ais_msg->size); } else { char *compressed = NULL; char *uncompressed = crm_strdup(data); unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */ crm_debug_5("Compressing message payload"); crm_malloc(compressed, len); rc = BZ2_bzBuffToBuffCompress( compressed, &len, uncompressed, ais_msg->size, CRM_BZ2_BLOCKS, 0, CRM_BZ2_WORK); crm_free(uncompressed); if(rc != BZ_OK) { crm_err("Compression failed: %d", rc); crm_free(compressed); goto failback; } crm_realloc(ais_msg, sizeof(AIS_Message) + len + 1); memcpy(ais_msg->data, compressed, len); ais_msg->data[len] = 0; crm_free(compressed); ais_msg->is_compressed = TRUE; ais_msg->compressed_size = len; crm_debug_2("Compression details: %d -> %d", ais_msg->size, ais_data_len(ais_msg)); } ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg); crm_debug_3("Sending%s message %d to %s.%s (data=%d, total=%d)", ais_msg->is_compressed?" compressed":"", ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(dest), ais_data_len(ais_msg), ais_msg->header.size); iov.iov_base = ais_msg; iov.iov_len = ais_msg->header.size; retry: errno = 0; crm_realloc(buf, buf_len); #ifdef TRADITIONAL_AIS_IPC rc = saSendReceiveReply(ais_fd_sync, ais_msg, ais_msg->header.size, buf, buf_len); #else # ifdef AIS_WHITETANK rc = openais_msg_send_reply_receive(ais_ipc_ctx, &iov, 1, buf, buf_len); # else rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, buf, buf_len); # endif #endif header = (coroipc_response_header_t *)buf; if(rc == CS_ERR_TRY_AGAIN && retries < 20) { retries++; crm_info("Peer overloaded: Re-sending message (Attempt %d of 20)", retries); sleep(retries); /* Proportional back off */ goto retry; } else if(rc == CS_OK) { CRM_CHECK_AND_STORE(header->size == sizeof (coroipc_response_header_t), crm_err("Odd message: id=%d, size=%d, class=%d, error=%d", header->id, header->size, class, header->error)); if(buf_len < header->size) { crm_err("Increasing buffer length to %d and retrying", header->size); buf_len = header->size + 1; goto retry; } else if(header->id == crm_class_nodeid && header->size == sizeof (struct crm_ais_nodeid_resp_s)){ struct crm_ais_nodeid_resp_s *answer = (struct crm_ais_nodeid_resp_s *)header; crm_err("Server details: id=%u uname=%s counter=%u", answer->id, answer->uname, answer->counter); } else { CRM_CHECK_AND_STORE(header->id == CRM_MESSAGE_IPC_ACK, crm_err("Bad response id (%d) for request (%d)", header->id, ais_msg->header.id)); CRM_CHECK(header->error == CS_OK, rc = header->error); } } if(rc != CS_OK) { crm_perror(LOG_ERR,"Sending message %d: FAILED (rc=%d): %s", ais_msg->id, rc, ais_error2text(rc)); ais_fd_async = -1; } else { crm_debug_4("Message %d: sent", ais_msg->id); } crm_free(buf); crm_free(ais_msg); return (rc == CS_OK); } gboolean send_ais_message(xmlNode *msg, gboolean local, const char *node, enum crm_ais_msg_types dest) { gboolean rc = TRUE; char *data = NULL; if(ais_fd_async < 0 || ais_source == NULL) { crm_err("Not connected to AIS"); return FALSE; } data = dump_xml_unformatted(msg); rc = send_ais_text(0, data, local, node, dest); crm_free(data); return rc; } void terminate_ais_connection(void) { #ifndef TRADITIONAL_AIS_IPC if(ais_ipc_ctx) { # ifdef AIS_WHITETANK openais_service_disconnect(ais_ipc_ctx); # else coroipcc_service_disconnect(ais_ipc_handle); # endif } #else if(ais_fd_sync > 0) { close(ais_fd_sync); } if(ais_fd_async > 0) { close(ais_fd_async); } #endif crm_notice("Disconnected from AIS"); /* G_main_del_fd(ais_source); */ /* G_main_del_fd(ais_source_sync); */ } int ais_membership_timer = 0; gboolean ais_membership_force = FALSE; gboolean ais_dispatch(int sender, gpointer user_data) { char *data = NULL; char *buffer = NULL; char *uncompressed = NULL; int rc = CS_OK; xmlNode *xml = NULL; AIS_Message *msg = NULL; gboolean (*dispatch)(AIS_Message*,char*,int) = user_data; #ifdef TRADITIONAL_AIS_IPC coroipc_response_header_t *header = NULL; static int header_len = sizeof(coroipc_response_header_t); crm_malloc0(header, header_len); buffer = (char*)header; errno = 0; rc = saRecvRetry(sender, header, header_len); if (rc != CS_OK) { crm_perror(LOG_ERR, "Receiving message header failed: (%d/%d) %s", rc, errno, ais_error2text(rc)); goto bail; } else if(header->size == header_len) { crm_err("Empty message: id=%d, size=%d, error=%d, header_len=%d", header->id, header->size, header->error, header_len); goto done; } else if(header->size == 0 || header->size < header_len) { crm_err("Mangled header: size=%d, header=%d, error=%d", header->size, header_len, header->error); goto done; } else if(header->error != CS_OK) { crm_err("Header contined error: %d", header->error); } crm_debug_2("Looking for %d (%d - %d) more bytes", header->size - header_len, header->size, header_len); crm_realloc(header, header->size); /* Use a char* so we can store the remainder into an offset */ buffer = (char*)header; errno = 0; rc = saRecvRetry(sender, buffer+header_len, header->size - header_len); #else # ifdef AIS_WHITETANK crm_malloc0(buffer, 1000000); rc = openais_dispatch_recv (ais_ipc_ctx, buffer, 0); # else rc = coroipcc_dispatch_get (ais_ipc_handle, (void**)&buffer, 0); # endif #endif if (rc == 0) { /* Zero is a legal "no message afterall" value */ goto done; } else if (rc != CS_OK) { crm_perror(LOG_ERR,"Receiving message body failed: (%d) %s", rc, ais_error2text(rc)); goto bail; } msg = (AIS_Message*)buffer; crm_debug_3("Got new%s message (size=%d, %d, %d)", msg->is_compressed?" compressed":"", ais_data_len(msg), msg->size, msg->compressed_size); data = msg->data; if(msg->is_compressed && msg->size > 0) { int rc = BZ_OK; unsigned int new_size = msg->size + 1; if(check_message_sanity(msg, NULL) == FALSE) { goto badmsg; } crm_debug_5("Decompressing message data"); crm_malloc0(uncompressed, new_size); rc = BZ2_bzBuffToBuffDecompress( uncompressed, &new_size, data, msg->compressed_size, 1, 0); if(rc != BZ_OK) { crm_err("Decompression failed: %d", rc); goto badmsg; } CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); data = uncompressed; } else if(check_message_sanity(msg, data) == FALSE) { goto badmsg; } else if(safe_str_eq("identify", data)) { int pid = getpid(); char *pid_s = crm_itoa(pid); send_ais_text(0, pid_s, TRUE, NULL, crm_msg_ais); crm_free(pid_s); goto done; } if(msg->header.id != crm_class_members) { crm_update_peer(msg->sender.id, 0,0,0,0, msg->sender.uname, msg->sender.uname, NULL, NULL); } if(msg->header.id == crm_class_rmpeer) { uint32_t id = crm_int_helper(data, NULL); crm_info("Removing peer %s/%u", data, id); reap_crm_member(id); goto done; } else if(msg->header.id == crm_class_members || msg->header.id == crm_class_quorum) { const char *value = NULL; gboolean quorate = FALSE; xml = string2xml(data); if(xml == NULL) { crm_err("Invalid membership update: %s", data); goto badmsg; } value = crm_element_value(xml, "quorate"); CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No quorum value:"); goto badmsg); if(crm_is_true(value)) { quorate = TRUE; } value = crm_element_value(xml, "id"); CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No membership id"); goto badmsg); crm_peer_seq = crm_int_helper(value, NULL); if(quorate != crm_have_quorum) { crm_notice("Membership %s: quorum %s", value, quorate?"acquired":"lost"); crm_have_quorum = quorate; } else { crm_info("Membership %s: quorum %s", value, quorate?"retained":"still lost"); } xml_child_iter(xml, node, crm_update_ais_node(node, crm_peer_seq)); } if(dispatch != NULL) { dispatch(msg, data, sender); } done: crm_free(uncompressed); free_xml(xml); #ifdef AIS_COROSYNC # ifndef TRADITIONAL_AIS_IPC coroipcc_dispatch_put (ais_ipc_handle); buffer = NULL; # endif #endif crm_free(buffer); return TRUE; badmsg: crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" " min=%d, total=%d, size=%d, bz2_size=%d", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, (int)sizeof(AIS_Message), msg->header.size, msg->size, msg->compressed_size); goto done; bail: crm_err("AIS connection failed"); #ifdef AIS_COROSYNC # ifndef TRADITIONAL_AIS_IPC buffer = NULL; # endif #endif crm_free(buffer); return FALSE; } static void ais_destroy(gpointer user_data) { crm_err("AIS connection terminated"); ais_fd_sync = -1; exit(1); } gboolean init_ais_connection( gboolean (*dispatch)(AIS_Message*,char*,int), void (*destroy)(gpointer), char **our_uuid, char **our_uname, int *nodeid) { int pid = 0; int retries = 0; int rc = CS_OK; char *pid_s = NULL; struct utsname name; uint32_t local_nodeid = 0; char *local_uname = NULL; retry: crm_info("Creating connection to our AIS plugin"); #ifdef TRADITIONAL_AIS_IPC rc = saServiceConnect (&ais_fd_sync, &ais_fd_async, PCMK_SERVICE_ID); #else # ifdef AIS_WHITETANK rc = openais_service_connect(PCMK_SERVICE_ID, &ais_ipc_ctx); if(ais_ipc_ctx) { ais_fd_async = openais_fd_get(ais_ipc_ctx); } # else rc = coroipcc_service_connect( COROSYNC_SOCKET_NAME, PCMK_SERVICE_ID, AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE, &ais_ipc_handle); if(ais_ipc_handle) { coroipcc_fd_get(ais_ipc_handle, &ais_fd_async); } # endif #endif if(ais_fd_async <= 0 && rc == CS_OK) { crm_err("No context created, but connection reported 'ok'"); rc = CS_ERR_LIBRARY; } if (rc != CS_OK) { crm_info("Connection to our AIS plugin (%d) failed: %s (%d)", PCMK_SERVICE_ID, ais_error2text(rc), rc); } switch(rc) { case CS_OK: break; case CS_ERR_TRY_AGAIN: if(retries < 30) { sleep(1); retries++; goto retry; } crm_err("Retry count exceeded"); return FALSE; default: return FALSE; } if(destroy == NULL) { destroy = ais_destroy; } crm_info("AIS connection established"); pid = getpid(); pid_s = crm_itoa(pid); send_ais_text(0, pid_s, TRUE, NULL, crm_msg_ais); crm_free(pid_s); crm_peer_init(); get_ais_nodeid(&local_nodeid, &local_uname); if(uname(&name) < 0) { crm_perror(LOG_ERR,"uname(2) call failed"); exit(100); } if(safe_str_neq(name.nodename, local_uname)) { crm_crit("Node name mismatch! OpenAIS supplied %s, our lookup returned %s", local_uname, name.nodename); crm_notice("Node name mismatches usually occur when assigned automatically by DHCP servers"); crm_notice("If this node was part of the cluster with a different name," " you will need to remove the old entry with crm_node --remove"); } if(our_uuid != NULL) { *our_uuid = crm_strdup(local_uname); } if(our_uname != NULL) { *our_uname = local_uname; } if(nodeid != NULL) { *nodeid = local_nodeid; } if(local_nodeid != 0) { /* Ensure the local node always exists */ crm_update_peer(local_nodeid, 0, 0, 0, 0, local_uname, local_uname, NULL, NULL); } if(dispatch) { ais_source = G_main_add_fd( G_PRIORITY_HIGH, ais_fd_async, FALSE, ais_dispatch, dispatch, destroy); } return TRUE; } gboolean check_message_sanity(const AIS_Message *msg, const char *data) { gboolean sane = TRUE; gboolean repaired = FALSE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if(sane && msg->header.size == 0) { crm_warn("Message with no size"); sane = FALSE; } if(sane && msg->header.error != CS_OK) { crm_warn("Message header contains an error: %d", msg->header.error); sane = FALSE; } if(sane && ais_data_len(msg) != tmp_size) { crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg), tmp_size); sane = TRUE; } if(sane && ais_data_len(msg) == 0) { crm_warn("Message with no payload"); sane = FALSE; } if(sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if(ais_data_len(msg) != str_size) { int lpc = 0; crm_warn("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for(lpc = (str_size - 10); lpc < msg->size; lpc++) { if(lpc < 0) { lpc = 0; } crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if(sane == FALSE) { crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else if(repaired) { crm_err("Repaired message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { crm_debug_3("Verfied message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } #endif diff --git a/lib/common/utils.c b/lib/common/utils.c index 49d694836c..ef021258e0 100644 --- a/lib/common/utils.c +++ b/lib/common/utils.c @@ -1,2184 +1,2342 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if HAVE_HB_CONFIG_H #include /* for HB_COREDIR */ #endif #if HAVE_GLUE_CONFIG_H #include /* for HB_COREDIR */ #endif #ifndef MAXLINE # define MAXLINE 512 #endif #ifdef HAVE_GETOPT_H # include #endif static uint ref_counter = 0; unsigned int crm_log_level = LOG_INFO; gboolean crm_config_error = FALSE; gboolean crm_config_warning = FALSE; const char *crm_system_name = "unknown"; int node_score_red = 0; int node_score_green = 0; int node_score_yellow = 0; int node_score_infinity = INFINITY; void crm_set_env_options(void); gboolean check_time(const char *value) { if(crm_get_msec(value) < 5000) { return FALSE; } return TRUE; } gboolean check_timer(const char *value) { if(crm_get_msec(value) < 0) { return FALSE; } return TRUE; } gboolean check_boolean(const char *value) { int tmp = FALSE; if(crm_str_to_boolean(value, &tmp) != 1) { return FALSE; } return TRUE; } gboolean check_number(const char *value) { errno = 0; if(value == NULL) { return FALSE; } else if(safe_str_eq(value, MINUS_INFINITY_S)) { } else if(safe_str_eq(value, INFINITY_S)) { } else { crm_int_helper(value, NULL); } if(errno != 0) { return FALSE; } return TRUE; } int char2score(const char *score) { int score_f = 0; if(score == NULL) { } else if(safe_str_eq(score, MINUS_INFINITY_S)) { score_f = -node_score_infinity; } else if(safe_str_eq(score, INFINITY_S)) { score_f = node_score_infinity; } else if(safe_str_eq(score, "+"INFINITY_S)) { score_f = node_score_infinity; } else if(safe_str_eq(score, "red")) { score_f = node_score_red; } else if(safe_str_eq(score, "yellow")) { score_f = node_score_yellow; } else if(safe_str_eq(score, "green")) { score_f = node_score_green; } else { score_f = crm_parse_int(score, NULL); if(score_f > 0 && score_f > node_score_infinity) { score_f = node_score_infinity; } else if(score_f < 0 && score_f < -node_score_infinity) { score_f = -node_score_infinity; } } return score_f; } char * score2char(int score) { if(score >= node_score_infinity) { return crm_strdup(INFINITY_S); } else if(score <= -node_score_infinity) { return crm_strdup("-"INFINITY_S); } return crm_itoa(score); } const char * cluster_option(GHashTable* options, gboolean(*validate)(const char*), const char *name, const char *old_name, const char *def_value) { const char *value = NULL; CRM_ASSERT(name != NULL); if(options != NULL) { value = g_hash_table_lookup(options, name); } if(value == NULL && old_name && options != NULL) { value = g_hash_table_lookup(options, old_name); if(value != NULL) { crm_config_warn("Using deprecated name '%s' for" " cluster option '%s'", old_name, name); g_hash_table_insert( options, crm_strdup(name), crm_strdup(value)); value = g_hash_table_lookup(options, old_name); } } if(value == NULL) { crm_debug("Using default value '%s' for cluster option '%s'", def_value, name); if(options == NULL) { return def_value; } g_hash_table_insert( options, crm_strdup(name), crm_strdup(def_value)); value = g_hash_table_lookup(options, name); } if(validate && validate(value) == FALSE) { crm_config_err("Value '%s' for cluster option '%s' is invalid." " Defaulting to %s", value, name, def_value); g_hash_table_replace(options, crm_strdup(name), crm_strdup(def_value)); value = g_hash_table_lookup(options, name); } return value; } const char * get_cluster_pref(GHashTable *options, pe_cluster_option *option_list, int len, const char *name) { int lpc = 0; const char *value = NULL; gboolean found = FALSE; for(lpc = 0; lpc < len; lpc++) { if(safe_str_eq(name, option_list[lpc].name)) { found = TRUE; value = cluster_option(options, option_list[lpc].is_valid, option_list[lpc].name, option_list[lpc].alt_name, option_list[lpc].default_value); } } CRM_CHECK(found, crm_err("No option named: %s", name)); CRM_ASSERT(value != NULL); return value; } void config_metadata(const char *name, const char *version, const char *desc_short, const char *desc_long, pe_cluster_option *option_list, int len) { int lpc = 0; fprintf(stdout, "" "\n" "\n" " %s\n" " %s\n" " %s\n" " \n", name, version, desc_long, desc_short); for(lpc = 0; lpc < len; lpc++) { if(option_list[lpc].description_long == NULL && option_list[lpc].description_short == NULL) { continue; } fprintf(stdout, " \n" " %s\n" " \n" " %s%s%s\n" " \n", option_list[lpc].name, option_list[lpc].description_short, option_list[lpc].type, option_list[lpc].default_value, option_list[lpc].description_long?option_list[lpc].description_long:option_list[lpc].description_short, option_list[lpc].values?" Allowed values: ":"", option_list[lpc].values?option_list[lpc].values:""); } fprintf(stdout, " \n\n"); } void verify_all_options(GHashTable *options, pe_cluster_option *option_list, int len) { int lpc = 0; for(lpc = 0; lpc < len; lpc++) { cluster_option(options, option_list[lpc].is_valid, option_list[lpc].name, option_list[lpc].alt_name, option_list[lpc].default_value); } } char * generateReference(const char *custom1, const char *custom2) { const char *local_cust1 = custom1; const char *local_cust2 = custom2; int reference_len = 4; char *since_epoch = NULL; reference_len += 20; /* too big */ reference_len += 40; /* too big */ if(local_cust1 == NULL) { local_cust1 = "_empty_"; } reference_len += strlen(local_cust1); if(local_cust2 == NULL) { local_cust2 = "_empty_"; } reference_len += strlen(local_cust2); crm_malloc0(since_epoch, reference_len); if(since_epoch != NULL) { sprintf(since_epoch, "%s-%s-%ld-%u", local_cust1, local_cust2, (unsigned long)time(NULL), ref_counter++); } return since_epoch; } gboolean decodeNVpair(const char *srcstring, char separator, char **name, char **value) { int lpc = 0; int len = 0; const char *temp = NULL; CRM_ASSERT(name != NULL && value != NULL); *name = NULL; *value = NULL; crm_debug_4("Attempting to decode: [%s]", srcstring); if (srcstring != NULL) { len = strlen(srcstring); while(lpc <= len) { if (srcstring[lpc] == separator) { crm_malloc0(*name, lpc+1); if(*name == NULL) { break; /* and return FALSE */ } strncpy(*name, srcstring, lpc); (*name)[lpc] = '\0'; /* this sucks but as the strtok manpage says.. * it *is* a bug */ len = len-lpc; len--; if(len <= 0) { *value = NULL; } else { crm_malloc0(*value, len+1); if(*value == NULL) { crm_free(*name); break; /* and return FALSE */ } temp = srcstring+lpc+1; strncpy(*value, temp, len); (*value)[len] = '\0'; } return TRUE; } lpc++; } } if(*name != NULL) { crm_free(*name); } *name = NULL; *value = NULL; return FALSE; } char * crm_concat(const char *prefix, const char *suffix, char join) { int len = 0; char *new_str = NULL; CRM_ASSERT(prefix != NULL); CRM_ASSERT(suffix != NULL); len = strlen(prefix) + strlen(suffix) + 2; crm_malloc0(new_str, (len)); sprintf(new_str, "%s%c%s", prefix, join, suffix); new_str[len-1] = 0; return new_str; } char * generate_hash_key(const char *crm_msg_reference, const char *sys) { char *hash_key = crm_concat(sys?sys:"none", crm_msg_reference, '_'); crm_debug_3("created hash key: (%s)", hash_key); return hash_key; } char * generate_hash_value(const char *src_node, const char *src_subsys) { char *hash_value = NULL; if (src_node == NULL || src_subsys == NULL) { return NULL; } if (strcasecmp(CRM_SYSTEM_DC, src_subsys) == 0) { hash_value = crm_strdup(src_subsys); CRM_ASSERT(hash_value); return hash_value; } hash_value = crm_concat(src_node, src_subsys, '_'); crm_info("created hash value: (%s)", hash_value); return hash_value; } char * crm_itoa(int an_int) { int len = 32; char *buffer = NULL; crm_malloc0(buffer, (len+1)); if(buffer != NULL) { snprintf(buffer, len, "%d", an_int); } return buffer; } extern int LogToLoggingDaemon(int priority, const char * buf, int bstrlen, gboolean use_pri_str); #ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER GLogFunc glib_log_default; static void crm_glib_handler(const gchar *log_domain, GLogLevelFlags flags, const gchar *message, gpointer user_data) { int log_level = LOG_WARNING; GLogLevelFlags msg_level = (flags & G_LOG_LEVEL_MASK); switch(msg_level) { case G_LOG_LEVEL_CRITICAL: /* log and record how we got here */ crm_abort(__FILE__,__PRETTY_FUNCTION__,__LINE__, message, TRUE, TRUE); return; case G_LOG_LEVEL_ERROR: log_level = LOG_ERR; break; case G_LOG_LEVEL_MESSAGE: log_level = LOG_NOTICE; break; case G_LOG_LEVEL_INFO: log_level = LOG_INFO; break; case G_LOG_LEVEL_DEBUG: log_level = LOG_DEBUG; break; case G_LOG_LEVEL_WARNING: case G_LOG_FLAG_RECURSION: case G_LOG_FLAG_FATAL: case G_LOG_LEVEL_MASK: log_level = LOG_WARNING; break; } do_crm_log(log_level, "%s: %s", log_domain, message); } #endif void crm_log_deinit(void) { #ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER g_log_set_default_handler(glib_log_default, NULL); #endif } gboolean crm_log_init( const char *entity, int level, gboolean coredir, gboolean to_stderr, int argc, char **argv) { /* Redirect messages from glib functions to our handler */ /* cl_malloc_forced_for_glib(); */ #ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER glib_log_default = g_log_set_default_handler(crm_glib_handler, NULL); #endif /* and for good measure... - this enum is a bit field (!) */ g_log_set_always_fatal((GLogLevelFlags)0); /*value out of range*/ crm_system_name = entity; setenv("PCMK_service", crm_system_name, 1); cl_log_set_entity(entity); if(argc == 0) { /* Nuke any syslog activity */ unsetenv("HA_logfacility"); } else if(getenv("HA_logfacility") == NULL) { /* Set a default */ cl_log_set_facility(HA_LOG_FACILITY); } /* else: picked up by crm_set_env_options() */ if(coredir) { int user = getuid(); struct passwd *pwent = NULL; const char *base = HA_COREDIR; pwent = getpwuid(user); if (chdir(base) < 0) { crm_perror(LOG_ERR, "Cannot change active directory to %s", base); } else if (pwent == NULL) { crm_perror(LOG_ERR, "Cannot get name for uid: %d", user); } else if (chdir(pwent->pw_name) < 0) { crm_perror(LOG_ERR, "Cannot change active directory to %s/%s", base, pwent->pw_name); } else { crm_info("Changed active directory to %s/%s", base, pwent->pw_name); } } set_crm_log_level(level); crm_set_env_options(); cl_log_args(argc, argv); cl_log_enable_stderr(to_stderr); crm_signal(DEBUG_INC, alter_debug); crm_signal(DEBUG_DEC, alter_debug); return TRUE; } /* returns the old value */ unsigned int set_crm_log_level(unsigned int level) { unsigned int old = crm_log_level; while(crm_log_level < 100 && crm_log_level < level) { alter_debug(DEBUG_INC); } while(crm_log_level > 0 && crm_log_level > level) { alter_debug(DEBUG_DEC); } return old; } unsigned int get_crm_log_level(void) { return crm_log_level; } static int crm_version_helper(const char *text, char **end_text) { int atoi_result = -1; CRM_ASSERT(end_text != NULL); errno = 0; if(text != NULL && text[0] != 0) { atoi_result = (int)strtol(text, end_text, 10); if(errno == EINVAL) { crm_err("Conversion of '%s' %c failed", text, text[0]); atoi_result = -1; } } return atoi_result; } /* * version1 < version2 : -1 * version1 = version2 : 0 * version1 > version2 : 1 */ int compare_version(const char *version1, const char *version2) { int rc = 0; int lpc = 0; char *ver1_copy = NULL, *ver2_copy = NULL; char *rest1 = NULL, *rest2 = NULL; if(version1 == NULL && version2 == NULL) { return 0; } else if(version1 == NULL) { return -1; } else if(version2 == NULL) { return 1; } ver1_copy = crm_strdup(version1); ver2_copy = crm_strdup(version2); rest1 = ver1_copy; rest2 = ver2_copy; while(1) { int digit1 = 0; int digit2 = 0; lpc++; if(rest1 == rest2) { break; } if(rest1 != NULL) { digit1 = crm_version_helper(rest1, &rest1); } if(rest2 != NULL) { digit2 = crm_version_helper(rest2, &rest2); } if(digit1 < digit2){ rc = -1; crm_debug_5("%d < %d", digit1, digit2); break; } else if (digit1 > digit2){ rc = 1; crm_debug_5("%d > %d", digit1, digit2); break; } if(rest1 != NULL && rest1[0] == '.') { rest1++; } if(rest1 != NULL && rest1[0] == 0) { rest1 = NULL; } if(rest2 != NULL && rest2[0] == '.') { rest2++; } if(rest2 != NULL && rest2[0] == 0) { rest2 = NULL; } } crm_free(ver1_copy); crm_free(ver2_copy); if(rc == 0) { crm_debug_3("%s == %s (%d)", version1, version2, lpc); } else if(rc < 0) { crm_debug_3("%s < %s (%d)", version1, version2, lpc); } else if(rc > 0) { crm_debug_3("%s > %s (%d)", version1, version2, lpc); } return rc; } gboolean do_stderr = FALSE; void alter_debug(int nsig) { crm_signal(DEBUG_INC, alter_debug); crm_signal(DEBUG_DEC, alter_debug); switch(nsig) { case DEBUG_INC: if (crm_log_level < 100) { crm_log_level++; } break; case DEBUG_DEC: if (crm_log_level > 0) { crm_log_level--; } break; default: fprintf(stderr, "Unknown signal %d\n", nsig); cl_log(LOG_ERR, "Unknown signal %d", nsig); break; } } void g_hash_destroy_str(gpointer data) { crm_free(data); } #include /* #include */ /* #include */ long long crm_int_helper(const char *text, char **end_text) { long long result = -1; char *local_end_text = NULL; errno = 0; if(text != NULL) { #ifdef ANSI_ONLY if(end_text != NULL) { result = strtol(text, end_text, 10); } else { result = strtol(text, &local_end_text, 10); } #else if(end_text != NULL) { result = strtoll(text, end_text, 10); } else { result = strtoll(text, &local_end_text, 10); } #endif /* CRM_CHECK(errno != EINVAL); */ if(errno == EINVAL) { crm_err("Conversion of %s failed", text); result = -1; } else if(errno == ERANGE) { crm_err("Conversion of %s was clipped: %lld", text, result); } else if(errno != 0) { crm_perror(LOG_ERR,"Conversion of %s failed:", text); } if(local_end_text != NULL && local_end_text[0] != '\0') { crm_err("Characters left over after parsing '%s': '%s'", text, local_end_text); } } return result; } int crm_parse_int(const char *text, const char *default_text) { int atoi_result = -1; if(text != NULL) { atoi_result = crm_int_helper(text, NULL); if(errno == 0) { return atoi_result; } } if(default_text != NULL) { atoi_result = crm_int_helper(default_text, NULL); if(errno == 0) { return atoi_result; } } else { crm_err("No default conversion value supplied"); } return -1; } gboolean safe_str_neq(const char *a, const char *b) { if(a == b) { return FALSE; } else if(a==NULL || b==NULL) { return TRUE; } else if(strcasecmp(a, b) == 0) { return FALSE; } return TRUE; } char * crm_strdup_fn(const char *src, const char *file, const char *fn, int line) { char *dup = NULL; CRM_CHECK(src != NULL, return NULL); crm_malloc0(dup, strlen(src) + 1); return strcpy(dup, src); } #define ENV_PREFIX "HA_" void crm_set_env_options(void) { cl_inherit_logging_environment(500); cl_log_set_logd_channel_source(NULL, NULL); if(debug_level > 0 && (debug_level+LOG_INFO) > (int)crm_log_level) { set_crm_log_level(LOG_INFO + debug_level); } } gboolean crm_is_true(const char * s) { gboolean ret = FALSE; if(s != NULL) { crm_str_to_boolean(s, &ret); } return ret; } int crm_str_to_boolean(const char * s, int * ret) { if(s == NULL) { return -1; } else if (strcasecmp(s, "true") == 0 || strcasecmp(s, "on") == 0 || strcasecmp(s, "yes") == 0 || strcasecmp(s, "y") == 0 || strcasecmp(s, "1") == 0){ *ret = TRUE; return 1; } else if (strcasecmp(s, "false") == 0 || strcasecmp(s, "off") == 0 || strcasecmp(s, "no") == 0 || strcasecmp(s, "n") == 0 || strcasecmp(s, "0") == 0){ *ret = FALSE; return 1; } return -1; } #ifndef NUMCHARS # define NUMCHARS "0123456789." #endif #ifndef WHITESPACE # define WHITESPACE " \t\n\r\f" #endif unsigned long long crm_get_interval(const char * input) { ha_time_t *interval = NULL; char *input_copy = crm_strdup(input); char *input_copy_mutable = input_copy; unsigned long long msec = 0; if(input == NULL) { return 0; } else if(input[0] != 'P') { crm_free(input_copy); return crm_get_msec(input); } interval = parse_time_duration(&input_copy_mutable); msec = date_in_seconds(interval); free_ha_date(interval); crm_free(input_copy); return msec * 1000; } long long crm_get_msec(const char * input) { const char *cp = input; const char *units; long long multiplier = 1000; long long divisor = 1; long long msec = -1; char *end_text = NULL; /* double dret; */ if(input == NULL) { return msec; } cp += strspn(cp, WHITESPACE); units = cp + strspn(cp, NUMCHARS); units += strspn(units, WHITESPACE); if (strchr(NUMCHARS, *cp) == NULL) { return msec; } if (strncasecmp(units, "ms", 2) == 0 || strncasecmp(units, "msec", 4) == 0) { multiplier = 1; divisor = 1; } else if (strncasecmp(units, "us", 2) == 0 || strncasecmp(units, "usec", 4) == 0) { multiplier = 1; divisor = 1000; } else if (strncasecmp(units, "s", 1) == 0 || strncasecmp(units, "sec", 3) == 0) { multiplier = 1000; divisor = 1; } else if (strncasecmp(units, "m", 1) == 0 || strncasecmp(units, "min", 3) == 0) { multiplier = 60*1000; divisor = 1; } else if (strncasecmp(units, "h", 1) == 0 || strncasecmp(units, "hr", 2) == 0) { multiplier = 60*60*1000; divisor = 1; } else if (*units != EOS && *units != '\n' && *units != '\r') { return msec; } msec = crm_int_helper(cp, &end_text); msec *= multiplier; msec /= divisor; /* dret += 0.5; */ /* msec = (long long)dret; */ return msec; } const char * op_status2text(op_status_t status) { switch(status) { case LRM_OP_PENDING: return "pending"; break; case LRM_OP_DONE: return "complete"; break; case LRM_OP_ERROR: return "Error"; break; case LRM_OP_TIMEOUT: return "Timed Out"; break; case LRM_OP_NOTSUPPORTED: return "NOT SUPPORTED"; break; case LRM_OP_CANCELLED: return "Cancelled"; break; } crm_err("Unknown status: %d", status); return "UNKNOWN!"; } char * generate_op_key(const char *rsc_id, const char *op_type, int interval) { int len = 35; char *op_id = NULL; CRM_CHECK(rsc_id != NULL, return NULL); CRM_CHECK(op_type != NULL, return NULL); len += strlen(op_type); len += strlen(rsc_id); crm_malloc0(op_id, len); CRM_CHECK(op_id != NULL, return NULL); sprintf(op_id, "%s_%s_%d", rsc_id, op_type, interval); return op_id; } gboolean parse_op_key(const char *key, char **rsc_id, char **op_type, int *interval) { char *mutable_key = NULL; char *mutable_key_ptr = NULL; int len = 0, offset = 0, ch = 0; CRM_CHECK(key != NULL, return FALSE); *interval = 0; len = strlen(key); offset = len-1; crm_debug_3("Source: %s", key); while(offset > 0 && isdigit(key[offset])) { int digits = len-offset; ch = key[offset] - '0'; CRM_CHECK(ch < 10, return FALSE); CRM_CHECK(ch >= 0, return FALSE); while(digits > 1) { digits--; ch = ch * 10; } *interval += ch; offset--; } crm_debug_3(" Interval: %d", *interval); CRM_CHECK(key[offset] == '_', return FALSE); mutable_key = crm_strdup(key); mutable_key_ptr = mutable_key_ptr; mutable_key[offset] = 0; offset--; while(offset > 0 && key[offset] != '_') { offset--; } CRM_CHECK(key[offset] == '_', crm_free(mutable_key); return FALSE); mutable_key_ptr = mutable_key+offset+1; crm_debug_3(" Action: %s", mutable_key_ptr); *op_type = crm_strdup(mutable_key_ptr); mutable_key[offset] = 0; offset--; CRM_CHECK(mutable_key != mutable_key_ptr, crm_free(mutable_key); return FALSE); crm_debug_3(" Resource: %s", mutable_key); *rsc_id = crm_strdup(mutable_key); crm_free(mutable_key); return TRUE; } char * generate_notify_key(const char *rsc_id, const char *notify_type, const char *op_type) { int len = 12; char *op_id = NULL; CRM_CHECK(rsc_id != NULL, return NULL); CRM_CHECK(op_type != NULL, return NULL); CRM_CHECK(notify_type != NULL, return NULL); len += strlen(op_type); len += strlen(rsc_id); len += strlen(notify_type); crm_malloc0(op_id, len); if(op_id != NULL) { sprintf(op_id, "%s_%s_notify_%s_0", rsc_id, notify_type, op_type); } return op_id; } char * generate_transition_magic_v202(const char *transition_key, int op_status) { int len = 80; char *fail_state = NULL; CRM_CHECK(transition_key != NULL, return NULL); len += strlen(transition_key); crm_malloc0(fail_state, len); if(fail_state != NULL) { snprintf(fail_state, len, "%d:%s", op_status,transition_key); } return fail_state; } char * generate_transition_magic(const char *transition_key, int op_status, int op_rc) { int len = 80; char *fail_state = NULL; CRM_CHECK(transition_key != NULL, return NULL); len += strlen(transition_key); crm_malloc0(fail_state, len); if(fail_state != NULL) { snprintf(fail_state, len, "%d:%d;%s", op_status, op_rc, transition_key); } return fail_state; } gboolean decode_transition_magic( const char *magic, char **uuid, int *transition_id, int *action_id, int *op_status, int *op_rc, int *target_rc) { int res = 0; char *key = NULL; gboolean result = TRUE; CRM_CHECK(magic != NULL, return FALSE); CRM_CHECK(op_rc != NULL, return FALSE); CRM_CHECK(op_status != NULL, return FALSE); crm_malloc0(key, strlen(magic)); res = sscanf(magic, "%d:%d;%s", op_status, op_rc, key); if(res != 3) { crm_crit("Only found %d items in: %s", res, magic); result = FALSE; goto bail; } CRM_CHECK(decode_transition_key(key, uuid, transition_id, action_id, target_rc), result = FALSE; goto bail; ); bail: crm_free(key); return result; } char * generate_transition_key(int transition_id, int action_id, int target_rc, const char *node) { int len = 40; char *fail_state = NULL; CRM_CHECK(node != NULL, return NULL); len += strlen(node); crm_malloc0(fail_state, len); if(fail_state != NULL) { snprintf(fail_state, len, "%d:%d:%d:%s", action_id, transition_id, target_rc, node); } return fail_state; } gboolean decode_transition_key( const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc) { int res = 0; gboolean done = FALSE; CRM_CHECK(uuid != NULL, return FALSE); CRM_CHECK(target_rc != NULL, return FALSE); CRM_CHECK(action_id != NULL, return FALSE); CRM_CHECK(transition_id != NULL, return FALSE); crm_malloc0(*uuid, strlen(key)); res = sscanf(key, "%d:%d:%d:%s", action_id, transition_id, target_rc, *uuid); switch(res) { case 4: /* Post Pacemaker 0.6 */ done = TRUE; break; case 3: case 2: /* this can be tricky - the UUID might start with an integer */ /* Until Pacemaker 0.6 */ done = TRUE; *target_rc = -1; res = sscanf(key, "%d:%d:%s", action_id, transition_id, *uuid); if(res == 2) { *action_id = -1; res = sscanf(key, "%d:%s", transition_id, *uuid); CRM_CHECK(res == 2, done = FALSE); } else if(res != 3) { CRM_CHECK(res == 3, done = FALSE); } break; case 1: /* Prior to Heartbeat 2.0.8 */ done = TRUE; *action_id = -1; *target_rc = -1; res = sscanf(key, "%d:%s", transition_id, *uuid); CRM_CHECK(res == 2, done = FALSE); break; default: crm_crit("Unhandled sscanf result (%d) for %s", res, key); } if(strlen(*uuid) != 36) { crm_warn("Bad UUID (%s) in sscanf result (%d) for %s", *uuid, res, key); } if(done == FALSE) { crm_err("Cannot decode '%s' rc=%d", key, res); crm_free(*uuid); *uuid = NULL; *target_rc = -1; *action_id = -1; *transition_id = -1; } return done; } void filter_action_parameters(xmlNode *param_set, const char *version) { char *key = NULL; char *timeout = NULL; char *interval = NULL; #if CRM_DEPRECATED_SINCE_2_0_5 const char *filter_205[] = { XML_ATTR_TE_TARGET_RC, XML_ATTR_LRM_PROBE, XML_RSC_ATTR_START, XML_RSC_ATTR_NOTIFY, XML_RSC_ATTR_UNIQUE, XML_RSC_ATTR_MANAGED, XML_RSC_ATTR_PRIORITY, XML_RSC_ATTR_MULTIPLE, XML_RSC_ATTR_STICKINESS, XML_RSC_ATTR_FAIL_STICKINESS, XML_RSC_ATTR_TARGET_ROLE, /* ignore clone fields */ XML_RSC_ATTR_INCARNATION, XML_RSC_ATTR_INCARNATION_MAX, XML_RSC_ATTR_INCARNATION_NODEMAX, XML_RSC_ATTR_MASTER_MAX, XML_RSC_ATTR_MASTER_NODEMAX, /* old field names */ "role", "crm_role", "te-target-rc", /* ignore notify fields */ "notify_stop_resource", "notify_stop_uname", "notify_start_resource", "notify_start_uname", "notify_active_resource", "notify_active_uname", "notify_inactive_resource", "notify_inactive_uname", "notify_promote_resource", "notify_promote_uname", "notify_demote_resource", "notify_demote_uname", "notify_master_resource", "notify_master_uname", "notify_slave_resource", "notify_slave_uname" }; #endif const char *attr_filter[] = { XML_ATTR_ID, XML_ATTR_CRM_VERSION, XML_LRM_ATTR_OP_DIGEST, }; gboolean do_delete = FALSE; int lpc = 0; static int meta_len = 0; if(meta_len == 0) { meta_len = strlen(CRM_META); } if(param_set == NULL) { return; } #if CRM_DEPRECATED_SINCE_2_0_5 if(version == NULL || compare_version("1.0.5", version) > 0) { for(lpc = 0; lpc < DIMOF(filter_205); lpc++) { xml_remove_prop(param_set, filter_205[lpc]); } } #endif for(lpc = 0; lpc < DIMOF(attr_filter); lpc++) { xml_remove_prop(param_set, attr_filter[lpc]); } key = crm_meta_name(XML_LRM_ATTR_INTERVAL); interval = crm_element_value_copy(param_set, key); crm_free(key); key = crm_meta_name(XML_ATTR_TIMEOUT); timeout = crm_element_value_copy(param_set, key); xml_prop_iter(param_set, prop_name, prop_value, do_delete = FALSE; if(strncasecmp(prop_name, CRM_META, meta_len) == 0) { do_delete = TRUE; } if(do_delete) { xml_remove_prop(param_set, prop_name); } ); if(crm_get_msec(interval) > 0 && compare_version(version, "1.0.8") > 0) { /* Re-instate the operation's timeout value */ if(timeout != NULL) { crm_xml_add(param_set, key, timeout); } } crm_free(interval); crm_free(timeout); crm_free(key); } void filter_reload_parameters(xmlNode *param_set, const char *restart_string) { int len = 0; char *name = NULL; char *match = NULL; if(param_set == NULL) { return; } xml_prop_iter(param_set, prop_name, prop_value, name = NULL; len = strlen(prop_name) + 3; crm_malloc0(name, len); sprintf(name, " %s ", prop_name); name[len-1] = 0; match = strstr(restart_string, name); if(match == NULL) { crm_debug_3("%s not found in %s", prop_name, restart_string); xml_remove_prop(param_set, prop_name); } crm_free(name); ); } void crm_abort(const char *file, const char *function, int line, const char *assert_condition, gboolean do_core, gboolean do_fork) { int rc = 0; int pid = 0; int status = 0; if(do_core == FALSE) { do_crm_log(LOG_ERR, "%s: Triggered assert at %s:%d : %s", function, file, line, assert_condition); return; } else if(do_fork) { pid=fork(); } else { do_crm_log(LOG_ERR, "%s: Triggered fatal assert at %s:%d : %s", function, file, line, assert_condition); } switch(pid) { case -1: do_crm_log(LOG_CRIT, "%s: Cannot create core for non-fatal assert at %s:%d : %s", function, file, line, assert_condition); return; default: /* Parent */ do_crm_log(LOG_ERR, "%s: Forked child %d to record non-fatal assert at %s:%d : %s", function, pid, file, line, assert_condition); do { rc = waitpid(pid, &status, 0); if(rc < 0 && errno != EINTR) { crm_perror(LOG_ERR,"%s: Cannot wait on forked child %d", function, pid); } } while(rc < 0 && errno == EINTR); return; case 0: /* Child */ abort(); break; } } char * generate_series_filename( const char *directory, const char *series, int sequence, gboolean bzip) { int len = 40; char *filename = NULL; const char *ext = "raw"; CRM_CHECK(directory != NULL, return NULL); CRM_CHECK(series != NULL, return NULL); len += strlen(directory); len += strlen(series); crm_malloc0(filename, len); CRM_CHECK(filename != NULL, return NULL); if(bzip) { ext = "bz2"; } sprintf(filename, "%s/%s-%d.%s", directory, series, sequence, ext); return filename; } int get_last_sequence(const char *directory, const char *series) { FILE *file_strm = NULL; int start = 0, length = 0, read_len = 0; char *series_file = NULL; char *buffer = NULL; int seq = 0; int len = 36; CRM_CHECK(directory != NULL, return 0); CRM_CHECK(series != NULL, return 0); len += strlen(directory); len += strlen(series); crm_malloc0(series_file, len); CRM_CHECK(series_file != NULL, return 0); sprintf(series_file, "%s/%s.last", directory, series); file_strm = fopen(series_file, "r"); if(file_strm == NULL) { crm_debug("Series file %s does not exist", series_file); crm_free(series_file); return 0; } /* see how big the file is */ start = ftell(file_strm); fseek(file_strm, 0L, SEEK_END); length = ftell(file_strm); fseek(file_strm, 0L, start); CRM_ASSERT(start == ftell(file_strm)); crm_debug_3("Reading %d bytes from file", length); crm_malloc0(buffer, (length+1)); read_len = fread(buffer, 1, length, file_strm); if(read_len != length) { crm_err("Calculated and read bytes differ: %d vs. %d", length, read_len); crm_free(buffer); buffer = NULL; } else if(length <= 0) { crm_info("%s was not valid", series_file); crm_free(buffer); buffer = NULL; } crm_free(series_file); seq = crm_parse_int(buffer, "0"); crm_free(buffer); fclose(file_strm); return seq; } void write_last_sequence( const char *directory, const char *series, int sequence, int max) { int rc = 0; int len = 36; FILE *file_strm = NULL; char *series_file = NULL; CRM_CHECK(directory != NULL, return); CRM_CHECK(series != NULL, return); if(max == 0) { return; } while(max > 0 && sequence > max) { sequence -= max; } len += strlen(directory); len += strlen(series); crm_malloc0(series_file, len); sprintf(series_file, "%s/%s.last", directory, series); file_strm = fopen(series_file, "w"); if(file_strm == NULL) { crm_err("Cannout open series file %s for writing", series_file); goto bail; } rc = fprintf(file_strm, "%d", sequence); if(rc < 0) { crm_perror(LOG_ERR,"Cannot write to series file %s", series_file); } bail: if(file_strm != NULL) { fflush(file_strm); fclose(file_strm); } crm_free(series_file); } #define LOCKSTRLEN 11 int crm_pid_active(long pid) { int rc = 0; int running = 0; char proc_path[PATH_MAX], exe_path[PATH_MAX], myexe_path[PATH_MAX]; if(pid <= 0) { return -1; } else if (kill(pid, 0) < 0 && errno == ESRCH) { return 0; } #ifndef HAVE_PROC_PID return 1; #endif /* check to make sure pid hasn't been reused by another process */ snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", pid); rc = readlink(proc_path, exe_path, PATH_MAX-1); if(rc < 0) { crm_perror(LOG_ERR, "Could not read from %s", proc_path); goto bail; } exe_path[rc] = 0; snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", (long unsigned int)getpid()); rc = readlink(proc_path, myexe_path, PATH_MAX-1); if(rc < 0) { crm_perror(LOG_ERR, "Could not read from %s", proc_path); goto bail; } myexe_path[rc] = 0; if(strcmp(exe_path, myexe_path) == 0) { running = 1; } bail: return running; } int crm_read_pidfile(const char *filename) { int fd; long pid = -1; char buf[LOCKSTRLEN+1]; if ((fd = open(filename, O_RDONLY)) < 0) { goto bail; } if (read(fd, buf, sizeof(buf)) < 1) { goto bail; } if (sscanf(buf, "%lu", &pid) > 0) { if (pid <= 0){ pid = -LSB_STATUS_STOPPED; } } bail: close(fd); return pid; } int crm_lock_pidfile(const char *filename) { struct stat sbuf; int fd = 0, rc = 0; long pid = 0, mypid = 0; char lf_name[256], tf_name[256], buf[LOCKSTRLEN+1]; mypid = (unsigned long) getpid(); snprintf(lf_name, sizeof(lf_name), "%s",filename); snprintf(tf_name, sizeof(tf_name), "%s.%lu", filename, mypid); if ((fd = open(lf_name, O_RDONLY)) >= 0) { if (fstat(fd, &sbuf) >= 0 && sbuf.st_size < LOCKSTRLEN) { sleep(1); /* if someone was about to create one, * give'm a sec to do so * Though if they follow our protocol, * this won't happen. They should really * put the pid in, then link, not the * other way around. */ } if (read(fd, buf, sizeof(buf)) > 0) { if (sscanf(buf, "%lu", &pid) > 0) { if (pid > 1 && pid != getpid() && crm_pid_active(pid)) { /* locked by existing process - give up */ close(fd); return -1; } } } unlink(lf_name); close(fd); } if ((fd = open(tf_name, O_CREAT | O_WRONLY | O_EXCL, 0644)) < 0) { /* Hmmh, why did we fail? Anyway, nothing we can do about it */ return -3; } /* Slight overkill with the %*d format ;-) */ snprintf(buf, sizeof(buf), "%*lu\n", LOCKSTRLEN-1, mypid); if (write(fd, buf, LOCKSTRLEN) != LOCKSTRLEN) { /* Again, nothing we can do about this */ rc = -3; close(fd); goto out; } close(fd); switch (link(tf_name, lf_name)) { case 0: if (stat(tf_name, &sbuf) < 0) { /* something weird happened */ rc = -3; } else if (sbuf.st_nlink < 2) { /* somehow, it didn't get through - NFS trouble? */ rc = -2; } else { rc = 0; } break; case EEXIST: rc = -1; break; default: rc = -3; } out: unlink(tf_name); return rc; } void crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile) { long pid; const char *devnull = "/dev/null"; if(daemonize == FALSE) { return; } pid = fork(); if (pid < 0) { fprintf(stderr, "%s: could not start daemon\n", name); crm_perror(LOG_ERR,"fork"); exit(LSB_EXIT_GENERIC); } else if (pid > 0) { exit(LSB_EXIT_OK); } if (crm_lock_pidfile(pidfile) < 0 ) { pid = crm_read_pidfile(pidfile); if(crm_pid_active(pid) > 0) { crm_warn("%s: already running [pid %ld] (%s).\n", name, pid, pidfile); exit(LSB_EXIT_OK); } } umask(022); close(STDIN_FILENO); (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ close(STDOUT_FILENO); (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ close(STDERR_FILENO); (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ } gboolean crm_is_writable(const char *dir, const char *file, const char *user, const char *group, gboolean need_both) { int s_res = -1; struct stat buf; char *full_file = NULL; const char *target = NULL; gboolean pass = TRUE; gboolean readwritable = FALSE; CRM_ASSERT(dir != NULL); if(file != NULL) { full_file = crm_concat(dir, file, '/'); target = full_file; s_res = stat(full_file, &buf); if( s_res == 0 && S_ISREG(buf.st_mode) == FALSE ) { crm_err("%s must be a regular file", target); pass = FALSE; goto out; } } if (s_res != 0) { target = dir; s_res = stat(dir, &buf); if(s_res != 0) { crm_err("%s must exist and be a directory", dir); pass = FALSE; goto out; } else if( S_ISDIR(buf.st_mode) == FALSE ) { crm_err("%s must be a directory", dir); pass = FALSE; } } if(user) { struct passwd *sys_user = NULL; sys_user = getpwnam(user); readwritable = (sys_user != NULL && buf.st_uid == sys_user->pw_uid && (buf.st_mode & (S_IRUSR|S_IWUSR))); if(readwritable == FALSE) { crm_err("%s must be owned and r/w by user %s", target, user); if(need_both) { pass = FALSE; } } } if(group) { struct group *sys_grp = getgrnam(group); readwritable = ( sys_grp != NULL && buf.st_gid == sys_grp->gr_gid && (buf.st_mode & (S_IRGRP|S_IWGRP))); if(readwritable == FALSE) { if(need_both || user == NULL) { pass = FALSE; crm_err("%s must be owned and r/w by group %s", target, group); } else { crm_warn("%s should be owned and r/w by group %s", target, group); } } } out: crm_free(full_file); return pass; } static unsigned long long crm_bit_filter = 0; /* 0x00000002ULL; */ static unsigned int bit_log_level = LOG_DEBUG_5; long long crm_clear_bit(const char *function, long long word, long long bit) { unsigned int level = bit_log_level; if(bit & crm_bit_filter) { level = LOG_ERR; } do_crm_log_unlikely(level, "Bit 0x%.16llx cleared by %s", bit, function); word &= ~bit; return word; } long long crm_set_bit(const char *function, long long word, long long bit) { unsigned int level = bit_log_level; if(bit & crm_bit_filter) { level = LOG_ERR; } do_crm_log_unlikely(level, "Bit 0x%.16llx set by %s", bit, function); word |= bit; return word; } static const char *cluster_type = NULL; gboolean is_openais_cluster(void) { if(cluster_type == NULL) { cluster_type = getenv("HA_cluster_type"); if(cluster_type == NULL) { cluster_type = "Heartbeat"; } } if(safe_str_eq("openais", cluster_type)) { #if SUPPORT_AIS return TRUE; #else crm_crit("The installation of Pacemaker only supports Heartbeat" " but you're trying to run it on %s. Terminating.", cluster_type); exit(100); #endif } return FALSE; } gboolean is_heartbeat_cluster(void) { #if SUPPORT_HEARTBEAT return !is_openais_cluster(); #else if(is_openais_cluster() == FALSE) { crm_crit("The installation of Pacemaker only supports OpenAIS" " but you're trying to run it on %s. Terminating.", cluster_type); exit(100); } return FALSE; #endif } gboolean crm_str_eq(const char *a, const char *b, gboolean use_case) { if(a == b) { return TRUE; } else if(a == NULL || b == NULL) { /* shouldn't be comparing NULLs */ return FALSE; } else if(use_case && a[0] != b[0]) { return FALSE; } else if(strcasecmp(a, b) == 0) { return TRUE; } return FALSE; } char *crm_meta_name(const char *field) { int lpc = 0; int max = 0; char *crm_name = NULL; CRM_CHECK(field != NULL, return NULL); crm_name = crm_concat(CRM_META, field, '_'); /* Massage the names so they can be used as shell variables */ max = strlen(crm_name); for(; lpc < max; lpc++) { switch(crm_name[lpc]) { case '-': crm_name[lpc] = '_'; break; } } return crm_name; } const char *crm_meta_value(GHashTable *hash, const char *field) { char *key = NULL; const char *value = NULL; key = crm_meta_name(field); if(key) { value = g_hash_table_lookup(hash, key); crm_free(key); } return value; } static struct crm_option *crm_long_options = NULL; static const char *crm_app_description = NULL; static const char *crm_short_options = NULL; static const char *crm_app_usage = NULL; static struct option *crm_create_long_opts(struct crm_option *long_options) { struct option *long_opts = NULL; #ifdef HAVE_GETOPT_H int index = 0, lpc = 0; /* * A previous, possibly poor, choice of '?' as the short form of --help * means that getopt_long() returns '?' for both --help and for "unknown option" * * This dummy entry allows us to differentiate between the two in crm_get_option() * and exit with the correct error code */ crm_realloc(long_opts, (index+1) * sizeof(struct option)); long_opts[index].name = "__dummmy__"; long_opts[index].has_arg = 0; long_opts[index].flag = 0; long_opts[index].val = '_'; index++; for(lpc = 0; long_options[lpc].name != NULL; lpc++) { if(long_options[lpc].name[0] == '-') { continue; } crm_realloc(long_opts, (index+1) * sizeof(struct option)); /*fprintf(stderr, "Creating %d %s = %c\n", index, * long_options[lpc].name, long_options[lpc].val); */ long_opts[index].name = long_options[lpc].name; long_opts[index].has_arg = long_options[lpc].has_arg; long_opts[index].flag = long_options[lpc].flag; long_opts[index].val = long_options[lpc].val; index++; } /* Now create the list terminator */ crm_realloc(long_opts, (index+1) * sizeof(struct option)); long_opts[index].name = NULL; long_opts[index].has_arg = 0; long_opts[index].flag = 0; long_opts[index].val = 0; #endif return long_opts; } void crm_set_options(const char *short_options, const char *app_usage, struct crm_option *long_options, const char *app_desc) { if(short_options) { crm_short_options = short_options; } if(long_options) { crm_long_options = long_options; } if(app_desc) { crm_app_description = app_desc; } if(app_usage) { crm_app_usage = app_usage; } } int crm_get_option(int argc, char **argv, int *index) { #ifdef HAVE_GETOPT_H static struct option *long_opts = NULL; if(long_opts == NULL && crm_long_options) { long_opts = crm_create_long_opts(crm_long_options); } if(long_opts) { int flag = getopt_long(argc, argv, crm_short_options, long_opts, index); switch(flag) { case 0: return long_opts[*index].val; case -1: /* End of option processing */ break; case ':': crm_debug_2("Missing argument"); crm_help('?', 1); break; case '?': crm_help('?', *index?0:1); break; } return flag; } #endif if(crm_short_options) { return getopt(argc, argv, crm_short_options); } return -1; } void crm_help(char cmd, int exit_code) { int i = 0; FILE *stream = (exit_code ? stderr : stdout); if(cmd == 'v' || cmd == '$') { fprintf(stream, "%s %s for %s (Build: %s)\n", crm_system_name, VERSION, #if !SUPPORT_HEARTBEAT "OpenAIS", #elif !SUPPORT_AIS "Heartbeat", #else "OpenAIS and Heartbeat", #endif BUILD_VERSION); fprintf(stream, "\nWritten by Andrew Beekhof\n"); goto out; } fprintf(stream, "%s - %s\n", crm_system_name, crm_app_description); if(crm_app_usage) { fprintf(stream, "Usage: %s %s\n", crm_system_name, crm_app_usage); } if(crm_long_options) { fprintf(stream, "Options:\n"); for(i = 0; crm_long_options[i].name != NULL; i++) { if(crm_long_options[i].flags & pcmk_option_hidden) { } else if(crm_long_options[i].flags & pcmk_option_paragraph) { fprintf(stream, "%s\n\n", crm_long_options[i].desc); } else if(crm_long_options[i].flags & pcmk_option_example) { fprintf(stream, "\t#%s\n\n", crm_long_options[i].desc); } else if(crm_long_options[i].val == '-' && crm_long_options[i].desc) { fprintf(stream, "%s\n", crm_long_options[i].desc); } else { fprintf(stream, " -%c, --%s%c%s\t%s\n", crm_long_options[i].val, crm_long_options[i].name, crm_long_options[i].has_arg?'=':' ',crm_long_options[i].has_arg?"value":"", crm_long_options[i].desc?crm_long_options[i].desc:""); } } } else if(crm_short_options) { fprintf(stream, "Usage: %s - %s\n", crm_system_name, crm_app_description); for(i = 0; crm_short_options[i] != 0; i++) { int has_arg = FALSE; if(crm_short_options[i+1] == ':') { has_arg = TRUE; } fprintf(stream, " -%c %s\n", crm_short_options[i], has_arg?"{value}":""); if(has_arg) { i++; } } } fprintf(stream, "\nReport bugs to %s\n", PACKAGE_BUGREPORT); out: if(exit_code >= 0) { exit(exit_code); } } #include <../../tools/attrd.h> gboolean attrd_update(IPC_Channel *cluster, char command, const char *host, const char *name, const char *value, const char *section, const char *set, const char *dampen) { gboolean success = FALSE; const char *reason = "Cluster connection failed"; /* remap common aliases */ if(safe_str_eq(section, "reboot")) { section = XML_CIB_TAG_STATUS; } else if(safe_str_eq(section, "forever")) { section = XML_CIB_TAG_NODES; } if(cluster == NULL) { reason = "No connection to the cluster"; } else { xmlNode *update = create_xml_node(NULL, __FUNCTION__); crm_xml_add(update, F_TYPE, T_ATTRD); crm_xml_add(update, F_ORIG, crm_system_name); if(name == NULL && command == 'U') { command = 'R'; } switch(command) { case 'D': case 'U': case 'v': crm_xml_add(update, F_ATTRD_TASK, "update"); crm_xml_add(update, F_ATTRD_ATTRIBUTE, name); break; case 'R': crm_xml_add(update, F_ATTRD_TASK, "refresh"); break; case 'q': crm_xml_add(update, F_ATTRD_TASK, "query"); break; } crm_xml_add(update, F_ATTRD_VALUE, value); crm_xml_add(update, F_ATTRD_DAMPEN, dampen); crm_xml_add(update, F_ATTRD_SECTION, section); crm_xml_add(update, F_ATTRD_HOST, host); crm_xml_add(update, F_ATTRD_SET, set); success = send_ipc_message(cluster, update); free_xml(update); } if(success) { crm_debug("Sent update: %s=%s for %s", name, value, host?host:"localhost"); return TRUE; } crm_info("Could not send update: %s=%s for %s", name, value, host?host:"localhost"); return FALSE; } gboolean attrd_lazy_update(char command, const char *host, const char *name, const char *value, const char *section, const char *set, const char *dampen) { int max = 5; gboolean updated = FALSE; static IPC_Channel *cluster = NULL; while(updated == 0 && max > 0) { if(cluster == NULL) { crm_info("Connecting to cluster... %d retries remaining", max); cluster = init_client_ipc_comms_nodispatch(T_ATTRD); } if(cluster != NULL) { updated = attrd_update(cluster, command, host, name, value, section, set, dampen); } if(updated == 0) { cluster = NULL; sleep(2); max--; } } return updated; } gboolean attrd_update_no_mainloop(int *connection, char command, const char *host, const char *name, const char *value, const char *section, const char *set, const char *dampen) { int max = 5; gboolean updated = FALSE; static IPC_Channel *cluster = NULL; if(connection && *connection == 0 && cluster) { crm_info("Forcing a new connection to the cluster"); cluster = NULL; } while(updated == 0 && max > 0) { if(cluster == NULL) { crm_info("Connecting to cluster... %d retries remaining", max); cluster = init_client_ipc_comms_nodispatch(T_ATTRD); } if(connection) { if(cluster != NULL) { *connection = cluster->ops->get_recv_select_fd(cluster); } else { *connection = 0; } } if(cluster != NULL) { updated = attrd_update(cluster, command, host, name, value, section, set, dampen); } if(updated == 0) { cluster = NULL; sleep(2); max--; } } return updated; } + +#define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" +static void +append_digest(lrm_op_t *op, xmlNode *update, const char *version, const char *magic, int level) +{ + /* this will enable us to later determine that the + * resource's parameters have changed and we should force + * a restart + */ + char *digest = NULL; + xmlNode *args_xml = NULL; + + if(op->params == NULL) { + return; + } + + args_xml = create_xml_node(NULL, XML_TAG_PARAMS); + g_hash_table_foreach(op->params, hash2field, args_xml); + filter_action_parameters(args_xml, version); + digest = calculate_xml_digest(args_xml, TRUE, FALSE); + +#if 0 + if(level < crm_log_level + && op->interval == 0 + && crm_str_eq(op->op_type, CRMD_ACTION_START, TRUE)) { + char *digest_source = dump_xml_unformatted(args_xml); + do_crm_log(level, "Calculated digest %s for %s (%s). Source: %s\n", + digest, ID(update), magic, digest_source); + crm_free(digest_source); + } +#endif + crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest); + + free_xml(args_xml); + crm_free(digest); +} + +xmlNode * +create_operation_update( + xmlNode *parent, lrm_op_t *op, const char *caller_version, int target_rc, const char *origin) +{ + char *magic = NULL; + const char *task = NULL; + xmlNode *xml_op = NULL; + char *op_id = NULL; + char *local_user_data = NULL; + + CRM_CHECK(op != NULL, return NULL); + crm_debug_2("%s: Updating resouce %s after %s %s op", + origin, op->rsc_id, op_status2text(op->op_status), op->op_type); + + if(op->op_status == LRM_OP_CANCELLED) { + crm_debug_3("Ignoring cancelled op"); + return NULL; + } + + crm_debug_3("DC version: %s", caller_version); + + task = op->op_type; + /* remap the task name under various scenarios + * this makes life easier for the PE when its trying determin the current state + */ + if(crm_str_eq(task, "reload", TRUE)) { + if(op->op_status == LRM_OP_DONE) { + task = CRMD_ACTION_START; + } else { + task = CRMD_ACTION_STATUS; + } + + } else if(crm_str_eq(task, CRMD_ACTION_MIGRATE, TRUE)) { + /* if the migrate_from fails it will have enough info to do the right thing */ + if(op->op_status == LRM_OP_DONE) { + task = CRMD_ACTION_STOP; + } else { + task = CRMD_ACTION_STATUS; + } + + } else if(op->op_status == LRM_OP_DONE + && crm_str_eq(task, CRMD_ACTION_MIGRATED, TRUE)) { + task = CRMD_ACTION_START; + + } else if(crm_str_eq(task, CRMD_ACTION_NOTIFY, TRUE)) { + const char *n_type = crm_meta_value(op->params, "notify_type"); + const char *n_task = crm_meta_value(op->params, "notify_operation"); + CRM_DEV_ASSERT(n_type != NULL); + CRM_DEV_ASSERT(n_task != NULL); + op_id = generate_notify_key(op->rsc_id, n_type, n_task); + + /* these are not yet allowed to fail */ + op->op_status = LRM_OP_DONE; + op->rc = 0; + + } + + if (op_id == NULL) { + op_id = generate_op_key(op->rsc_id, task, op->interval); + } + + xml_op = find_entity(parent, XML_LRM_TAG_RSC_OP, op_id); + if(xml_op != NULL) { + crm_log_xml(LOG_DEBUG, "Replacing existing entry", xml_op); + + } else { + xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP); + } + + if(op->user_data == NULL) { + crm_debug("Generating fake transition key for:" + " %s_%s_%d %d from %s", + op->rsc_id, op->op_type, op->interval, op->call_id, + op->app_name); + local_user_data = generate_transition_key(-1, op->call_id, target_rc, FAKE_TE_ID); + op->user_data = local_user_data; + } + + magic = generate_transition_magic(op->user_data, op->op_status, op->rc); + + crm_xml_add(xml_op, XML_ATTR_ID, op_id); + crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); + crm_xml_add(xml_op, XML_ATTR_ORIGIN, origin); + crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version); + crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data); + crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic); + + crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id); + crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc); + crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status); + crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, op->interval); + + if(compare_version("2.1", caller_version) <= 0) { + if(op->t_run || op->t_rcchange || op->exec_time || op->queue_time) { + crm_debug_2("Timing data (%s_%s_%d): last=%lu change=%lu exec=%lu queue=%lu", + op->rsc_id, op->op_type, op->interval, + op->t_run, op->t_rcchange, op->exec_time, op->queue_time); + + crm_xml_add_int(xml_op, "last-run", op->t_run); + crm_xml_add_int(xml_op, "last-rc-change", op->t_rcchange); + crm_xml_add_int(xml_op, "exec-time", op->exec_time); + crm_xml_add_int(xml_op, "queue-time", op->queue_time); + } + } + + append_digest(op, xml_op, caller_version, magic, LOG_DEBUG); + + if(op->op_status != LRM_OP_DONE + && crm_str_eq(op->op_type, CRMD_ACTION_MIGRATED, TRUE)) { + const char *host = crm_meta_value(op->params, "migrate_source_uuid"); + crm_xml_add(xml_op, CRMD_ACTION_MIGRATED, host); + } + + if(local_user_data) { + crm_free(local_user_data); + op->user_data = NULL; + } + crm_free(magic); + crm_free(op_id); + return xml_op; +} diff --git a/lib/common/xml.c b/lib/common/xml.c index f13335a8c5..8b037ad871 100644 --- a/lib/common/xml.c +++ b/lib/common/xml.c @@ -1,2746 +1,2744 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if HAVE_BZLIB_H # include #endif #define XML_BUFFER_SIZE 4096 #define XML_PARSER_DEBUG 0 xmlDoc *getDocPtr(xmlNode *node); struct schema_s { int type; const char *name; const char *location; const char *transform; int after_transform; }; struct schema_s known_schemas[] = { /* 0 */ { 0, NULL, NULL, NULL, 1 }, /* 1 */ { 1, "pacemaker-0.6", CRM_DTD_DIRECTORY"/crm.dtd", CRM_DTD_DIRECTORY"/upgrade06.xsl", 4 }, /* 2 */ { 1, "transitional-0.6", CRM_DTD_DIRECTORY"/crm-transitional.dtd", CRM_DTD_DIRECTORY"/upgrade06.xsl", 4 }, /* 3 */ { 2, "pacemaker-0.7", CRM_DTD_DIRECTORY"/pacemaker-1.0.rng", NULL, 0 }, /* 4 */ { 2, "pacemaker-1.0", CRM_DTD_DIRECTORY"/pacemaker-1.0.rng", NULL, 0 }, /* 5 */ { 0, "none", NULL, NULL, 0 }, }; static int all_schemas = DIMOF(known_schemas); static int max_schemas = DIMOF(known_schemas) - 2; /* skip back past 'none' */ static const char *filter[] = { XML_ATTR_ORIGIN, XML_DIFF_MARKER, XML_CIB_ATTR_WRITTEN, }; static void add_ha_nocopy(HA_Message *parent, HA_Message *child, const char *field) { int next = parent->nfields; if (parent->nfields >= parent->nalloc && ha_msg_expand(parent) != HA_OK ) { crm_err("Parent expansion failed"); return; } parent->names[next] = crm_strdup(field); parent->nlens[next] = strlen(field); parent->values[next] = child; parent->vlens[next] = sizeof(HA_Message); parent->types[next] = FT_UNCOMPRESS; parent->nfields++; } int print_spaces(char *buffer, int spaces, int max); int log_data_element(const char *function, const char *prefix, int log_level, int depth, xmlNode *data, gboolean formatted); int get_tag_name(const char *input, size_t offset, size_t max); int get_attr_name(const char *input, size_t offset, size_t max); int get_attr_value(const char *input, size_t offset, size_t max); gboolean can_prune_leaf(xmlNode *xml_node); void diff_filter_context(int context, int upper_bound, int lower_bound, xmlNode *xml_node, xmlNode *parent); int in_upper_context(int depth, int context, xmlNode *xml_node); int write_file(const char *string, const char *filename); xmlNode * find_xml_node(xmlNode *root, const char * search_path, gboolean must_find) { const char *name = "NULL"; if(must_find || root != NULL) { crm_validate_data(root); } if(root != NULL) { name = crm_element_name(root); } if(search_path == NULL) { crm_warn("Will never find "); return NULL; } xml_child_iter_filter( root, a_child, search_path, /* crm_debug_5("returning node (%s).", crm_element_name(a_child)); */ crm_validate_data(a_child); return a_child; ); if(must_find) { crm_warn("Could not find %s in %s.", search_path, name); } else if(root != NULL) { crm_debug_3("Could not find %s in %s.", search_path, name); } else { crm_debug_3("Could not find %s in .", search_path); } return NULL; } xmlNode* find_entity(xmlNode *parent, const char *node_name, const char *id) { crm_validate_data(parent); xml_child_iter_filter( parent, a_child, node_name, if(id == NULL || crm_str_eq(id, ID(a_child), TRUE)) { crm_debug_4("returning node (%s).", crm_element_name(a_child)); return a_child; } ); crm_debug_3("node <%s id=%s> not found in %s.", node_name, id, crm_element_name(parent)); return NULL; } void copy_in_properties(xmlNode* target, xmlNode *src) { crm_validate_data(src); crm_validate_data(target); if(src == NULL) { crm_warn("No node to copy properties from"); } else if (target == NULL) { crm_err("No node to copy properties into"); } else { xml_prop_iter( src, local_prop_name, local_prop_value, expand_plus_plus(target, local_prop_name, local_prop_value) ); crm_validate_data(target); } return; } void fix_plus_plus_recursive(xmlNode* target) { xml_prop_iter(target, name, value, expand_plus_plus(target, name, value)); xml_child_iter(target, child, fix_plus_plus_recursive(child)); } void expand_plus_plus(xmlNode* target, const char *name, const char *value) { int offset = 1; int name_len = 0; int int_value = 0; int value_len = 0; const char *old_value = NULL; if(value == NULL || name == NULL) { return; } old_value = crm_element_value(target, name); if(old_value == NULL) { /* if no previous value, set unexpanded */ goto set_unexpanded; } else if(strstr(value, name) != value) { goto set_unexpanded; } name_len = strlen(name); value_len = strlen(value); if(value_len < (name_len + 2) || value[name_len] != '+' || (value[name_len+1] != '+' && value[name_len+1] != '=')) { goto set_unexpanded; } /* if we are expanding ourselves, * then no previous value was set and leave int_value as 0 */ if(old_value != value) { int_value = char2score(old_value); } if(value[name_len+1] != '+') { const char *offset_s = value+(name_len+2); offset = char2score(offset_s); } int_value += offset; if(int_value > INFINITY) { int_value = INFINITY; } crm_xml_add_int(target, name, int_value); return; set_unexpanded: if(old_value == value) { /* the old value is already set, nothing to do */ return; } crm_xml_add(target, name, value); return; } xmlDoc *getDocPtr(xmlNode *node) { xmlDoc *doc = NULL; CRM_CHECK(node != NULL, return NULL); doc = node->doc; if(doc == NULL) { doc = xmlNewDoc((const xmlChar*)"1.0"); xmlDocSetRootElement(doc, node); xmlSetTreeDoc(node, doc); } return doc; } xmlNode* add_node_copy(xmlNode *parent, xmlNode *src_node) { xmlNode *child = NULL; xmlDoc *doc = getDocPtr(parent); CRM_CHECK(src_node != NULL, return NULL); child = xmlDocCopyNode(src_node, doc, 1); xmlAddChild(parent, child); return child; } int add_node_nocopy(xmlNode *parent, const char *name, xmlNode *child) { add_node_copy(parent, child); free_xml(child); return HA_OK; } const char * crm_xml_add(xmlNode* node, const char *name, const char *value) { xmlAttr *attr = NULL; CRM_CHECK_AND_STORE(node != NULL, return NULL); CRM_CHECK_AND_STORE(name != NULL, return NULL); if(value == NULL) { return NULL; } #if XML_PARANOIA_CHECKS { const char *old_value = NULL; old_value = crm_element_value(node, name); /* Could be re-setting the same value */ CRM_CHECK_AND_STORE(old_value != value, crm_err("Cannot reset %s with crm_xml_add(%s)", name, value); return value); } #endif attr = xmlSetProp(node, (const xmlChar*)name, (const xmlChar*)value); CRM_CHECK(attr && attr->children && attr->children->content, return NULL); return (char *)attr->children->content; } const char * crm_xml_replace(xmlNode* node, const char *name, const char *value) { xmlAttr *attr = NULL; const char *old_value = NULL; CRM_CHECK(node != NULL, return NULL); CRM_CHECK(name != NULL && name[0] != 0, return NULL); old_value = crm_element_value(node, name); /* Could be re-setting the same value */ CRM_CHECK_AND_STORE(old_value != value, return value); if (old_value != NULL && value == NULL) { xml_remove_prop(node, name); return NULL; } else if(value == NULL) { return NULL; } attr = xmlSetProp(node, (const xmlChar*)name, (const xmlChar*)value); CRM_CHECK(attr && attr->children && attr->children->content, return NULL); return (char *)attr->children->content; } const char * crm_xml_add_int(xmlNode* node, const char *name, int value) { char *number = crm_itoa(value); const char *added = crm_xml_add(node, name, number); crm_free(number); return added; } xmlNode* create_xml_node(xmlNode *parent, const char *name) { xmlDoc *doc = NULL; xmlNode *node = NULL; if (name == NULL || name[0] == 0) { return NULL; } if(parent == NULL) { doc = xmlNewDoc((const xmlChar*)"1.0"); node = xmlNewDocRawNode(doc, NULL, (const xmlChar*)name, NULL); xmlDocSetRootElement(doc, node); } else { doc = getDocPtr(parent); node = xmlNewDocRawNode(doc, NULL, (const xmlChar*)name, NULL); xmlAddChild(parent, node); } return node; } void free_xml_from_parent(xmlNode *parent, xmlNode *a_node) { CRM_CHECK(a_node != NULL, return); xmlUnlinkNode(a_node); xmlFreeNode(a_node); } xmlNode* copy_xml(xmlNode *src) { xmlDoc *doc = xmlNewDoc((const xmlChar*)"1.0"); xmlNode *copy = xmlDocCopyNode(src, doc, 1); xmlDocSetRootElement(doc, copy); xmlSetTreeDoc(copy, doc); return copy; } static void crm_xml_err(void * ctx, const char * msg, ...) G_GNUC_PRINTF(2,3); extern size_t strlcat(char * dest, const char *source, size_t len); int write_file(const char *string, const char *filename) { int rc = 0; FILE *file_output_strm = NULL; CRM_CHECK(filename != NULL, return -1); if (string == NULL) { crm_err("Cannot write NULL to %s", filename); return -1; } file_output_strm = fopen(filename, "w"); if(file_output_strm == NULL) { crm_perror(LOG_ERR,"Cannot open %s for writing", filename); return -1; } rc = fprintf(file_output_strm, "%s", string); if(rc < 0) { crm_perror(LOG_ERR,"Cannot write output to %s", filename); } if(fflush(file_output_strm) != 0) { crm_perror(LOG_ERR,"fflush for %s failed:", filename); rc = -1; } if(fsync(fileno(file_output_strm)) < 0) { crm_perror(LOG_ERR,"fsync for %s failed:", filename); rc = -1; } fclose(file_output_strm); return rc; } static void crm_xml_err(void * ctx, const char * msg, ...) { int len = 0; va_list args; char *buf = NULL; static int buffer_len = 0; static char *buffer = NULL; va_start(args, msg); len = vasprintf(&buf, msg, args); if(strchr(buf, '\n')) { buf[len - 1] = 0; if(buffer) { crm_err("XML Error: %s%s", buffer, buf); free(buffer); } else { crm_err("XML Error: %s", buf); } buffer = NULL; buffer_len = 0; } else if(buffer == NULL) { buffer_len = len; buffer = buf; buf = NULL; } else { buffer_len += len; buffer = realloc(buffer, buffer_len); strlcat(buffer, buf, buffer_len); } va_end(args); - if(buf) { - free(buf); - } + free(buf); } xmlNode* string2xml(const char *input) { xmlNode *xml = NULL; xmlDocPtr output = NULL; xmlParserCtxtPtr ctxt = NULL; xmlErrorPtr last_error = NULL; if(input == NULL) { crm_err("Can't parse NULL input"); return NULL; } /* create a parser context */ ctxt = xmlNewParserCtxt(); CRM_CHECK(ctxt != NULL, return NULL); /* xmlCtxtUseOptions(ctxt, XML_PARSE_NOBLANKS|XML_PARSE_RECOVER); */ xmlCtxtResetLastError(ctxt); xmlSetGenericErrorFunc(ctxt, crm_xml_err); /* initGenericErrorDefaultFunc(crm_xml_err); */ output = xmlCtxtReadDoc(ctxt, (const xmlChar*)input, NULL, NULL, XML_PARSE_NOBLANKS|XML_PARSE_RECOVER); if(output) { xml = xmlDocGetRootElement(output); } last_error = xmlCtxtGetLastError(ctxt); if(last_error && last_error->code != XML_ERR_OK) { /* crm_abort(__FILE__,__PRETTY_FUNCTION__,__LINE__, "last_error->code != XML_ERR_OK", TRUE, TRUE); */ /* * http://xmlsoft.org/html/libxml-xmlerror.html#xmlErrorLevel * http://xmlsoft.org/html/libxml-xmlerror.html#xmlParserErrors */ crm_warn("Parsing failed (domain=%d, level=%d, code=%d): %s", last_error->domain, last_error->level, last_error->code, last_error->message); if(last_error->code != XML_ERR_DOCUMENT_END) { crm_err("Couldn't%s parse %d chars: %s", xml?" fully":"", (int)strlen(input), input); if(xml != NULL) { crm_log_xml_err(xml, "Partial"); } } else { int len = strlen(input); crm_warn("String start: %.50s", input); crm_warn("String start+%d: %s", len-50, input+len-50); crm_abort(__FILE__,__PRETTY_FUNCTION__,__LINE__, "String parsing error", TRUE, TRUE); } } xmlFreeParserCtxt(ctxt); return xml; } xmlNode * stdin2xml(void) { size_t data_length = 0; size_t read_chars = 0; char *xml_buffer = NULL; xmlNode *xml_obj = NULL; do { crm_realloc(xml_buffer, XML_BUFFER_SIZE + data_length + 1); read_chars = fread(xml_buffer + data_length, 1, XML_BUFFER_SIZE, stdin); data_length += read_chars; } while (read_chars > 0); if(data_length == 0) { crm_warn("No XML supplied on stdin"); return NULL; } xml_buffer[data_length] = '\0'; xml_obj = string2xml(xml_buffer); crm_free(xml_buffer); crm_log_xml_debug_3(xml_obj, "Created fragment"); return xml_obj; } static char * decompress_file(const char *filename) { char *buffer = NULL; #if HAVE_BZLIB_H int rc = 0; size_t length = 0, read_len = 0; BZFILE *bz_file = NULL; FILE *input = fopen(filename, "r"); if(input == NULL) { crm_perror(LOG_ERR,"Could not open %s for reading", filename); return NULL; } bz_file = BZ2_bzReadOpen(&rc, input, 0, 0, NULL, 0); if ( rc != BZ_OK ) { BZ2_bzReadClose ( &rc, bz_file); return NULL; } rc = BZ_OK; while ( rc == BZ_OK ) { crm_realloc(buffer, XML_BUFFER_SIZE + length + 1); read_len = BZ2_bzRead ( &rc, bz_file, buffer + length, XML_BUFFER_SIZE); crm_debug_5("Read %ld bytes from file: %d", (long)read_len, rc); if ( rc == BZ_OK || rc == BZ_STREAM_END) { length += read_len; } } buffer[length] = '\0'; read_len = length; if ( rc != BZ_STREAM_END ) { crm_err("Couldnt read compressed xml from file"); crm_free(buffer); buffer = NULL; } BZ2_bzReadClose (&rc, bz_file); fclose(input); #else crm_err("Cannot read compressed files:" " bzlib was not available at compile time"); #endif return buffer; } xmlNode * filename2xml(const char *filename) { xmlNode *xml = NULL; xmlDocPtr output = NULL; xmlParserCtxtPtr ctxt = NULL; xmlErrorPtr last_error = NULL; static int xml_options = XML_PARSE_NOBLANKS|XML_PARSE_RECOVER; /* create a parser context */ ctxt = xmlNewParserCtxt(); CRM_CHECK(ctxt != NULL, return NULL); /* xmlCtxtUseOptions(ctxt, XML_PARSE_NOBLANKS|XML_PARSE_RECOVER); */ xmlCtxtResetLastError(ctxt); xmlSetGenericErrorFunc(ctxt, crm_xml_err); /* initGenericErrorDefaultFunc(crm_xml_err); */ if(filename == NULL) { /* STDIN_FILENO == fileno(stdin) */ output = xmlCtxtReadFd(ctxt, STDIN_FILENO, "unknown.xml", NULL, xml_options); } else if(strstr(filename, ".bz2") == NULL) { output = xmlCtxtReadFile(ctxt, filename, NULL, xml_options); } else { char *input = decompress_file(filename); output = xmlCtxtReadDoc(ctxt, (const xmlChar*)input, NULL, NULL, xml_options); crm_free(input); } if(output) { xml = xmlDocGetRootElement(output); } last_error = xmlCtxtGetLastError(ctxt); if(last_error && last_error->code != XML_ERR_OK) { /* crm_abort(__FILE__,__PRETTY_FUNCTION__,__LINE__, "last_error->code != XML_ERR_OK", TRUE, TRUE); */ /* * http://xmlsoft.org/html/libxml-xmlerror.html#xmlErrorLevel * http://xmlsoft.org/html/libxml-xmlerror.html#xmlParserErrors */ crm_err("Parsing failed (domain=%d, level=%d, code=%d): %s", last_error->domain, last_error->level, last_error->code, last_error->message); if(last_error && last_error->code != XML_ERR_OK) { crm_err("Couldn't%s parse %s", xml?" fully":"", filename); if(xml != NULL) { crm_log_xml_err(xml, "Partial"); } } } xmlFreeParserCtxt(ctxt); return xml; } int write_xml_file(xmlNode *xml_node, const char *filename, gboolean compress) { int res = 0; time_t now; char *buffer = NULL; char *now_str = NULL; unsigned int out = 0; FILE *file_output_strm = NULL; static mode_t cib_mode = S_IRUSR|S_IWUSR; CRM_CHECK(filename != NULL, return -1); crm_debug_3("Writing XML out to %s", filename); crm_validate_data(xml_node); if (xml_node == NULL) { crm_err("Cannot write NULL to %s", filename); return -1; } file_output_strm = fopen(filename, "w"); if(file_output_strm == NULL) { crm_perror(LOG_ERR,"Cannot open %s for writing", filename); return -1; } /* establish the correct permissions */ fchmod(fileno(file_output_strm), cib_mode); crm_log_xml_debug_4(xml_node, "Writing out"); now = time(NULL); now_str = ctime(&now); now_str[24] = EOS; /* replace the newline */ crm_xml_add(xml_node, XML_CIB_ATTR_WRITTEN, now_str); crm_validate_data(xml_node); buffer = dump_xml_formatted(xml_node); CRM_CHECK(buffer != NULL && strlen(buffer) > 0, crm_log_xml_warn(xml_node, "dump:failed"); goto bail); if(compress) { #if HAVE_BZLIB_H int rc = BZ_OK; unsigned int in = 0; BZFILE *bz_file = NULL; bz_file = BZ2_bzWriteOpen(&rc, file_output_strm, 5, 0, 30); if(rc != BZ_OK) { crm_err("bzWriteOpen failed: %d", rc); } else { BZ2_bzWrite(&rc,bz_file,buffer,strlen(buffer)); if(rc != BZ_OK) { crm_err("bzWrite() failed: %d", rc); } } if(rc == BZ_OK) { BZ2_bzWriteClose(&rc, bz_file, 0, &in, &out); if(rc != BZ_OK) { crm_err("bzWriteClose() failed: %d",rc); out = -1; } else { crm_debug_2("%s: In: %d, out: %d", filename, in, out); } } #else crm_err("Cannot write compressed files:" " bzlib was not available at compile time"); #endif } if(out <= 0) { res = fprintf(file_output_strm, "%s", buffer); if(res < 0) { crm_perror(LOG_ERR,"Cannot write output to %s", filename); goto bail; } } bail: if(fflush(file_output_strm) != 0) { crm_perror(LOG_ERR,"fflush for %s failed:", filename); res = -1; } if(fsync(fileno(file_output_strm)) < 0) { crm_perror(LOG_ERR,"fsync for %s failed:", filename); res = -1; } fclose(file_output_strm); crm_debug_3("Saved %d bytes to the Cib as XML", res); crm_free(buffer); return res; } void print_xml_formatted(int log_level, const char *function, xmlNode *msg, const char *text) { if(msg == NULL) { do_crm_log(log_level, "%s: %s: NULL", function, crm_str(text)); return; } crm_validate_data(msg); log_data_element(function, text, log_level, 0, msg, TRUE); return; } static HA_Message* convert_xml_message_struct(HA_Message *parent, xmlNode *src_node, const char *field) { xmlNode *child = NULL; xmlNode *__crm_xml_iter = src_node->children; xmlAttrPtr prop_iter = src_node->properties; const char *name = NULL; const char *value = NULL; HA_Message *result = ha_msg_new(3); ha_msg_add(result, F_XML_TAGNAME, (const char *)src_node->name); while(prop_iter != NULL) { name = (const char *)prop_iter->name; value = (const char *)xmlGetProp(src_node, prop_iter->name); prop_iter = prop_iter->next; ha_msg_add(result, name, value); } while(__crm_xml_iter != NULL) { child = __crm_xml_iter; __crm_xml_iter = __crm_xml_iter->next; convert_xml_message_struct(result, child, NULL); } if(parent == NULL) { return result; } if(field) { HA_Message *holder = holder = ha_msg_new(3); CRM_ASSERT(holder != NULL); ha_msg_add(holder, F_XML_TAGNAME, field); add_ha_nocopy(holder, result, (const char*)src_node->name); ha_msg_addstruct_compress(parent, field, holder); ha_msg_del(holder); } else { add_ha_nocopy(parent, result, (const char*)src_node->name); } return result; } static void convert_xml_child(HA_Message *msg, xmlNode *xml) { int orig = 0; int rc = BZ_OK; unsigned int len = 0; char *buffer = NULL; char *compressed = NULL; const char *name = NULL; name = (const char *)xml->name; buffer = dump_xml_unformatted(xml); orig = strlen(buffer); if(orig < CRM_BZ2_THRESHOLD) { ha_msg_add(msg, name, buffer); goto done; } len = (orig * 1.1) + 600; /* recomended size */ crm_malloc(compressed, len); rc = BZ2_bzBuffToBuffCompress(compressed, &len, buffer, orig, CRM_BZ2_BLOCKS, 0, CRM_BZ2_WORK); if(rc != BZ_OK) { crm_err("Compression failed: %d", rc); crm_free(compressed); convert_xml_message_struct(msg, xml, name); goto done; } crm_free(buffer); buffer = compressed; crm_debug_2("Compression details: %d -> %d", orig, len); ha_msg_addbin(msg, name, buffer, len); done: crm_free(buffer); # if 0 { unsigned int used = orig; char *uncompressed = NULL; crm_debug("Trying to decompress %d bytes", len); crm_malloc0(uncompressed, orig); rc = BZ2_bzBuffToBuffDecompress( uncompressed, &used, compressed, len, 1, 0); CRM_CHECK(rc == BZ_OK, ;); CRM_CHECK(used == orig, ;); crm_debug("rc=%d, used=%d", rc, used); if(rc != BZ_OK) { exit(100); } crm_debug("Original %s, decompressed %s", buffer, uncompressed); crm_free(uncompressed); } # endif } HA_Message* convert_xml_message(xmlNode *xml) { HA_Message *result = NULL; result = ha_msg_new(3); ha_msg_add(result, F_XML_TAGNAME, (const char *)xml->name); xml_prop_iter(xml, name, value, ha_msg_add(result, name, value)); xml_child_iter(xml, child, convert_xml_child(result, child)); return result; } static void convert_ha_field(xmlNode *parent, HA_Message *msg, int lpc) { int type = 0; const char *name = NULL; const char *value = NULL; xmlNode *xml = NULL; int rc = BZ_OK; size_t orig_len = 0; unsigned int used = 0; char *uncompressed = NULL; char *compressed = NULL; int size = orig_len * 10; CRM_CHECK(parent != NULL, return); CRM_CHECK(msg != NULL, return); name = msg->names[lpc]; type = cl_get_type(msg, name); switch(type) { case FT_STRUCT: convert_ha_message(parent, msg->values[lpc], name); break; case FT_COMPRESS: case FT_UNCOMPRESS: convert_ha_message(parent, cl_get_struct(msg, name), name); break; case FT_STRING: value = cl_get_string(msg, name); CRM_CHECK_AND_STORE(value != NULL, return); crm_debug_5("Converting %s/%d/%s", name, type, value[0] == '<' ? "xml":"field"); if( value[0] != '<' ) { crm_xml_add(parent, name, value); break; } /* unpack xml string */ xml = string2xml(value); if(xml == NULL) { crm_err("Conversion of field '%s' failed", name); return; } add_node_nocopy(parent, NULL, xml); break; case FT_BINARY: value = cl_get_binary(msg, name, &orig_len); size = orig_len * 10 + 1; /* +1 because an exact 10x compression factor happens occasionally */ if(orig_len < 3 || value[0] != 'B' || value[1] != 'Z' || value[2] != 'h') { if(strstr(name, "uuid") == NULL) { crm_err("Skipping non-bzip binary field: %s", name); } return; } crm_malloc0(compressed, orig_len); memcpy(compressed, value, orig_len); crm_debug_2("Trying to decompress %d bytes", (int)orig_len); retry: crm_realloc(uncompressed, size); memset(uncompressed, 0, size); used = size - 1; /* always leave room for a trailing '\0' * BZ2_bzBuffToBuffDecompress wont say anything if * the uncompressed data is exactly 'size' bytes */ rc = BZ2_bzBuffToBuffDecompress( uncompressed, &used, compressed, orig_len, 1, 0); if(rc == BZ_OUTBUFF_FULL) { size = size * 2; /* dont try to allocate more memory than we have */ if(size > 0) { goto retry; } } if(rc != BZ_OK) { crm_err("Decompression of %s (%d bytes) into %d failed: %d", name, (int)orig_len, size, rc); } else { CRM_ASSERT(used < size); CRM_CHECK(uncompressed[used] == 0, uncompressed[used] = 0); xml = string2xml(uncompressed); } if(xml != NULL) { add_node_copy(parent, xml); free_xml(xml); } crm_free(uncompressed); crm_free(compressed); break; } } xmlNode * convert_ha_message(xmlNode *parent, HA_Message *msg, const char *field) { int lpc = 0; xmlNode *child = NULL; const char *tag = NULL; CRM_CHECK_AND_STORE(msg != NULL, crm_err("Empty message for %s", field); return parent); tag = cl_get_string(msg, F_XML_TAGNAME); if(tag == NULL) { tag = field; } else if(parent && safe_str_neq(field, tag)) { /* For compatability with 0.6.x */ crm_debug("Creating intermediate parent %s between %s and %s", field, crm_element_name(parent), tag); parent = create_xml_node(parent, field); } if(parent == NULL) { parent = create_xml_node(NULL, tag); child = parent; } else { child = create_xml_node(parent, tag); } for (lpc = 0; lpc < msg->nfields; lpc++) { convert_ha_field(child, msg, lpc); } return parent; } xmlNode *convert_ipc_message(IPC_Message *msg, const char *field) { HA_Message *hmsg = wirefmt2msg((char *)msg->msg_body, msg->msg_len, 0); xmlNode *xml = convert_ha_message(NULL, hmsg, __FUNCTION__); crm_msg_del(hmsg); return xml; } xmlNode * get_message_xml(xmlNode *msg, const char *field) { xmlNode *tmp = first_named_child(msg, field); return first_named_child(tmp, NULL); } gboolean add_message_xml(xmlNode *msg, const char *field, xmlNode *xml) { xmlNode *holder = create_xml_node(msg, field); add_node_copy(holder, xml); return TRUE; } static char * dump_xml(xmlNode *an_xml_node, gboolean formatted, gboolean for_digest) { int len = 0; char *buffer = NULL; xmlBuffer *xml_buffer = NULL; xmlDoc *doc = getDocPtr(an_xml_node); /* doc will only be NULL if an_xml_node is */ CRM_CHECK(doc != NULL, return NULL); xml_buffer = xmlBufferCreate(); CRM_ASSERT(xml_buffer != NULL); len = xmlNodeDump(xml_buffer, doc, an_xml_node, 0, formatted); if(len > 0) { if(for_digest) { /* for compatability with the old result which is used for digests */ len += 3; crm_malloc0(buffer, len); snprintf(buffer, len, " %s\n", (char *)xml_buffer->content); } else { buffer = crm_strdup((char *)xml_buffer->content); } } else { crm_err("Conversion failed"); } xmlBufferFree(xml_buffer); return buffer; } char * dump_xml_formatted(xmlNode *an_xml_node) { return dump_xml(an_xml_node, TRUE, FALSE); } char * dump_xml_unformatted(xmlNode *an_xml_node) { return dump_xml(an_xml_node, FALSE, FALSE); } #define update_buffer() do { \ if(printed < 0) { \ crm_perror(LOG_ERR,"snprintf failed"); \ goto print; \ } else if(printed >= (buffer_len - offset)) { \ crm_err("Output truncated: available=%d, needed=%d", buffer_len - offset, printed); \ offset += printed; \ goto print; \ } else if(offset >= buffer_len) { \ crm_err("Buffer exceeded"); \ offset += printed; \ goto print; \ } else { \ offset += printed; \ } \ } while(0) int print_spaces(char *buffer, int depth, int max) { int lpc = 0; int spaces = 2*depth; max--; /* <= so that we always print 1 space - prevents problems with syslog */ for(lpc = 0; lpc <= spaces && lpc < max; lpc++) { if(sprintf(buffer+lpc, "%c", ' ') < 1) { return -1; } } return lpc; } int log_data_element( const char *function, const char *prefix, int log_level, int depth, xmlNode *data, gboolean formatted) { int child_result = 0; int offset = 0; int printed = 0; char *buffer = NULL; int buffer_len = 1000; const char *name = NULL; const char *hidden = NULL; if(data == NULL) { crm_warn("No data to dump as XML"); return 0; } name = crm_element_name(data); CRM_ASSERT(name != NULL); crm_debug_5("Dumping %s", name); crm_malloc0(buffer, buffer_len); if(formatted) { offset = print_spaces(buffer, depth, buffer_len - offset); } printed = snprintf(buffer + offset, buffer_len - offset, "<%s", name); update_buffer(); hidden = crm_element_value(data, "hidden"); xml_prop_iter( data, prop_name, prop_value, if(prop_name == NULL || safe_str_eq(F_XML_TAGNAME, prop_name)) { continue; } else if(hidden != NULL && prop_name[0] != 0 && strstr(hidden, prop_name) != NULL) { prop_value = "*****"; } crm_debug_5("Dumping <%s %s=\"%s\"...", name, prop_name, prop_value); printed = snprintf(buffer + offset, buffer_len - offset, " %s=\"%s\"", prop_name, prop_value); update_buffer(); ); printed = snprintf(buffer + offset, buffer_len - offset, " %s>", xml_has_children(data)?"":"/"); update_buffer(); print: do_crm_log(log_level, "%s: %s%s", function, prefix?prefix:"", buffer); if(xml_has_children(data) == FALSE) { crm_free(buffer); return 0; } xml_child_iter( data, a_child, child_result = log_data_element( function, prefix, log_level, depth+1, a_child, formatted); ); if(formatted) { offset = print_spaces(buffer, depth, buffer_len); } do_crm_log(log_level, "%s: %s%s", function, prefix?prefix:"", buffer, name); crm_free(buffer); return 1; } gboolean xml_has_children(const xmlNode *xml_root) { if(xml_root != NULL && xml_root->children != NULL) { return TRUE; } return FALSE; } void xml_validate(const xmlNode *xml_root) { CRM_ASSERT(xml_root != NULL); } int crm_element_value_int(xmlNode *data, const char *name, int *dest) { const char *value = crm_element_value(data, name); CRM_CHECK(dest != NULL, return -1); if(value) { *dest = crm_int_helper(value, NULL); return 0; } return -1; } const char * crm_element_value_const(const xmlNode *data, const char *name) { return crm_element_value((xmlNode*)data, name); } char * crm_element_value_copy(xmlNode *data, const char *name) { char *value_copy = NULL; const char *value = crm_element_value(data, name); if(value != NULL) { value_copy = crm_strdup(value); } return value_copy; } void xml_remove_prop(xmlNode *obj, const char *name) { xmlUnsetProp(obj, (const xmlChar*)name); } void log_xml_diff(unsigned int log_level, xmlNode *diff, const char *function) { xmlNode *added = find_xml_node(diff, "diff-added", FALSE); xmlNode *removed = find_xml_node(diff, "diff-removed", FALSE); gboolean is_first = TRUE; if(crm_log_level < log_level) { /* nothing will ever be printed */ return; } xml_child_iter( removed, child, log_data_element(function, "-", log_level, 0, child, TRUE); if(is_first) { is_first = FALSE; } else { do_crm_log(log_level, " --- "); } ); is_first = TRUE; xml_child_iter( added, child, log_data_element(function, "+", log_level, 0, child, TRUE); if(is_first) { is_first = FALSE; } else { do_crm_log(log_level, " +++ "); } ); } void purge_diff_markers(xmlNode *a_node) { CRM_CHECK(a_node != NULL, return); xml_remove_prop(a_node, XML_DIFF_MARKER); xml_child_iter(a_node, child, purge_diff_markers(child); ); } gboolean apply_xml_diff(xmlNode *old, xmlNode *diff, xmlNode **new) { gboolean result = TRUE; const char *digest = crm_element_value(diff, XML_ATTR_DIGEST); xmlNode *added = find_xml_node(diff, "diff-added", FALSE); xmlNode *removed = find_xml_node(diff, "diff-removed", FALSE); int root_nodes_seen = 0; CRM_CHECK(new != NULL, return FALSE); crm_debug_2("Substraction Phase"); xml_child_iter(removed, child_diff, CRM_CHECK(root_nodes_seen == 0, result = FALSE); if(root_nodes_seen == 0) { *new = subtract_xml_object(old, child_diff, NULL); } root_nodes_seen++; ); if(root_nodes_seen == 0) { *new = copy_xml(old); } else if(root_nodes_seen > 1) { crm_err("(-) Diffs cannot contain more than one change set..." " saw %d", root_nodes_seen); result = FALSE; } root_nodes_seen = 0; crm_debug_2("Addition Phase"); if(result) { xml_child_iter(added, child_diff, CRM_CHECK(root_nodes_seen == 0, result = FALSE); if(root_nodes_seen == 0) { add_xml_object(NULL, *new, child_diff); } root_nodes_seen++; ); } if(root_nodes_seen > 1) { crm_err("(+) Diffs cannot contain more than one change set..." " saw %d", root_nodes_seen); result = FALSE; } else if(result && digest) { char *new_digest = calculate_xml_digest(*new, FALSE, TRUE); if(safe_str_neq(new_digest, digest)) { crm_info("Digest mis-match: expected %s, calculated %s", digest, new_digest); result = FALSE; } else { crm_debug_2("Digest matched: expected %s, calculated %s", digest, new_digest); } crm_free(new_digest); } else if(result) { int lpc = 0; xmlNode *intermediate = NULL; xmlNode *diff_of_diff = NULL; xmlNode *calc_added = NULL; xmlNode *calc_removed = NULL; const char *value = NULL; const char *name = NULL; const char *version_attrs[] = { XML_ATTR_NUMUPDATES, XML_ATTR_GENERATION, XML_ATTR_GENERATION_ADMIN }; crm_debug_2("Verification Phase"); intermediate = diff_xml_object(old, *new, FALSE); calc_added = find_xml_node(intermediate, "diff-added", FALSE); calc_removed = find_xml_node(intermediate, "diff-removed", FALSE); /* add any version details to the diff so they match */ for(lpc = 0; lpc < DIMOF(version_attrs); lpc++) { name = version_attrs[lpc]; value = crm_element_value(added, name); crm_xml_add(calc_added, name, value); value = crm_element_value(removed, name); crm_xml_add(calc_removed, name, value); } diff_of_diff = diff_xml_object(intermediate, diff, TRUE); if(diff_of_diff != NULL) { crm_info("Diff application failed!"); crm_log_xml_debug(old, "diff:original"); crm_log_xml_debug(diff, "diff:input"); result = FALSE; } free_xml(diff_of_diff); free_xml(intermediate); diff_of_diff = NULL; intermediate = NULL; } if(result) { purge_diff_markers(*new); } return result; } xmlNode * diff_xml_object(xmlNode *old, xmlNode *new, gboolean suppress) { xmlNode *diff = NULL; xmlNode *tmp1 = NULL; xmlNode *added = NULL; xmlNode *removed = NULL; tmp1 = subtract_xml_object(old, new, "removed:top"); if(tmp1 != NULL) { if(suppress && can_prune_leaf(tmp1)) { free_xml(tmp1); } else { diff = create_xml_node(NULL, "diff"); removed = create_xml_node(diff, "diff-removed"); added = create_xml_node(diff, "diff-added"); add_node_nocopy(removed, NULL, tmp1); } } tmp1 = subtract_xml_object(new, old, "added:top"); if(tmp1 != NULL) { if(suppress && can_prune_leaf(tmp1)) { free_xml(tmp1); return diff; } if(diff == NULL) { diff = create_xml_node(NULL, "diff"); } if(removed == NULL) { removed = create_xml_node(diff, "diff-removed"); } if(added == NULL) { added = create_xml_node(diff, "diff-added"); } add_node_nocopy(added, NULL, tmp1); } return diff; } gboolean can_prune_leaf(xmlNode *xml_node) { gboolean can_prune = TRUE; /* return FALSE; */ xml_prop_iter(xml_node, prop_name, prop_value, if(safe_str_eq(prop_name, XML_ATTR_ID)) { continue; } can_prune = FALSE; ); xml_child_iter(xml_node, child, if(can_prune_leaf(child)) { free_xml(child); } else { can_prune = FALSE; } ); return can_prune; } void diff_filter_context(int context, int upper_bound, int lower_bound, xmlNode *xml_node, xmlNode *parent) { xmlNode *us = NULL; xmlNode *new_parent = parent; const char *name = crm_element_name(xml_node); CRM_CHECK(xml_node != NULL && name != NULL, return); us = create_xml_node(parent, name); xml_prop_iter(xml_node, prop_name, prop_value, lower_bound = context; crm_xml_add(us, prop_name, prop_value); ); if(lower_bound >= 0 || upper_bound >= 0) { crm_xml_add(us, XML_ATTR_ID, ID(xml_node)); new_parent = us; } else { upper_bound = in_upper_context(0, context, xml_node); if(upper_bound >= 0) { crm_xml_add(us, XML_ATTR_ID, ID(xml_node)); new_parent = us; } else { free_xml(us); us = NULL; } } xml_child_iter(us, child, diff_filter_context( context, upper_bound-1, lower_bound-1, child, new_parent); ); } int in_upper_context(int depth, int context, xmlNode *xml_node) { gboolean has_attributes = FALSE; if(context == 0) { return 0; } xml_prop_iter(xml_node, prop_name, prop_value, has_attributes = TRUE; break; ); if(has_attributes) { return depth; } else if(depth < context) { xml_child_iter(xml_node, child, if(in_upper_context(depth+1, context, child)) { return depth; } ); } return 0; } xmlNode * subtract_xml_object(xmlNode *left, xmlNode *right, const char *marker) { gboolean skip = FALSE; gboolean differences = FALSE; xmlNode *diff = NULL; xmlNode *child_diff = NULL; xmlNode *right_child = NULL; const char *id = NULL; const char *name = NULL; const char *value = NULL; const char *right_val = NULL; int lpc = 0; static int filter_len = DIMOF(filter); if(left == NULL) { return NULL; } id = ID(left); if(right == NULL) { xmlNode *deleted = NULL; crm_debug_5("Processing <%s id=%s> (complete copy)", crm_element_name(left), id); deleted = copy_xml(left); crm_xml_add(deleted, XML_DIFF_MARKER, marker); return deleted; } name = crm_element_name(left); CRM_CHECK(name != NULL, return NULL); diff = create_xml_node(NULL, name); /* changes to name/value pairs */ xml_prop_iter(left, prop_name, left_value, if(crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) { continue; } skip = FALSE; for(lpc = 0; skip == FALSE && lpc < filter_len; lpc++){ if(crm_str_eq(prop_name, filter[lpc], TRUE)) { skip = TRUE; } } if(skip) { continue; } right_val = crm_element_value(right, prop_name); if(right_val == NULL) { /* new */ differences = TRUE; crm_xml_add(diff, prop_name, left_value); } else if(strcmp(left_value, right_val) == 0) { /* unchanged */ } else { /* changed */ differences = TRUE; crm_xml_add(diff, prop_name, left_value); } ); /* changes to child objects */ xml_child_iter( left, left_child, right_child = find_entity( right, crm_element_name(left_child), ID(left_child)); child_diff = subtract_xml_object( left_child, right_child, marker); if(child_diff != NULL) { differences = TRUE; add_node_nocopy(diff, NULL, child_diff); } ); if(differences == FALSE) { /* check for XML_DIFF_MARKER in a child */ xml_child_iter( right, right_child, value = crm_element_value(right_child, XML_DIFF_MARKER); if(value != NULL && safe_str_eq(value, "removed:top")) { crm_debug_3("Found the root of the deletion: %s", name); differences = TRUE; break; } ); } if(differences == FALSE) { free_xml(diff); crm_debug_5("\tNo changes to <%s id=%s>", crm_str(name), id); return NULL; } crm_xml_add(diff, XML_ATTR_ID, id); return diff; } int add_xml_object(xmlNode *parent, xmlNode *target, xmlNode *update) { const char *object_id = NULL; const char *object_name = NULL; #if XML_PARSE_DEBUG crm_log_xml(LOG_DEBUG_5, "update:", update); crm_log_xml(LOG_DEBUG_5, "target:", target); #endif CRM_CHECK(update != NULL, return 0); object_name = crm_element_name(update); object_id = ID(update); CRM_CHECK(object_name != NULL, return 0); if(target == NULL && object_id == NULL) { /* placeholder object */ target = find_xml_node(parent, object_name, FALSE); } else if(target == NULL) { target = find_entity(parent, object_name, object_id); } if(target == NULL) { target = create_xml_node(parent, object_name); CRM_CHECK(target != NULL, return 0); #if XML_PARSER_DEBUG crm_debug_2("Added <%s%s%s/>", crm_str(object_name), object_id?" id=":"", object_id?object_id:""); } else { crm_debug_3("Found node <%s%s%s/> to update", crm_str(object_name), object_id?" id=":"", object_id?object_id:""); #endif } copy_in_properties(target, update); xml_child_iter( update, a_child, #if XML_PARSER_DEBUG crm_debug_4("Updating child <%s id=%s>", crm_element_name(a_child), ID(a_child)); #endif add_xml_object(target, NULL, a_child); ); #if XML_PARSER_DEBUG crm_debug_3("Finished with <%s id=%s>", crm_str(object_name), crm_str(object_id)); #endif return 0; } gboolean update_xml_child(xmlNode *child, xmlNode *to_update) { gboolean can_update = TRUE; CRM_CHECK(child != NULL, return FALSE); CRM_CHECK(to_update != NULL, return FALSE); if(safe_str_neq(crm_element_name(to_update), crm_element_name(child))) { can_update = FALSE; } else if(safe_str_neq(ID(to_update), ID(child))) { can_update = FALSE; } else if(can_update) { #if XML_PARSER_DEBUG crm_log_xml_debug_2(child, "Update match found..."); #endif add_xml_object(NULL, child, to_update); } xml_child_iter( child, child_of_child, /* only update the first one */ if(can_update) { break; } can_update = update_xml_child(child_of_child, to_update); ); return can_update; } int find_xml_children(xmlNode **children, xmlNode *root, const char *tag, const char *field, const char *value, gboolean search_matches) { int match_found = 0; CRM_CHECK(root != NULL, return FALSE); CRM_CHECK(children != NULL, return FALSE); if(tag != NULL && safe_str_neq(tag, crm_element_name(root))) { } else if(value != NULL && safe_str_neq(value, crm_element_value(root, field))) { } else { if(*children == NULL) { *children = create_xml_node(NULL, __FUNCTION__); } add_node_copy(*children, root); match_found = 1; } if(search_matches || match_found == 0) { xml_child_iter( root, child, match_found += find_xml_children( children, child, tag, field, value, search_matches); ); } return match_found; } gboolean replace_xml_child(xmlNode *parent, xmlNode *child, xmlNode *update, gboolean delete_only) { gboolean can_delete = FALSE; const char *up_id = NULL; const char *child_id = NULL; const char *right_val = NULL; CRM_CHECK(child != NULL, return FALSE); CRM_CHECK(update != NULL, return FALSE); up_id = ID(update); child_id = ID(child); if(up_id == NULL || safe_str_eq(child_id, up_id)) { can_delete = TRUE; } if(safe_str_neq(crm_element_name(update), crm_element_name(child))) { can_delete = FALSE; } if(can_delete && delete_only) { xml_prop_iter(update, prop_name, left_value, right_val = crm_element_value(child, prop_name); if(safe_str_neq(left_value, right_val)) { can_delete = FALSE; } ); } if(can_delete && parent != NULL) { crm_log_xml_debug_4(child, "Delete match found..."); if(delete_only || update == NULL) { free_xml_from_parent(NULL, child); } else { xmlNode *tmp = copy_xml(update); xmlNode *old = xmlReplaceNode(child, tmp); free_xml_from_parent(NULL, old); } child = NULL; return TRUE; } else if(can_delete) { crm_log_xml_debug(child, "Cannot delete the search root"); can_delete = FALSE; } xml_child_iter( child, child_of_child, /* only delete the first one */ if(can_delete) { break; } can_delete = replace_xml_child(child, child_of_child, update, delete_only); ); return can_delete; } void hash2nvpair(gpointer key, gpointer value, gpointer user_data) { const char *name = key; const char *s_value = value; xmlNode *xml_node = user_data; xmlNode *xml_child = create_xml_node(xml_node, XML_CIB_TAG_NVPAIR); crm_xml_add(xml_child, XML_ATTR_ID, name); crm_xml_add(xml_child, XML_NVPAIR_ATTR_NAME, name); crm_xml_add(xml_child, XML_NVPAIR_ATTR_VALUE, s_value); crm_debug_3("dumped: name=%s value=%s", name, s_value); } void hash2smartfield(gpointer key, gpointer value, gpointer user_data) { const char *name = key; const char *s_value = value; xmlNode *xml_node = user_data; if(isdigit(name[0])) { xmlNode *tmp = create_xml_node(xml_node, XML_TAG_PARAM); crm_xml_add(tmp, XML_NVPAIR_ATTR_NAME, name); crm_xml_add(tmp, XML_NVPAIR_ATTR_VALUE, s_value); } else if(crm_element_value(xml_node, name) == NULL) { crm_xml_add(xml_node, name, s_value); crm_debug_3("dumped: %s=%s", name, s_value); } else { crm_debug_2("duplicate: %s=%s", name, s_value); } } void hash2field(gpointer key, gpointer value, gpointer user_data) { const char *name = key; const char *s_value = value; xmlNode *xml_node = user_data; if(crm_element_value(xml_node, name) == NULL) { crm_xml_add(xml_node, name, s_value); crm_debug_3("dumped: %s=%s", name, s_value); } else { crm_debug_2("duplicate: %s=%s", name, s_value); } } void hash2metafield(gpointer key, gpointer value, gpointer user_data) { char *crm_name = NULL; if(key == NULL || value == NULL) { return; } crm_name = crm_meta_name(key); hash2field(crm_name, value, user_data); crm_free(crm_name); } GHashTable * xml2list(xmlNode *parent) { xmlNode *nvpair_list = NULL; GHashTable *nvpair_hash = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); CRM_CHECK(parent != NULL, return nvpair_hash); nvpair_list = find_xml_node(parent, XML_TAG_ATTRS, FALSE); if(nvpair_list == NULL) { crm_debug_2("No attributes in %s", crm_element_name(parent)); crm_log_xml_debug_2( parent,"No attributes for resource op"); } crm_log_xml_debug_3(nvpair_list, "Unpacking"); xml_prop_iter( nvpair_list, key, value, crm_debug_4("Added %s=%s", key, value); g_hash_table_insert( nvpair_hash, crm_strdup(key), crm_strdup(value)); ); xml_child_iter_filter( nvpair_list, child, XML_TAG_PARAM, const char *key = crm_element_value(child, XML_NVPAIR_ATTR_NAME); const char *value = crm_element_value(child, XML_NVPAIR_ATTR_VALUE); crm_debug_4("Added %s=%s", key, value); if(key != NULL && value != NULL) { g_hash_table_insert(nvpair_hash, crm_strdup(key), crm_strdup(value)); } ); return nvpair_hash; } typedef struct name_value_s { const char *name; const void *value; } name_value_t; static gint sort_pairs(gconstpointer a, gconstpointer b) { int rc = 0; const name_value_t *pair_a = a; const name_value_t *pair_b = b; CRM_ASSERT(a != NULL); CRM_ASSERT(pair_a->name != NULL); CRM_ASSERT(b != NULL); CRM_ASSERT(pair_b->name != NULL); rc = strcmp(pair_a->name, pair_b->name); if(rc < 0) { return -1; } else if(rc > 0) { return 1; } return 0; } static void dump_pair(gpointer data, gpointer user_data) { name_value_t *pair = data; xmlNode *parent = user_data; crm_xml_add(parent, pair->name, pair->value); } xmlNode * sorted_xml(xmlNode *input, xmlNode *parent, gboolean recursive) { GListPtr sorted = NULL; GListPtr unsorted = NULL; name_value_t *pair = NULL; xmlNode *result = NULL; const char *name = crm_element_name(input); CRM_CHECK(input != NULL, return NULL); name = crm_element_name(input); CRM_CHECK(name != NULL, return NULL); result = create_xml_node(parent, name); xml_prop_iter(input, p_name, p_value, crm_malloc0(pair, sizeof(name_value_t)); pair->name = p_name; pair->value = p_value; unsorted = g_list_prepend(unsorted, pair); pair = NULL; ); sorted = g_list_sort(unsorted, sort_pairs); g_list_foreach(sorted, dump_pair, result); slist_destroy(name_value_t, child, sorted, crm_free(child)); if(recursive) { xml_child_iter(input, child, sorted_xml(child, result, recursive)); } else { xml_child_iter(input, child, add_node_copy(result, child)); } return result; } static void filter_xml(xmlNode *data, const char **filter, int filter_len, gboolean recursive) { int lpc = 0; for(lpc = 0; lpc < filter_len; lpc++) { xml_remove_prop(data, filter[lpc]); } if(recursive == FALSE) { return; } xml_child_iter(data, child, filter_xml(child, filter, filter_len, recursive)); } /* "c048eae664dba840e1d2060f00299e9d" */ char * calculate_xml_digest(xmlNode *input, gboolean sort, gboolean do_filter) { int i = 0; int digest_len = 16; char *digest = NULL; unsigned char *raw_digest = NULL; xmlNode *sorted = NULL; char *buffer = NULL; size_t buffer_len = 0; if(sort || do_filter) { sorted = sorted_xml(input, NULL, TRUE); } else { sorted = copy_xml(input); } if(do_filter) { filter_xml(sorted, filter, DIMOF(filter), TRUE); } buffer = dump_xml(sorted, FALSE, TRUE); buffer_len = strlen(buffer); CRM_CHECK(buffer != NULL && buffer_len > 0, free_xml(sorted); crm_free(buffer); return NULL); crm_malloc(digest, (2 * digest_len + 1)); crm_malloc(raw_digest, (digest_len + 1)); MD5((unsigned char *)buffer, buffer_len, raw_digest); for(i = 0; i < digest_len; i++) { sprintf(digest+(2*i), "%02x", raw_digest[i]); } digest[(2*digest_len)] = 0; crm_debug_2("Digest %s: %s\n", digest, buffer); crm_log_xml(LOG_DEBUG_3, "digest:source", sorted); crm_free(buffer); crm_free(raw_digest); free_xml(sorted); return digest; } #if HAVE_LIBXML2 # include # include # include # include # include #endif static gboolean validate_with_dtd( xmlDocPtr doc, gboolean to_logs, const char *dtd_file) { gboolean valid = TRUE; xmlDtdPtr dtd = NULL; xmlValidCtxtPtr cvp = NULL; CRM_CHECK(doc != NULL, return FALSE); CRM_CHECK(dtd_file != NULL, return FALSE); dtd = xmlParseDTD(NULL, (const xmlChar *)dtd_file); CRM_CHECK(dtd != NULL, crm_err("Could not find/parse %s", dtd_file); goto cleanup); cvp = xmlNewValidCtxt(); CRM_CHECK(cvp != NULL, goto cleanup); if(to_logs) { cvp->userData = (void *) LOG_ERR; cvp->error = (xmlValidityErrorFunc) cl_log; cvp->warning = (xmlValidityWarningFunc) cl_log; } else { cvp->userData = (void *) stderr; cvp->error = (xmlValidityErrorFunc) fprintf; cvp->warning = (xmlValidityWarningFunc) fprintf; } if (!xmlValidateDtd(cvp, doc, dtd)) { valid = FALSE; } cleanup: if(cvp) { xmlFreeValidCtxt(cvp); } if(dtd) { xmlFreeDtd(dtd); } return valid; } xmlNode *first_named_child(xmlNode *parent, const char *name) { xml_child_iter_filter(parent, match, name, return match); return NULL; } #if 0 static void relaxng_invalid_stderr(void * userData, xmlErrorPtr error) { /* Structure xmlError struct _xmlError { int domain : What part of the library raised this er int code : The error code, e.g. an xmlParserError char * message : human-readable informative error messag xmlErrorLevel level : how consequent is the error char * file : the filename int line : the line number if available char * str1 : extra string information char * str2 : extra string information char * str3 : extra string information int int1 : extra number information int int2 : column number of the error or 0 if N/A void * ctxt : the parser context if available void * node : the node in the tree } */ crm_err("Structured error: line=%d, level=%d %s", error->line, error->level, error->message); } #endif static gboolean validate_with_relaxng( xmlDocPtr doc, gboolean to_logs, const char *relaxng_file) { gboolean valid = TRUE; int rc = 0; xmlRelaxNGPtr rng = NULL; xmlRelaxNGValidCtxtPtr valid_ctx = NULL; xmlRelaxNGParserCtxtPtr parser_ctx = NULL; CRM_CHECK(doc != NULL, return FALSE); CRM_CHECK(relaxng_file != NULL, return FALSE); xmlLoadExtDtdDefaultValue = 1; parser_ctx = xmlRelaxNGNewParserCtxt(relaxng_file); CRM_CHECK(parser_ctx != NULL, goto cleanup); if(to_logs) { xmlRelaxNGSetParserErrors(parser_ctx, (xmlRelaxNGValidityErrorFunc) cl_log, (xmlRelaxNGValidityWarningFunc) cl_log, GUINT_TO_POINTER(LOG_ERR)); } else { xmlRelaxNGSetParserErrors(parser_ctx, (xmlRelaxNGValidityErrorFunc) fprintf, (xmlRelaxNGValidityWarningFunc) fprintf, stderr); } rng = xmlRelaxNGParse(parser_ctx); CRM_CHECK(rng != NULL, crm_err("Could not find/parse %s", relaxng_file); goto cleanup); valid_ctx = xmlRelaxNGNewValidCtxt(rng); CRM_CHECK(valid_ctx != NULL, goto cleanup); if(to_logs) { xmlRelaxNGSetValidErrors(valid_ctx, (xmlRelaxNGValidityErrorFunc) cl_log, (xmlRelaxNGValidityWarningFunc) cl_log, GUINT_TO_POINTER(LOG_ERR)); } else { xmlRelaxNGSetValidErrors(valid_ctx, (xmlRelaxNGValidityErrorFunc) fprintf, (xmlRelaxNGValidityWarningFunc) fprintf, stderr); } /* xmlRelaxNGSetValidStructuredErrors( */ /* valid_ctx, relaxng_invalid_stderr, valid_ctx); */ xmlLineNumbersDefault(1); rc = xmlRelaxNGValidateDoc(valid_ctx, doc); if (rc > 0) { valid = FALSE; } else if (rc < 0) { crm_err("Internal libxml error during validation\n"); } cleanup: if(parser_ctx != NULL) { xmlRelaxNGFreeParserCtxt(parser_ctx); xmlCleanupParser(); } if(valid_ctx != NULL) { xmlRelaxNGFreeValidCtxt(valid_ctx); } if (rng != NULL) { xmlRelaxNGFree(rng); } return valid; } static gboolean validate_with(xmlNode *xml, int method, gboolean to_logs) { xmlDocPtr doc = NULL; gboolean valid = FALSE; int type = known_schemas[method].type; const char *file = known_schemas[method].location; CRM_CHECK(xml != NULL, return FALSE); doc = getDocPtr(xml); crm_debug_2("Validating with: %s (type=%d)", crm_str(file), type); switch(type) { case 0: valid = TRUE; break; case 1: valid = validate_with_dtd(doc, to_logs, file); break; case 2: valid = validate_with_relaxng(doc, to_logs, file); break; default: crm_err("Unknown validator type: %d", type); break; } return valid; } #include static void dump_file(const char *filename) { FILE *fp = NULL; int ch, line = 0; CRM_CHECK(filename != NULL, return); fp = fopen(filename, "r"); CRM_CHECK(fp != NULL, return); fprintf(stderr, "%4d ", ++line); do { ch = getc(fp); if(ch == EOF) { putc('\n', stderr); break; } else if(ch == '\n') { fprintf(stderr, "\n%4d ", ++line); } else { putc(ch, stderr); } } while(1); fclose(fp); } gboolean validate_xml_verbose(xmlNode *xml_blob) { xmlDoc *doc = NULL; xmlNode *xml = NULL; gboolean rc = FALSE; char *filename = NULL; static char *template = NULL; if(template == NULL) { template = crm_strdup(CRM_STATE_DIR"/cib-invalid.XXXXXX"); } filename = mktemp(template); write_xml_file(xml_blob, filename, FALSE); dump_file(filename); doc = xmlParseFile(filename); xml = xmlDocGetRootElement(doc); rc = validate_xml(xml, NULL, FALSE); free_xml(xml); return rc; } gboolean validate_xml(xmlNode *xml_blob, const char *validation, gboolean to_logs) { int lpc = 0; if(validation == NULL) { validation = crm_element_value(xml_blob, XML_ATTR_VALIDATION); } if(validation == NULL) { validation = crm_element_value(xml_blob, "ignore-dtd"); if(crm_is_true(validation)) { validation = "none"; } else { validation = "transitional-0.6"; } } if(safe_str_eq(validation, "none")) { return TRUE; } for(; lpc < all_schemas; lpc++) { if(safe_str_eq(validation, known_schemas[lpc].name)) { return validate_with(xml_blob, lpc, to_logs); } } crm_err("Unknown validator: %s", validation); return FALSE; } static xmlNode *apply_transformation(xmlNode *xml, const char *transform) { xmlNode *out = NULL; xmlDocPtr res = NULL; xmlDocPtr doc = NULL; xsltStylesheet *xslt = NULL; CRM_CHECK(xml != NULL, return FALSE); doc = getDocPtr(xml); xmlLoadExtDtdDefaultValue = 1; xmlSubstituteEntitiesDefault(1); xslt = xsltParseStylesheetFile((const xmlChar *)transform); CRM_CHECK(xslt != NULL, goto cleanup); res = xsltApplyStylesheet(xslt, doc, NULL); CRM_CHECK(res != NULL, goto cleanup); out = xmlDocGetRootElement(res); cleanup: if(xslt) { xsltFreeStylesheet(xslt); } xsltCleanupGlobals(); xmlCleanupParser(); return out; } const char *get_schema_name(int version) { if(version < 0 || version >= all_schemas) { return "unknown"; } return known_schemas[version].name; } int get_schema_version(const char *name) { int lpc = 0; for(; lpc < all_schemas; lpc++) { if(safe_str_eq(name, known_schemas[lpc].name)) { return lpc; } } return -1; } /* set which validation to use */ #include int update_validation( xmlNode **xml_blob, int *best, gboolean transform, gboolean to_logs) { xmlNode *xml = NULL; char *value = NULL; int lpc = 0, match = -1, rc = cib_ok; CRM_CHECK(best != NULL, return cib_invalid_argument); CRM_CHECK(xml_blob != NULL, return cib_invalid_argument); CRM_CHECK(*xml_blob != NULL, return cib_invalid_argument); *best = 0; xml = *xml_blob; value = crm_element_value_copy(xml, XML_ATTR_VALIDATION); if(value != NULL) { match = get_schema_version(value); lpc = match; if(lpc >= 0 && transform == FALSE) { lpc++; } else if(lpc < 0) { crm_debug("Unknown validation type"); lpc = 0; } } if(match >= max_schemas) { /* nothing to do */ crm_free(value); *best = match; return cib_ok; } for(; lpc < max_schemas; lpc++) { gboolean valid = TRUE; crm_debug("Testing '%s' validation", known_schemas[lpc].name?known_schemas[lpc].name:""); valid = validate_with(xml, lpc, to_logs); if(valid) { *best = lpc; } if(valid && transform && known_schemas[lpc].transform != NULL) { xmlNode *upgrade = NULL; int next = known_schemas[lpc].after_transform; if(next <= 0) { next = lpc+1; } crm_notice("Upgrading %s-style configuration to %s with %s", known_schemas[lpc].name, known_schemas[next].name, known_schemas[lpc].transform); upgrade = apply_transformation(xml, known_schemas[lpc].transform); if(upgrade == NULL) { crm_err("Transformation %s failed", known_schemas[lpc].transform); rc = cib_transform_failed; } else if(validate_with(upgrade, next, to_logs)) { crm_info("Transformation %s successful", known_schemas[lpc].transform); lpc = next; *best = next; free_xml(xml); xml = upgrade; rc = cib_ok; } else { crm_err("Transformation %s did not produce a valid configuration", known_schemas[lpc].transform); crm_log_xml_info(upgrade, "transform:bad"); free_xml(upgrade); rc = cib_dtd_validation; } } } if(*best > match) { crm_notice("Upgraded from %s to %s validation", value?value:"", known_schemas[*best].name); crm_xml_add(xml, XML_ATTR_VALIDATION, known_schemas[*best].name); } *xml_blob = xml; crm_free(value); return rc; } xmlNode * getXpathResult(xmlXPathObjectPtr xpathObj, int index) { xmlNode *match = NULL; CRM_CHECK(index >= 0, return NULL); CRM_CHECK(xpathObj != NULL, return NULL); if(index >= xpathObj->nodesetval->nodeNr) { crm_err("Requested index %d of only %d items", index, xpathObj->nodesetval->nodeNr); return NULL; } match = xpathObj->nodesetval->nodeTab[index]; CRM_CHECK(match != NULL, return NULL); if(match->type == XML_DOCUMENT_NODE) { /* Will happen if section = '/' */ match = match->children; } else if(match->type != XML_ELEMENT_NODE && match->parent && match->parent->type == XML_ELEMENT_NODE) { /* reurning the parent instead */ match = match->parent; } else if(match->type != XML_ELEMENT_NODE) { /* We only support searching nodes */ crm_err("We only support %d not %d", XML_ELEMENT_NODE, match->type); match = NULL; } return match; } /* the caller needs to check if the result contains a xmlDocPtr or xmlNodePtr */ xmlXPathObjectPtr xpath_search(xmlNode *xml_top, const char *path) { xmlDocPtr doc = NULL; xmlXPathObjectPtr xpathObj = NULL; xmlXPathContextPtr xpathCtx = NULL; const xmlChar *xpathExpr = (const xmlChar *)path; CRM_CHECK(path != NULL, return NULL); CRM_CHECK(xml_top != NULL, return NULL); CRM_CHECK(strlen(path) > 0, return NULL); doc = getDocPtr(xml_top); crm_debug_2("Evaluating: %s", path); xpathCtx = xmlXPathNewContext(doc); CRM_ASSERT(xpathCtx != NULL); xpathObj = xmlXPathEvalExpression(xpathExpr, xpathCtx); xmlXPathFreeContext(xpathCtx); return xpathObj; } gboolean cli_config_update(xmlNode **xml, int *best_version, gboolean to_logs) { gboolean rc = TRUE; const char *value = crm_element_value(*xml, XML_ATTR_VALIDATION); int min_version = get_schema_version(MINIMUM_SCHEMA_VERSION); int max_version = get_schema_version(LATEST_SCHEMA_VERSION); int version = get_schema_version(value); if(version < max_version) { xmlNode *converted = NULL; converted = copy_xml(*xml); update_validation(&converted, &version, TRUE, to_logs); value = crm_element_value(converted, XML_ATTR_VALIDATION); if(version < min_version) { if(to_logs) { crm_config_err("Your current configuration could only be upgraded to %s... " "the minimum requirement is %s.\n", crm_str(value), MINIMUM_SCHEMA_VERSION); } else { fprintf(stderr, "Your current configuration could only be upgraded to %s... " "the minimum requirement is %s.\n", crm_str(value), MINIMUM_SCHEMA_VERSION); } free_xml(converted); converted = NULL; rc = FALSE; } else { free_xml(*xml); *xml = converted; if(version < max_version) { crm_config_warn("Your configuration was internally updated to %s... " "which is acceptable but not the most recent", get_schema_name(version)); } else if(to_logs){ crm_config_warn("Your configuration was internally updated to the latest version (%s)", get_schema_name(version)); } else { fprintf(stderr, "Your configuration was internally updated to the latest version (%s)\n", get_schema_name(version)); } } } else if(version > max_version) { if(to_logs){ crm_config_warn("Configuration validation is currently disabled." " It is highly encouraged and prevents many common cluster issues."); } else { fprintf(stderr, "Configuration validation is currently disabled." " It is highly encouraged and prevents many common cluster issues.\n"); } } if(best_version) { *best_version = version; } return rc; } xmlNode *expand_idref(xmlNode *input, xmlNode *top) { const char *tag = NULL; const char *ref = NULL; xmlNode *result = input; char *xpath_string = NULL; if(result == NULL) { return NULL; } else if(top == NULL) { top = input; } tag = crm_element_name(result); ref = crm_element_value(result, XML_ATTR_IDREF); if(ref != NULL) { int xpath_max = 512, offset = 0; crm_malloc0(xpath_string, xpath_max); offset += snprintf(xpath_string + offset, xpath_max - offset, "//%s[@id='%s']", tag, ref); result = get_xpath_object(xpath_string, top, LOG_ERR); if(result == NULL) { crm_err("No match for %s found in %s: Invalid configuration", xpath_string, xmlGetNodePath(top)); } } crm_free(xpath_string); return result; } xmlNode* get_xpath_object_relative(const char *xpath, xmlNode *xml_obj, int error_level) { int len = 0; xmlNode *result = NULL; char *xpath_full = NULL; const char *xpath_prefix = NULL; if(xml_obj == NULL || xpath == NULL) { return NULL; } xpath_prefix = (const char *)xmlGetNodePath(xml_obj); len += strlen(xpath_prefix); len += strlen(xpath); xpath_full = crm_strdup(xpath_prefix); crm_realloc(xpath_full, len+1); strncat(xpath_full, xpath, len); result = get_xpath_object(xpath_full, xml_obj, error_level); crm_free(xpath_full); return result; } xmlNode* get_xpath_object(const char *xpath, xmlNode *xml_obj, int error_level) { xmlNode *result = NULL; xmlXPathObjectPtr xpathObj = NULL; if(xpath == NULL) { return xml_obj; /* or return NULL? */ } xpathObj = xpath_search(xml_obj, xpath); if(xpathObj == NULL || xpathObj->nodesetval == NULL || xpathObj->nodesetval->nodeNr < 1) { do_crm_log(error_level, "No match for %s in %s", xpath, xmlGetNodePath(xml_obj)); crm_log_xml(LOG_DEBUG_2, "Bad Input", xml_obj); } else if(xpathObj->nodesetval->nodeNr > 1) { int lpc = 0, max = xpathObj->nodesetval->nodeNr; do_crm_log(error_level, "Too many matches for %s in %s", xpath, xmlGetNodePath(xml_obj)); for(lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_CHECK(match != NULL, continue); do_crm_log(error_level, "%s[%d] = %s", xpath, lpc, xmlGetNodePath(match)); } crm_log_xml(LOG_DEBUG_2, "Bad Input", xml_obj); } else { result = getXpathResult(xpathObj, 0); } if(xpathObj) { xmlXPathFreeObject(xpathObj); } return result; } const char * crm_element_value(xmlNode *data, const char *name) { xmlAttr *attr = NULL; if(data == NULL) { crm_err("Couldn't find %s in NULL", name?name:""); return NULL; } else if(name == NULL) { crm_err("Couldn't find NULL in %s", crm_element_name(data)); return NULL; } attr = xmlHasProp(data, (const xmlChar*)name); if(attr == NULL || attr->children == NULL) { return NULL; } return (const char*)attr->children->content; } diff --git a/lib/fencing/Makefile.am b/lib/fencing/Makefile.am index f25b112937..a1eca709db 100644 --- a/lib/fencing/Makefile.am +++ b/lib/fencing/Makefile.am @@ -1,35 +1,32 @@ # File: Makefile.am # Author: Sun Jiang Dong # Copyright (c) 2004 International Business Machines # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ - -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir) -I$(top_srcdir) -halibdir = $(libdir)/@HB_PKG@ -havarlibdir = $(localstatedir)/lib/@HB_PKG@ - -COMMONLIBS = -lplumb $(CLUSTERLIBS) - lib_LTLIBRARIES = libstonithd.la -libstonithd_la_SOURCES = stonithd_lib.c stonithd_msg.c -libstonithd_la_LDFLAGS = $(COMMONLIBS) + +libstonithd_la_SOURCES = st_client.c +libstonithd_la_LDFLAGS = -version-info 1:0:0 \ + -lplumb $(CLUSTERLIBS) \ + $(top_builddir)/lib/common/libcrmcommon.la AM_CFLAGS = $(INCLUDES) diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c new file mode 100644 index 0000000000..b0797335a6 --- /dev/null +++ b/lib/fencing/st_client.c @@ -0,0 +1,1185 @@ +/* + * Copyright (c) 2004 Andrew Beekhof + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +GHashTable *stonith_op_callback_table = NULL; + +typedef struct stonith_private_s +{ + char *token; + IPC_Channel *command_channel; + IPC_Channel *callback_channel; + GCHSource *callback_source; + + void (*op_callback)( + stonith_t *st, const xmlNode *msg, int call, int rc, xmlNode *output, void *userdata); + +} stonith_private_t; + +typedef struct stonith_notify_client_s +{ + const char *event; + const char *obj_id; /* implement one day */ + const char *obj_type; /* implement one day */ + void (*notify)(stonith_t *st, const char *event, xmlNode *msg); + +} stonith_notify_client_t; + +typedef struct stonith_callback_client_s +{ + void (*callback)( + stonith_t *st, const xmlNode *msg, int call, int rc, xmlNode *output, void *userdata); + const char *id; + void *user_data; + gboolean only_success; + struct timer_rec_s *timer; + +} stonith_callback_client_t; + +struct notify_blob_s +{ + stonith_t *stonith; + xmlNode *xml; +}; + +struct timer_rec_s +{ + int call_id; + int timeout; + guint ref; + stonith_t *stonith; +}; + +typedef enum stonith_errors (*stonith_op_t)( + const char *, int, const char *, xmlNode *, + xmlNode*, xmlNode*, xmlNode**, xmlNode**); + +gboolean stonith_dispatch(IPC_Channel *channel, gpointer user_data); +void stonith_perform_callback(stonith_t *stonith, xmlNode *msg, int call_id, int rc); +xmlNode *stonith_create_op( + int call_id, const char *token, const char *op, xmlNode *data, int call_options); +int stonith_send_command( + stonith_t *stonith, const char *op, xmlNode *data, + xmlNode **output_data, int call_options, int timeout); + +static void stonith_connection_destroy(gpointer user_data); +static void stonith_send_notification(gpointer data, gpointer user_data); + +static void stonith_connection_destroy(gpointer user_data) +{ + stonith_t *stonith = user_data; + struct notify_blob_s blob; + + blob.stonith = stonith; + blob.xml = create_xml_node(NULL, "notify");; + + stonith->state = stonith_disconnected; + crm_xml_add(blob.xml, F_TYPE, T_STONITH_NOTIFY); + crm_xml_add(blob.xml, F_SUBTYPE, T_STONITH_NOTIFY_DISCONNECT); + + g_list_foreach(stonith->notify_list, stonith_send_notification, &blob); + free_xml(blob.xml); +} + +static int stonith_api_register_device( + stonith_t *stonith, int call_options, + const char *id, const char *namespace, const char *agent, GHashTable *params) +{ + int rc = 0; + xmlNode *data = create_xml_node(NULL, F_STONITH_DEVICE); + xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); + + crm_xml_add(data, XML_ATTR_ID, id); + crm_xml_add(data, "origin", __FUNCTION__); + crm_xml_add(data, "agent", agent); + crm_xml_add(data, "namespace", namespace); + + g_hash_table_foreach(params, hash2field, args); + + rc = stonith_send_command(stonith, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0); + free_xml(data); + + return rc; +} + +static int stonith_api_remove_device( + stonith_t *stonith, int call_options, const char *name) +{ + int rc = 0; + xmlNode *data = NULL; + + data = create_xml_node(NULL, F_STONITH_DEVICE); + crm_xml_add(data, "origin", __FUNCTION__); + crm_xml_add(data, XML_ATTR_ID, name); + rc = stonith_send_command(stonith, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0); + free_xml(data); + + return rc; +} + + +static int stonith_api_device_metadata( + stonith_t *stonith, int call_options, const char *agent, const char *namespace, + char **output, int timeout) +{ + int rc = 0; + xmlNode *xml = NULL; + xmlNode *data = NULL; + + data = create_xml_node(NULL, F_STONITH_DEVICE); + crm_xml_add(data, "origin", __FUNCTION__); + crm_xml_add(data, "agent", agent); + crm_xml_add(data, "namespace", namespace); + crm_xml_add(data, F_STONITH_ACTION, "metadata"); + crm_xml_add_int(data, "timeout", timeout); + + rc = stonith_send_command( + stonith, STONITH_OP_EXEC, data, &xml, call_options, timeout); + + if(xml && output) { + xmlNode *meta = get_xpath_object("//resource-agent", xml, LOG_ERR); + if(meta) { + *output = dump_xml_formatted(meta); + } else { + crm_log_xml_warn(xml, "NoMetadata"); + rc = st_err_internal; + } + } + + free_xml(data); + free_xml(xml); + return rc; +} + +static int stonith_api_query( + stonith_t *stonith, int call_options, const char *target, GListPtr *devices, int timeout) +{ + int rc = 0, lpc = 0, max = 0; + + xmlNode *data = NULL; + xmlNode *output = NULL; + xmlXPathObjectPtr xpathObj = NULL; + + CRM_CHECK(devices != NULL, return st_err_missing); + + data = create_xml_node(NULL, F_STONITH_DEVICE); + crm_xml_add(data, "origin", __FUNCTION__); + crm_xml_add(data, F_STONITH_TARGET, target); + rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout); + + if(rc < 0) { + return rc; + } + + xpathObj = xpath_search(output, "//@agent"); + max = xpathObj->nodesetval->nodeNr; + + for(lpc = 0; lpc < max; lpc++) { + xmlNode *match = getXpathResult(xpathObj, lpc); + CRM_CHECK(match != NULL, continue); + + crm_info("%s[%d] = %s", "//@agent", lpc, xmlGetNodePath(match)); + *devices = g_list_append(*devices, crm_element_value_copy(match, XML_ATTR_ID)); + } + + free_xml(output); + free_xml(data); + return max; +} + +static int stonith_api_call( + stonith_t *stonith, int call_options, const char *id, const char *action, const char *port, int timeout) +{ + int rc = 0; + xmlNode *data = NULL; + + data = create_xml_node(NULL, F_STONITH_DEVICE); + crm_xml_add(data, "origin", __FUNCTION__); + crm_xml_add(data, F_STONITH_DEVICE, id); + crm_xml_add(data, F_STONITH_ACTION, action); + crm_xml_add(data, F_STONITH_TARGET, port); + crm_xml_add_int(data, "timeout", timeout); + + rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, NULL, call_options, timeout); + free_xml(data); + + return rc; +} + +static int stonith_api_fence( + stonith_t *stonith, int call_options, const char *node, const char *action, int timeout) +{ + int rc = 0; + xmlNode *data = NULL; + + data = create_xml_node(NULL, __FUNCTION__); + crm_xml_add(data, F_STONITH_TARGET, node); + crm_xml_add(data, F_STONITH_ACTION, action); + crm_xml_add_int(data, "timeout", timeout); + + rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout); + free_xml(data); + + return rc; +} + +const char * +stonith_error2string(enum stonith_errors return_code) +{ + const char *error_msg = NULL; + switch(return_code) { + case stonith_ok: + error_msg = ""; + break; + case st_err_not_supported: + error_msg = ""; + break; + case st_err_authentication: + error_msg = ""; + break; + case st_err_generic: + error_msg = ""; + break; + case st_err_internal: + error_msg = ""; + break; + case st_err_unknown_device: + error_msg = ""; + break; + case st_err_unknown_operation: + error_msg = ""; + break; + case st_err_unknown_port: + error_msg = ""; + break; + case st_err_none_available: + error_msg = ""; + break; + case st_err_connection: + error_msg = ""; + break; + case st_err_missing: + error_msg = ""; + break; + case st_err_exists: + error_msg = ""; + break; + case st_err_timeout: + error_msg = ""; + break; + case st_err_signal: + error_msg = ""; + break; + case st_err_ipc: + error_msg = ""; + break; + case st_err_peer: + error_msg = ""; + break; + } + + if(error_msg == NULL) { + crm_err("Unknown Stonith error code: %d", return_code); + error_msg = ""; + } + + return error_msg; +} + +static gint stonithlib_GCompareFunc(gconstpointer a, gconstpointer b) +{ + int rc = 0; + const stonith_notify_client_t *a_client = a; + const stonith_notify_client_t *b_client = b; + + CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0); + rc = strcmp(a_client->event, b_client->event); + if(rc == 0) { + if(a_client->notify == NULL || b_client->notify == NULL) { + return 0; + + } else if(a_client->notify == b_client->notify) { + return 0; + + } else if(((long)a_client->notify) < ((long)b_client->notify)) { + crm_err("callbacks for %s are not equal: %p vs. %p", + a_client->event, a_client->notify, b_client->notify); + return -1; + } + crm_err("callbacks for %s are not equal: %p vs. %p", + a_client->event, a_client->notify, b_client->notify); + return 1; + } + return rc; +} + +static int get_stonith_token(IPC_Channel *ch, char **token) +{ + int rc = stonith_ok; + xmlNode *reg_msg = NULL; + const char *msg_type = NULL; + const char *tmp_ticket = NULL; + + CRM_CHECK(ch != NULL, return st_err_missing); + CRM_CHECK(token != NULL, return st_err_missing); + + crm_debug_4("Waiting for msg on command channel"); + + reg_msg = xmlfromIPC(ch, MAX_IPC_DELAY); + + if(ch->ops->get_chan_status(ch) != IPC_CONNECT) { + crm_err("No reply message - disconnected"); + free_xml(reg_msg); + return st_err_connection; + + } else if(reg_msg == NULL) { + crm_err("No reply message - empty"); + return st_err_ipc; + } + + msg_type = crm_element_value(reg_msg, F_STONITH_OPERATION); + tmp_ticket = crm_element_value(reg_msg, F_STONITH_CLIENTID); + + if(safe_str_neq(msg_type, CRM_OP_REGISTER) ) { + crm_err("Invalid registration message: %s", msg_type); + rc = st_err_internal; + + } else if(tmp_ticket == NULL) { + crm_err("No registration token provided"); + crm_log_xml_warn(reg_msg, "Bad reply") + rc = st_err_internal; + + } else { + crm_debug("Obtained registration token: %s", tmp_ticket); + *token = crm_strdup(tmp_ticket); + } + + free_xml(reg_msg); + return rc; +} + +xmlNode *stonith_create_op( + int call_id, const char *token, const char *op, xmlNode *data, int call_options) +{ + int rc = HA_OK; + xmlNode *op_msg = create_xml_node(NULL, "stonith_command"); + CRM_CHECK(op_msg != NULL, return NULL); + CRM_CHECK(token != NULL, return NULL); + + crm_xml_add(op_msg, F_XML_TAGNAME, "stonith_command"); + + crm_xml_add(op_msg, F_TYPE, T_STONITH_NG); + crm_xml_add(op_msg, F_STONITH_CALLBACK_TOKEN, token); + crm_xml_add(op_msg, F_STONITH_OPERATION, op); + crm_xml_add_int(op_msg, F_STONITH_CALLID, call_id); + crm_debug_4("Sending call options: %.8lx, %d", + (long)call_options, call_options); + crm_xml_add_int(op_msg, F_STONITH_CALLOPTS, call_options); + + if(data != NULL) { + add_message_xml(op_msg, F_STONITH_CALLDATA, data); + } + + if (rc != HA_OK) { + crm_err("Failed to create STONITH operation message"); + crm_log_xml(LOG_ERR, "op", op_msg); + free_xml(op_msg); + return NULL; + } + + return op_msg; +} + +static void stonith_destroy_op_callback(gpointer data) +{ + stonith_callback_client_t *blob = data; + if(blob->timer && blob->timer->ref > 0) { + g_source_remove(blob->timer->ref); + } + crm_free(blob->timer); + crm_free(blob); +} + +static int stonith_api_signoff(stonith_t* stonith) +{ + stonith_private_t *native = stonith->private; + + crm_debug("Signing out of the STONITH Service"); + + /* close channels */ + if (native->command_channel != NULL) { + native->command_channel->ops->destroy( + native->command_channel); + native->command_channel = NULL; + } + + if (native->callback_source != NULL) { + G_main_del_IPC_Channel(native->callback_source); + native->callback_source = NULL; + } + + if (native->callback_channel != NULL) { +#ifdef BUG + native->callback_channel->ops->destroy( + native->callback_channel); +#endif + native->callback_channel = NULL; + } + + stonith->state = stonith_disconnected; + return stonith_ok; +} + +static int stonith_api_signon( + stonith_t* stonith, const char *name, int *async_fd, int *sync_fd) +{ + int rc = stonith_ok; + xmlNode *hello = NULL; + char *uuid_ticket = NULL; + stonith_private_t *native = stonith->private; + + crm_debug_4("Connecting command channel"); + + stonith->state = stonith_connected_command; + native->command_channel = init_client_ipc_comms_nodispatch(stonith_channel); + + if(native->command_channel == NULL) { + crm_debug("Connection to command channel failed"); + rc = st_err_connection; + + } else if(native->command_channel->ch_status != IPC_CONNECT) { + crm_err("Connection may have succeeded," + " but authentication to command channel failed"); + rc = st_err_authentication; + } + + if(rc == stonith_ok) { + rc = get_stonith_token(native->command_channel, &uuid_ticket); + if(rc == stonith_ok) { + native->token = uuid_ticket; + uuid_ticket = NULL; + + } else { + stonith->state = stonith_disconnected; + native->command_channel->ops->disconnect(native->command_channel); + return rc; + } + } + + native->callback_channel = init_client_ipc_comms_nodispatch( + stonith_channel_callback); + + if(native->callback_channel == NULL) { + crm_debug("Connection to callback channel failed"); + rc = st_err_connection; + + } else if(native->callback_channel->ch_status != IPC_CONNECT) { + crm_err("Connection may have succeeded," + " but authentication to command channel failed"); + rc = st_err_authentication; + } + + if(rc == stonith_ok) { + native->callback_channel->send_queue->max_qlen = 500; + rc = get_stonith_token(native->callback_channel, &uuid_ticket); + if(rc == stonith_ok) { + crm_free(native->token); + native->token = uuid_ticket; + } + } + + if(rc == stonith_ok) { + CRM_CHECK(native->token != NULL, ;); + hello = stonith_create_op(0, native->token, CRM_OP_REGISTER, NULL, 0); + crm_xml_add(hello, F_STONITH_CLIENTNAME, name); + + if(send_ipc_message(native->command_channel, hello) == FALSE) { + rc = st_err_internal; + } + + free_xml(hello); + } + + if(rc == stonith_ok) { + gboolean do_mainloop = TRUE; + if(async_fd != NULL) { + do_mainloop = FALSE; + *async_fd = native->callback_channel->ops->get_recv_select_fd(native->callback_channel); + } + + if(sync_fd != NULL) { + do_mainloop = FALSE; + *sync_fd = native->callback_channel->ops->get_send_select_fd(native->callback_channel); + } + + if(do_mainloop) { + crm_debug_4("Connecting callback channel"); + native->callback_source = G_main_add_IPC_Channel( + G_PRIORITY_HIGH, native->callback_channel, FALSE, stonith_dispatch, + stonith, default_ipc_connection_destroy); + + if(native->callback_source == NULL) { + crm_err("Callback source not recorded"); + rc = st_err_connection; + + } else { + set_IPC_Channel_dnotify( + native->callback_source, stonith_connection_destroy); + } + } + } + + if(rc == stonith_ok) { +#if HAVE_MSGFROMIPC_TIMEOUT + stonith->call_timeout = MAX_IPC_DELAY; +#endif + crm_debug("Connection to STONITH successful"); + return stonith_ok; + } + + crm_debug("Connection to STONITH failed: %s", stonith_error2string(rc)); + stonith->cmds->disconnect(stonith); + return rc; +} + +static int stonith_set_notification(stonith_t* stonith, const char *callback, int enabled) +{ + xmlNode *notify_msg = create_xml_node(NULL, __FUNCTION__); + stonith_private_t *native = stonith->private; + + if(stonith->state != stonith_disconnected) { + crm_xml_add(notify_msg, F_STONITH_OPERATION, T_STONITH_NOTIFY); + if(enabled) { + crm_xml_add(notify_msg, F_STONITH_NOTIFY_ACTIVATE, callback); + } else { + crm_xml_add(notify_msg, F_STONITH_NOTIFY_DEACTIVATE, callback); + } + send_ipc_message(native->callback_channel, notify_msg); + } + + free_xml(notify_msg); + return stonith_ok; +} + +static int stonith_api_add_notification( + stonith_t *stonith, const char *event, + void (*callback)(stonith_t *stonith, const char *event, xmlNode *msg)) +{ + GList *list_item = NULL; + stonith_notify_client_t *new_client = NULL; + + crm_debug_2("Adding callback for %s events (%d)", + event, g_list_length(stonith->notify_list)); + + crm_malloc0(new_client, sizeof(stonith_notify_client_t)); + new_client->event = event; + new_client->notify = callback; + + list_item = g_list_find_custom( + stonith->notify_list, new_client, stonithlib_GCompareFunc); + + if(list_item != NULL) { + crm_warn("Callback already present"); + crm_free(new_client); + return st_err_exists; + + } else { + stonith->notify_list = g_list_append( + stonith->notify_list, new_client); + + stonith_set_notification(stonith, event, 1); + + crm_debug_3("Callback added (%d)", g_list_length(stonith->notify_list)); + } + return stonith_ok; +} + + +static int stonith_api_del_notification(stonith_t *stonith, const char *event) +{ + GList *list_item = NULL; + stonith_notify_client_t *new_client = NULL; + + crm_debug("Removing callback for %s events", event); + + crm_malloc0(new_client, sizeof(stonith_notify_client_t)); + new_client->event = event; + new_client->notify = NULL; + + list_item = g_list_find_custom( + stonith->notify_list, new_client, stonithlib_GCompareFunc); + + stonith_set_notification(stonith, event, 0); + + if(list_item != NULL) { + stonith_notify_client_t *list_client = list_item->data; + stonith->notify_list = + g_list_remove(stonith->notify_list, list_client); + crm_free(list_client); + + crm_debug_3("Removed callback"); + + } else { + crm_debug_3("Callback not present"); + } + crm_free(new_client); + return stonith_ok; +} + +static gboolean stonith_async_timeout_handler(gpointer data) +{ + struct timer_rec_s *timer = data; + crm_debug("Async call %d timed out after %ds", timer->call_id, timer->timeout); + stonith_perform_callback(timer->stonith, NULL, timer->call_id, st_err_timeout); + + /* Always return TRUE, never remove the handler + * We do that in stonith_del_callback() + */ + return TRUE; +} + +static int stonith_api_add_callback( + stonith_t *stonith, int call_id, int timeout, gboolean only_success, + void *user_data, const char *callback_name, + void (*callback)( + stonith_t *st, const xmlNode *msg, int call, int rc, xmlNode *output, void *userdata)) +{ + stonith_callback_client_t *blob = NULL; + CRM_CHECK(stonith != NULL, return st_err_missing); + CRM_CHECK(stonith->private != NULL, return st_err_missing); + + if(call_id == 0) { + stonith_private_t *private = stonith->private; + private->op_callback = callback; + + } else if(call_id < 0) { + if(only_success == FALSE) { + callback(stonith, NULL, call_id, call_id, NULL, user_data); + } else { + crm_warn("STONITH call failed: %s", stonith_error2string(call_id)); + } + return FALSE; + } + + crm_malloc0(blob, sizeof(stonith_callback_client_t)); + blob->id = callback_name; + blob->only_success = only_success; + blob->user_data = user_data; + blob->callback = callback; + + if(timeout > 0) { + struct timer_rec_s *async_timer = NULL; + + crm_malloc0(async_timer, sizeof(struct timer_rec_s)); + blob->timer = async_timer; + + async_timer->stonith = stonith; + async_timer->call_id = call_id; + async_timer->timeout = timeout*1000; + async_timer->ref = g_timeout_add( + async_timer->timeout, stonith_async_timeout_handler, async_timer); + } + + g_hash_table_insert(stonith_op_callback_table, GINT_TO_POINTER(call_id), blob); + + return TRUE; +} + +static int stonith_api_del_callback(stonith_t *stonith, int call_id, gboolean all_callbacks) +{ + stonith_private_t *private = stonith->private; + + if(all_callbacks) { + private->op_callback = NULL; + if(stonith_op_callback_table != NULL) { + g_hash_table_destroy(stonith_op_callback_table); + } + + stonith_op_callback_table = g_hash_table_new_full( + g_direct_hash, g_direct_equal, + NULL, stonith_destroy_op_callback); + + } else if(call_id == 0) { + private->op_callback = NULL; + + } else { + g_hash_table_remove(stonith_op_callback_table, GINT_TO_POINTER(call_id)); + } + return stonith_ok; +} + +static void stonith_dump_pending_op( + gpointer key, gpointer value, gpointer user_data) +{ + int call = GPOINTER_TO_INT(key); + stonith_callback_client_t *blob = value; + + crm_debug("Call %d (%s): pending", call, crm_str(blob->id)); +} + +void stonith_dump_pending_callbacks(void) +{ + if(stonith_op_callback_table == NULL) { + return; + } + return g_hash_table_foreach( + stonith_op_callback_table, stonith_dump_pending_op, NULL); +} + +void stonith_perform_callback(stonith_t *stonith, xmlNode *msg, int call_id, int rc) +{ + xmlNode *output = NULL; + stonith_private_t *private = NULL; + stonith_callback_client_t *blob = NULL; + stonith_callback_client_t local_blob; + CRM_CHECK(stonith != NULL, return); + CRM_CHECK(stonith->private != NULL, return); + + private = stonith->private; + + local_blob.id = NULL; + local_blob.callback = NULL; + local_blob.user_data = NULL; + local_blob.only_success = FALSE; + + if(msg != NULL) { + crm_element_value_int(msg, F_STONITH_RC, &rc); + crm_element_value_int(msg, F_STONITH_CALLID, &call_id); + output = get_message_xml(msg, F_STONITH_CALLDATA); + } + + blob = g_hash_table_lookup( + stonith_op_callback_table, GINT_TO_POINTER(call_id)); + + if(blob != NULL) { + local_blob = *blob; + blob = NULL; + + stonith_api_del_callback(stonith, call_id, FALSE); + + } else { + crm_debug_2("No callback found for call %d", call_id); + local_blob.callback = NULL; + } + + if(stonith == NULL) { + crm_debug("No stonith object supplied"); + } + + if(local_blob.callback != NULL + && (rc == stonith_ok || local_blob.only_success == FALSE)) { + crm_debug_2("Invoking callback %s for call %d", crm_str(local_blob.id), call_id); + local_blob.callback(stonith, msg, call_id, rc, output, local_blob.user_data); + + } else if(private->op_callback == NULL && rc != stonith_ok) { + crm_warn("STONITH command failed: %s", stonith_error2string(rc)); + crm_log_xml(LOG_DEBUG, "Failed STONITH Update", msg); + } + + if(private->op_callback != NULL) { + crm_debug_2("Invoking global callback for call %d", call_id); + private->op_callback(stonith, msg, call_id, rc, output, NULL); + } + crm_debug_4("OP callback activated."); +} + +static void stonith_send_notification(gpointer data, gpointer user_data) +{ + struct notify_blob_s *blob = user_data; + stonith_notify_client_t *entry = data; + const char *event = NULL; + + if(blob->xml == NULL) { + crm_warn("Skipping callback - NULL message"); + return; + } + + event = crm_element_value(blob->xml, F_SUBTYPE); + + if(entry == NULL) { + crm_warn("Skipping callback - NULL callback client"); + return; + + } else if(entry->notify == NULL) { + crm_warn("Skipping callback - NULL callback"); + return; + + } else if(safe_str_neq(entry->event, event)) { + crm_debug_4("Skipping callback - event mismatch %p/%s vs. %s", + entry, entry->event, event); + return; + } + + crm_debug_4("Invoking callback for %p/%s event...", entry, event); + entry->notify(blob->stonith, event, blob->xml); + crm_debug_4("Callback invoked..."); +} + +int stonith_send_command( + stonith_t *stonith, const char *op, xmlNode *data, xmlNode **output_data, + int call_options, int timeout) +{ + int rc = HA_OK; + + xmlNode *op_msg = NULL; + xmlNode *op_reply = NULL; + + stonith_private_t *native = stonith->private; + + if(stonith->state == stonith_disconnected) { + return st_err_connection; + } + + if(output_data != NULL) { + *output_data = NULL; + } + + if(op == NULL) { + crm_err("No operation specified"); + return st_err_missing; + } + + stonith->call_id++; + /* prevent call_id from being negative (or zero) and conflicting + * with the stonith_errors enum + * use 2 because we use it as (stonith->call_id - 1) below + */ + if(stonith->call_id < 1) { + stonith->call_id = 1; + } + + CRM_CHECK(native->token != NULL, ;); + op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options); + if(op_msg == NULL) { + return st_err_missing; + } + + crm_debug_3("Sending %s message to STONITH service", op); + if(send_ipc_message(native->command_channel, op_msg) == FALSE) { + crm_err("Sending message to STONITH service FAILED"); + free_xml(op_msg); + return st_err_ipc; + + } else { + crm_debug_3("Message sent"); + } + + free_xml(op_msg); + + if((call_options & st_opt_discard_reply)) { + crm_debug_3("Discarding reply"); + return stonith_ok; + + } else if(!(call_options & st_opt_sync_call)) { + crm_debug_3("Async call, returning"); + CRM_CHECK(stonith->call_id != 0, return st_err_ipc); + + return stonith->call_id; + } + + rc = IPC_OK; + crm_debug_3("Waiting for a syncronous reply"); + + rc = stonith_ok; + while(IPC_ISRCONN(native->command_channel)) { + int reply_id = -1; + int msg_id = stonith->call_id; + + op_reply = xmlfromIPC(native->command_channel, stonith->call_timeout); + if(op_reply == NULL) { + rc = st_err_peer; + break; + } + + crm_element_value_int(op_reply, F_STONITH_CALLID, &reply_id); + if(reply_id <= 0) { + rc = st_err_peer; + break; + + } else if(reply_id == msg_id) { + crm_debug_3("Syncronous reply received"); + crm_log_xml(LOG_MSG, "Reply", op_reply); + if(crm_element_value_int(op_reply, F_STONITH_RC, &rc) != 0) { + rc = st_err_peer; + } + + if(output_data != NULL && is_not_set(call_options, st_opt_discard_reply)) { + *output_data = op_reply; + op_reply = NULL; + } + + break; + + } else if(reply_id < msg_id) { + crm_debug("Recieved old reply: %d (wanted %d)", reply_id, msg_id); + crm_log_xml(LOG_MSG, "Old reply", op_reply); + + } else if((reply_id - 10000) > msg_id) { + /* wrap-around case */ + crm_debug("Recieved old reply: %d (wanted %d)", reply_id, msg_id); + crm_log_xml(LOG_MSG, "Old reply", op_reply); + + } else { + crm_err("Received a __future__ reply:" + " %d (wanted %d)", reply_id, msg_id); + } + free_xml(op_reply); + op_reply = NULL; + } + + if(IPC_ISRCONN(native->command_channel) == FALSE) { + crm_err("STONITH disconnected: %d", native->command_channel->ch_status); + stonith->state = stonith_disconnected; + } + + if(op_reply == NULL && stonith->state == stonith_disconnected) { + rc = st_err_connection; + + } else if(rc == stonith_ok && op_reply == NULL) { + rc = st_err_peer; + } + + free_xml(op_reply); + return rc; +} + +static gboolean stonith_msgready(stonith_t* stonith) +{ + stonith_private_t *private = NULL; + + if (stonith == NULL) { + crm_err("No STONITH!"); + return FALSE; + } + + private = stonith->private; + + if(private->command_channel != NULL) { + /* drain the channel */ + IPC_Channel *cmd_ch = private->command_channel; + xmlNode *cmd_msg = NULL; + while(cmd_ch->ch_status != IPC_DISCONNECT + && cmd_ch->ops->is_message_pending(cmd_ch)) { + /* this will happen when the STONITH exited from beneath us */ + cmd_msg = xmlfromIPC(cmd_ch, MAX_IPC_DELAY); + free_xml(cmd_msg); + } + + } else { + crm_err("No command channel"); + } + + if(private->callback_channel == NULL) { + crm_err("No callback channel"); + return FALSE; + + } else if(private->callback_channel->ch_status == IPC_DISCONNECT) { + crm_info("Lost connection to the STONITH service [%d].", + private->callback_channel->farside_pid); + return FALSE; + + } else if(private->callback_channel->ops->is_message_pending( + private->callback_channel)) { + crm_debug_4("Message pending on command channel [%d]", + private->callback_channel->farside_pid); + return TRUE; + } + + crm_debug_3("No message pending"); + return FALSE; +} + +static int stonith_rcvmsg(stonith_t* stonith) +{ + const char *type = NULL; + stonith_private_t *private = NULL; + struct notify_blob_s blob; + + if (stonith == NULL) { + crm_err("No STONITH!"); + return FALSE; + } + + blob.stonith = stonith; + private = stonith->private; + + /* if it is not blocking mode and no message in the channel, return */ + if (stonith_msgready(stonith) == FALSE) { + crm_debug_3("No message ready and non-blocking..."); + return 0; + } + + /* IPC_INTR is not a factor here */ + blob.xml = xmlfromIPC(private->callback_channel, MAX_IPC_DELAY); + if (blob.xml == NULL) { + crm_warn("Received a NULL msg from STONITH service."); + return 0; + } + + /* do callbacks */ + type = crm_element_value(blob.xml, F_TYPE); + crm_debug_4("Activating %s callbacks...", type); + + if(safe_str_eq(type, T_STONITH_NG)) { + stonith_perform_callback(stonith, blob.xml, 0, 0); + + } else if(safe_str_eq(type, T_STONITH_NOTIFY)) { + g_list_foreach(stonith->notify_list, stonith_send_notification, &blob); + + } else { + crm_err("Unknown message type: %s", type); + crm_log_xml_warn(blob.xml, "BadReply"); + } + + free_xml(blob.xml); + + return 1; +} + +gboolean stonith_dispatch(IPC_Channel *channel, gpointer user_data) +{ + stonith_t *stonith = user_data; + stonith_private_t *private = NULL; + gboolean stay_connected = TRUE; + + CRM_CHECK(stonith != NULL, return FALSE); + + private = stonith->private; + CRM_CHECK(private->callback_channel == channel, return FALSE); + + while(stonith_msgready(stonith)) { + /* invoke the callbacks but dont block */ + int rc = stonith_rcvmsg(stonith); + if( rc < 0) { + crm_err("Message acquisition failed: %d", rc); + break; + + } else if(rc == 0) { + break; + } + } + + if(private->callback_channel + && private->callback_channel->ch_status != IPC_CONNECT) { + crm_crit("Lost connection to the STONITH service [%d/callback].", + channel->farside_pid); + private->callback_source = NULL; + stay_connected = FALSE; + } + + if(private->command_channel + && private->command_channel->ch_status != IPC_CONNECT) { + crm_crit("Lost connection to the STONITH service [%d/command].", + channel->farside_pid); + private->callback_source = NULL; + stay_connected = FALSE; + } + + return stay_connected; +} + +static int stonith_api_free (stonith_t* stonith) +{ + int rc = stonith_ok; + + if(stonith->state != stonith_disconnected) { + rc = stonith->cmds->disconnect(stonith); + } + + if(stonith->state == stonith_disconnected) { + stonith_private_t *private = stonith->private; + crm_free(private->token); + crm_free(stonith->private); + crm_free(stonith->cmds); + crm_free(stonith); + } + + return rc; +} + +void stonith_api_delete(stonith_t *stonith) +{ + GList *list = stonith->notify_list; + while(list != NULL) { + stonith_notify_client_t *client = g_list_nth_data(list, 0); + list = g_list_remove(list, client); + crm_free(client); + } + + g_hash_table_destroy(stonith_op_callback_table); + stonith->cmds->free(stonith); + stonith = NULL; +} + +stonith_t *stonith_api_new(void) +{ + stonith_t* new_stonith = NULL; + stonith_private_t* private = NULL; + crm_malloc0(new_stonith, sizeof(stonith_t)); + crm_malloc0(private, sizeof(stonith_private_t)); + new_stonith->private = private; + + if(stonith_op_callback_table != NULL) { + g_hash_table_destroy(stonith_op_callback_table); + stonith_op_callback_table = NULL; + } + if(stonith_op_callback_table == NULL) { + stonith_op_callback_table = g_hash_table_new_full( + g_direct_hash, g_direct_equal, + NULL, stonith_destroy_op_callback); + } + + new_stonith->call_id = 1; + new_stonith->notify_list = NULL; + new_stonith->state = stonith_disconnected; + + crm_malloc0(new_stonith->cmds, sizeof(stonith_api_operations_t)); + + new_stonith->cmds->free = stonith_api_free; + new_stonith->cmds->connect = stonith_api_signon; + new_stonith->cmds->disconnect = stonith_api_signoff; + + new_stonith->cmds->call = stonith_api_call; + new_stonith->cmds->fence = stonith_api_fence; + new_stonith->cmds->metadata = stonith_api_device_metadata; + + new_stonith->cmds->query = stonith_api_query; + new_stonith->cmds->remove_device = stonith_api_remove_device; + new_stonith->cmds->register_device = stonith_api_register_device; + + new_stonith->cmds->remove_callback = stonith_api_del_callback; + new_stonith->cmds->register_callback = stonith_api_add_callback; + new_stonith->cmds->remove_notification = stonith_api_del_notification; + new_stonith->cmds->register_notification = stonith_api_add_notification; + + return new_stonith; +} + diff --git a/lib/pengine/common.c b/lib/pengine/common.c index d5d702141f..b171a65aee 100644 --- a/lib/pengine/common.c +++ b/lib/pengine/common.c @@ -1,409 +1,431 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include gboolean was_processing_error = FALSE; gboolean was_processing_warning = FALSE; static gboolean check_quorum(const char *value) { if(safe_str_eq(value, "stop")) { return TRUE; } else if(safe_str_eq(value, "freeze")) { return TRUE; } else if(safe_str_eq(value, "ignore")) { return TRUE; } else if(safe_str_eq(value, "suicide")) { return TRUE; } return FALSE; } static gboolean check_health(const char *value) { if(safe_str_eq(value, "none")) { return TRUE; } else if(safe_str_eq(value, "custom")) { return TRUE; } else if(safe_str_eq(value, "only-green")) { return TRUE; } else if(safe_str_eq(value, "progressive")) { return TRUE; } else if(safe_str_eq(value, "migrate-on-red")) { return TRUE; } return FALSE; } static gboolean check_stonith_action(const char *value) { if(safe_str_eq(value, "reboot")) { return TRUE; } else if(safe_str_eq(value, "poweroff")) { return TRUE; } return FALSE; } +static gboolean +check_placement_strategy(const char *value) +{ + if(safe_str_eq(value, "default")) { + return TRUE; + + } else if(safe_str_eq(value, "utilization")) { + return TRUE; + + } else if(safe_str_eq(value, "minimal")) { + return TRUE; + + } else if(safe_str_eq(value, "balanced")) { + return TRUE; + } + return FALSE; +} + pe_cluster_option pe_opts[] = { /* name, old-name, validate, default, description */ { "no-quorum-policy", "no_quorum_policy", "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, "What to do when the cluster does not have quorum", NULL }, { "symmetric-cluster", "symmetric_cluster", "boolean", NULL, "true", &check_boolean, "All resources can run anywhere by default", NULL }, { "default-resource-stickiness", "default_resource_stickiness", "integer", NULL, "0", &check_number, "", NULL }, { "is-managed-default", "is_managed_default", "boolean", NULL, "true", &check_boolean, "Should the cluster start/stop resources as required", NULL }, { "maintenance-mode", NULL, "boolean", NULL, "false", &check_boolean, "Should the cluster monitor resources and start/stop them as required", NULL }, { "start-failure-is-fatal", NULL, "boolean", NULL, "true", &check_boolean, "Always treat start failures as fatal", "This was the old default. However when set to FALSE, the cluster will instead use the resource's failcount and value for resource-failure-stickiness" }, /* Stonith Options */ { "stonith-enabled", "stonith_enabled", "boolean", NULL, "true", &check_boolean, "Failed nodes are STONITH'd", NULL }, { "stonith-action", "stonith_action", "enum", "reboot, poweroff", "reboot", &check_stonith_action, "Action to send to STONITH device", NULL }, { "stonith-timeout", NULL, "time", NULL, "60s", &check_timer, "How long to wait for the STONITH action to complete", NULL }, { "startup-fencing", "startup_fencing", "boolean", NULL, "true", &check_boolean, "STONITH unseen nodes", "Advanced Use Only! Not using the default is very unsafe!" }, /* Timeouts etc */ { "cluster-delay", "transition_idle_timeout", "time", NULL, "60s", &check_time, "Round trip delay over the network (excluding action execution)", "The \"correct\" value will depend on the speed and load of your network and cluster nodes." }, { "batch-limit", NULL, "integer", NULL, "30", &check_number, "The number of jobs that the TE is allowed to execute in parallel", "The \"correct\" value will depend on the speed and load of your network and cluster nodes." }, { "default-action-timeout", "default_action_timeout", "time", NULL, "20s", &check_time, "How long to wait for actions to complete", NULL }, /* Orphans and stopping */ { "stop-all-resources", NULL, "boolean", NULL, "false", &check_boolean, "Should the cluster stop all active resources", NULL }, { "stop-orphan-resources", "stop_orphan_resources", "boolean", NULL, "true", &check_boolean, "Should deleted resources be stopped", NULL }, { "stop-orphan-actions", "stop_orphan_actions", "boolean", NULL, "true", &check_boolean, "Should deleted actions be cancelled", NULL }, { "remove-after-stop", "remove_after_stop", "boolean", NULL, "false", &check_boolean, "Remove resources from the LRM after they are stopped", "Always set this to false. Other values are, at best, poorly tested and potentially dangerous." }, /* { "", "", , "0", "", NULL }, */ /* Storing inputs */ { "pe-error-series-max", NULL, "integer", NULL, "-1", &check_number, "The number of PE inputs resulting in ERRORs to save", "Zero to disable, -1 to store unlimited." }, { "pe-warn-series-max", NULL, "integer", NULL, "-1", &check_number, "The number of PE inputs resulting in WARNINGs to save", "Zero to disable, -1 to store unlimited." }, { "pe-input-series-max", NULL, "integer", NULL, "-1", &check_number, "The number of other PE inputs to save", "Zero to disable, -1 to store unlimited." }, /* Node health */ { "node-health-strategy", NULL, "enum", "none, migrate-on-red, only-green, progressive, custom", "none", &check_health, "The strategy combining node attributes to determine overall node health.", "Requires external entities to create node attributes (named with the prefix '#health') with values: 'red', 'yellow' or 'green'."}, { "node-health-green", NULL, "integer", NULL, "0", &check_number, "The score 'green' translates to in rsc_location constraints", "Only used when node-health-strategy is set to custom or progressive." }, { "node-health-yellow", NULL, "integer", NULL, "0", &check_number, "The score 'yellow' translates to in rsc_location constraints", "Only used when node-health-strategy is set to custom or progressive." }, { "node-health-red", NULL, "integer", NULL, "-INFINITY", &check_number, "The score 'red' translates to in rsc_location constraints", "Only used when node-health-strategy is set to custom or progressive." }, + + /*Placement Strategy*/ + { "placement-strategy", NULL, "enum", "default, utilization, minimal, balanced", "default", &check_placement_strategy, + "The strategy to determine resource placement", NULL}, }; void pe_metadata(void) { config_metadata("Policy Engine", "1.0", "Policy Engine Options", "This is a fake resource that details the options that can be configured for the Policy Engine.", pe_opts, DIMOF(pe_opts)); } void verify_pe_options(GHashTable *options) { verify_all_options(options, pe_opts, DIMOF(pe_opts)); } const char * pe_pref(GHashTable *options, const char *name) { return get_cluster_pref(options, pe_opts, DIMOF(pe_opts), name); } const char * fail2text(enum action_fail_response fail) { const char *result = ""; switch(fail) { case action_fail_ignore: result = "ignore"; break; case action_fail_block: result = "block"; break; case action_fail_recover: result = "recover"; break; case action_fail_migrate: result = "migrate"; break; case action_fail_stop: result = "stop"; break; case action_migrate_failure: result = "atomic migration recovery"; break; case action_fail_fence: result = "fence"; break; case action_fail_standby: result = "standby"; break; } return result; } enum action_tasks text2task(const char *task) { if(safe_str_eq(task, CRMD_ACTION_STOP)) { return stop_rsc; } else if(safe_str_eq(task, CRMD_ACTION_STOPPED)) { return stopped_rsc; } else if(safe_str_eq(task, CRMD_ACTION_START)) { return start_rsc; } else if(safe_str_eq(task, CRMD_ACTION_STARTED)) { return started_rsc; } else if(safe_str_eq(task, CRM_OP_SHUTDOWN)) { return shutdown_crm; } else if(safe_str_eq(task, CRM_OP_FENCE)) { return stonith_node; } else if(safe_str_eq(task, CRMD_ACTION_STATUS)) { return monitor_rsc; } else if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) { return action_notify; } else if(safe_str_eq(task, CRMD_ACTION_NOTIFIED)) { return action_notified; } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { return action_promote; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { return action_demote; } else if(safe_str_eq(task, CRMD_ACTION_PROMOTED)) { return action_promoted; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTED)) { return action_demoted; } else if(safe_str_eq(task, CRMD_ACTION_CANCEL)) { return no_action; } else if(safe_str_eq(task, CRMD_ACTION_DELETE)) { return no_action; } else if(safe_str_eq(task, CRMD_ACTION_STATUS)) { return no_action; } else if(safe_str_eq(task, CRM_OP_PROBED)) { return no_action; } else if(safe_str_eq(task, CRM_OP_LRM_REFRESH)) { return no_action; } else if(safe_str_eq(task, CRMD_ACTION_MIGRATE)) { return no_action; } else if(safe_str_eq(task, CRMD_ACTION_MIGRATED)) { return no_action; } else if(safe_str_eq(task, "fail")) { return no_action; } else if(safe_str_eq(task, "stonith_up")) { return no_action; } else if(safe_str_eq(task, "stonith_complete")) { return no_action; } else if(safe_str_eq(task, "all_stopped")) { return no_action; } crm_debug("Unsupported action: %s", task); return no_action; } const char * task2text(enum action_tasks task) { const char *result = ""; switch(task) { case no_action: result = "no_action"; break; case stop_rsc: result = CRMD_ACTION_STOP; break; case stopped_rsc: result = CRMD_ACTION_STOPPED; break; case start_rsc: result = CRMD_ACTION_START; break; case started_rsc: result = CRMD_ACTION_STARTED; break; case shutdown_crm: result = CRM_OP_SHUTDOWN; break; case stonith_node: result = CRM_OP_FENCE; break; case monitor_rsc: result = CRMD_ACTION_STATUS; break; case action_notify: result = CRMD_ACTION_NOTIFY; break; case action_notified: result = CRMD_ACTION_NOTIFIED; break; case action_promote: result = CRMD_ACTION_PROMOTE; break; case action_promoted: result = CRMD_ACTION_PROMOTED; break; case action_demote: result = CRMD_ACTION_DEMOTE; break; case action_demoted: result = CRMD_ACTION_DEMOTED; break; } return result; } const char * role2text(enum rsc_role_e role) { CRM_CHECK(role >= RSC_ROLE_UNKNOWN, return RSC_ROLE_UNKNOWN_S); CRM_CHECK(role < RSC_ROLE_MAX, return RSC_ROLE_UNKNOWN_S); switch(role) { case RSC_ROLE_UNKNOWN: return RSC_ROLE_UNKNOWN_S; case RSC_ROLE_STOPPED: return RSC_ROLE_STOPPED_S; case RSC_ROLE_STARTED: return RSC_ROLE_STARTED_S; case RSC_ROLE_SLAVE: return RSC_ROLE_SLAVE_S; case RSC_ROLE_MASTER: return RSC_ROLE_MASTER_S; } return RSC_ROLE_UNKNOWN_S; } enum rsc_role_e text2role(const char *role) { if(safe_str_eq(role, RSC_ROLE_STOPPED_S)) { return RSC_ROLE_STOPPED; } else if(safe_str_eq(role, RSC_ROLE_STARTED_S)) { return RSC_ROLE_STARTED; } else if(safe_str_eq(role, RSC_ROLE_SLAVE_S)) { return RSC_ROLE_SLAVE; } else if(safe_str_eq(role, RSC_ROLE_MASTER_S)) { return RSC_ROLE_MASTER; } else if(safe_str_eq(role, RSC_ROLE_UNKNOWN_S)) { return RSC_ROLE_UNKNOWN; } crm_err("Unknown role: %s", role); return RSC_ROLE_UNKNOWN; } int merge_weights(int w1, int w2) { int result = w1 + w2; if(w1 <= -INFINITY || w2 <= -INFINITY) { if(w1 >= INFINITY || w2 >= INFINITY) { crm_debug_2("-INFINITY + INFINITY == -INFINITY"); } return -INFINITY; } else if(w1 >= INFINITY || w2 >= INFINITY) { return INFINITY; } /* detect wrap-around */ if(result > 0) { if(w1 <= 0 && w2 < 0) { result = -INFINITY; } } else if(w1 > 0 && w2 > 0) { result = INFINITY; } /* detect +/- INFINITY */ if(result >= INFINITY) { result = INFINITY; } else if(result <= -INFINITY) { result = -INFINITY; } crm_debug_5("%d + %d = %d", w1, w2, result); return result; } void add_hash_param(GHashTable *hash, const char *name, const char *value) { CRM_CHECK(hash != NULL, return); crm_debug_3("adding: name=%s value=%s", crm_str(name), crm_str(value)); if(name == NULL || value == NULL) { return; } else if(safe_str_eq(value, "#default")) { return; } else if(g_hash_table_lookup(hash, name) == NULL) { g_hash_table_insert(hash, crm_strdup(name), crm_strdup(value)); } } diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c index 13572aa588..58c9fae4d8 100644 --- a/lib/pengine/complex.c +++ b/lib/pengine/complex.c @@ -1,479 +1,488 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include extern xmlNode *get_object_root(const char *object_type,xmlNode *the_root); void populate_hash(xmlNode *nvpair_list, GHashTable *hash, const char **attrs, int attrs_length); resource_object_functions_t resource_class_functions[] = { { native_unpack, native_find_rsc, native_parameter, native_print, native_active, native_resource_state, native_location, native_free }, { group_unpack, native_find_rsc, native_parameter, group_print, group_active, group_resource_state, native_location, group_free }, { clone_unpack, native_find_rsc, native_parameter, clone_print, clone_active, clone_resource_state, native_location, clone_free }, { master_unpack, native_find_rsc, native_parameter, clone_print, clone_active, clone_resource_state, native_location, clone_free } }; enum pe_obj_types get_resource_type(const char *name) { if(safe_str_eq(name, XML_CIB_TAG_RESOURCE)) { return pe_native; } else if(safe_str_eq(name, XML_CIB_TAG_GROUP)) { return pe_group; } else if(safe_str_eq(name, XML_CIB_TAG_INCARNATION)) { return pe_clone; } else if(safe_str_eq(name, XML_CIB_TAG_MASTER)) { return pe_master; } return pe_unknown; } const char *get_resource_typename(enum pe_obj_types type) { switch(type) { case pe_native: return XML_CIB_TAG_RESOURCE; case pe_group: return XML_CIB_TAG_GROUP; case pe_clone: return XML_CIB_TAG_INCARNATION; case pe_master: return XML_CIB_TAG_MASTER; case pe_unknown: return "unknown"; } return ""; } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { add_hash_param(user_data, key, value); } void get_meta_attributes(GHashTable *meta_hash, resource_t *rsc, node_t *node, pe_working_set_t *data_set) { GHashTable *node_hash = NULL; if(node) { node_hash = node->details->attrs; } xml_prop_iter(rsc->xml, prop_name, prop_value, add_hash_param(meta_hash, prop_name, prop_value); ); unpack_instance_attributes(data_set->input, rsc->xml, XML_TAG_META_SETS, node_hash, meta_hash, NULL, FALSE, data_set->now); /* populate from the regular attributes until the GUI can create * meta attributes */ unpack_instance_attributes(data_set->input, rsc->xml, XML_TAG_ATTR_SETS, node_hash, meta_hash, NULL, FALSE, data_set->now); /* set anything else based on the parent */ if(rsc->parent != NULL) { g_hash_table_foreach(rsc->parent->meta, dup_attr, meta_hash); } /* and finally check the defaults */ unpack_instance_attributes(data_set->input, data_set->rsc_defaults, XML_TAG_META_SETS, node_hash, meta_hash, NULL, FALSE, data_set->now); } void get_rsc_attributes(GHashTable *meta_hash, resource_t *rsc, node_t *node, pe_working_set_t *data_set) { GHashTable *node_hash = NULL; if(node) { node_hash = node->details->attrs; } unpack_instance_attributes(data_set->input, rsc->xml, XML_TAG_ATTR_SETS, node_hash, meta_hash, NULL, FALSE, data_set->now); /* set anything else based on the parent */ if(rsc->parent != NULL) { get_rsc_attributes(meta_hash, rsc->parent, node, data_set); } else { /* and finally check the defaults */ unpack_instance_attributes(data_set->input, data_set->rsc_defaults, XML_TAG_ATTR_SETS, node_hash, meta_hash, NULL, FALSE, data_set->now); } } gboolean common_unpack(xmlNode * xml_obj, resource_t **rsc, resource_t *parent, pe_working_set_t *data_set) { xmlNode *ops = NULL; resource_t *top = NULL; const char *value = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *class = crm_element_value(xml_obj, XML_AGENT_ATTR_CLASS); crm_log_xml_debug_3(xml_obj, "Processing resource input..."); if(id == NULL) { pe_err("Must specify id tag in "); return FALSE; } else if(rsc == NULL) { pe_err("Nowhere to unpack resource into"); return FALSE; } crm_malloc0(*rsc, sizeof(resource_t)); ops = find_xml_node(xml_obj, "operations", FALSE); (*rsc)->xml = xml_obj; (*rsc)->parent = parent; (*rsc)->ops_xml = expand_idref(ops, data_set->input); (*rsc)->variant = get_resource_type(crm_element_name(xml_obj)); if((*rsc)->variant == pe_unknown) { pe_err("Unknown resource type: %s", crm_element_name(xml_obj)); crm_free(*rsc); return FALSE; } (*rsc)->parameters = g_hash_table_new_full( g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str); (*rsc)->meta = g_hash_table_new_full( g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str); value = crm_element_value(xml_obj, XML_RSC_ATTR_INCARNATION); if(value) { (*rsc)->id = crm_concat(id, value, ':'); add_hash_param((*rsc)->meta, XML_RSC_ATTR_INCARNATION, value); } else { (*rsc)->id = crm_strdup(id); } if(parent) { (*rsc)->long_name = crm_concat(parent->long_name, (*rsc)->id, ':'); } else { (*rsc)->long_name = crm_strdup((*rsc)->id); } (*rsc)->fns = &resource_class_functions[(*rsc)->variant]; crm_debug_3("Unpacking resource..."); get_meta_attributes((*rsc)->meta, *rsc, NULL, data_set); (*rsc)->flags = 0; set_bit((*rsc)->flags, pe_rsc_runnable); set_bit((*rsc)->flags, pe_rsc_provisional); if(is_set(data_set->flags, pe_flag_is_managed_default)) { set_bit((*rsc)->flags, pe_rsc_managed); } (*rsc)->rsc_cons = NULL; (*rsc)->actions = NULL; (*rsc)->role = RSC_ROLE_STOPPED; (*rsc)->next_role = RSC_ROLE_UNKNOWN; (*rsc)->recovery_type = recovery_stop_start; (*rsc)->stickiness = data_set->default_resource_stickiness; (*rsc)->migration_threshold= INFINITY; (*rsc)->failure_timeout = 0; value = g_hash_table_lookup((*rsc)->meta, XML_CIB_ATTR_PRIORITY); (*rsc)->priority = crm_parse_int(value, "0"); (*rsc)->effective_priority = (*rsc)->priority; value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_NOTIFY); if(crm_is_true(value)) { set_bit((*rsc)->flags, pe_rsc_notify); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MANAGED); if(value != NULL && safe_str_neq("default", value)) { gboolean bool_value = TRUE; crm_str_to_boolean(value, &bool_value); if(bool_value == FALSE) { clear_bit((*rsc)->flags, pe_rsc_managed); } else { set_bit((*rsc)->flags, pe_rsc_managed); } } if(is_set(data_set->flags, pe_flag_maintenance_mode)) { clear_bit((*rsc)->flags, pe_rsc_managed); } crm_debug_2("Options for %s", (*rsc)->id); value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_UNIQUE); top = uber_parent(*rsc); if(crm_is_true(value) || top->variant < pe_clone) { set_bit((*rsc)->flags, pe_rsc_unique); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_RESTART); if(safe_str_eq(value, "restart")) { (*rsc)->restart_type = pe_restart_restart; crm_debug_2("\tDependancy restart handling: restart"); } else { (*rsc)->restart_type = pe_restart_ignore; crm_debug_2("\tDependancy restart handling: ignore"); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MULTIPLE); if(safe_str_eq(value, "stop_only")) { (*rsc)->recovery_type = recovery_stop_only; crm_debug_2("\tMultiple running resource recovery: stop only"); } else if(safe_str_eq(value, "block")) { (*rsc)->recovery_type = recovery_block; crm_debug_2("\tMultiple running resource recovery: block"); } else { (*rsc)->recovery_type = recovery_stop_start; crm_debug_2("\tMultiple running resource recovery: stop/start"); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_STICKINESS); if(value != NULL && safe_str_neq("default", value)) { (*rsc)->stickiness = char2score(value); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_FAIL_STICKINESS); if(value != NULL && safe_str_neq("default", value)) { (*rsc)->migration_threshold = char2score(value); } else if(value == NULL) { /* Make a best-effort guess at a migration threshold for people with 0.6 configs * try with underscores and hyphens, from both the resource and global defaults section */ value = g_hash_table_lookup((*rsc)->meta, "resource-failure-stickiness"); if(value == NULL) { value = g_hash_table_lookup((*rsc)->meta, "resource_failure_stickiness"); } if(value == NULL) { value = g_hash_table_lookup(data_set->config_hash, "default-resource-failure-stickiness"); } if(value == NULL) { value = g_hash_table_lookup(data_set->config_hash, "default_resource_failure_stickiness"); } if(value) { int fail_sticky = char2score(value); if(fail_sticky == -INFINITY) { (*rsc)->migration_threshold = 1; crm_info("Set a migration threshold of %d for %s based on a failure-stickiness of %s", (*rsc)->migration_threshold, (*rsc)->id, value); } else if((*rsc)->stickiness != 0 && fail_sticky != 0) { (*rsc)->migration_threshold = (*rsc)->stickiness / fail_sticky; if((*rsc)->migration_threshold < 0) { /* Make sure it's positive */ (*rsc)->migration_threshold = 0 - (*rsc)->migration_threshold; } (*rsc)->migration_threshold += 1; crm_info("Calculated a migration threshold for %s of %d based on a stickiness of %d/%s", (*rsc)->id, (*rsc)->migration_threshold, (*rsc)->stickiness, value); } } } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_FAIL_TIMEOUT); if(value != NULL) { /* call crm_get_msec() and convert back to seconds */ (*rsc)->failure_timeout = (crm_get_msec(value) / 1000); } get_target_role(*rsc, &((*rsc)->next_role)); crm_debug_2("\tDesired next state: %s", (*rsc)->next_role!=RSC_ROLE_UNKNOWN?role2text((*rsc)->next_role):"default"); if((*rsc)->fns->unpack(*rsc, data_set) == FALSE) { return FALSE; } if(is_set(data_set->flags, pe_flag_symmetric_cluster)) { resource_location(*rsc, NULL, 0, "symmetric_default", data_set); } crm_debug_2("\tAction notification: %s", is_set((*rsc)->flags, pe_rsc_notify)?"required":"not required"); if(safe_str_eq(class, "stonith")) { set_bit_inplace(data_set->flags, pe_flag_have_stonith_resource); } + + (*rsc)->utilization = g_hash_table_new_full( + g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); + + unpack_instance_attributes(data_set->input, (*rsc)->xml, XML_TAG_UTILIZATION, NULL, + (*rsc)->utilization, NULL, FALSE, data_set->now); /* data_set->resources = g_list_append(data_set->resources, (*rsc)); */ return TRUE; } void common_update_score(resource_t *rsc, const char *id, int score) { node_t *node = NULL; node = pe_find_node_id(rsc->allowed_nodes, id); if(node != NULL) { crm_debug_2("Updating score for %s on %s: %d + %d", rsc->id, id, node->weight, score); node->weight = merge_weights(node->weight, score); } if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, common_update_score(child_rsc, id, score); ); } } resource_t *uber_parent(resource_t *rsc) { resource_t *parent = rsc; while(parent != NULL && parent->parent != NULL) { parent = parent->parent; } return parent; } node_t *rsc_known_on(resource_t *rsc, GListPtr *list) { node_t *one = NULL; GListPtr result = NULL; if(rsc->children) { slist_iter(child, resource_t, rsc->children, lpc, rsc_known_on(child, &result); ); } else if(rsc->known_on) { result = g_list_copy(rsc->known_on); } if(result && g_list_length(result) == 1) { one = g_list_nth_data(result, 0); } if(list) { slist_iter(node, node_t, result, lpc, if(*list == NULL || pe_find_node_id(*list, node->details->id) == NULL) { *list = g_list_append(*list, node); } ); } g_list_free(result); return one; } void common_free(resource_t *rsc) { if(rsc == NULL) { return; } crm_debug_5("Freeing %s %d", rsc->id, rsc->variant); g_list_free(rsc->rsc_cons); g_list_free(rsc->rsc_cons_lhs); if(rsc->parameters != NULL) { g_hash_table_destroy(rsc->parameters); } if(rsc->meta != NULL) { g_hash_table_destroy(rsc->meta); } + if(rsc->utilization != NULL) { + g_hash_table_destroy(rsc->utilization); + } if(rsc->parent == NULL && is_set(rsc->flags, pe_rsc_orphan)) { free_xml(rsc->xml); } if(rsc->running_on) { g_list_free(rsc->running_on); rsc->running_on = NULL; } if(rsc->known_on) { g_list_free(rsc->known_on); rsc->known_on = NULL; } if(rsc->actions) { g_list_free(rsc->actions); rsc->actions = NULL; } pe_free_shallow_adv(rsc->rsc_location, FALSE); pe_free_shallow_adv(rsc->allowed_nodes, TRUE); crm_free(rsc->id); crm_free(rsc->long_name); crm_free(rsc->clone_name); crm_free(rsc->allocated_to); crm_free(rsc->variant_opaque); crm_free(rsc); crm_debug_5("Resource freed"); } diff --git a/lib/pengine/status.c b/lib/pengine/status.c index e9da4b0cdc..e3c152bbd5 100644 --- a/lib/pengine/status.c +++ b/lib/pengine/status.c @@ -1,288 +1,291 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include xmlNode * do_calculations( pe_working_set_t *data_set, xmlNode *xml_input, ha_time_t *now); extern xmlNode*get_object_root( const char *object_type, xmlNode *the_root); #define MEMCHECK_STAGE_0 0 #define check_and_exit(stage) cleanup_calculations(data_set); \ crm_mem_stats(NULL); \ crm_err("Exiting: stage %d", stage); \ exit(1); /* * Unpack everything * At the end you'll have: * - A list of nodes * - A list of resources (each with any dependencies on other resources) * - A list of constraints between resources and nodes * - A list of constraints between start/stop actions * - A list of nodes that need to be stonith'd * - A list of nodes that need to be shutdown * - A list of the possible stop/start actions (without dependencies) */ gboolean cluster_status(pe_working_set_t *data_set) { xmlNode * config = get_object_root( XML_CIB_TAG_CRMCONFIG, data_set->input); xmlNode * cib_nodes = get_object_root( XML_CIB_TAG_NODES, data_set->input); xmlNode * cib_resources = get_object_root( XML_CIB_TAG_RESOURCES, data_set->input); xmlNode * cib_status = get_object_root( XML_CIB_TAG_STATUS, data_set->input); const char *value = crm_element_value( data_set->input, XML_ATTR_HAVE_QUORUM); crm_debug_3("Beginning unpack"); /* reset remaining global variables */ if(data_set->input == NULL) { return FALSE; } if(data_set->now == NULL) { data_set->now = new_ha_date(TRUE); } if(data_set->input != NULL && crm_element_value(data_set->input, XML_ATTR_DC_UUID) != NULL) { /* this should always be present */ data_set->dc_uuid = crm_element_value_copy( data_set->input, XML_ATTR_DC_UUID); } clear_bit_inplace(data_set->flags, pe_flag_have_quorum); if(crm_is_true(value)) { set_bit_inplace(data_set->flags, pe_flag_have_quorum); } data_set->op_defaults = get_object_root(XML_CIB_TAG_OPCONFIG, data_set->input); data_set->rsc_defaults = get_object_root(XML_CIB_TAG_RSCCONFIG, data_set->input); unpack_config(config, data_set); if(is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy != no_quorum_ignore) { crm_warn("We do not have quorum" " - fencing and resource management disabled"); } unpack_nodes(cib_nodes, data_set); unpack_resources(cib_resources, data_set); unpack_status(cib_status, data_set); return TRUE; } static void pe_free_resources(GListPtr resources) { resource_t *rsc = NULL; GListPtr iterator = resources; while(iterator != NULL) { iterator = iterator; rsc = (resource_t *)iterator->data; iterator = iterator->next; rsc->fns->free(rsc); } if(resources != NULL) { g_list_free(resources); } } static void pe_free_actions(GListPtr actions) { GListPtr iterator = actions; while(iterator != NULL) { pe_free_action(iterator->data); iterator = iterator->next; } if(actions != NULL) { g_list_free(actions); } } static void pe_free_nodes(GListPtr nodes) { GListPtr iterator = nodes; while(iterator != NULL) { node_t *node = (node_t*)iterator->data; struct node_shared_s *details = node->details; iterator = iterator->next; crm_debug_5("deleting node"); crm_debug_5("%s is being deleted", details->uname); print_node("delete", node, FALSE); if(details != NULL) { if(details->attrs != NULL) { g_hash_table_destroy(details->attrs); } + if(details->utilization != NULL) { + g_hash_table_destroy(details->utilization); + } pe_free_shallow_adv(details->running_rsc, FALSE); pe_free_shallow_adv(details->allocated_rsc, FALSE); crm_free(details); } crm_free(node); } if(nodes != NULL) { g_list_free(nodes); } } void cleanup_calculations(pe_working_set_t *data_set) { if(data_set == NULL) { return; } if(data_set->config_hash != NULL) { g_hash_table_destroy(data_set->config_hash); } crm_free(data_set->dc_uuid); crm_debug_3("deleting resources"); pe_free_resources(data_set->resources); crm_debug_3("deleting actions"); pe_free_actions(data_set->actions); crm_debug_3("deleting nodes"); pe_free_nodes(data_set->nodes); free_xml(data_set->graph); free_ha_date(data_set->now); free_xml(data_set->input); free_xml(data_set->failed); data_set->stonith_action = NULL; CRM_CHECK(data_set->ordering_constraints == NULL, ;); CRM_CHECK(data_set->placement_constraints == NULL, ;); xmlCleanupParser(); } void set_working_set_defaults(pe_working_set_t *data_set) { data_set->failed = create_xml_node(NULL, "failed-ops"); data_set->now = NULL; data_set->input = NULL; data_set->graph = NULL; data_set->dc_uuid = NULL; data_set->dc_node = NULL; data_set->nodes = NULL; data_set->actions = NULL; data_set->resources = NULL; data_set->config_hash = NULL; data_set->stonith_action = NULL; data_set->ordering_constraints = NULL; data_set->placement_constraints = NULL; data_set->colocation_constraints = NULL; data_set->order_id = 1; data_set->action_id = 1; data_set->num_synapse = 0; data_set->max_valid_nodes = 0; data_set->no_quorum_policy = no_quorum_freeze; data_set->default_resource_stickiness = 0; data_set->flags = 0x0ULL; set_bit_inplace(data_set->flags, pe_flag_stop_rsc_orphans); set_bit_inplace(data_set->flags, pe_flag_symmetric_cluster); set_bit_inplace(data_set->flags, pe_flag_is_managed_default); set_bit_inplace(data_set->flags, pe_flag_stop_action_orphans); } resource_t * pe_find_resource(GListPtr rsc_list, const char *id) { unsigned lpc = 0; resource_t *rsc = NULL; resource_t *match = NULL; if(id == NULL) { return NULL; } for(lpc = 0; lpc < g_list_length(rsc_list); lpc++) { rsc = g_list_nth_data(rsc_list, lpc); match = rsc->fns->find_rsc(rsc, id, TRUE, FALSE, NULL, TRUE); if(match != NULL) { return match; } } crm_debug_2("No match for %s", id); return NULL; } node_t * pe_find_node_id(GListPtr nodes, const char *id) { slist_iter(node, node_t, nodes, lpc, if(node && safe_str_eq(node->details->id, id)) { return node; } ); /* error */ return NULL; } node_t * pe_find_node(GListPtr nodes, const char *uname) { slist_iter(node, node_t, nodes, lpc, if(node && safe_str_eq(node->details->uname, uname)) { return node; } ); /* error */ return NULL; } diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index b407fd9185..f217c2d6bc 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,1717 +1,1727 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #define set_config_flag(data_set, option, flag) do { \ const char *tmp = pe_pref(data_set->config_hash, option); \ if(tmp) { \ if(crm_is_true(tmp)) { \ set_bit_inplace(data_set->flags, flag); \ } else { \ clear_bit_inplace(data_set->flags, flag); \ } \ } \ } while(0) gboolean unpack_rsc_op( resource_t *rsc, node_t *node, xmlNode *xml_op, enum action_fail_response *failed, pe_working_set_t *data_set); gboolean unpack_config(xmlNode *config, pe_working_set_t *data_set) { const char *value = NULL; GHashTable *config_hash = g_hash_table_new_full( g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str); data_set->config_hash = config_hash; unpack_instance_attributes( data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, data_set->now); verify_pe_options(data_set->config_hash); value = pe_pref(data_set->config_hash, "stonith-timeout"); data_set->stonith_timeout = crm_get_msec(value); crm_debug("STONITH timeout: %d", data_set->stonith_timeout); set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled); crm_debug("STONITH of failed nodes is %s", is_set(data_set->flags, pe_flag_stonith_enabled)?"enabled":"disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); crm_debug_2("STONITH will %s nodes", data_set->stonith_action); set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything); crm_debug("Stop all active resources: %s", is_set(data_set->flags, pe_flag_stop_everything)?"true":"false"); set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster); if(is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "default-resource-stickiness"); data_set->default_resource_stickiness = char2score(value); crm_debug("Default stickiness: %d", data_set->default_resource_stickiness); value = pe_pref(data_set->config_hash, "no-quorum-policy"); if(safe_str_eq(value, "ignore")) { data_set->no_quorum_policy = no_quorum_ignore; } else if(safe_str_eq(value, "freeze")) { data_set->no_quorum_policy = no_quorum_freeze; } else if(safe_str_eq(value, "suicide")) { gboolean do_panic = FALSE; crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic); if(is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE){ crm_config_err("Setting no-quorum-policy=suicide makes no sense if stonith-enabled=false"); } if(do_panic && is_set(data_set->flags, pe_flag_stonith_enabled)) { data_set->no_quorum_policy = no_quorum_suicide; } else if(is_set(data_set->flags, pe_flag_have_quorum) == FALSE && do_panic == FALSE) { crm_notice("Resetting no-quorum-policy to 'stop': The cluster has never had quorum"); data_set->no_quorum_policy = no_quorum_stop; } } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of CCM Quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of CCM Quorum: Stop ALL resources"); break; case no_quorum_suicide: crm_notice("On loss of CCM Quorum: Fence all remaining nodes"); break; case no_quorum_ignore: crm_notice("On loss of CCM Quorum: Ignore"); break; } set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans); crm_debug_2("Orphan resources are %s", is_set(data_set->flags, pe_flag_stop_rsc_orphans)?"stopped":"ignored"); set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans); crm_debug_2("Orphan resource actions are %s", is_set(data_set->flags, pe_flag_stop_action_orphans)?"stopped":"ignored"); set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop); crm_debug_2("Stopped resources are removed from the status section: %s", is_set(data_set->flags, pe_flag_remove_after_stop)?"true":"false"); set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode); crm_debug_2("Maintenance mode: %s", is_set(data_set->flags, pe_flag_maintenance_mode)?"true":"false"); if(is_set(data_set->flags, pe_flag_maintenance_mode)) { clear_bit(data_set->flags, pe_flag_is_managed_default); } else { set_config_flag(data_set, "is-managed-default", pe_flag_is_managed_default); } crm_debug_2("By default resources are %smanaged", is_set(data_set->flags, pe_flag_is_managed_default)?"":"not "); set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal); crm_debug_2("Start failures are %s", is_set(data_set->flags, pe_flag_start_failure_fatal)?"always fatal":"handled by failcount"); node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red")); node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green")); node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow")); crm_info("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s", score2char(node_score_red),score2char(node_score_yellow), score2char(node_score_green)); + + data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy"); + crm_debug_2("Placement strategy: %s", data_set->placement_strategy); return TRUE; } gboolean unpack_nodes(xmlNode * xml_nodes, pe_working_set_t *data_set) { node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; gboolean unseen_are_unclean = TRUE; const char *blind_faith = pe_pref( data_set->config_hash, "startup-fencing"); if(crm_is_true(blind_faith) == FALSE) { unseen_are_unclean = FALSE; crm_warn("Blind faith: not fencing unseen nodes"); } xml_child_iter_filter( xml_nodes, xml_obj, XML_CIB_TAG_NODE, new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); crm_debug_3("Processing node %s/%s", uname, id); if(id == NULL) { crm_config_err("Must specify id tag in "); continue; } if(type == NULL) { crm_config_err("Must specify type tag in "); continue; } if(pe_find_node(data_set->nodes, uname) != NULL) { crm_config_warn("Detected multiple node entries with uname=%s" " - this is rarely intended", uname); } crm_malloc0(new_node, sizeof(node_t)); if(new_node == NULL) { return FALSE; } new_node->weight = 0; new_node->fixed = FALSE; crm_malloc0(new_node->details, sizeof(struct node_shared_s)); if(new_node->details == NULL) { crm_free(new_node); return FALSE; } crm_debug_3("Creaing node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->type = node_ping; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->running_rsc = NULL; new_node->details->attrs = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); + new_node->details->utilization = g_hash_table_new_full( + g_str_hash, g_str_equal, + g_hash_destroy_str, g_hash_destroy_str); /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ if(is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE || unseen_are_unclean == FALSE) { /* blind faith... */ new_node->details->unclean = FALSE; } else { /* all nodes are unclean until we've seen their * status entry */ new_node->details->unclean = TRUE; } if(type == NULL || safe_str_eq(type, "member") || safe_str_eq(type, NORMALNODE)) { new_node->details->type = node_member; } add_node_attrs(xml_obj, new_node, FALSE, data_set); + unpack_instance_attributes( + data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL, + new_node->details->utilization, NULL, FALSE, data_set->now); + data_set->nodes = g_list_append(data_set->nodes, new_node); crm_debug_3("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); ); return TRUE; } gboolean unpack_resources(xmlNode * xml_resources, pe_working_set_t *data_set) { xml_child_iter( xml_resources, xml_obj, resource_t *new_rsc = NULL; crm_debug_3("Begining unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj)); if(common_unpack(xml_obj, &new_rsc, NULL, data_set)) { data_set->resources = g_list_append( data_set->resources, new_rsc); print_resource(LOG_DEBUG_3, "Added", new_rsc, FALSE); } else { crm_config_err("Failed unpacking %s %s", crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID)); if(new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } ); data_set->resources = g_list_sort( data_set->resources, sort_rsc_priority); if(is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { crm_config_err("Resource start-up disabled since no STONITH resources have been defined"); crm_config_err("Either configure some or disable STONITH with the stonith-enabled option"); crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } /* remove nodes that are down, stopping */ /* create +ve rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode * status, pe_working_set_t *data_set) { const char *id = NULL; const char *uname = NULL; xmlNode * lrm_rsc = NULL; xmlNode * attrs = NULL; node_t *this_node = NULL; crm_debug_3("Begining unpack"); xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, id = crm_element_value(node_state, XML_ATTR_ID); uname = crm_element_value(node_state, XML_ATTR_UNAME); attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); crm_debug_3("Processing node %s", uname); this_node = pe_find_node_id(data_set->nodes, id); if(uname == NULL) { /* error */ continue; } else if(this_node == NULL) { crm_config_warn("Node %s in status section no longer exists", uname); continue; } /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; add_node_attrs(attrs, this_node, TRUE, data_set); if(crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } crm_debug_3("determining node state"); determine_online_status(node_state, this_node, data_set); if(this_node->details->online && data_set->no_quorum_policy == no_quorum_suicide) { /* Everything else should flow from this automatically * At least until the PE becomes able to migrate off healthy resources */ crm_notice("Marking node %s for STONITH: The cluster does not have quorum", this_node->details->uname); this_node->details->unclean = TRUE; } if(this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ crm_debug_3("Processing lrm resource entries"); unpack_lrm_resources(this_node, lrm_rsc, data_set); } ); return TRUE; } static gboolean determine_online_status_no_fencing(xmlNode * node_state, node_t *this_node) { gboolean online = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(ha_state == NULL) { ha_state = DEADSTATUS; } if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)){ crm_debug_2("Node is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); } else if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)) { } else if(safe_str_eq(crm_state, ONLINESTATUS)) { if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join_state); } } else if(this_node->details->expected_up == FALSE) { crm_debug_2("CRMd is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); crm_debug_2("\tcrm_state=%s, join_state=%s, expected=%s", crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { /* mark it unclean */ this_node->details->unclean = TRUE; crm_warn("Node %s is partially & un-expectedly down", this_node->details->uname); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(xmlNode * node_state, node_t *this_node) { gboolean online = FALSE; gboolean do_terminate = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); const char *terminate = g_hash_table_lookup(this_node->details->attrs, "terminate"); if(ha_state == NULL) { ha_state = DEADSTATUS; } if(crm_is_true(terminate)) { do_terminate = TRUE; } else if(terminate != NULL && strlen(terminate) > 0) { /* could be a time() value */ char t = terminate[0]; if(t != '0' && isdigit(t)) { do_terminate = TRUE; } } if(crm_is_true(ccm_state) && safe_str_eq(ha_state, ACTIVESTATUS) && safe_str_eq(crm_state, ONLINESTATUS)) { if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { online = TRUE; if(do_terminate) { crm_notice("Forcing node %s to be terminated", this_node->details->uname); this_node->details->unclean = TRUE; this_node->details->shutdown = TRUE; } } else if(join_state == exp_state /* == NULL */) { crm_info("Node %s is coming up", this_node->details->uname); crm_debug("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else if(safe_str_eq(join_state, CRMD_JOINSTATE_PENDING)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; online = TRUE; } else if(safe_str_eq(join_state, CRMD_JOINSTATE_NACK)) { crm_warn("Node %s is not part of the cluster", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; online = TRUE; } else { crm_warn("Node %s (%s) is un-expectedly down", this_node->details->uname, this_node->details->id); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); this_node->details->unclean = TRUE; } } else if(crm_is_true(ccm_state) == FALSE && safe_str_eq(ha_state, DEADSTATUS) && safe_str_eq(crm_state, OFFLINESTATUS) && this_node->details->expected_up == FALSE) { crm_debug("Node %s is down: join_state=%s, expected=%s", this_node->details->uname, crm_str(join_state), crm_str(exp_state)); #if 0 /* While a nice optimization, it causes the cluster to block until the node * comes back online. Which is a serious problem if the cluster software * is not configured to start at boot or stonith is configured to merely * stop the node instead of restart it. * Easily triggered by setting terminate=true for the DC */ } else if(do_terminate) { crm_info("Node %s is %s after forced termination", this_node->details->uname, crm_is_true(ccm_state)?"coming up":"going down"); crm_debug("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); if(crm_is_true(ccm_state) == FALSE) { this_node->details->standby = TRUE; this_node->details->pending = TRUE; online = TRUE; } #endif } else if(this_node->details->expected_up) { /* mark it unclean */ this_node->details->unclean = TRUE; crm_warn("Node %s (%s) is un-expectedly down", this_node->details->uname, this_node->details->id); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { crm_info("Node %s is down", this_node->details->uname); crm_debug("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } gboolean determine_online_status( xmlNode * node_state, node_t *this_node, pe_working_set_t *data_set) { gboolean online = FALSE; const char *shutdown = NULL; const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(this_node == NULL) { crm_config_err("No node to check"); return online; } this_node->details->shutdown = FALSE; this_node->details->expected_up = FALSE; shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN); if(shutdown != NULL && safe_str_neq("0", shutdown)) { this_node->details->shutdown = TRUE; } else if(safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) { this_node->details->expected_up = TRUE; } if(is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { online = determine_online_status_no_fencing( node_state, this_node); } else { online = determine_online_status_fencing( node_state, this_node); } if(online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if(online && this_node->details->shutdown) { /* dont run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if(this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if(this_node->details->online) { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown?"shutting down": this_node->details->pending?"pending": this_node->details->standby?"standby":"online"); } else { crm_debug_2("Node %s is offline", this_node->details->uname); } return online; } #define set_char(x) last_rsc_id[lpc] = x; complete = TRUE; char * clone_zero(const char *last_rsc_id) { int lpc = 0; char *zero = NULL; CRM_CHECK(last_rsc_id != NULL, return NULL); if(last_rsc_id != NULL) { lpc = strlen(last_rsc_id); } while(--lpc > 0) { switch(last_rsc_id[lpc]) { case 0: return NULL; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': crm_malloc0(zero, lpc + 3); memcpy(zero, last_rsc_id, lpc); zero[lpc] = ':'; zero[lpc+1] = '0'; zero[lpc+2] = 0; return zero; } } return NULL; } char * increment_clone(char *last_rsc_id) { int lpc = 0; int len = 0; char *tmp = NULL; gboolean complete = FALSE; CRM_CHECK(last_rsc_id != NULL, return NULL); if(last_rsc_id != NULL) { len = strlen(last_rsc_id); } lpc = len-1; while(complete == FALSE && lpc > 0) { switch (last_rsc_id[lpc]) { case 0: lpc--; break; case '0': set_char('1'); break; case '1': set_char('2'); break; case '2': set_char('3'); break; case '3': set_char('4'); break; case '4': set_char('5'); break; case '5': set_char('6'); break; case '6': set_char('7'); break; case '7': set_char('8'); break; case '8': set_char('9'); break; case '9': last_rsc_id[lpc] = '0'; lpc--; break; case ':': tmp = last_rsc_id; crm_malloc0(last_rsc_id, len + 2); memcpy(last_rsc_id, tmp, len); last_rsc_id[++lpc] = '1'; last_rsc_id[len] = '0'; last_rsc_id[len+1] = 0; complete = TRUE; crm_free(tmp); break; default: crm_err("Unexpected char: %c (%d)", last_rsc_id[lpc], lpc); break; } } return last_rsc_id; } static resource_t * create_fake_resource(const char *rsc_id, xmlNode *rsc_entry, pe_working_set_t *data_set) { resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_info(xml_rsc, "Orphan resource"); common_unpack(xml_rsc, &rsc, NULL, data_set); set_bit(rsc->flags, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } extern resource_t *create_child_clone(resource_t *rsc, int sub_id, pe_working_set_t *data_set); static resource_t *find_clone(pe_working_set_t *data_set, node_t *node, resource_t *parent, const char *rsc_id) { int len = 0; resource_t *rsc = NULL; char *base = clone_zero(rsc_id); char *alt_rsc_id = crm_strdup(rsc_id); CRM_ASSERT(parent != NULL); CRM_ASSERT(parent->variant == pe_clone || parent->variant == pe_master); if(base) { len = strlen(base); } if(len > 0) { base[len-1] = 0; } crm_debug_3("Looking for %s on %s in %s %d", rsc_id, node->details->uname, parent->id, is_set(parent->flags, pe_rsc_unique)); if(is_set(parent->flags, pe_rsc_unique)) { crm_debug_3("Looking for %s", rsc_id); rsc = parent->fns->find_rsc(parent, rsc_id, FALSE, FALSE, NULL, TRUE); } else { rsc = parent->fns->find_rsc(parent, base, FALSE, TRUE, node, TRUE); if(rsc != NULL && rsc->running_on) { rsc = NULL; crm_debug_3("Looking for an existing orphan for %s: %s on %s", parent->id, rsc_id, node->details->uname); /* There is already an instance of this _anonymous_ clone active on "node". * * If there is a partially active orphan (only applies to clone groups) on * the same node, use that. * Otherwise create a new (orphaned) instance at "orphan_check:". */ slist_iter(child, resource_t, parent->children, lpc, node_t *loc = child->fns->location(child, NULL, TRUE); if(loc && loc->details == node->details) { resource_t *tmp = child->fns->find_rsc(child, base, FALSE, TRUE, NULL, TRUE); if(tmp && tmp->running_on == NULL) { rsc = tmp; break; } } ); goto orphan_check; } while(rsc == NULL) { crm_debug_3("Trying %s", alt_rsc_id); rsc = parent->fns->find_rsc(parent, alt_rsc_id, FALSE, FALSE, NULL, TRUE); if(rsc == NULL) { break; } else if(rsc->running_on == NULL) { break; } alt_rsc_id = increment_clone(alt_rsc_id); rsc = NULL; } } orphan_check: if(rsc == NULL) { /* Create an extra orphan */ resource_t *top = create_child_clone(parent, -1, data_set); crm_debug("Created orphan for %s: %s on %s", parent->id, rsc_id, node->details->uname); rsc = top->fns->find_rsc(top, base, FALSE, TRUE, NULL, TRUE); CRM_ASSERT(rsc != NULL); } crm_free(rsc->clone_name); rsc->clone_name = NULL; if(safe_str_neq(rsc_id, rsc->id)) { crm_info("Internally renamed %s on %s to %s%s", rsc_id, node->details->uname, rsc->id, is_set(rsc->flags, pe_rsc_orphan)?" (ORPHAN)":""); rsc->clone_name = crm_strdup(rsc_id); } crm_free(alt_rsc_id); crm_free(base); return rsc; } static resource_t * unpack_find_resource( pe_working_set_t *data_set, node_t *node, const char *rsc_id, xmlNode *rsc_entry) { resource_t *rsc = NULL; resource_t *clone_parent = NULL; char *alt_rsc_id = crm_strdup(rsc_id); crm_debug_2("looking for %s", rsc_id); rsc = pe_find_resource(data_set->resources, alt_rsc_id); /* no match */ if(rsc == NULL) { /* Even when clone-max=0, we still create a single :0 orphan to match against */ char *tmp = clone_zero(alt_rsc_id); resource_t *clone0 = pe_find_resource(data_set->resources, tmp); clone_parent = uber_parent(clone0); crm_free(tmp); crm_debug_2("%s not found: %s", alt_rsc_id, clone_parent?clone_parent->id:"orphan"); } else { clone_parent = uber_parent(rsc); } if(clone_parent && clone_parent->variant > pe_group) { rsc = find_clone(data_set, node, clone_parent, rsc_id); CRM_ASSERT(rsc != NULL); } crm_free(alt_rsc_id); return rsc; } static resource_t * process_orphan_resource(xmlNode *rsc_entry, node_t *node, pe_working_set_t *data_set) { resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_config_warn("Nothing known about resource %s running on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if(is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } else { crm_info("Making sure orphan %s is stopped", rsc_id); print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE); CRM_CHECK(rsc != NULL, return NULL); resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set); } return rsc; } static void process_rsc_state(resource_t *rsc, node_t *node, enum action_fail_response on_fail, xmlNode *migrate_op, pe_working_set_t *data_set) { if(on_fail == action_migrate_failure) { node_t *from = NULL; const char *uuid = crm_element_value(migrate_op, CRMD_ACTION_MIGRATED); on_fail = action_fail_recover; from = pe_find_node_id(data_set->nodes, uuid); if(from != NULL) { process_rsc_state(rsc, from, on_fail, NULL, data_set); } else { crm_log_xml_err(migrate_op, "Bad Op"); } } crm_debug_2("Resource %s is %s on %s: on_fail=%s", rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail)); /* process current state */ if(rsc->role != RSC_ROLE_UNKNOWN) { rsc->known_on = g_list_append(rsc->known_on, node); } if(node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = action_fail_ignore; } switch(on_fail) { case action_fail_ignore: /* nothing to do */ break; case action_fail_fence: /* treat it as if it is still running * but also mark the node as unclean */ node->details->unclean = TRUE; break; case action_fail_standby: node->details->standby = TRUE; node->details->standby_onfail = TRUE; break; case action_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); break; case action_fail_migrate: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__",data_set); break; case action_fail_stop: rsc->next_role = RSC_ROLE_STOPPED; break; case action_fail_recover: if(rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { set_bit(rsc->flags, pe_rsc_failed); stop_action(rsc, node, FALSE); } break; case action_migrate_failure: /* anything extra? */ break; } if(rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { native_add_running(rsc, node, data_set); if(on_fail != action_fail_ignore) { set_bit(rsc->flags, pe_rsc_failed); } } else if(rsc->clone_name) { crm_debug_2("Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); crm_free(rsc->clone_name); rsc->clone_name = NULL; } else { char *key = stop_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, node); slist_iter(stop, action_t, possible_matches, lpc, stop->optional = TRUE; ); crm_free(key); } } /* create active recurring operations as optional */ static void process_recurring(node_t *node, resource_t *rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t *data_set) { const char *task = NULL; const char *status = NULL; crm_debug_3("%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); slist_iter(rsc_op, xmlNode, sorted_op_list, lpc, int interval = 0; char *key = NULL; const char *id = ID(rsc_op); const char *interval_s = NULL; if(node->details->online == FALSE) { crm_debug_4("Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; } else if(start_index < stop_index) { crm_debug_4("Skipping %s/%s: not active", rsc->id, node->details->uname); break; } else if(lpc <= start_index) { crm_debug_4("Skipping %s/%s: old", id, node->details->uname); continue; } interval_s = crm_element_value(rsc_op,XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0) { crm_debug_4("Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if(safe_str_eq(status, "-1")) { crm_debug_4("Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = generate_op_key(rsc->id, task, interval); crm_debug_3("Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); ); } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { const char *task = NULL; const char *status = NULL; *stop_index = -1; *start_index = -1; slist_iter( rsc_op, xmlNode, sorted_op_list, lpc, task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if(safe_str_eq(task, CRMD_ACTION_STOP) && safe_str_eq(status, "0")) { *stop_index = lpc; } else if(safe_str_eq(task, CRMD_ACTION_START)) { *start_index = lpc; } else if(*start_index <= *stop_index && safe_str_eq(task, CRMD_ACTION_STATUS)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if(safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) { *start_index = lpc; } } ); } static void unpack_lrm_rsc_state( node_t *node, xmlNode * rsc_entry, pe_working_set_t *data_set) { int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; const char *task = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; xmlNode *migrate_op = NULL; enum action_fail_response on_fail = FALSE; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_debug_3("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, op_list = g_list_append(op_list, rsc_op); ); if(op_list == NULL) { /* if there are no operations, there is nothing to do */ return; } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if(rsc == NULL) { rsc = process_orphan_resource(rsc_entry, node, data_set); } CRM_ASSERT(rsc != NULL); /* process operations */ saved_role = rsc->role; on_fail = action_fail_ignore; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); slist_iter( rsc_op, xmlNode, sorted_op_list, lpc, task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if(safe_str_eq(task, CRMD_ACTION_MIGRATED)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, &on_fail, data_set); ); /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); if(get_target_role(rsc, &req_role)) { if(rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { crm_debug("%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); rsc->next_role = req_role; } else if(req_role > rsc->next_role) { crm_info("%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } } if(saved_role > rsc->role) { rsc->role = saved_role; } } gboolean unpack_lrm_resources(node_t *node, xmlNode * lrm_rsc_list, pe_working_set_t *data_set) { CRM_CHECK(node != NULL, return FALSE); crm_debug_3("Unpacking resources on %s", node->details->uname); xml_child_iter_filter( lrm_rsc_list, rsc_entry, XML_LRM_TAG_RESOURCE, unpack_lrm_rsc_state(node, rsc_entry, data_set); ); return TRUE; } static void set_active(resource_t *rsc) { resource_t *top = uber_parent(rsc); if(top && top->variant == pe_master) { rsc->role = RSC_ROLE_SLAVE; } else { rsc->role = RSC_ROLE_STARTED; } } gboolean unpack_rsc_op(resource_t *rsc, node_t *node, xmlNode *xml_op, enum action_fail_response *on_fail, pe_working_set_t *data_set) { const char *id = NULL; const char *key = NULL; const char *task = NULL; const char *magic = NULL; const char *task_id = NULL; const char *actual_rc = NULL; /* const char *target_rc = NULL; */ const char *task_status = NULL; const char *interval_s = NULL; const char *op_digest = NULL; const char *op_version = NULL; int interval = 0; int task_status_i = -2; int actual_rc_i = 0; int target_rc = -1; action_t *action = NULL; node_t *effective_node = NULL; resource_t *failed = NULL; gboolean expired = FALSE; gboolean is_probe = FALSE; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(node != NULL, return FALSE); CRM_CHECK(xml_op != NULL, return FALSE); id = ID(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); task_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); task_status = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); op_digest = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(id != NULL, return FALSE); CRM_CHECK(task != NULL, return FALSE); CRM_CHECK(task_status != NULL, return FALSE); task_status_i = crm_parse_int(task_status, NULL); CRM_CHECK(task_status_i <= LRM_OP_ERROR, return FALSE); CRM_CHECK(task_status_i >= LRM_OP_PENDING, return FALSE); if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) { /* safe to ignore these */ return TRUE; } if(rsc->failure_timeout > 0) { int last_run = 0; if(crm_element_value_int(xml_op, "last-run", &last_run) == 0) { /* int last_change = crm_element_value_int(xml_op, "last_rc_change"); */ time_t now = get_timet_now(data_set); if(now > (last_run + rsc->failure_timeout)) { expired = TRUE; } } } crm_debug_2("Unpacking task %s/%s (call_id=%s, status=%s) on %s (role=%s)", id, task, task_id, task_status, node->details->uname, role2text(rsc->role)); interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; } if(node->details->unclean) { crm_debug_2("Node %s (where %s is running) is unclean." " Further action depends on the value of the stop's on-fail attribue", node->details->uname, rsc->id); } actual_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); CRM_CHECK(actual_rc != NULL, return FALSE); actual_rc_i = crm_parse_int(actual_rc, NULL); if(key) { int dummy = 0; char *dummy_string = NULL; decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc); crm_free(dummy_string); } if(task_status_i == LRM_OP_DONE && target_rc >= 0) { if(target_rc == actual_rc_i) { task_status_i = LRM_OP_DONE; } else { task_status_i = LRM_OP_ERROR; crm_debug("%s on %s returned %d (%s) instead of the expected value: %d (%s)", id, node->details->uname, actual_rc_i, execra_code2string(actual_rc_i), target_rc, execra_code2string(target_rc)); } } else if(task_status_i == LRM_OP_ERROR) { /* let us decide that */ task_status_i = LRM_OP_DONE; } if(task_status_i == LRM_OP_NOTSUPPORTED) { actual_rc_i = EXECRA_UNIMPLEMENT_FEATURE; } if(expired && actual_rc_i != EXECRA_NOT_RUNNING && actual_rc_i != EXECRA_RUNNING_MASTER && actual_rc_i != EXECRA_OK) { crm_notice("Ignoring expired failure %s (rc=%d, magic=%s) on %s", id, actual_rc_i, magic, node->details->uname); goto done; } /* we could clean this up significantly except for old LRMs and CRMs that * didnt include target_rc and liked to remap status */ switch(actual_rc_i) { case EXECRA_NOT_RUNNING: if(is_probe || target_rc == actual_rc_i) { task_status_i = LRM_OP_DONE; rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } else if(safe_str_neq(task, CRMD_ACTION_STOP)) { task_status_i = LRM_OP_ERROR; } break; case EXECRA_RUNNING_MASTER: if(is_probe) { task_status_i = LRM_OP_DONE; crm_notice("Operation %s found resource %s active in master mode on %s", id, rsc->id, node->details->uname); } else if(target_rc == actual_rc_i) { /* nothing to do */ } else if(target_rc >= 0) { task_status_i = LRM_OP_ERROR; /* legacy code for pre-0.6.5 operations */ } else if(safe_str_neq(task, CRMD_ACTION_STATUS) || rsc->role != RSC_ROLE_MASTER) { task_status_i = LRM_OP_ERROR; if(rsc->role != RSC_ROLE_MASTER) { crm_err("%s reported %s in master mode on %s", id, rsc->id, node->details->uname); } } rsc->role = RSC_ROLE_MASTER; break; case EXECRA_FAILED_MASTER: rsc->role = RSC_ROLE_MASTER; task_status_i = LRM_OP_ERROR; break; case EXECRA_UNIMPLEMENT_FEATURE: if(interval > 0) { task_status_i = LRM_OP_NOTSUPPORTED; break; } /* else: fall through */ case EXECRA_INSUFFICIENT_PRIV: case EXECRA_NOT_INSTALLED: case EXECRA_INVALID_PARAM: effective_node = node; /* fall through */ case EXECRA_NOT_CONFIGURED: failed = rsc; if(is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(failed); } do_crm_log(actual_rc_i==EXECRA_NOT_INSTALLED?LOG_NOTICE:LOG_ERR, "Hard error - %s failed with rc=%d: Preventing %s from re-starting %s %s", id, actual_rc_i, failed->id, effective_node?"on":"anywhere", effective_node?effective_node->details->uname:"in the cluster"); resource_location(failed, effective_node, -INFINITY, "hard-error", data_set); if(is_probe) { /* treat these like stops */ task = CRMD_ACTION_STOP; task_status_i = LRM_OP_DONE; crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); add_node_copy(data_set->failed, xml_op); } break; case EXECRA_OK: if(is_probe && target_rc == 7) { task_status_i = LRM_OP_DONE; crm_notice("Operation %s found resource %s active on %s", id, rsc->id, node->details->uname); /* legacy code for pre-0.6.5 operations */ } else if(target_rc < 0 && interval > 0 && rsc->role == RSC_ROLE_MASTER) { /* catch status ops that return 0 instead of 8 while they * are supposed to be in master mode */ task_status_i = LRM_OP_ERROR; } break; default: if(task_status_i == LRM_OP_DONE) { crm_info("Remapping %s (rc=%d) on %s to an ERROR", id, actual_rc_i, node->details->uname); task_status_i = LRM_OP_ERROR; } } if(task_status_i == LRM_OP_ERROR || task_status_i == LRM_OP_TIMEOUT || task_status_i == LRM_OP_NOTSUPPORTED) { action = custom_action(rsc, crm_strdup(id), task, NULL, TRUE, FALSE, data_set); if(expired) { crm_notice("Ignoring expired failure (calculated) %s (rc=%d, magic=%s) on %s", id, actual_rc_i, magic, node->details->uname); goto done; } else if(action->on_fail == action_fail_ignore) { crm_warn("Remapping %s (rc=%d) on %s to DONE: ignore", id, actual_rc_i, node->details->uname); task_status_i = LRM_OP_DONE; } } switch(task_status_i) { case LRM_OP_PENDING: if(safe_str_eq(task, CRMD_ACTION_START)) { set_bit(rsc->flags, pe_rsc_start_pending); set_active(rsc); } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } break; case LRM_OP_DONE: crm_debug_3("%s/%s completed on %s", rsc->id, task, node->details->uname); if(actual_rc_i == EXECRA_NOT_RUNNING) { /* nothing to do */ } else if(safe_str_eq(task, CRMD_ACTION_STOP)) { rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ switch(*on_fail) { case action_fail_block: case action_fail_stop: case action_fail_fence: case action_fail_migrate: case action_fail_standby: crm_debug_2("%s.%s is not cleared by a completed stop", rsc->id, fail2text(*on_fail)); break; case action_fail_ignore: case action_fail_recover: case action_migrate_failure: *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { rsc->role = RSC_ROLE_SLAVE; } else if(rsc->role < RSC_ROLE_STARTED) { crm_debug_3("%s active on %s", rsc->id, node->details->uname); set_active(rsc); } break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: crm_warn("Processing failed op %s on %s: %s (%d)", id, node->details->uname, execra_code2string(actual_rc_i), actual_rc_i); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); add_node_copy(data_set->failed, xml_op); if(*on_fail < action->on_fail) { *on_fail = action->on_fail; } if(safe_str_eq(task, CRMD_ACTION_STOP)) { resource_location( rsc, node, -INFINITY, "__stop_fail__", data_set); } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* * staying in role=master ends up putting the PE/TE into a loop * setting role=slave is not dangerous because no master will be * promoted until the failed resource has been fully stopped */ crm_warn("Forcing %s to stop after a failed demote action", rsc->id); rsc->next_role = RSC_ROLE_STOPPED; rsc->role = RSC_ROLE_SLAVE; } else if(compare_version("2.0", op_version) > 0 && safe_str_eq(task, CRMD_ACTION_START)) { crm_warn("Compatibility handling for failed op %s on %s", id, node->details->uname); resource_location( rsc, node, -INFINITY, "__legacy_start__", data_set); } if(rsc->role < RSC_ROLE_STARTED) { set_active(rsc); } crm_debug_2("Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean?"true":"false", fail2text(action->on_fail), role2text(action->fail_role)); if(action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if(action->fail_role == RSC_ROLE_STOPPED) { crm_err("Making sure %s doesn't come up again", rsc->id); /* make sure it doesnt come up again */ pe_free_shallow_adv(rsc->allowed_nodes, TRUE); rsc->allowed_nodes = node_list_dup( data_set->nodes, FALSE, FALSE); slist_iter( node, node_t, rsc->allowed_nodes, lpc, node->weight = -INFINITY; ); } pe_free_action(action); action = NULL; break; case LRM_OP_CANCELLED: /* do nothing?? */ pe_err("Dont know what to do for cancelled ops yet"); break; } done: crm_debug_3("Resource %s after %s: role=%s", rsc->id, task, role2text(rsc->role)); pe_free_action(action); return TRUE; } gboolean add_node_attrs(xmlNode *xml_obj, node_t *node, gboolean overwrite, pe_working_set_t *data_set) { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_UNAME), crm_strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_ID), crm_strdup(node->details->id)); if(safe_str_eq(node->details->id, data_set->dc_uuid)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_FALSE)); } unpack_instance_attributes( data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, node->details->attrs, NULL, overwrite, data_set->now); return TRUE; } static GListPtr extract_operations(const char *node, const char *rsc, xmlNode *rsc_entry, gboolean active_filter) { int stop_index = -1; int start_index = -1; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_append(op_list, rsc_op); ); if(op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if(active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); slist_iter(rsc_op, xmlNode, sorted_op_list, lpc, if(start_index < stop_index) { crm_debug_4("Skipping %s: not active", ID(rsc_entry)); break; } else if(lpc < start_index) { crm_debug_4("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); ); g_list_free(sorted_op_list); return op_list; } GListPtr find_operations( const char *rsc, const char *node, gboolean active_filter, pe_working_set_t *data_set) { GListPtr output = NULL; GListPtr intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); const char *uname = NULL; node_t *this_node = NULL; xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, uname = crm_element_value(node_state, XML_ATTR_UNAME); if(node != NULL && safe_str_neq(uname, node)) { continue; } this_node = pe_find_node(data_set->nodes, uname); CRM_CHECK(this_node != NULL, continue); determine_online_status(node_state, this_node, data_set); if(this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); xml_child_iter_filter( tmp, lrm_rsc, XML_LRM_TAG_RESOURCE, const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if(rsc != NULL && safe_str_neq(rsc_id, rsc)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); ); } ); return output; } diff --git a/lib/plugins/lrm/Makefile.am b/lib/plugins/lrm/Makefile.am index b3ea876d5b..8e9a9cb183 100644 --- a/lib/plugins/lrm/Makefile.am +++ b/lib/plugins/lrm/Makefile.am @@ -1,40 +1,38 @@ # # Author: Sun Jiang Dong # Copyright (c) 2004 International Business Machines # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ - -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl -halibdir = $(libdir)/@HB_PKG@ havarlibdir = $(localstatedir)/lib/@HB_PKG@ -COMMONLIBS = -lplumb - lrmdir = $(havarlibdir)/lrm + +halibdir = $(libdir)/@HB_PKG@ plugindir = $(halibdir)/plugins/RAExec plugin_LTLIBRARIES = stonith.la stonith_la_SOURCES = raexecstonith.c stonith_la_LDFLAGS = -lpils -export-dynamic -module -avoid-version \ - -L$(top_builddir)/lib/fencing -lstonithd -lstonith -llrm + $(top_builddir)/lib/fencing/libstonithd.la -lstonith -llrm install-exec-local: $(mkinstalldirs) $(DESTDIR)$(lrmdir) -chgrp $(CRM_DAEMON_GROUP) $(DESTDIR)/$(lrmdir) chmod 770 $(DESTDIR)/$(lrmdir) diff --git a/lib/plugins/lrm/raexecstonith.c b/lib/plugins/lrm/raexecstonith.c index 7f03a2ace7..f4862dd42e 100644 --- a/lib/plugins/lrm/raexecstonith.c +++ b/lib/plugins/lrm/raexecstonith.c @@ -1,392 +1,372 @@ /* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * File: raexecocf.c * Author: Sun Jiang Dong * Copyright (c) 2004 International Business Machines * * This code implements the Resource Agent Plugin Module for LSB style. * It's a part of Local Resource Manager. Currently it's used by lrmd only. */ #include #include #include #include #include #include #include #include #include #include #include #if HAVE_HB_CONFIG_H #include #endif #if HAVE_GLUE_CONFIG_H #include #endif #include #include #include #include #include /* Add it for compiling on OSX */ #include #include -#include +#include #include # define PIL_PLUGINTYPE RA_EXEC_TYPE # define PIL_PLUGINTYPE_S "RAExec" # define PIL_PLUGINLICENSE LICENSE_PUBDOM # define PIL_PLUGINLICENSEURL URL_PUBDOM # define PIL_PLUGIN stonith # define PIL_PLUGIN_S "stonith" static PIL_rc close_stonithRA(PILInterface*, void* ud_interface); /* The begin of exported function list */ static int execra(const char * rsc_id, const char * rsc_type, const char * provider, const char * op_type, const int timeout, GHashTable * params); static uniform_ret_execra_t map_ra_retvalue(int ret_execra , const char * op_type, const char * std_output); static int get_resource_list(GList ** rsc_info); static char* get_resource_meta(const char* rsc_type, const char* provider); static int get_provider_list(const char* op_type, GList ** providers); /* The end of exported function list */ -/* The begin of internal used function & data list */ -static int get_providers(const char* class_path, const char* op_type, - GList ** providers); -static void stonithRA_ops_callback(stonithRA_ops_t * op, void * private_data); -static int exit_value; -/* The end of internal function & data list */ - /* Rource agent execution plugin operations */ static struct RAExecOps raops = { execra, map_ra_retvalue, get_resource_list, get_provider_list, get_resource_meta }; static const char META_TEMPLATE[] = "\n" "\n" "\n" "1.0\n" "\n" "%s\n" "\n" "%s\n" "%s\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "2.0\n" "\n" "\n"; PIL_PLUGIN_BOILERPLATE2("1.0", Debug); static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static void* OurImports; static void* interfprivate; /* * Our plugin initialization and registration function * It gets called when the plugin gets loaded. */ PIL_rc PIL_PLUGIN_INIT(PILPlugin * us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin * us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interfaces */ return imports->register_interface(us, PIL_PLUGINTYPE_S, PIL_PLUGIN_S, &raops, close_stonithRA, &OurInterface, &OurImports, interfprivate); } static PIL_rc close_stonithRA(PILInterface* pif, void* ud_interface) { return PIL_OK; } -/* - * Most of the oprations will be sent to sotnithd directly, such as 'start', - * 'stop', 'monitor'. And others like 'meta-data' will be handled by itself - * locally. - * Some of important parameters' name: - * config_file - * config_string - */ -static int -execra(const char * rsc_id, const char * rsc_type, const char * provider, - const char * op_type,const int timeout, GHashTable * params) +static gboolean is_redhat_agent(const char *agent) { - stonithRA_ops_t * op; - int call_id = -1; - char buffer_tmp[32]; - - /* Handling "meta-data" operation in a special way. - * Now handle "meta-data" operation locally. - * Should be changed in the future? - */ - if ( 0 == STRNCMP_CONST(op_type, "meta-data")) { - char * tmp; - tmp = get_resource_meta(rsc_type, provider); - printf("%s", tmp); - g_free(tmp); - exit(0); - } - - g_snprintf(buffer_tmp, sizeof(buffer_tmp), "%s_%d" - , "STONITH_RA_EXEC", getpid()); - if (ST_OK != stonithd_signon(buffer_tmp)) { - cl_log(LOG_ERR, "%s:%d: Cannot sign on the stonithd." - , __FUNCTION__, __LINE__); - exit(EXECRA_UNKNOWN_ERROR); - } - - stonithd_set_stonithRA_ops_callback(stonithRA_ops_callback, &call_id); - - /* Temporarily donnot use it, but how to deal with the global OCF - * variables. This is a important thing to think about and do. - */ - /* send the RA operation to stonithd to simulate a RA's actions */ - if ( 0==STRNCMP_CONST(op_type, "start") - || 0==STRNCMP_CONST(op_type, "stop") ) { - cl_log(LOG_INFO - , "Try to %s STONITH resource : Device=%s" - , op_type, rsc_id, rsc_type); - } - - op = g_new(stonithRA_ops_t, 1); - op->ra_name = g_strdup(rsc_type); - op->op_type = g_strdup(op_type); - op->params = params; - op->timeout = timeout; /* need this for status at least */ - op->rsc_id = g_strdup(rsc_id); - if (ST_OK != stonithd_virtual_stonithRA_ops(op, &call_id)) { - cl_log(LOG_ERR, "sending stonithRA op to stonithd failed."); - /* Need to improve the granularity for error return code */ - stonithd_signoff(); - exit(EXECRA_EXEC_UNKNOWN_ERROR); - } - - /* May be redundant */ - /* - while (stonithd_op_result_ready() != TRUE) { - ; - } - */ - /* cl_log(LOG_DEBUG, "Will call stonithd_receive_ops_result."); */ - if (ST_OK != stonithd_receive_ops_result(TRUE)) { - cl_log(LOG_ERR, "stonithd_receive_ops_result failed."); - /* Need to improve the granularity for error return code */ - stonithd_signoff(); - exit(EXECRA_EXEC_UNKNOWN_ERROR); - } + int rc = 0; + struct stat prop; + char buffer[FILENAME_MAX+1]; + + snprintf(buffer,FILENAME_MAX,"%s/%s", RH_STONITH_DIR, agent); + rc = stat(buffer, &prop); + if (rc >= 0 && S_ISREG(prop.st_mode)) { + return TRUE; + } + return FALSE; +} - /* exit_value will be set by the callback function */ - g_free(op->ra_name); - g_free(op->op_type); - g_free(op->rsc_id); - g_free(op); +static const char *get_provider(const char *agent, const char *provider) +{ + /* This function sucks */ + if(is_redhat_agent(agent)) { + return "redhat"; + } - stonithd_signoff(); - /* cl_log(LOG_DEBUG, "stonithRA orignal exit code=%d", exit_value); */ - exit(map_ra_retvalue(exit_value, op_type, NULL)); + return "heartbeat"; } -static void -stonithRA_ops_callback(stonithRA_ops_t * op, void * private_data) +static int +execra(const char *rsc_id, const char *rsc_type, const char *provider, + const char *op_type, const int timeout, GHashTable *params) { - /* cl_log(LOG_DEBUG, "setting exit code=%d", exit_value); */ - exit_value = op->op_result; + int rc = 0; + stonith_t *stonith_api = NULL; + provider = get_provider(rsc_type, provider); + + if ( 0 == STRNCMP_CONST(op_type, "meta-data")) { + char *meta = get_resource_meta(rsc_type, provider); + printf("%s", meta); + free(meta); + exit(0); + } + + stonith_api = stonith_api_new(); + rc = stonith_api->cmds->connect(stonith_api, "lrmd", NULL, NULL); + if ( 0 == STRNCMP_CONST(op_type, "monitor") ) { + rc = stonith_api->cmds->call( + stonith_api, st_opt_sync_call, rsc_id, "monitor", NULL, timeout); + + } else if ( 0 == STRNCMP_CONST(op_type, "start") ) { + const char *agent = rsc_type; + if(provider == NULL || 0 != STRNCMP_CONST(provider, "redhat")) { + agent = "fence_legacy"; + g_hash_table_replace(params, strdup("plugin"), strdup(rsc_type)); + } + + rc = stonith_api->cmds->register_device( + stonith_api, st_opt_sync_call, rsc_id, provider, agent, params); + + } else if ( 0 == STRNCMP_CONST(op_type, "stop") ) { + rc = stonith_api->cmds->remove_device( + stonith_api, st_opt_sync_call, rsc_id); + } + + stonith_api->cmds->disconnect(stonith_api); + stonith_api_delete(stonith_api); + + /* cl_log(LOG_DEBUG, "stonithRA orignal exit code=%d", exit_value); */ + exit(map_ra_retvalue(rc, op_type, NULL)); } static uniform_ret_execra_t -map_ra_retvalue(int ret_execra, const char * op_type, const char * std_output) +map_ra_retvalue(int rc, const char * op_type, const char * std_output) { - /* Because the UNIFORM_RET_EXECRA is compatible with OCF standard, no - * actual mapping except validating, which ensure the return code - * will be in the range 0 to 7. Too strict? - */ - if (ret_execra < 0 || - ret_execra > EXECRA_STATUS_UNKNOWN) { - cl_log(LOG_WARNING, "%s:%d: mapped the invalid return code %d." - , __FUNCTION__, __LINE__, ret_execra); - ret_execra = EXECRA_UNKNOWN_ERROR; + if(rc == st_err_unknown_device) { + if ( 0 == STRNCMP_CONST(op_type, "stop") ) { + rc = 0; + + } else if ( 0 != STRNCMP_CONST(op_type, "start") ) { + rc = 7; } - return ret_execra; + + } else if (rc < 0 || rc > EXECRA_STATUS_UNKNOWN) { + crm_warn("Mapped the invalid return code %d.", rc); + rc = EXECRA_UNKNOWN_ERROR; + } + return rc; } static int get_resource_list(GList ** rsc_info) { - int rc; - int needprivs = !cl_have_full_privs(); - - if ( rsc_info == NULL ) { - cl_log(LOG_ERR, "Parameter error: get_resource_list"); - return -2; + int file_num; + char **entry = NULL; + char **type_list = NULL; + struct dirent **namelist; + + if ( rsc_info == NULL ) { + cl_log(LOG_ERR, "Parameter error: get_resource_list"); + return -2; + } + + /* Include Heartbeat agents */ + type_list = stonith_types(); + for(entry = type_list; *entry; ++entry) { + cl_log(LOG_INFO, "Added: %s", *entry); + *rsc_info = g_list_append(*rsc_info, *entry); + } + + /* Include Red Hat agents, basically: ls -1 @sbin_dir@/fence_* */ + file_num = scandir(RH_STONITH_DIR, &namelist, 0, alphasort); + if (file_num > 0) { + struct stat prop; + char buffer[FILENAME_MAX+1]; + + while (file_num--) { + if ('.' == namelist[file_num]->d_name[0]) { + free(namelist[file_num]); + continue; + + } else if(0 != strncmp(RH_STONITH_PREFIX, + namelist[file_num]->d_name, + strlen(RH_STONITH_PREFIX))) { + free(namelist[file_num]); + continue; + } + + snprintf(buffer,FILENAME_MAX,"%s/%s", + RH_STONITH_DIR, namelist[file_num]->d_name); + stat(buffer, &prop); + if (S_ISREG(prop.st_mode)) { + *rsc_info = g_list_append(*rsc_info, g_strdup(namelist[file_num]->d_name)); + } + + free(namelist[file_num]); } + free(namelist); + } - if ( *rsc_info != NULL ) { - cl_log(LOG_ERR, "Parameter error: get_resource_list."\ - "will cause memory leak."); - *rsc_info = NULL; - } - - if (needprivs) { - return_to_orig_privs(); - } - if (ST_OK != stonithd_signon("STONITH_RA")) { - cl_log(LOG_ERR, "%s:%d: Can not signon to the stonithd." - , __FUNCTION__, __LINE__); - rc = -1; - } else { - rc = stonithd_list_stonith_types(rsc_info); - stonithd_signoff(); - } - - if (needprivs) { - return_to_dropped_privs(); - } - return rc; + return 0; } static int get_provider_list(const char* op_type, GList ** providers) { - int ret; - ret = get_providers(STONITH_PLUGIN_DIR, op_type, providers); - if (0>ret) { - cl_log(LOG_ERR, "scandir failed in stonith RA plugin"); - } - return ret; + if(providers == NULL) { + return -1; + + } else if(op_type == NULL) { + return -2; + } + + if (is_redhat_agent(op_type)) { + *providers = g_list_append(*providers, g_strdup("redhat")); + + } else { + *providers = g_list_append(*providers, g_strdup("heartbeat")); + } + + return 1; } static char * get_resource_meta(const char* rsc_type, const char* provider) { - char * buffer; int bufferlen = 0; + char *buffer = NULL; const char * meta_param = NULL; const char * meta_longdesc = NULL; const char * meta_shortdesc = NULL; char *xml_meta_longdesc = NULL; char *xml_meta_shortdesc = NULL; Stonith * stonith_obj = NULL; static const char * no_parameter_info = ""; - if ( provider != NULL ) { - cl_log(LOG_DEBUG, "stonithRA plugin: provider attribute " - "is not needed and will be ignored."); + cl_log(LOG_INFO, "stonithRA plugin: looking up %s/%s metadata.", rsc_type, provider); + provider = get_provider(rsc_type, provider); + + if(0 == STRNCMP_CONST(provider, "redhat")) { + stonith_t *stonith_api = stonith_api_new(); + stonith_api->cmds->connect(stonith_api, "lrmd", NULL, NULL); + stonith_api->cmds->metadata( + stonith_api, st_opt_sync_call, rsc_type, provider, &buffer, 0); + stonith_api->cmds->disconnect(stonith_api); + stonith_api_delete(stonith_api); + cl_log(LOG_INFO, "stonithRA plugin: got metadata: %s", buffer); + return buffer; } + /* TODO: Move this to stonithd */ stonith_obj = stonith_new(rsc_type); meta_longdesc = stonith_get_info(stonith_obj, ST_DEVICEDESCR); if (meta_longdesc == NULL) { cl_log(LOG_WARNING, "stonithRA plugin: no long description in %s's metadata.", rsc_type); meta_longdesc = no_parameter_info; } xml_meta_longdesc = (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_longdesc); meta_shortdesc = stonith_get_info(stonith_obj, ST_DEVICENAME); if (meta_shortdesc == NULL) { cl_log(LOG_WARNING, "stonithRA plugin: no short description in %s's metadata.", rsc_type); meta_shortdesc = no_parameter_info; } xml_meta_shortdesc = (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_shortdesc); meta_param = stonith_get_info(stonith_obj, ST_CONF_XML); if (meta_param == NULL) { cl_log(LOG_WARNING, "stonithRA plugin: no list of parameters in %s's metadata.", rsc_type); meta_param = no_parameter_info; } bufferlen = STRLEN_CONST(META_TEMPLATE) + strlen(rsc_type) + strlen(xml_meta_longdesc) + strlen(xml_meta_shortdesc) + strlen(meta_param) + 1; - buffer = g_new(char, bufferlen); - buffer[bufferlen-1] = '\0'; + buffer = malloc(sizeof(char) * bufferlen); + memset(buffer, 0, bufferlen); snprintf(buffer, bufferlen-1, META_TEMPLATE, rsc_type, xml_meta_longdesc, xml_meta_shortdesc, meta_param); stonith_delete(stonith_obj); xmlFree(xml_meta_longdesc); xmlFree(xml_meta_shortdesc); return buffer; } - -/* - * Currently should return *providers = NULL, but remain the old code for - * possible unsing in the future - */ -static int -get_providers(const char* class_path, const char* op_type, GList ** providers) -{ - if ( providers == NULL ) { - cl_log(LOG_ERR, "%s:%d: Parameter error: providers==NULL" - , __FUNCTION__, __LINE__); - return -2; - } - - if ( *providers != NULL ) { - cl_log(LOG_ERR, "%s:%d: Parameter error: *providers==NULL." - "This will cause memory leak." - , __FUNCTION__, __LINE__); - } - - /* Now temporarily make it fixed */ - *providers = g_list_append(*providers, g_strdup("heartbeat")); - - return g_list_length(*providers); -} diff --git a/lib/transition/unpack.c b/lib/transition/unpack.c index 50cc1d0693..9ace14cdbd 100644 --- a/lib/transition/unpack.c +++ b/lib/transition/unpack.c @@ -1,277 +1,312 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include static crm_action_t* unpack_action(synapse_t *parent, xmlNode *xml_action) { crm_action_t *action = NULL; xmlNode *action_copy = NULL; const char *value = crm_element_value(xml_action, XML_ATTR_ID); if(value == NULL) { crm_err("Actions must have an id!"); crm_log_xml_debug_3(xml_action, "Action with missing id"); return NULL; } action_copy = copy_xml(xml_action); crm_malloc0(action, sizeof(crm_action_t)); if(action == NULL) { return NULL; } action->id = crm_parse_int(value, NULL); action->type = action_type_rsc; action->xml = action_copy; action->synapse = parent; if(safe_str_eq(crm_element_name(action_copy), XML_GRAPH_TAG_RSC_OP)) { action->type = action_type_rsc; } else if(safe_str_eq(crm_element_name(action_copy), XML_GRAPH_TAG_PSEUDO_EVENT)) { action->type = action_type_pseudo; } else if(safe_str_eq(crm_element_name(action_copy), XML_GRAPH_TAG_CRM_EVENT)) { action->type = action_type_crm; } action->params = xml2list(action_copy); value = g_hash_table_lookup(action->params, "CRM_meta_timeout"); if(value != NULL) { action->timeout = crm_parse_int(value, NULL); } value = g_hash_table_lookup(action->params, "CRM_meta_interval"); if(value != NULL) { action->interval = crm_parse_int(value, NULL); } value = g_hash_table_lookup(action->params, "CRM_meta_can_fail"); if(value != NULL) { crm_str_to_boolean(value, &(action->can_fail)); } crm_debug_3("Action %d has timer set to %dms", action->id, action->timeout); return action; } static synapse_t * unpack_synapse(crm_graph_t *new_graph, xmlNode *xml_synapse) { const char *value = NULL; synapse_t *new_synapse = NULL; CRM_CHECK(xml_synapse != NULL, return NULL); crm_debug_3("looking in synapse %s", ID(xml_synapse)); crm_malloc0(new_synapse, sizeof(synapse_t)); new_synapse->id = crm_parse_int(ID(xml_synapse), NULL); value = crm_element_value(xml_synapse, XML_CIB_ATTR_PRIORITY); if(value != NULL) { new_synapse->priority = crm_parse_int(value, NULL); } new_graph->num_synapses++; CRM_CHECK(new_synapse->id >= 0, crm_free(new_synapse); return NULL); crm_debug_3("look for actions in synapse %s", crm_element_value(xml_synapse, XML_ATTR_ID)); xml_child_iter_filter( xml_synapse, action_set, "action_set", xml_child_iter( action_set, action, crm_action_t *new_action = unpack_action( new_synapse, action); new_graph->num_actions++; if(new_action == NULL) { continue; } crm_debug_3("Adding action %d to synapse %d", new_action->id, new_synapse->id); new_synapse->actions = g_list_append( new_synapse->actions, new_action); ); ); crm_debug_3("look for inputs in synapse %s", ID(xml_synapse)); xml_child_iter_filter( xml_synapse, inputs, "inputs", xml_child_iter( inputs, trigger, xml_child_iter( trigger, input, crm_action_t *new_input = unpack_action( new_synapse, input); if(new_input == NULL) { continue; } crm_debug_3("Adding input %d to synapse %d", new_input->id, new_synapse->id); new_synapse->inputs = g_list_append( new_synapse->inputs, new_input); ); ); ); return new_synapse; } crm_graph_t * unpack_graph(xmlNode *xml_graph, const char *reference) { /* id = -1; new_graph->abort_priority = 0; new_graph->network_delay = -1; new_graph->transition_timeout = -1; new_graph->stonith_timeout = -1; new_graph->completion_action = tg_done; if(reference) { new_graph->source = crm_strdup(reference); } else { new_graph->source = crm_strdup("unknown"); } if(xml_graph != NULL) { t_id = crm_element_value(xml_graph, "transition_id"); CRM_CHECK(t_id != NULL, crm_free(new_graph); return NULL); new_graph->id = crm_parse_int(t_id, "-1"); time = crm_element_value(xml_graph, "cluster-delay"); CRM_CHECK(time != NULL, crm_free(new_graph); return NULL); new_graph->network_delay = crm_get_msec(time); time = crm_element_value(xml_graph, "stonith-timeout"); if(time == NULL) { new_graph->stonith_timeout = new_graph->network_delay; } else { new_graph->stonith_timeout = crm_get_msec(time); } t_id = crm_element_value(xml_graph, "batch-limit"); new_graph->batch_limit = crm_parse_int(t_id, "0"); } xml_child_iter_filter( xml_graph, synapse, "synapse", synapse_t *new_synapse = unpack_synapse(new_graph, synapse); if(new_synapse != NULL) { new_graph->synapses = g_list_append( new_graph->synapses, new_synapse); } ); crm_info("Unpacked transition %d: %d actions in %d synapses", new_graph->id, new_graph->num_actions,new_graph->num_synapses); return new_graph; } static void destroy_action(crm_action_t *action) { if(action->timer && action->timer->source_id != 0) { crm_warn("Cancelling timer for action %d (src=%d)", action->id, action->timer->source_id); g_source_remove(action->timer->source_id); } g_hash_table_destroy(action->params); free_xml(action->xml); crm_free(action->timer); crm_free(action); } static void destroy_synapse(synapse_t *synapse) { while(g_list_length(synapse->actions) > 0) { crm_action_t *action = g_list_nth_data(synapse->actions, 0); synapse->actions = g_list_remove(synapse->actions, action); destroy_action(action); } while(g_list_length(synapse->inputs) > 0) { crm_action_t *action = g_list_nth_data(synapse->inputs, 0); synapse->inputs = g_list_remove(synapse->inputs, action); destroy_action(action); } crm_free(synapse); } void destroy_graph(crm_graph_t *graph) { if(graph == NULL) { return; } while(g_list_length(graph->synapses) > 0) { synapse_t *synapse = g_list_nth_data(graph->synapses, 0); graph->synapses = g_list_remove(graph->synapses, synapse); destroy_synapse(synapse); } crm_free(graph->source); crm_free(graph); } +lrm_op_t *convert_graph_action(xmlNode *resource, crm_action_t *action, int status, int rc) +{ + lrm_op_t *op = NULL; + xmlNode *action_resource = NULL; + + CRM_CHECK(action != NULL, return NULL); + CRM_CHECK(action->type == action_type_rsc, return NULL); + + crm_malloc0(op, sizeof(lrm_op_t)); + + op->app_name = crm_strdup(crm_system_name); + + action_resource = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); + CRM_CHECK(action_resource != NULL, crm_log_xml_warn(action->xml, "Bad"); return NULL); + + op->rsc_id = crm_strdup(ID(action_resource)); + op->interval = action->interval; + op->op_type = crm_strdup(crm_element_value(action->xml, XML_LRM_ATTR_TASK)); + op->rc = rc; + op->op_status = status; + op->params = action->params; + + op->call_id = 0; + xml_child_iter(resource, xop, + int tmp = 0; + crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp); + crm_info("Got call_id=%d for %s", tmp, ID(resource)); + if(tmp > op->call_id) { + op->call_id = tmp; + } + ); + + op->call_id++; + return op; +} diff --git a/pengine/clone.c b/pengine/clone.c index 4c09525df0..869c8e2ac3 100644 --- a/pengine/clone.c +++ b/pengine/clone.c @@ -1,1689 +1,1689 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #define VARIANT_CLONE 1 #include -gint sort_clone_instance(gconstpointer a, gconstpointer b); +gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set); void child_stopping_constraints( clone_variant_data_t *clone_data, resource_t *self, resource_t *child, resource_t *last, pe_working_set_t *data_set); void child_starting_constraints( clone_variant_data_t *clone_data, resource_t *self, resource_t *child, resource_t *last, pe_working_set_t *data_set); static node_t * parent_node_instance(const resource_t *rsc, node_t *node) { node_t *ret = NULL; if(node != NULL) { ret = pe_find_node_id( rsc->parent->allowed_nodes, node->details->id); } return ret; } static gboolean did_fail(const resource_t *rsc) { if(is_set(rsc->flags, pe_rsc_failed)) { return TRUE; } slist_iter( child_rsc, resource_t, rsc->children, lpc, if(did_fail(child_rsc)) { return TRUE; } ); return FALSE; } -gint sort_clone_instance(gconstpointer a, gconstpointer b) +gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set) { int level = LOG_DEBUG_3; node_t *node1 = NULL; node_t *node2 = NULL; gboolean can1 = TRUE; gboolean can2 = TRUE; gboolean with_scores = TRUE; const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; CRM_ASSERT(resource1 != NULL); CRM_ASSERT(resource2 != NULL); /* allocation order: * - active instances * - instances running on nodes with the least copies * - active instances on nodes that cant support them or are to be fenced * - failed instances * - inactive instances */ do_crm_log_unlikely(level+1, "%s ? %s", resource1->id, resource2->id); if(resource1->running_on && resource2->running_on) { if(g_list_length(resource1->running_on) < g_list_length(resource2->running_on)) { do_crm_log_unlikely(level, "%s < %s: running_on", resource1->id, resource2->id); return -1; } else if(g_list_length(resource1->running_on) > g_list_length(resource2->running_on)) { do_crm_log_unlikely(level, "%s > %s: running_on", resource1->id, resource2->id); return 1; } } if(resource1->running_on) { node1 = resource1->running_on->data; } if(resource2->running_on) { node2 = resource2->running_on->data; } if(node1) { node_t *match = pe_find_node_id(resource1->allowed_nodes, node1->details->id); if(match == NULL || match->weight < 0) { do_crm_log_unlikely(level, "%s: current location is unavailable", resource1->id); node1 = NULL; can1 = FALSE; } } if(node2) { node_t *match = pe_find_node_id(resource2->allowed_nodes, node2->details->id); if(match == NULL || match->weight < 0) { do_crm_log_unlikely(level, "%s: current location is unavailable", resource2->id); node2 = NULL; can2 = FALSE; } } if(can1 != can2) { if(can1) { do_crm_log_unlikely(level, "%s < %s: availability of current location", resource1->id, resource2->id); return -1; } do_crm_log_unlikely(level, "%s > %s: availability of current location", resource1->id, resource2->id); return 1; } if(resource1->priority < resource2->priority) { do_crm_log_unlikely(level, "%s < %s: priority", resource1->id, resource2->id); return 1; } else if(resource1->priority > resource2->priority) { do_crm_log_unlikely(level, "%s > %s: priority", resource1->id, resource2->id); return -1; } if(node1 == NULL && node2 == NULL) { do_crm_log_unlikely(level, "%s == %s: not active", resource1->id, resource2->id); return 0; } if(node1 != node2) { if(node1 == NULL) { do_crm_log_unlikely(level, "%s > %s: active", resource1->id, resource2->id); return 1; } else if(node2 == NULL) { do_crm_log_unlikely(level, "%s < %s: active", resource1->id, resource2->id); return -1; } } can1 = can_run_resources(node1); can2 = can_run_resources(node2); if(can1 != can2) { if(can1) { do_crm_log_unlikely(level, "%s < %s: can", resource1->id, resource2->id); return -1; } do_crm_log_unlikely(level, "%s > %s: can", resource1->id, resource2->id); return 1; } node1 = parent_node_instance(resource1, node1); node2 = parent_node_instance(resource2, node2); if(node1 != NULL && node2 == NULL) { do_crm_log_unlikely(level, "%s < %s: not allowed", resource1->id, resource2->id); return -1; } else if(node1 == NULL && node2 != NULL) { do_crm_log_unlikely(level, "%s > %s: not allowed", resource1->id, resource2->id); return 1; } if(node1 == NULL) { do_crm_log_unlikely(level, "%s == %s: not allowed", resource1->id, resource2->id); return 0; } if(node1->count < node2->count) { do_crm_log_unlikely(level, "%s < %s: count", resource1->id, resource2->id); return -1; } else if(node1->count > node2->count) { do_crm_log_unlikely(level, "%s > %s: count", resource1->id, resource2->id); return 1; } if(with_scores) { int max = 0; int lpc = 0; GListPtr list1 = node_list_dup(resource1->allowed_nodes, FALSE, FALSE); GListPtr list2 = node_list_dup(resource2->allowed_nodes, FALSE, FALSE); - list1 = g_list_sort(list1, sort_node_weight); - list2 = g_list_sort(list2, sort_node_weight); + list1 = g_list_sort_with_data(list1, sort_node_weight, data_set); + list2 = g_list_sort_with_data(list2, sort_node_weight, data_set); max = g_list_length(list1); if(max < g_list_length(list2)) { max = g_list_length(list2); } for(;lpc < max; lpc++) { node1 = g_list_nth_data(list1, lpc); node2 = g_list_nth_data(list2, lpc); if(node1 == NULL) { do_crm_log_unlikely(level, "%s < %s: node score NULL", resource1->id, resource2->id); pe_free_shallow(list1); pe_free_shallow(list2); return 1; } else if(node2 == NULL) { do_crm_log_unlikely(level, "%s > %s: node score NULL", resource1->id, resource2->id); pe_free_shallow(list1); pe_free_shallow(list2); return -1; } if(node1->weight < node2->weight) { do_crm_log_unlikely(level, "%s < %s: node score", resource1->id, resource2->id); pe_free_shallow(list1); pe_free_shallow(list2); return 1; } else if(node1->weight > node2->weight) { do_crm_log_unlikely(level, "%s > %s: node score", resource1->id, resource2->id); pe_free_shallow(list1); pe_free_shallow(list2); return -1; } } pe_free_shallow(list1); pe_free_shallow(list2); } can1 = did_fail(resource1); can2 = did_fail(resource2); if(can1 != can2) { if(can1) { do_crm_log_unlikely(level, "%s > %s: failed", resource1->id, resource2->id); return 1; } do_crm_log_unlikely(level, "%s < %s: failed", resource1->id, resource2->id); return -1; } if(node1 && node2) { int max = 0; int lpc = 0; GListPtr list1 = g_list_append(NULL, node_copy(resource1->running_on->data)); GListPtr list2 = g_list_append(NULL, node_copy(resource2->running_on->data)); /* Possibly a replacement for the with_scores block above */ slist_iter( constraint, rsc_colocation_t, resource1->parent->rsc_cons_lhs, lpc, do_crm_log_unlikely(level+1, "Applying %s to %s", constraint->id, resource1->id); list1 = native_merge_weights( constraint->rsc_lh, resource1->id, list1, constraint->node_attribute, constraint->score/INFINITY, FALSE); ); slist_iter( constraint, rsc_colocation_t, resource2->parent->rsc_cons_lhs, lpc, do_crm_log_unlikely(level+1, "Applying %s to %s", constraint->id, resource2->id); list2 = native_merge_weights( constraint->rsc_lh, resource2->id, list2, constraint->node_attribute, constraint->score/INFINITY, FALSE); ); - list1 = g_list_sort(list1, sort_node_weight); - list2 = g_list_sort(list2, sort_node_weight); + list1 = g_list_sort_with_data(list1, sort_node_weight, data_set); + list2 = g_list_sort_with_data(list2, sort_node_weight, data_set); max = g_list_length(list1); if(max < g_list_length(list2)) { max = g_list_length(list2); } for(;lpc < max; lpc++) { node1 = g_list_nth_data(list1, lpc); node2 = g_list_nth_data(list2, lpc); if(node1 == NULL) { do_crm_log_unlikely(level, "%s < %s: colocated score NULL", resource1->id, resource2->id); pe_free_shallow(list1); pe_free_shallow(list2); return 1; } else if(node2 == NULL) { do_crm_log_unlikely(level, "%s > %s: colocated score NULL", resource1->id, resource2->id); pe_free_shallow(list1); pe_free_shallow(list2); return -1; } if(node1->weight < node2->weight) { do_crm_log_unlikely(level, "%s < %s: colocated score", resource1->id, resource2->id); pe_free_shallow(list1); pe_free_shallow(list2); return 1; } else if(node1->weight > node2->weight) { do_crm_log_unlikely(level, "%s > %s: colocated score", resource1->id, resource2->id); pe_free_shallow(list1); pe_free_shallow(list2); return -1; } } pe_free_shallow(list1); pe_free_shallow(list2); } do_crm_log_unlikely(level, "%s == %s: default %d", resource1->id, resource2->id, node2->weight); return 0; } static node_t * can_run_instance(resource_t *rsc, node_t *node) { node_t *local_node = NULL; clone_variant_data_t *clone_data = NULL; if(can_run_resources(node) == FALSE) { goto bail; } else if(is_set(rsc->flags, pe_rsc_orphan)) { goto bail; } local_node = parent_node_instance(rsc, node); get_clone_variant_data(clone_data, rsc->parent); if(local_node == NULL) { crm_warn("%s cannot run on %s: node not allowed", rsc->id, node->details->uname); goto bail; } else if(local_node->count < clone_data->clone_node_max) { return local_node; } else { crm_debug_2("%s cannot run on %s: node full", rsc->id, node->details->uname); } bail: if(node) { common_update_score(rsc, node->details->id, -INFINITY); } return NULL; } static node_t * color_instance(resource_t *rsc, pe_working_set_t *data_set) { node_t *chosen = NULL; node_t *local_node = NULL; crm_debug_2("Processing %s", rsc->id); if(is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->fns->location(rsc, NULL, FALSE); } else if(is_set(rsc->flags, pe_rsc_allocating)) { crm_debug("Dependancy loop detected involving %s", rsc->id); return NULL; } if(rsc->allowed_nodes) { slist_iter(try_node, node_t, rsc->allowed_nodes, lpc, can_run_instance(rsc, try_node); ); } chosen = rsc->cmds->color(rsc, data_set); if(chosen) { local_node = pe_find_node_id( rsc->parent->allowed_nodes, chosen->details->id); if(local_node) { local_node->count++; } else if(is_set(rsc->flags, pe_rsc_managed)) { /* what to do? we can't enforce per-node limits in this case */ crm_config_err("%s not found in %s (list=%d)", chosen->details->id, rsc->parent->id, g_list_length(rsc->parent->allowed_nodes)); } } return chosen; } static void append_parent_colocation(resource_t *rsc, resource_t *child, gboolean all) { slist_iter(cons, rsc_colocation_t, rsc->rsc_cons, lpc, if(all || cons->score < 0 || cons->score == INFINITY) { child->rsc_cons = g_list_append(child->rsc_cons, cons); } ); slist_iter(cons, rsc_colocation_t, rsc->rsc_cons_lhs, lpc, if(all || cons->score < 0) { child->rsc_cons_lhs = g_list_append(child->rsc_cons_lhs, cons); } ); } node_t * clone_color(resource_t *rsc, pe_working_set_t *data_set) { int allocated = 0; int available_nodes = 0; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if(is_not_set(rsc->flags, pe_rsc_provisional)) { return NULL; } else if(is_set(rsc->flags, pe_rsc_allocating)) { crm_debug("Dependancy loop detected involving %s", rsc->id); return NULL; } set_bit(rsc->flags, pe_rsc_allocating); crm_debug_2("Processing %s", rsc->id); /* this information is used by sort_clone_instance() when deciding in which * order to allocate clone instances */ slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons_lhs, lpc, rsc->allowed_nodes = constraint->rsc_lh->cmds->merge_weights( constraint->rsc_lh, rsc->id, rsc->allowed_nodes, constraint->node_attribute, constraint->score/INFINITY, TRUE); ); dump_node_scores(show_scores?0:scores_log_level, rsc, __FUNCTION__, rsc->allowed_nodes); /* count now tracks the number of clones currently allocated */ slist_iter(node, node_t, rsc->allowed_nodes, lpc, node->count = 0; ); slist_iter(child, resource_t, rsc->children, lpc, if(g_list_length(child->running_on) > 0) { node_t *child_node = child->running_on->data; node_t *local_node = parent_node_instance( child, child->running_on->data); if(local_node) { local_node->count++; } else { crm_err("%s is running on %s which isn't allowed", child->id, child_node->details->uname); } } ); - rsc->children = g_list_sort(rsc->children, sort_clone_instance); + rsc->children = g_list_sort_with_data(rsc->children, sort_clone_instance, data_set); /* count now tracks the number of clones we have allocated */ slist_iter(node, node_t, rsc->allowed_nodes, lpc, node->count = 0; ); - rsc->allowed_nodes = g_list_sort( - rsc->allowed_nodes, sort_node_weight); + rsc->allowed_nodes = g_list_sort_with_data( + rsc->allowed_nodes, sort_node_weight, data_set); slist_iter(node, node_t, rsc->allowed_nodes, lpc, if(can_run_resources(node)) { available_nodes++; } ); slist_iter(child, resource_t, rsc->children, lpc, if(allocated >= clone_data->clone_max) { crm_debug("Child %s not allocated - limit reached", child->id); resource_location(child, NULL, -INFINITY, "clone_color:limit_reached", data_set); } else if (clone_data->clone_max < available_nodes) { /* Only include positive colocation preferences of dependant resources * if not every node will get a copy of the clone */ append_parent_colocation(rsc, child, TRUE); } else { append_parent_colocation(rsc, child, FALSE); } if(color_instance(child, data_set)) { allocated++; } ); crm_debug("Allocated %d %s instances of a possible %d", allocated, rsc->id, clone_data->clone_max); clear_bit(rsc->flags, pe_rsc_provisional); clear_bit(rsc->flags, pe_rsc_allocating); return NULL; } static void clone_update_pseudo_status( resource_t *rsc, gboolean *stopping, gboolean *starting, gboolean *active) { if(rsc->children) { slist_iter(child, resource_t, rsc->children, lpc, clone_update_pseudo_status(child, stopping, starting, active) ); return; } CRM_ASSERT(active != NULL); CRM_ASSERT(starting != NULL); CRM_ASSERT(stopping != NULL); if(rsc->running_on) { *active = TRUE; } slist_iter( action, action_t, rsc->actions, lpc, if(*starting && *stopping) { return; } else if(action->optional) { crm_debug_3("Skipping optional: %s", action->uuid); continue; } else if(action->pseudo == FALSE && action->runnable == FALSE){ crm_debug_3("Skipping unrunnable: %s", action->uuid); continue; } else if(safe_str_eq(RSC_STOP, action->task)) { crm_debug_2("Stopping due to: %s", action->uuid); *stopping = TRUE; } else if(safe_str_eq(RSC_START, action->task)) { if(action->runnable == FALSE) { crm_debug_3("Skipping pseudo-op: %s run=%d, pseudo=%d", action->uuid, action->runnable, action->pseudo); } else { crm_debug_2("Starting due to: %s", action->uuid); crm_debug_3("%s run=%d, pseudo=%d", action->uuid, action->runnable, action->pseudo); *starting = TRUE; } } ); } static action_t * find_rsc_action(resource_t *rsc, const char *key, gboolean active_only, GListPtr *list) { action_t *match = NULL; GListPtr possible = NULL; GListPtr active = NULL; possible = find_actions(rsc->actions, key, NULL); if(active_only) { slist_iter(op, action_t, possible, lpc, if(op->optional == FALSE) { active = g_list_append(active, op); } ); if(active && g_list_length(active) == 1) { match = g_list_nth_data(active, 0); } if(list) { *list = active; active = NULL; } } else if(possible && g_list_length(possible) == 1) { match = g_list_nth_data(possible, 0); } if(list) { *list = possible; possible = NULL; } if(possible) { g_list_free(possible); } if(active) { g_list_free(active); } return match; } static void child_ordering_constraints(resource_t *rsc, pe_working_set_t *data_set) { char *key = NULL; action_t *stop = NULL; action_t *start = NULL; action_t *last_stop = NULL; action_t *last_start = NULL; gboolean active_only = TRUE; /* change to false to get the old behavior */ clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if(clone_data->ordered == FALSE) { return; } slist_iter( child, resource_t, rsc->children, lpc, key = stop_key(child); stop = find_rsc_action(child, key, active_only, NULL); crm_free(key); key = start_key(child); start = find_rsc_action(child, key, active_only, NULL); crm_free(key); if(stop) { if(last_stop) { /* child/child relative stop */ order_actions(stop, last_stop, pe_order_implies_left); } last_stop = stop; } if(start) { if(last_start) { /* child/child relative start */ order_actions(last_start, start, pe_order_implies_left); } last_start = start; } ); } void clone_create_actions(resource_t *rsc, pe_working_set_t *data_set) { gboolean child_active = FALSE; gboolean child_starting = FALSE; gboolean child_stopping = FALSE; action_t *stop = NULL; action_t *stopped = NULL; action_t *start = NULL; action_t *started = NULL; resource_t *last_start_rsc = NULL; resource_t *last_stop_rsc = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); crm_debug_2("Creating actions for %s", rsc->id); slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->create_actions(child_rsc, data_set); clone_update_pseudo_status( child_rsc, &child_stopping, &child_starting, &child_active); if(is_set(child_rsc->flags, pe_rsc_starting)) { last_start_rsc = child_rsc; } if(is_set(child_rsc->flags, pe_rsc_stopping)) { last_stop_rsc = child_rsc; } ); /* start */ start = start_action(rsc, NULL, !child_starting); started = custom_action(rsc, started_key(rsc), RSC_STARTED, NULL, !child_starting, TRUE, data_set); start->pseudo = TRUE; start->runnable = TRUE; started->pseudo = TRUE; started->priority = INFINITY; if(child_active || child_starting) { started->runnable = TRUE; } child_ordering_constraints(rsc, data_set); child_starting_constraints(clone_data, rsc, NULL, last_start_rsc, data_set); clone_data->start_notify = create_notification_boundaries(rsc, RSC_START, start, started, data_set); /* stop */ stop = stop_action(rsc, NULL, !child_stopping); stopped = custom_action(rsc, stopped_key(rsc), RSC_STOPPED, NULL, !child_stopping, TRUE, data_set); stop->pseudo = TRUE; stop->runnable = TRUE; stopped->pseudo = TRUE; stopped->runnable = TRUE; stopped->priority = INFINITY; child_stopping_constraints(clone_data, rsc, NULL, last_stop_rsc, data_set); clone_data->stop_notify = create_notification_boundaries(rsc, RSC_STOP, stop, stopped, data_set); if(clone_data->stop_notify && clone_data->start_notify) { order_actions(clone_data->stop_notify->post_done, clone_data->start_notify->pre, pe_order_optional); } } void child_starting_constraints( clone_variant_data_t *clone_data, resource_t *rsc, resource_t *child, resource_t *last, pe_working_set_t *data_set) { if(child == NULL && last == NULL) { crm_debug("%s has no active children", rsc->id); return; } if(child != NULL) { order_start_start( rsc, child, pe_order_runnable_left|pe_order_implies_left_printed); new_rsc_order(child, RSC_START, rsc, RSC_STARTED, pe_order_implies_right_printed, data_set); } if(FALSE && clone_data->ordered) { if(child == NULL) { /* last child start before global started */ new_rsc_order(last, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left, data_set); } else if(last == NULL) { /* global start before first child start */ order_start_start( rsc, child, pe_order_implies_left); } else { /* child/child relative start */ order_start_start(last, child, pe_order_implies_left); } } } void child_stopping_constraints( clone_variant_data_t *clone_data, resource_t *rsc, resource_t *child, resource_t *last, pe_working_set_t *data_set) { if(child == NULL && last == NULL) { crm_debug("%s has no active children", rsc->id); return; } if(child != NULL) { order_stop_stop(rsc, child, pe_order_shutdown|pe_order_implies_left_printed); new_rsc_order(child, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_right_printed, data_set); } if(FALSE && clone_data->ordered) { if(last == NULL) { /* first child stop before global stopped */ new_rsc_order(child, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left, data_set); } else if(child == NULL) { /* global stop before last child stop */ order_stop_stop( rsc, last, pe_order_implies_left); } else { /* child/child relative stop */ order_stop_stop(child, last, pe_order_implies_left); } } } void clone_internal_constraints(resource_t *rsc, pe_working_set_t *data_set) { resource_t *last_rsc = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); native_internal_constraints(rsc, data_set); /* global stop before stopped */ new_rsc_order(rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left, data_set); /* global start before started */ new_rsc_order(rsc, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left, data_set); /* global stopped before start */ new_rsc_order(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional, data_set); slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->internal_constraints(child_rsc, data_set); child_starting_constraints( clone_data, rsc, child_rsc, last_rsc, data_set); child_stopping_constraints( clone_data, rsc, child_rsc, last_rsc, data_set); last_rsc = child_rsc; ); } resource_t* find_compatible_child( resource_t *local_child, resource_t *rsc, enum rsc_role_e filter, gboolean current) { node_t *local_node = NULL; node_t *node = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); local_node = local_child->fns->location(local_child, NULL, current); if(local_node == NULL) { crm_debug("Can't colocate unrunnable child %s with %s", local_child->id, rsc->id); return NULL; } slist_iter( child_rsc, resource_t, rsc->children, lpc, enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current); node = child_rsc->fns->location(child_rsc, NULL, current); if(filter != RSC_ROLE_UNKNOWN && next_role != filter) { crm_debug_2("Filtered %s", child_rsc->id); continue; } if(node && local_node && node->details == local_node->details) { crm_info("Colocating %s with %s on %s", local_child->id, child_rsc->id, node->details->uname); return child_rsc; } ); crm_debug("Can't colocate child %s with %s", local_child->id, rsc->id); return NULL; } void clone_rsc_colocation_lh( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { gboolean do_interleave = FALSE; resource_t *rsc = constraint->rsc_lh; clone_variant_data_t *clone_data = NULL; clone_variant_data_t *clone_data_rh = NULL; if(rsc == NULL) { pe_err("rsc_lh was NULL for %s", constraint->id); return; } else if(constraint->rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } else { crm_debug_4("Processing constraints from %s", rsc->id); } get_clone_variant_data(clone_data, rsc); if(constraint->rsc_rh->variant == pe_clone || constraint->rsc_rh->variant == pe_master) { get_clone_variant_data( clone_data_rh, constraint->rsc_rh); if(clone_data->clone_node_max != clone_data_rh->clone_node_max) { crm_config_err("Cannot interleave "XML_CIB_TAG_INCARNATION " %s and %s because" " they do not support the same number of" " resources per node", constraint->rsc_lh->id, constraint->rsc_rh->id); /* only the LHS side needs to be labeled as interleave */ } else if(clone_data->interleave) { do_interleave = TRUE; } else if(constraint->score >= INFINITY) { GListPtr lhs = NULL, rhs = NULL; lhs = rsc_lh->allowed_nodes; slist_iter( child_rsc, resource_t, rsc_rh->children, lpc, node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE); if(chosen != NULL) { rhs = g_list_append(rhs, chosen); } ); rsc_lh->allowed_nodes = node_list_exclude(lhs, rhs); pe_free_shallow_adv(rhs, FALSE); pe_free_shallow(lhs); return; } } else if(constraint->score >= INFINITY) { crm_config_err("Manditory co-location of clones (%s) with other" " non-clone (%s) resources is not supported", rsc_lh->id, rsc_rh->id); return; } if(do_interleave) { resource_t *rh_child = NULL; slist_iter(lh_child, resource_t, rsc->children, lpc, CRM_ASSERT(lh_child != NULL); rh_child = find_compatible_child( lh_child, rsc_rh, RSC_ROLE_UNKNOWN, FALSE); if(rh_child == NULL) { crm_debug_2("No match found for %s", lh_child->id); continue; } crm_debug("Interleaving %s with %s", lh_child->id, rh_child->id); lh_child->cmds->rsc_colocation_lh( lh_child, rh_child, constraint); ); return; } slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->rsc_colocation_lh(child_rsc, constraint->rsc_rh, constraint); ); } void clone_rsc_colocation_rh( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { clone_variant_data_t *clone_data = NULL; CRM_CHECK(rsc_lh != NULL, return); CRM_CHECK(rsc_lh->variant == pe_native, return); get_clone_variant_data(clone_data, rsc_rh); crm_debug_3("Processing constraint %s: %d", constraint->id, constraint->score); if(rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } else if(is_set(rsc_rh->flags, pe_rsc_provisional)) { crm_debug_3("%s is still provisional", rsc_rh->id); return; } else if(constraint->score >= INFINITY) { GListPtr lhs = NULL, rhs = NULL; lhs = rsc_lh->allowed_nodes; slist_iter( child_rsc, resource_t, rsc_rh->children, lpc, node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE); if(chosen != NULL) { rhs = g_list_append(rhs, chosen); } ); rsc_lh->allowed_nodes = node_list_exclude(lhs, rhs); pe_free_shallow_adv(rhs, FALSE); pe_free_shallow(lhs); return; } slist_iter( child_rsc, resource_t, rsc_rh->children, lpc, child_rsc->cmds->rsc_colocation_rh(rsc_lh, child_rsc, constraint); ); } /* Clone <-> Clone ordering S : Start(ed) S' : Stop(ped) P : Promote(d) D : Demote(d) Started == Demoted First A then B A:0 B:0 Old New Old New S' S' S S' S' S' S' - S' S S S+ S' S S' S S S' S S' S S' S' - S S S - S S S' S S' S' P S' S' S' S' - S' P P P+ S' P S' P P S' P S' P S' S' - P P P - P P S' P D D P D D D D - D P P P+ D P D P P D P D P D D - P P P - P P D P Clone <-> Primitive ordering S : Start(ed) S' : Stop(ped) P : Promote(d) D : Demote(d) F : False T : True F' : A good idea? Started == Demoted First A then B A:0 B Old New Old Create Constraint S' S' S F S' S' S' F' S S' S T S S' S' F S' S S T S' S S' T S S S F' S S S' T S' S' S F S' S' S' F' P S' S T P S' S' F S' P S T S' P S' T P P S F' P P S' F S' S' S F S' S' S' F' D S' S T D S' S' F S' D S T S' D S' T D D S F' D D S' T */ static gboolean detect_restart(resource_t *rsc) { gboolean restart = FALSE; /* Look for restarts */ action_t *start = NULL; char *key = start_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, NULL); crm_free(key); if(possible_matches) { start = possible_matches->data; g_list_free(possible_matches); } if(start != NULL && start->optional == FALSE) { restart = TRUE; crm_debug_2("Detected a restart for %s", rsc->id); } /* Otherwise, look for moves */ if(restart == FALSE) { GListPtr old_hosts = NULL; GListPtr new_hosts = NULL; GListPtr intersection = NULL; rsc->fns->location(rsc, &old_hosts, TRUE); rsc->fns->location(rsc, &new_hosts, FALSE); intersection = node_list_and(old_hosts, new_hosts, FALSE); if(intersection == NULL) { restart = TRUE; /* Actually a move but the result is the same */ crm_debug_2("Detected a move for %s", rsc->id); } g_list_free(intersection); g_list_free(old_hosts); g_list_free(new_hosts); } return restart; } static void clone_rsc_order_lh_non_clone(resource_t *rsc, order_constraint_t *order, pe_working_set_t *data_set) { GListPtr hosts = NULL; GListPtr rh_hosts = NULL; GListPtr intersection = NULL; const char *reason = "unknown"; enum action_tasks task = start_rsc; enum rsc_role_e lh_role = RSC_ROLE_STARTED; int any_ordered = 0; gboolean down_stack = TRUE; crm_debug_2("Clone-to-* ordering: %s -> %s 0x%.6x", order->lh_action_task, order->rh_action_task, order->type); if(strstr(order->rh_action_task, "_"RSC_STOP"_0") || strstr(order->rh_action_task, "_"RSC_STOPPED"_0")) { task = stop_rsc; reason = "down activity"; lh_role = RSC_ROLE_STOPPED; order->rh_rsc->fns->location(order->rh_rsc, &rh_hosts, down_stack); } else if(strstr(order->rh_action_task, "_"RSC_DEMOTE"_0") || strstr(order->rh_action_task, "_"RSC_DEMOTED"_0")) { task = action_demote; reason = "demotion activity"; lh_role = RSC_ROLE_SLAVE; order->rh_rsc->fns->location(order->rh_rsc, &rh_hosts, down_stack); } else if(strstr(order->lh_action_task, "_"RSC_PROMOTE"_0") || strstr(order->lh_action_task, "_"RSC_PROMOTED"_0")) { task = action_promote; down_stack = FALSE; reason = "promote activity"; order->rh_rsc->fns->location(order->rh_rsc, &rh_hosts, down_stack); lh_role = RSC_ROLE_MASTER; } else if(strstr(order->rh_action_task, "_"RSC_START"_0") || strstr(order->rh_action_task, "_"RSC_STARTED"_0")) { task = start_rsc; down_stack = FALSE; reason = "up activity"; order->rh_rsc->fns->location(order->rh_rsc, &rh_hosts, down_stack); /* if(order->rh_rsc->variant > pe_clone) { */ /* lh_role = RSC_ROLE_SLAVE; */ /* } */ } else { crm_err("Unknown task: %s", order->rh_action_task); return; } /* slist_iter(h, node_t, rh_hosts, llpc, crm_info("RHH: %s", h->details->uname)); */ slist_iter( child_rsc, resource_t, rsc->children, lpc, gboolean create = FALSE; gboolean restart = FALSE; enum rsc_role_e lh_role_new = child_rsc->fns->state(child_rsc, FALSE); enum rsc_role_e lh_role_old = child_rsc->fns->state(child_rsc, TRUE); enum rsc_role_e child_role = child_rsc->fns->state(child_rsc, down_stack); crm_debug_4("Testing %s->%s for %s: %s vs. %s %s", order->lh_action_task, order->rh_action_task, child_rsc->id, role2text(lh_role), role2text(child_role), order->lh_action_task); if(rh_hosts == NULL) { crm_debug_3("Terminating search: %s.%d list is empty: no possible %s", order->rh_rsc->id, down_stack, reason); break; } if(lh_role_new == lh_role_old) { restart = detect_restart(child_rsc); if(restart == FALSE) { crm_debug_3("Ignoring %s->%s for %s: no relevant %s (no role change)", order->lh_action_task, order->rh_action_task, child_rsc->id, reason); continue; } } hosts = NULL; child_rsc->fns->location(child_rsc, &hosts, down_stack); intersection = node_list_and(hosts, rh_hosts, FALSE); /* slist_iter(h, node_t, hosts, llpc, crm_info("H: %s %s", child_rsc->id, h->details->uname)); */ if(intersection == NULL) { crm_debug_3("Ignoring %s->%s for %s: no relevant %s", order->lh_action_task, order->rh_action_task, child_rsc->id, reason); g_list_free(hosts); continue; } if(restart) { reason = "restart"; create = TRUE; } else if(down_stack) { if(lh_role_old > lh_role) { create = TRUE; } } else if(down_stack == FALSE) { if(lh_role_old < lh_role) { create = TRUE; } } else { any_ordered++; reason = "role"; crm_debug_4("Role: %s->%s for %s: %s vs. %s %s", order->lh_action_task, order->rh_action_task, child_rsc->id, role2text(lh_role_old), role2text(lh_role), order->lh_action_task); } if(create) { char *task = order->lh_action_task; any_ordered++; crm_debug("Enforcing %s->%s for %s on %s: found %s", order->lh_action_task, order->rh_action_task, child_rsc->id, ((node_t*)intersection->data)->details->uname, reason); order->lh_action_task = convert_non_atomic_task(task, child_rsc, TRUE, FALSE); child_rsc->cmds->rsc_order_lh(child_rsc, order, data_set); crm_free(order->lh_action_task); order->lh_action_task = task; } crm_debug_3("Processed %s->%s for %s on %s: %s", order->lh_action_task, order->rh_action_task, child_rsc->id, ((node_t*)intersection->data)->details->uname, reason); /* slist_iter(h, node_t, hosts, llpc, */ /* crm_info("H: %s %s", child_rsc->id, h->details->uname)); */ g_list_free(intersection); g_list_free(hosts); ); g_list_free(rh_hosts); if(any_ordered == 0 && down_stack == FALSE) { order->lh_action_task = convert_non_atomic_task(order->lh_action_task, rsc, TRUE, TRUE); native_rsc_order_lh(rsc, order, data_set); } order->type = pe_order_optional; } void clone_rsc_order_lh(resource_t *rsc, order_constraint_t *order, pe_working_set_t *data_set) { resource_t *r1 = NULL; resource_t *r2 = NULL; gboolean do_interleave = FALSE; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); crm_debug_4("%s->%s", order->lh_action_task, order->rh_action_task); if(order->rh_rsc == NULL) { order->lh_action_task = convert_non_atomic_task(order->lh_action_task, rsc, FALSE, TRUE); native_rsc_order_lh(rsc, order, data_set); return; } r1 = uber_parent(rsc); r2 = uber_parent(order->rh_rsc); if(r1 == r2) { native_rsc_order_lh(rsc, order, data_set); return; } if(order->rh_rsc->variant > pe_group) { clone_variant_data_t *clone_data_rh = NULL; get_clone_variant_data(clone_data_rh, order->rh_rsc); if(clone_data->clone_node_max != clone_data_rh->clone_node_max) { crm_config_err("Cannot interleave "XML_CIB_TAG_INCARNATION " %s and %s because they do not support the same" " number of resources per node", rsc->id, order->rh_rsc->id); /* only the LHS side needs to be labeled as interleave */ } else if(clone_data->interleave) { do_interleave = TRUE; } } if(order->rh_rsc == NULL) { do_interleave = FALSE; } if(do_interleave) { resource_t *lh_child = NULL; resource_t *rh_saved = order->rh_rsc; gboolean current = FALSE; if(strstr(order->lh_action_task, "_stop_0") || strstr(order->lh_action_task, "_demote_0")) { current = TRUE; } slist_iter( rh_child, resource_t, rh_saved->children, lpc, CRM_ASSERT(rh_child != NULL); lh_child = find_compatible_child(rh_child, rsc, RSC_ROLE_UNKNOWN, current); if(lh_child == NULL) { crm_debug_2("No match found for %s", rh_child->id); continue; } crm_notice("Interleaving %s with %s", lh_child->id, rh_child->id); order->rh_rsc = rh_child; lh_child->cmds->rsc_order_lh(lh_child, order, data_set); order->rh_rsc = rh_saved; ); } else { #if 0 if(order->type != pe_order_optional) { crm_debug("Upgraded ordering constraint %d - 0x%.6x", order->id, order->type); native_rsc_order_lh(rsc, order, data_set); } #endif if(order->rh_rsc->variant < pe_clone) { clone_rsc_order_lh_non_clone(rsc, order, data_set); } else if(order->type & pe_order_implies_left) { if(rsc->variant == order->rh_rsc->variant) { crm_debug_2("Clone-to-clone ordering: %s -> %s 0x%.6x", order->lh_action_task, order->rh_action_task, order->type); /* stop instances on the same nodes as stopping RHS instances */ slist_iter( child_rsc, resource_t, rsc->children, lpc, native_rsc_order_lh(child_rsc, order, data_set); ); } else { /* stop everything */ slist_iter( child_rsc, resource_t, rsc->children, lpc, native_rsc_order_lh(child_rsc, order, data_set); ); } } } if(do_interleave == FALSE || clone_data->ordered) { order->lh_action_task = convert_non_atomic_task(order->lh_action_task, rsc, FALSE, TRUE); native_rsc_order_lh(rsc, order, data_set); } if(is_set(rsc->flags, pe_rsc_notify)) { order->type = pe_order_optional; order->lh_action_task = convert_non_atomic_task(order->lh_action_task, rsc, TRUE, TRUE); native_rsc_order_lh(rsc, order, data_set); } } static void clone_rsc_order_rh_non_clone( resource_t *lh_p, action_t *lh_action, resource_t *rsc, order_constraint_t *order) { GListPtr hosts = NULL; GListPtr lh_hosts = NULL; GListPtr intersection = NULL; const char *reason = "unknown"; gboolean restart = FALSE; gboolean down_stack = TRUE; enum rsc_role_e rh_role = RSC_ROLE_STARTED; enum action_tasks task = start_rsc; enum rsc_role_e lh_role_new = lh_p->fns->state(lh_p, FALSE); enum rsc_role_e lh_role_old = lh_p->fns->state(lh_p, TRUE); if(strstr(order->lh_action_task, "_"RSC_STOP"_0") || strstr(order->lh_action_task, "_"RSC_STOPPED"_0")) { task = stop_rsc; reason = "down activity"; rh_role = RSC_ROLE_STOPPED; lh_p->fns->location(lh_p, &lh_hosts, down_stack); /* These actions are not possible for non-clones } else if(strstr(order->lh_action_task, "_"RSC_DEMOTE"_0") || strstr(order->lh_action_task, "_"RSC_DEMOTED"_0")) { task = action_demote; rh_role = RSC_ROLE_SLAVE; reason = "demotion activity"; lh_p->fns->location(lh_p, &lh_hosts, down_stack); } else if(strstr(order->lh_action_task, "_"RSC_PROMOTE"_0") || strstr(order->lh_action_task, "_"RSC_PROMOTED"_0")) { task = action_promote; down_stack = FALSE; reason = "promote activity"; lh_p->fns->location(lh_p, &lh_hosts, down_stack); rh_role = RSC_ROLE_MASTER; */ } else if(strstr(order->lh_action_task, "_"RSC_START"_0") || strstr(order->lh_action_task, "_"RSC_STARTED"_0")) { task = start_rsc; down_stack = FALSE; reason = "up activity"; lh_p->fns->location(lh_p, &lh_hosts, down_stack); } else { crm_err("Unknown action: %s", order->lh_action_task); return; } if(lh_role_new == lh_role_old) { restart = detect_restart(lh_action->rsc); if(FALSE && restart == FALSE) { crm_debug_3("Ignoring %s->%s for %s: no relevant %s (no role change)", lh_action->task, order->lh_action_task, lh_p->id, reason); goto cleanup; } } /* slist_iter(h, node_t, lh_hosts, llpc, crm_info("LHH: %s", h->details->uname)); */ slist_iter( child_rsc, resource_t, rsc->children, lpc, gboolean create = FALSE; enum rsc_role_e child_role = child_rsc->fns->state(child_rsc, down_stack); crm_debug_4("Testing %s->%s for %s: %s vs. %s %s", lh_action->task, order->lh_action_task, child_rsc->id, role2text(rh_role), role2text(child_role), order->lh_action_task); if(lh_hosts == NULL) { crm_debug_3("Terminating search: %s.%d list is empty: no possible %s", order->rh_rsc->id, down_stack, reason); break; } hosts = NULL; child_rsc->fns->location(child_rsc, &hosts, down_stack); intersection = node_list_and(hosts, lh_hosts, FALSE); if(intersection == NULL) { crm_debug_3("Ignoring %s->%s for %s: no relevant %s", lh_action->task, order->lh_action_task, child_rsc->id, reason); g_list_free(hosts); continue; } /* slist_iter(h, node_t, hosts, llpc, crm_info("H: %s %s", child_rsc->id, h->details->uname)); */ if(restart) { reason = "restart"; create = TRUE; } else if(down_stack && lh_role_old >= rh_role) { create = TRUE; } else if(down_stack == FALSE && lh_role_old <= rh_role) { create = TRUE; } else { reason = "role"; } if(create) { enum pe_ordering type = order->type; child_rsc->cmds->rsc_order_rh(lh_action, child_rsc, order); order->type = pe_order_optional; native_rsc_order_rh(lh_action, rsc, order); order->type = type; } crm_debug_3("Processed %s->%s for %s on %s: found %s%s", lh_action->task, order->lh_action_task, child_rsc->id, ((node_t*)intersection->data)->details->uname, reason, create?" - enforced":""); /* slist_iter(h, node_t, hosts, llpc, */ /* crm_info("H: %s %s", child_rsc->id, h->details->uname)); */ g_list_free(intersection); g_list_free(hosts); ); cleanup: g_list_free(lh_hosts); } void clone_rsc_order_rh( action_t *lh_action, resource_t *rsc, order_constraint_t *order) { enum pe_ordering type = order->type; clone_variant_data_t *clone_data = NULL; resource_t *lh_p = uber_parent(lh_action->rsc); get_clone_variant_data(clone_data, rsc); crm_debug_2("%s->%s", order->lh_action_task, order->rh_action_task); if(safe_str_eq(CRM_OP_PROBED, lh_action->uuid)) { slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->rsc_order_rh(lh_action, child_rsc, order); ); if(rsc->fns->state(rsc, TRUE) < RSC_ROLE_STARTED && rsc->fns->state(rsc, FALSE) > RSC_ROLE_STOPPED) { order->type |= pe_order_implies_right; } } else if(lh_p && lh_p != rsc && lh_p->variant < pe_clone) { clone_rsc_order_rh_non_clone(lh_p, lh_action, rsc, order); return; } native_rsc_order_rh(lh_action, rsc, order); order->type = type; } void clone_rsc_location(resource_t *rsc, rsc_to_node_t *constraint) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); crm_debug_3("Processing location constraint %s for %s", constraint->id, rsc->id); native_rsc_location(rsc, constraint); slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->rsc_location(child_rsc, constraint); ); } void clone_expand(resource_t *rsc, pe_working_set_t *data_set) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); crm_debug_2("Processing actions from %s", rsc->id); if(clone_data->start_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->start_notify); expand_notification_data(clone_data->start_notify); create_notifications(rsc, clone_data->start_notify, data_set); } if(clone_data->stop_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->stop_notify); expand_notification_data(clone_data->stop_notify); create_notifications(rsc, clone_data->stop_notify, data_set); } if(clone_data->promote_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->promote_notify); expand_notification_data(clone_data->promote_notify); create_notifications(rsc, clone_data->promote_notify, data_set); } if(clone_data->demote_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->demote_notify); expand_notification_data(clone_data->demote_notify); create_notifications(rsc, clone_data->demote_notify, data_set); } /* Now that the notifcations have been created we can expand the children */ slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->expand(child_rsc, data_set)); native_expand(rsc, data_set); /* The notifications are in the graph now, we can destroy the notify_data */ free_notification_data(clone_data->demote_notify); free_notification_data(clone_data->stop_notify); free_notification_data(clone_data->start_notify); free_notification_data(clone_data->promote_notify); } static gint sort_rsc_id(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; CRM_ASSERT(resource1 != NULL); CRM_ASSERT(resource2 != NULL); return strcmp(resource1->id, resource2->id); } static resource_t *find_instance_on(resource_t *rsc, node_t *node) { slist_iter(child, resource_t, rsc->children, lpc, GListPtr known_list = NULL; rsc_known_on(child, &known_list); slist_iter(known, node_t, known_list, lpc2, if(node->details == known->details) { g_list_free(known_list); return child; } ); g_list_free(known_list); ); return NULL; } gboolean clone_create_probe(resource_t *rsc, node_t *node, action_t *complete, gboolean force, pe_working_set_t *data_set) { gboolean any_created = FALSE; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); rsc->children = g_list_sort(rsc->children, sort_rsc_id); if(rsc->children == NULL) { pe_warn("Clone %s has no children", rsc->id); return FALSE; } if(is_not_set(rsc->flags, pe_rsc_unique) && clone_data->clone_node_max == 1) { /* only look for one copy */ resource_t *child = NULL; /* Try whoever we probed last time */ child = find_instance_on(rsc, node); if(child) { return child->cmds->create_probe( child, node, complete, force, data_set); } /* Try whoever we plan on starting there */ slist_iter( child_rsc, resource_t, rsc->children, lpc, node_t *local_node = child_rsc->fns->location(child_rsc, NULL, FALSE); if(local_node == NULL) { continue; } if(local_node->details == node->details) { return child_rsc->cmds->create_probe( child_rsc, node, complete, force, data_set); } ); /* Fall back to the first clone instance */ child = rsc->children->data; return child->cmds->create_probe(child, node, complete, force, data_set); } slist_iter( child_rsc, resource_t, rsc->children, lpc, if(child_rsc->cmds->create_probe( child_rsc, node, complete, force, data_set)) { any_created = TRUE; } if(any_created && is_not_set(rsc->flags, pe_rsc_unique) && clone_data->clone_node_max == 1) { /* only look for one copy (clone :0) */ break; } ); return any_created; } diff --git a/pengine/constraints.c b/pengine/constraints.c index cb498e3807..d989c18560 100644 --- a/pengine/constraints.c +++ b/pengine/constraints.c @@ -1,1097 +1,1138 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include +enum pe_order_kind +{ + pe_order_kind_optional, + pe_order_kind_mandatory, + pe_order_kind_serialize, +}; + +enum pe_ordering get_flags( + const char *id, enum pe_order_kind kind, + const char *action_first, const char *action_then, gboolean invert); + gboolean unpack_constraints(xmlNode * xml_constraints, pe_working_set_t *data_set) { xmlNode *lifetime = NULL; xml_child_iter( xml_constraints, xml_obj, const char *id = crm_element_value(xml_obj, XML_ATTR_ID); if(id == NULL) { crm_config_err("Constraint <%s...> must have an id", crm_element_name(xml_obj)); continue; } crm_debug_3("Processing constraint %s %s", crm_element_name(xml_obj),id); lifetime = first_named_child(xml_obj, "lifetime"); if(test_ruleset(lifetime, NULL, data_set->now) == FALSE) { crm_info("Constraint %s %s is not active", crm_element_name(xml_obj), id); } else if(safe_str_eq(XML_CONS_TAG_RSC_ORDER, crm_element_name(xml_obj))) { unpack_rsc_order(xml_obj, data_set); } else if(safe_str_eq(XML_CONS_TAG_RSC_DEPEND, crm_element_name(xml_obj))) { unpack_rsc_colocation(xml_obj, data_set); } else if(safe_str_eq(XML_CONS_TAG_RSC_LOCATION, crm_element_name(xml_obj))) { unpack_rsc_location(xml_obj, data_set); } else { pe_err("Unsupported constraint type: %s", crm_element_name(xml_obj)); } ); return TRUE; } static const char * invert_action(const char *action) { if(safe_str_eq(action, RSC_START)) { return RSC_STOP; } else if(safe_str_eq(action, RSC_STOP)) { return RSC_START; } else if(safe_str_eq(action, RSC_PROMOTE)) { return RSC_DEMOTE; } else if(safe_str_eq(action, RSC_DEMOTE)) { return RSC_PROMOTE; } else if(safe_str_eq(action, RSC_PROMOTED)) { return RSC_DEMOTED; } else if(safe_str_eq(action, RSC_DEMOTED)) { return RSC_PROMOTED; } else if(safe_str_eq(action, RSC_STARTED)) { return RSC_STOPPED; } else if(safe_str_eq(action, RSC_STOPPED)) { return RSC_STARTED; } crm_config_warn("Unknown action: %s", action); return NULL; } +static enum pe_order_kind get_ordering_type(xmlNode *xml_obj) +{ + enum pe_order_kind kind_e = pe_order_kind_mandatory; + const char *kind = crm_element_value(xml_obj, XML_ORDER_ATTR_KIND); + + if(kind == NULL) { + const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); + kind_e = pe_order_kind_mandatory; + + if(score) { + int score_i = char2score(score); + if(score_i == 0) { + kind_e = pe_order_kind_optional; + } + + /* } else if(rsc_then->variant == pe_native && rsc_first->variant > pe_group) { */ + /* kind_e = pe_order_kind_optional; */ + } + + } else if(safe_str_eq(kind, "Mandatory")) { + kind_e = pe_order_kind_mandatory; + + } else if(safe_str_eq(kind, "Optional")) { + kind_e = pe_order_kind_optional; + + } else if(safe_str_eq(kind, "Serialize")) { + kind_e = pe_order_kind_serialize; + + } else { + const char *id = crm_element_value(xml_obj, XML_ATTR_ID); + crm_config_err("Constraint %s: Unknown type '%s'", id, kind); + } + return kind_e; +} + static gboolean unpack_simple_rsc_order(xmlNode * xml_obj, pe_working_set_t *data_set) { - int score_i = 0; int order_id = 0; resource_t *rsc_then = NULL; resource_t *rsc_first = NULL; gboolean invert_bool = TRUE; + enum pe_order_kind kind = pe_order_kind_mandatory; enum pe_ordering cons_weight = pe_order_optional; const char *id_first = NULL; const char *id_then = NULL; const char *action_then = NULL; const char *action_first = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); - const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); const char *invert = crm_element_value(xml_obj, XML_CONS_ATTR_SYMMETRICAL); crm_str_to_boolean(invert, &invert_bool); if(xml_obj == NULL) { crm_config_err("No constraint object to process."); return FALSE; } else if(id == NULL) { crm_config_err("%s constraint must have an id", crm_element_name(xml_obj)); return FALSE; } id_then = crm_element_value(xml_obj, XML_ORDER_ATTR_THEN); id_first = crm_element_value(xml_obj, XML_ORDER_ATTR_FIRST); action_then = crm_element_value(xml_obj, XML_ORDER_ATTR_THEN_ACTION); action_first = crm_element_value(xml_obj, XML_ORDER_ATTR_FIRST_ACTION); if(action_first == NULL) { action_first = RSC_START; } if(action_then == NULL) { action_then = action_first; } if(id_then == NULL || id_first == NULL) { crm_config_err("Constraint %s needs two sides lh: %s rh: %s", id, crm_str(id_then), crm_str(id_first)); return FALSE; } rsc_then = pe_find_resource(data_set->resources, id_then); rsc_first = pe_find_resource(data_set->resources, id_first); if(rsc_then == NULL) { crm_config_err("Constraint %s: no resource found for name '%s'", id, id_then); return FALSE; } else if(rsc_first == NULL) { crm_config_err("Constraint %s: no resource found for name '%s'", id, id_first); return FALSE; } - if(score == NULL && rsc_then->variant == pe_native && rsc_first->variant > pe_group) { - score = "0"; - - } else if(score == NULL) { - score = "INFINITY"; - } - - score_i = char2score(score); cons_weight = pe_order_optional; - if(score_i == 0 && rsc_then->restart_type == pe_restart_restart) { - crm_debug_2("Upgrade : recovery - implies right"); - cons_weight |= pe_order_implies_right; - } + kind = get_ordering_type(xml_obj); - if(score_i < 0) { - crm_debug_2("Upgrade : implies left"); - cons_weight |= pe_order_implies_left; - - } else if(score_i > 0) { - crm_debug_2("Upgrade : implies right"); - cons_weight |= pe_order_implies_right; - if(safe_str_eq(action_then, RSC_START) - || safe_str_eq(action_then, RSC_PROMOTE)) { - crm_debug_2("Upgrade : runnable"); - cons_weight |= pe_order_runnable_left; - } + if(kind == pe_order_kind_optional && rsc_then->restart_type == pe_restart_restart) { + crm_debug_2("Upgrade : recovery - implies right"); + cons_weight |= pe_order_implies_right; } - - order_id = new_rsc_order(rsc_first, action_first, rsc_then, action_then, cons_weight, data_set); + + cons_weight |= get_flags(id, kind, action_first, action_then, FALSE); + order_id = new_rsc_order( + rsc_first, action_first, rsc_then, action_then, cons_weight, data_set); crm_debug_2("order-%d (%s): %s_%s before %s_%s flags=0x%.6x", order_id, id, rsc_first->id, action_first, rsc_then->id, action_then, cons_weight); if(invert_bool == FALSE) { return TRUE; + + } else if(invert && kind == pe_order_kind_serialize) { + crm_config_warn("Cannot invert serialized constraint set %s", id); + return TRUE; + + } else if(kind == pe_order_kind_serialize) { + return TRUE; } action_then = invert_action(action_then); action_first = invert_action(action_first); - - cons_weight = pe_order_optional; - if(score_i == 0 && rsc_then->restart_type == pe_restart_restart) { - crm_debug_2("Upgrade : recovery - implies left"); - cons_weight |= pe_order_implies_left; - } - - score_i *= -1; - if(score_i < 0) { - crm_debug_2("Upgrade : implies left"); - cons_weight |= pe_order_implies_left; - if(safe_str_eq(action_then, RSC_DEMOTE)) { - crm_debug_2("Upgrade : demote"); - cons_weight |= pe_order_demote; - } - - } else if(score_i > 0) { - crm_debug_2("Upgrade : implies right"); - cons_weight |= pe_order_implies_right; - if(safe_str_eq(action_then, RSC_START) - || safe_str_eq(action_then, RSC_PROMOTE)) { - crm_debug_2("Upgrade : runnable"); - cons_weight |= pe_order_runnable_left; - } - } - if(action_then == NULL || action_first == NULL) { crm_config_err("Cannot invert rsc_order constraint %s." " Please specify the inverse manually.", id); return TRUE; } - + + cons_weight = pe_order_optional; + if(kind == pe_order_kind_optional && rsc_then->restart_type == pe_restart_restart) { + crm_debug_2("Upgrade : recovery - implies left"); + cons_weight |= pe_order_implies_left; + } + + cons_weight |= get_flags(id, kind, action_first, action_then, TRUE); order_id = new_rsc_order( rsc_then, action_then, rsc_first, action_first, cons_weight, data_set); + crm_debug_2("order-%d (%s): %s_%s before %s_%s flags=0x%.6x", order_id, id, rsc_then->id, action_then, rsc_first->id, action_first, cons_weight); return TRUE; } gboolean unpack_rsc_location(xmlNode * xml_obj, pe_working_set_t *data_set) { gboolean empty = TRUE; const char *id_lh = crm_element_value(xml_obj, "rsc"); const char *id = crm_element_value(xml_obj, XML_ATTR_ID); resource_t *rsc_lh = pe_find_resource(data_set->resources, id_lh); const char *node = crm_element_value(xml_obj, "node"); const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); if(rsc_lh == NULL) { /* only a warn as BSC adds the constraint then the resource */ crm_config_warn("No resource (con=%s, rsc=%s)", id, id_lh); return FALSE; } if(node != NULL && score != NULL) { int score_i = char2score(score); node_t *match = pe_find_node(data_set->nodes, node); if(match) { rsc2node_new(id, rsc_lh, score_i, match, data_set); return TRUE; } else { return FALSE; } } xml_child_iter_filter( xml_obj, rule_xml, XML_TAG_RULE, empty = FALSE; crm_debug_2("Unpacking %s/%s", id, ID(rule_xml)); generate_location_rule(rsc_lh, rule_xml, data_set); ); if(empty) { crm_config_err("Invalid location constraint %s:" " rsc_location must contain at least one rule", ID(xml_obj)); } return TRUE; } static int get_node_score(const char *rule, const char *score, gboolean raw, node_t *node) { int score_f = 0; if(score == NULL) { pe_err("Rule %s: no score specified. Assuming 0.", rule); } else if(raw) { score_f = char2score(score); } else { const char *attr_score = g_hash_table_lookup( node->details->attrs, score); if(attr_score == NULL) { crm_debug("Rule %s: node %s did not have a value for %s", rule, node->details->uname, score); score_f = -INFINITY; } else { crm_debug("Rule %s: node %s had value %s for %s", rule, node->details->uname, attr_score, score); score_f = char2score(attr_score); } } return score_f; } rsc_to_node_t * generate_location_rule( resource_t *rsc, xmlNode *rule_xml, pe_working_set_t *data_set) { const char *rule_id = NULL; const char *score = NULL; const char *boolean = NULL; const char *role = NULL; GListPtr match_L = NULL; int score_f = 0; gboolean do_and = TRUE; gboolean accept = TRUE; gboolean raw_score = TRUE; rsc_to_node_t *location_rule = NULL; rule_xml = expand_idref(rule_xml, data_set->input); rule_id = crm_element_value(rule_xml, XML_ATTR_ID); boolean = crm_element_value(rule_xml, XML_RULE_ATTR_BOOLEAN_OP); role = crm_element_value(rule_xml, XML_RULE_ATTR_ROLE); crm_debug_2("Processing rule: %s", rule_id); if(role != NULL && text2role(role) == RSC_ROLE_UNKNOWN) { pe_err("Bad role specified for %s: %s", rule_id, role); return NULL; } score = crm_element_value(rule_xml, XML_RULE_ATTR_SCORE); if(score != NULL) { score_f = char2score(score); } else { score = crm_element_value( rule_xml, XML_RULE_ATTR_SCORE_ATTRIBUTE); if(score == NULL) { score = crm_element_value( rule_xml, XML_RULE_ATTR_SCORE_MANGLED); } if(score != NULL) { raw_score = FALSE; } } if(safe_str_eq(boolean, "or")) { do_and = FALSE; } location_rule = rsc2node_new(rule_id, rsc, 0, NULL, data_set); if(location_rule == NULL) { return NULL; } if(role != NULL) { crm_debug_2("Setting role filter: %s", role); location_rule->role_filter = text2role(role); if(location_rule->role_filter == RSC_ROLE_SLAVE) { /* Fold slave back into Started for simplicity * At the point Slave location constraints are evaluated, * all resources are still either stopped or started */ location_rule->role_filter = RSC_ROLE_STARTED; } } if(do_and) { match_L = node_list_dup(data_set->nodes, TRUE, FALSE); slist_iter( node, node_t, match_L, lpc, node->weight = get_node_score(rule_id, score, raw_score, node); ); } slist_iter( node, node_t, data_set->nodes, lpc, accept = test_rule( rule_xml, node->details->attrs, RSC_ROLE_UNKNOWN, data_set->now); crm_debug_2("Rule %s %s on %s", ID(rule_xml), accept?"passed":"failed", node->details->uname); score_f = get_node_score(rule_id, score, raw_score, node); /* if(accept && score_f == -INFINITY) { */ /* accept = FALSE; */ /* } */ if(accept) { node_t *local = pe_find_node_id( match_L, node->details->id); if(local == NULL && do_and) { continue; } else if(local == NULL) { local = node_copy(node); match_L = g_list_append(match_L, local); } if(do_and == FALSE) { local->weight = merge_weights( local->weight, score_f); } crm_debug_2("node %s now has weight %d", node->details->uname, local->weight); } else if(do_and && !accept) { /* remove it */ node_t *delete = pe_find_node_id( match_L, node->details->id); if(delete != NULL) { match_L = g_list_remove(match_L,delete); crm_debug_5("node %s did not match", node->details->uname); } crm_free(delete); } ); location_rule->node_list_rh = match_L; if(location_rule->node_list_rh == NULL) { crm_debug_2("No matching nodes for rule %s", rule_id); return NULL; } crm_debug_3("%s: %d nodes matched", rule_id, g_list_length(location_rule->node_list_rh)); return location_rule; } static gint sort_cons_priority_lh(gconstpointer a, gconstpointer b) { const rsc_colocation_t *rsc_constraint1 = (const rsc_colocation_t*)a; const rsc_colocation_t *rsc_constraint2 = (const rsc_colocation_t*)b; if(a == NULL) { return 1; } if(b == NULL) { return -1; } CRM_ASSERT(rsc_constraint1->rsc_lh != NULL); CRM_ASSERT(rsc_constraint1->rsc_rh != NULL); if(rsc_constraint1->rsc_lh->priority > rsc_constraint2->rsc_lh->priority) { return -1; } if(rsc_constraint1->rsc_lh->priority < rsc_constraint2->rsc_lh->priority) { return 1; } return strcmp(rsc_constraint1->rsc_lh->id, rsc_constraint2->rsc_lh->id); } static gint sort_cons_priority_rh(gconstpointer a, gconstpointer b) { const rsc_colocation_t *rsc_constraint1 = (const rsc_colocation_t*)a; const rsc_colocation_t *rsc_constraint2 = (const rsc_colocation_t*)b; if(a == NULL) { return 1; } if(b == NULL) { return -1; } CRM_ASSERT(rsc_constraint1->rsc_lh != NULL); CRM_ASSERT(rsc_constraint1->rsc_rh != NULL); if(rsc_constraint1->rsc_rh->priority > rsc_constraint2->rsc_rh->priority) { return -1; } if(rsc_constraint1->rsc_rh->priority < rsc_constraint2->rsc_rh->priority) { return 1; } return strcmp(rsc_constraint1->rsc_rh->id, rsc_constraint2->rsc_rh->id); } gboolean rsc_colocation_new(const char *id, const char *node_attr, int score, resource_t *rsc_lh, resource_t *rsc_rh, const char *state_lh, const char *state_rh, pe_working_set_t *data_set) { rsc_colocation_t *new_con = NULL; if(rsc_lh == NULL){ crm_config_err("No resource found for LHS %s", id); return FALSE; } else if(rsc_rh == NULL){ crm_config_err("No resource found for RHS of %s", id); return FALSE; } crm_malloc0(new_con, sizeof(rsc_colocation_t)); if(new_con == NULL) { return FALSE; } if(state_lh == NULL || safe_str_eq(state_lh, RSC_ROLE_STARTED_S)) { state_lh = RSC_ROLE_UNKNOWN_S; } if(state_rh == NULL || safe_str_eq(state_rh, RSC_ROLE_STARTED_S)) { state_rh = RSC_ROLE_UNKNOWN_S; } new_con->id = id; new_con->rsc_lh = rsc_lh; new_con->rsc_rh = rsc_rh; new_con->score = score; new_con->role_lh = text2role(state_lh); new_con->role_rh = text2role(state_rh); new_con->node_attribute = node_attr; if(node_attr == NULL) { node_attr = "#"XML_ATTR_UNAME; } crm_debug_3("%s ==> %s (%s %d)", rsc_lh->id, rsc_rh->id, node_attr, score); rsc_lh->rsc_cons = g_list_insert_sorted( rsc_lh->rsc_cons, new_con, sort_cons_priority_rh); rsc_rh->rsc_cons_lhs = g_list_insert_sorted( rsc_rh->rsc_cons_lhs, new_con, sort_cons_priority_lh); data_set->colocation_constraints = g_list_append( data_set->colocation_constraints, new_con); return TRUE; } /* LHS before RHS */ int new_rsc_order(resource_t *lh_rsc, const char *lh_task, resource_t *rh_rsc, const char *rh_task, enum pe_ordering type, pe_working_set_t *data_set) { char *lh_key = NULL; char *rh_key = NULL; CRM_CHECK(lh_rsc != NULL, return -1); CRM_CHECK(lh_task != NULL, return -1); CRM_CHECK(rh_rsc != NULL, return -1); CRM_CHECK(rh_task != NULL, return -1); lh_key = generate_op_key(lh_rsc->id, lh_task, 0); rh_key = generate_op_key(rh_rsc->id, rh_task, 0); return custom_action_order(lh_rsc, lh_key, NULL, rh_rsc, rh_key, NULL, type, data_set); } /* LHS before RHS */ int custom_action_order( resource_t *lh_rsc, char *lh_action_task, action_t *lh_action, resource_t *rh_rsc, char *rh_action_task, action_t *rh_action, enum pe_ordering type, pe_working_set_t *data_set) { order_constraint_t *order = NULL; if(lh_rsc == NULL && lh_action) { lh_rsc = lh_action->rsc; } if(rh_rsc == NULL && rh_action) { rh_rsc = rh_action->rsc; } if((lh_action == NULL && lh_rsc == NULL) || (rh_action == NULL && rh_rsc == NULL)){ crm_config_err("Invalid inputs %p.%p %p.%p", lh_rsc, lh_action, rh_rsc, rh_action); crm_free(lh_action_task); crm_free(rh_action_task); return -1; } crm_malloc0(order, sizeof(order_constraint_t)); crm_debug_3("Creating ordering constraint %d", data_set->order_id); order->id = data_set->order_id++; order->type = type; order->lh_rsc = lh_rsc; order->rh_rsc = rh_rsc; order->lh_action = lh_action; order->rh_action = rh_action; order->lh_action_task = lh_action_task; order->rh_action_task = rh_action_task; data_set->ordering_constraints = g_list_append( data_set->ordering_constraints, order); if(lh_rsc != NULL && rh_rsc != NULL) { crm_debug_4("Created ordering constraint %d (%s):" " %s/%s before %s/%s", order->id, ordering_type2text(order->type), lh_rsc->id, lh_action_task, rh_rsc->id, rh_action_task); } else if(lh_rsc != NULL) { crm_debug_4("Created ordering constraint %d (%s):" " %s/%s before action %d (%s)", order->id, ordering_type2text(order->type), lh_rsc->id, lh_action_task, rh_action->id, rh_action_task); } else if(rh_rsc != NULL) { crm_debug_4("Created ordering constraint %d (%s):" " action %d (%s) before %s/%s", order->id, ordering_type2text(order->type), lh_action->id, lh_action_task, rh_rsc->id, rh_action_task); } else { crm_debug_4("Created ordering constraint %d (%s):" " action %d (%s) before action %d (%s)", order->id, ordering_type2text(order->type), lh_action->id, lh_action_task, rh_action->id, rh_action_task); } return order->id; } -static enum pe_ordering get_flags( - const char *id, int score, const char *action_1, const char *action_2) { +enum pe_ordering get_flags( + const char *id, enum pe_order_kind kind, + const char *action_first, const char *action_then, gboolean invert) { enum pe_ordering flags = pe_order_optional; - if(score < 0) { + if(invert && kind == pe_order_kind_mandatory) { crm_debug_2("Upgrade %s: implies left", id); flags |= pe_order_implies_left; - if(safe_str_eq(action_2, RSC_DEMOTE)) { + if(safe_str_eq(action_then, RSC_DEMOTE)) { crm_debug_2("Upgrade %s: demote", id); flags |= pe_order_demote; } - } else if(score > 0) { + } else if(kind == pe_order_kind_mandatory) { crm_debug_2("Upgrade %s: implies right", id); flags |= pe_order_implies_right; - if(safe_str_eq(action_1, RSC_START) - || safe_str_eq(action_1, RSC_PROMOTE)) { + if(safe_str_eq(action_first, RSC_START) + || safe_str_eq(action_first, RSC_PROMOTE)) { crm_debug_2("Upgrade %s: runnable", id); flags |= pe_order_runnable_left; } + + } else if(kind == pe_order_kind_serialize) { + flags |= pe_order_serialize_only; } + return flags; } static gboolean -unpack_order_set(xmlNode *set, int score, - action_t **begin, action_t **end, - action_t **inv_begin, action_t **inv_end, const char *symmetrical, pe_working_set_t *data_set) +unpack_order_set(xmlNode *set, enum pe_order_kind kind, + action_t **begin, action_t **end, action_t **inv_begin, action_t **inv_end, + const char *symmetrical, pe_working_set_t *data_set) { + GListPtr set_iter = NULL; + GListPtr resources = NULL; + resource_t *last = NULL; resource_t *resource = NULL; - int local_score = score; + int local_kind = kind; gboolean sequential = FALSE; enum pe_ordering flags = pe_order_optional; char *key = NULL; const char *id = ID(set); const char *action = crm_element_value(set, "action"); const char *sequential_s = crm_element_value(set, "sequential"); - const char *score_s = crm_element_value(set, XML_RULE_ATTR_SCORE); + const char *kind_s = crm_element_value(set, XML_ORDER_ATTR_KIND); char *pseudo_id = NULL; char *end_id = NULL; char *begin_id = NULL; if(action == NULL) { action = RSC_START; } pseudo_id = crm_concat(id, action, '-'); end_id = crm_concat(pseudo_id, "end", '-'); begin_id = crm_concat(pseudo_id, "begin", '-'); *end = get_pseudo_op(end_id, data_set); *begin = get_pseudo_op(begin_id, data_set); - if(score_s) { - local_score = char2score(score_s); + if(kind_s) { + local_kind = get_ordering_type(set); } sequential = crm_is_true(sequential_s); - flags = get_flags(id, local_score, action, action); - + flags = get_flags(id, local_kind, action, action, FALSE); + xml_child_iter_filter( set, xml_rsc, XML_TAG_RESOURCE_REF, - + resource = pe_find_resource(data_set->resources, ID(xml_rsc)); + resources = g_list_append(resources, resource); + ); - key = generate_op_key(resource->id, action, 0); - custom_action_order(NULL, NULL, *begin, resource, key, NULL, - flags|pe_order_implies_left_printed, data_set); + set_iter = resources; + while(set_iter != NULL) { + resource = (resource_t *) set_iter->data; + set_iter = set_iter->next; key = generate_op_key(resource->id, action, 0); - custom_action_order(resource, key, NULL, NULL, NULL, *end, + + custom_action_order(NULL, NULL, *begin, resource, crm_strdup(key), NULL, + flags|pe_order_implies_left_printed, data_set); + + custom_action_order(resource, crm_strdup(key), NULL, NULL, NULL, *end, flags|pe_order_implies_right_printed, data_set); - if(sequential) { + if(local_kind == pe_order_kind_serialize) { + /* Serialize before everything that comes after */ + slist_iter( + then_rsc, resource_t, set_iter, lpc, + + char *then_key = generate_op_key(then_rsc->id, action, 0); + custom_action_order(resource, crm_strdup(key), NULL, then_rsc, then_key, NULL, + flags, data_set); + ); + + } else if(sequential) { if(last != NULL) { new_rsc_order(last, action, resource, action, flags, data_set); } last = resource; } + } + + if(crm_is_true(symmetrical) == FALSE) { + goto done; - ); + } else if(symmetrical && local_kind == pe_order_kind_serialize) { + crm_config_warn("Cannot invert serialized constraint set %s", id); + goto done; - if(crm_is_true(symmetrical) == FALSE) { + } else if(local_kind == pe_order_kind_serialize) { goto done; } + last = NULL; - local_score *= -1; action = invert_action(action); pseudo_id = crm_concat(id, action, '-'); end_id = crm_concat(pseudo_id, "end", '-'); begin_id = crm_concat(pseudo_id, "begin", '-'); *inv_end = get_pseudo_op(end_id, data_set); *inv_begin = get_pseudo_op(begin_id, data_set); - flags = get_flags(id, local_score, action, action); - - xml_child_iter_filter( - set, xml_rsc, XML_TAG_RESOURCE_REF, + flags = get_flags(id, local_kind, action, action, TRUE); - resource = pe_find_resource(data_set->resources, ID(xml_rsc)); + set_iter = resources; + while(set_iter != NULL) { + resource = (resource_t *) set_iter->data; + set_iter = set_iter->next; key = generate_op_key(resource->id, action, 0); - custom_action_order(NULL, NULL, *inv_begin, resource, key, NULL, + + custom_action_order(NULL, NULL, *inv_begin, resource, crm_strdup(key), NULL, flags|pe_order_implies_left_printed, data_set); - key = generate_op_key(resource->id, action, 0); - custom_action_order(resource, key, NULL, NULL, NULL, *inv_end, + custom_action_order(resource, crm_strdup(key), NULL, NULL, NULL, *inv_end, flags|pe_order_implies_right_printed, data_set); if(sequential) { if(last != NULL) { new_rsc_order(resource, action, last, action, flags, data_set); } last = resource; } - - ); + } done: + g_list_free(resources); crm_free(pseudo_id); return TRUE; } static gboolean order_rsc_sets( - const char *id, xmlNode *set1, xmlNode *set2, int score, pe_working_set_t *data_set) { + const char *id, xmlNode *set1, xmlNode *set2, enum pe_order_kind kind, pe_working_set_t *data_set) { resource_t *rsc_1 = NULL; resource_t *rsc_2 = NULL; const char *action_1 = crm_element_value(set1, "action"); const char *action_2 = crm_element_value(set2, "action"); const char *sequential_1 = crm_element_value(set1, "sequential"); const char *sequential_2 = crm_element_value(set2, "sequential"); - enum pe_ordering flags = get_flags(id, score, action_1, action_2); + enum pe_ordering flags = get_flags(id, kind, action_1, action_2, FALSE); if(crm_is_true(sequential_1)) { /* get the first one */ xml_child_iter_filter( set1, xml_rsc, XML_TAG_RESOURCE_REF, rsc_1 = pe_find_resource(data_set->resources, ID(xml_rsc)); break; ); } if(crm_is_true(sequential_2)) { /* get the last one */ const char *rid = NULL; xml_child_iter_filter( set2, xml_rsc, XML_TAG_RESOURCE_REF, rid = ID(xml_rsc); ); rsc_2 = pe_find_resource(data_set->resources, rid); } if(rsc_1 != NULL && rsc_2 != NULL) { new_rsc_order(rsc_1, action_1, rsc_2, action_2, flags, data_set); } else if(rsc_1 != NULL) { xml_child_iter_filter( set2, xml_rsc, XML_TAG_RESOURCE_REF, rsc_2 = pe_find_resource(data_set->resources, ID(xml_rsc)); new_rsc_order(rsc_1, action_1, rsc_2, action_2, flags, data_set); ); } else if(rsc_2 != NULL) { xml_child_iter_filter( set1, xml_rsc, XML_TAG_RESOURCE_REF, rsc_1 = pe_find_resource(data_set->resources, ID(xml_rsc)); new_rsc_order(rsc_1, action_1, rsc_2, action_2, flags, data_set); ); } else { xml_child_iter_filter( set1, xml_rsc, XML_TAG_RESOURCE_REF, rsc_1 = pe_find_resource(data_set->resources, ID(xml_rsc)); xml_child_iter_filter( set2, xml_rsc_2, XML_TAG_RESOURCE_REF, rsc_2 = pe_find_resource(data_set->resources, ID(xml_rsc_2)); new_rsc_order(rsc_1, action_1, rsc_2, action_2, flags, data_set); ); ); } return TRUE; } gboolean unpack_rsc_order(xmlNode *xml_obj, pe_working_set_t *data_set) { - int score_i = 0; gboolean any_sets = FALSE; action_t *set_end = NULL; action_t *set_begin = NULL; action_t *set_inv_end = NULL; action_t *set_inv_begin = NULL; xmlNode *last = NULL; action_t *last_end = NULL; action_t *last_begin = NULL; action_t *last_inv_end = NULL; action_t *last_inv_begin = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); - const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); const char *invert = crm_element_value(xml_obj, XML_CONS_ATTR_SYMMETRICAL); + enum pe_order_kind kind = get_ordering_type(xml_obj); if(invert == NULL) { invert = "true"; } - if(score == NULL) { - score = "INFINITY"; - } - - score_i = char2score(score); - xml_child_iter_filter( - xml_obj, set, "resource_set", + xml_obj, set, XML_CONS_TAG_RSC_SET, any_sets = TRUE; - if(unpack_order_set(set, score_i, &set_begin, &set_end, + set = expand_idref(set, data_set->input); + if(unpack_order_set(set, kind, &set_begin, &set_end, &set_inv_begin, &set_inv_end, invert, data_set) == FALSE) { return FALSE; } else if(last) { const char *set_action = crm_element_value(set, "action"); const char *last_action = crm_element_value(last, "action"); - enum pe_ordering flags = get_flags(id, score_i, last_action, set_action); + enum pe_ordering flags = get_flags(id, kind, last_action, set_action, FALSE); order_actions(last_end, set_begin, flags); if(crm_is_true(invert)) { set_action = invert_action(set_action?set_action:RSC_START); last_action = invert_action(last_action?last_action:RSC_START); - score_i *= -1; - flags = get_flags(id, score_i, last_action, set_action); + flags = get_flags(id, kind, last_action, set_action, TRUE); order_actions(last_inv_begin, set_inv_end, flags); } - } else if(/* never called */last && order_rsc_sets(id, last, set, score_i, data_set) == FALSE) { + } else if(/* never called */last && order_rsc_sets(id, last, set, kind, data_set) == FALSE) { return FALSE; } last = set; last_end = set_end; last_begin = set_begin; last_inv_end = set_inv_end; last_inv_begin = set_inv_begin; ); if(any_sets == FALSE) { return unpack_simple_rsc_order(xml_obj, data_set); } return TRUE; } static gboolean unpack_colocation_set(xmlNode *set, int score, pe_working_set_t *data_set) { resource_t *with = NULL; resource_t *resource = NULL; const char *set_id = ID(set); const char *role = crm_element_value(set, "role"); const char *sequential = crm_element_value(set, "sequential"); int local_score = score; const char *score_s = crm_element_value(set, XML_RULE_ATTR_SCORE); if(score_s) { local_score = char2score(score_s); } if(crm_is_true(sequential)) { xml_child_iter_filter( set, xml_rsc, XML_TAG_RESOURCE_REF, resource = pe_find_resource(data_set->resources, ID(xml_rsc)); if(with != NULL) { crm_debug_2("Colocating %s with %s", resource->id, with->id); rsc_colocation_new(set_id, NULL, local_score, resource, with, role, role, data_set); } with = resource; ); } return TRUE; } static gboolean colocate_rsc_sets( const char *id, xmlNode *set1, xmlNode *set2, int score, pe_working_set_t *data_set) { resource_t *rsc_1 = NULL; resource_t *rsc_2 = NULL; const char *role_1 = crm_element_value(set1, "role"); const char *role_2 = crm_element_value(set2, "role"); const char *sequential_1 = crm_element_value(set1, "sequential"); const char *sequential_2 = crm_element_value(set2, "sequential"); if(crm_is_true(sequential_1)) { /* get the first one */ xml_child_iter_filter( set1, xml_rsc, XML_TAG_RESOURCE_REF, rsc_1 = pe_find_resource(data_set->resources, ID(xml_rsc)); break; ); } if(crm_is_true(sequential_2)) { /* get the last one */ const char *rid = NULL; xml_child_iter_filter( set2, xml_rsc, XML_TAG_RESOURCE_REF, rid = ID(xml_rsc); ); rsc_2 = pe_find_resource(data_set->resources, rid); } if(rsc_1 != NULL && rsc_2 != NULL) { rsc_colocation_new(id, NULL, score, rsc_1, rsc_2, role_1, role_2, data_set); } else if(rsc_1 != NULL) { xml_child_iter_filter( set2, xml_rsc, XML_TAG_RESOURCE_REF, rsc_2 = pe_find_resource(data_set->resources, ID(xml_rsc)); rsc_colocation_new(id, NULL, score, rsc_1, rsc_2, role_1, role_2, data_set); ); } else if(rsc_2 != NULL) { xml_child_iter_filter( set1, xml_rsc, XML_TAG_RESOURCE_REF, rsc_1 = pe_find_resource(data_set->resources, ID(xml_rsc)); rsc_colocation_new(id, NULL, score, rsc_1, rsc_2, role_1, role_2, data_set); ); } else { xml_child_iter_filter( set1, xml_rsc, XML_TAG_RESOURCE_REF, rsc_1 = pe_find_resource(data_set->resources, ID(xml_rsc)); xml_child_iter_filter( set2, xml_rsc_2, XML_TAG_RESOURCE_REF, rsc_2 = pe_find_resource(data_set->resources, ID(xml_rsc_2)); rsc_colocation_new(id, NULL, score, rsc_1, rsc_2, role_1, role_2, data_set); ); ); } return TRUE; } static gboolean unpack_simple_colocation(xmlNode *xml_obj, pe_working_set_t *data_set) { int score_i = 0; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); const char *id_lh = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE); const char *id_rh = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET); const char *state_lh = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE); const char *state_rh = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_ROLE); const char *attr = crm_element_value(xml_obj, XML_COLOC_ATTR_NODE_ATTR); const char *symmetrical = crm_element_value(xml_obj, XML_CONS_ATTR_SYMMETRICAL); resource_t *rsc_lh = pe_find_resource(data_set->resources, id_lh); resource_t *rsc_rh = pe_find_resource(data_set->resources, id_rh); if(rsc_lh == NULL) { crm_config_err("No resource (con=%s, rsc=%s)", id, id_lh); return FALSE; } else if(rsc_rh == NULL) { crm_config_err("No resource (con=%s, rsc=%s)", id, id_rh); return FALSE; } if(crm_is_true(symmetrical)) { crm_config_warn("The %s colocation constraint attribute has been removed." " It didn't do what you think it did anyway.", XML_CONS_ATTR_SYMMETRICAL); } if(score) { score_i = char2score(score); } rsc_colocation_new(id, attr, score_i, rsc_lh, rsc_rh, state_lh, state_rh, data_set); return TRUE; } gboolean unpack_rsc_colocation(xmlNode *xml_obj, pe_working_set_t *data_set) { int score_i = 0; xmlNode *last = NULL; gboolean any_sets = FALSE; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); if(score) { score_i = char2score(score); } xml_child_iter_filter( - xml_obj, set, "resource_set", + xml_obj, set, XML_CONS_TAG_RSC_SET, any_sets = TRUE; + set = expand_idref(set, data_set->input); if(unpack_colocation_set(set, score_i, data_set) == FALSE) { return FALSE; } else if(last && colocate_rsc_sets(id, last, set, score_i, data_set) == FALSE) { return FALSE; } last = set; ); if(any_sets == FALSE) { return unpack_simple_colocation(xml_obj, data_set); } return TRUE; } gboolean is_active(rsc_to_node_t *cons) { return TRUE; } diff --git a/pengine/master.c b/pengine/master.c index 7c903e4c6d..8ef7a36489 100644 --- a/pengine/master.c +++ b/pengine/master.c @@ -1,872 +1,872 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #define VARIANT_CLONE 1 #include -extern gint sort_clone_instance(gconstpointer a, gconstpointer b); +extern gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set); extern int master_score(resource_t *rsc, node_t *node, int not_set_value); static void child_promoting_constraints( clone_variant_data_t *clone_data, enum pe_ordering type, resource_t *rsc, resource_t *child, resource_t *last, pe_working_set_t *data_set) { if(child == NULL) { if(clone_data->ordered && last != NULL) { crm_debug_4("Ordered version (last node)"); /* last child promote before promoted started */ new_rsc_order(last, RSC_PROMOTE, rsc, RSC_PROMOTED, type, data_set); } return; } /* child promote before global promoted */ new_rsc_order(child, RSC_PROMOTE, rsc, RSC_PROMOTED, type, data_set); /* global promote before child promote */ new_rsc_order(rsc, RSC_PROMOTE, child, RSC_PROMOTE, type, data_set); if(clone_data->ordered) { crm_debug_4("Ordered version"); if(last == NULL) { /* global promote before first child promote */ last = rsc; } /* else: child/child relative promote */ order_start_start(last, child, type); new_rsc_order(last, RSC_PROMOTE, child, RSC_PROMOTE, type, data_set); } else { crm_debug_4("Un-ordered version"); } } static void child_demoting_constraints( clone_variant_data_t *clone_data, enum pe_ordering type, resource_t *rsc, resource_t *child, resource_t *last, pe_working_set_t *data_set) { if(child == NULL) { if(clone_data->ordered && last != NULL) { crm_debug_4("Ordered version (last node)"); /* global demote before first child demote */ new_rsc_order(rsc, RSC_DEMOTE, last, RSC_DEMOTE, pe_order_implies_left, data_set); } return; } /* child demote before global demoted */ new_rsc_order(child, RSC_DEMOTE, rsc, RSC_DEMOTED, pe_order_implies_right_printed, data_set); /* global demote before child demote */ new_rsc_order(rsc, RSC_DEMOTE, child, RSC_DEMOTE, pe_order_implies_left_printed, data_set); if(clone_data->ordered && last != NULL) { crm_debug_4("Ordered version"); /* child/child relative demote */ new_rsc_order(child, RSC_DEMOTE, last, RSC_DEMOTE, type, data_set); } else if(clone_data->ordered) { crm_debug_4("Ordered version (1st node)"); /* first child stop before global stopped */ new_rsc_order(child, RSC_DEMOTE, rsc, RSC_DEMOTED, type, data_set); } else { crm_debug_4("Un-ordered version"); } } static void master_update_pseudo_status( resource_t *rsc, gboolean *demoting, gboolean *promoting) { if(rsc->children) { slist_iter(child, resource_t, rsc->children, lpc, master_update_pseudo_status(child, demoting, promoting) ); return; } CRM_ASSERT(demoting != NULL); CRM_ASSERT(promoting != NULL); slist_iter( action, action_t, rsc->actions, lpc, if(*promoting && *demoting) { return; } else if(action->optional) { continue; } else if(safe_str_eq(RSC_DEMOTE, action->task)) { *demoting = TRUE; } else if(safe_str_eq(RSC_PROMOTE, action->task)) { *promoting = TRUE; } ); } #define apply_master_location(list) \ slist_iter( \ cons, rsc_to_node_t, list, lpc2, \ cons_node = NULL; \ if(cons->role_filter == RSC_ROLE_MASTER) { \ crm_debug_2("Applying %s to %s", \ cons->id, child_rsc->id); \ cons_node = pe_find_node_id( \ cons->node_list_rh, chosen->details->id); \ } \ if(cons_node != NULL) { \ int new_priority = merge_weights( \ child_rsc->priority, cons_node->weight); \ crm_debug_2("\t%s: %d->%d (%d)", child_rsc->id, \ child_rsc->priority, new_priority, cons_node->weight); \ child_rsc->priority = new_priority; \ } \ ); static node_t * can_be_master(resource_t *rsc) { node_t *node = NULL; node_t *local_node = NULL; resource_t *parent = uber_parent(rsc); clone_variant_data_t *clone_data = NULL; int level = LOG_DEBUG_2; #if 0 enum rsc_role_e role = RSC_ROLE_UNKNOWN; role = rsc->fns->state(rsc, FALSE); crm_info("%s role: %s", rsc->id, role2text(role)); #endif if(rsc->children) { slist_iter( child, resource_t, rsc->children, lpc, if(can_be_master(child) == NULL) { do_crm_log_unlikely(level, "Child %s of %s can't be promoted", child->id, rsc->id); return NULL; } ); } node = rsc->fns->location(rsc, NULL, FALSE); if(node == NULL) { do_crm_log_unlikely(level, "%s cannot be master: not allocated", rsc->id); return NULL; } else if(is_not_set(rsc->flags, pe_rsc_managed)) { if(rsc->fns->state(rsc, TRUE) == RSC_ROLE_MASTER) { crm_notice("Forcing unmanaged master %s to remain promoted on %s", rsc->id, node->details->uname); } else { return NULL; } } else if(rsc->priority < 0) { do_crm_log_unlikely(level, "%s cannot be master: preference: %d", rsc->id, rsc->priority); return NULL; } else if(can_run_resources(node) == FALSE) { do_crm_log_unlikely(level, "Node cant run any resources: %s", node->details->uname); return NULL; } get_clone_variant_data(clone_data, parent); local_node = pe_find_node_id( parent->allowed_nodes, node->details->id); if(local_node == NULL) { crm_err("%s cannot run on %s: node not allowed", rsc->id, node->details->uname); return NULL; } else if(local_node->count < clone_data->master_node_max || is_not_set(rsc->flags, pe_rsc_managed)) { return local_node; } else { do_crm_log_unlikely(level, "%s cannot be master on %s: node full", rsc->id, node->details->uname); } return NULL; } -static gint sort_master_instance(gconstpointer a, gconstpointer b) +static gint sort_master_instance(gconstpointer a, gconstpointer b, gpointer data_set) { int rc; enum rsc_role_e role1 = RSC_ROLE_UNKNOWN; enum rsc_role_e role2 = RSC_ROLE_UNKNOWN; const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; CRM_ASSERT(resource1 != NULL); CRM_ASSERT(resource2 != NULL); role1 = resource1->fns->state(resource1, TRUE); role2 = resource2->fns->state(resource2, TRUE); rc = sort_rsc_index(a, b); if( rc != 0 ) { return rc; } if(role1 > role2) { return -1; } else if(role1 < role2) { return 1; } - return sort_clone_instance(a, b); + return sort_clone_instance(a, b, data_set); } -static void master_promotion_order(resource_t *rsc) +static void master_promotion_order(resource_t *rsc, pe_working_set_t *data_set) { node_t *node = NULL; node_t *chosen = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if(clone_data->merged_master_weights) { return; } clone_data->merged_master_weights = TRUE; crm_debug_2("Merging weights for %s", rsc->id); slist_iter( child, resource_t, rsc->children, lpc, crm_debug_2("%s: %d", child->id, child->sort_index); ); dump_node_scores(LOG_DEBUG_3, rsc, "Before", rsc->allowed_nodes); slist_iter( child, resource_t, rsc->children, lpc, chosen = child->fns->location(child, NULL, FALSE); if(chosen == NULL || child->sort_index < 0) { crm_debug_3("Skipping %s", child->id); continue; } node = (node_t*)pe_find_node_id( rsc->allowed_nodes, chosen->details->id); CRM_ASSERT(node != NULL); /* adds in master preferences and rsc_location.role=Master */ node->weight = merge_weights(child->sort_index, node->weight); ); dump_node_scores(LOG_DEBUG_3, rsc, "Middle", rsc->allowed_nodes); slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons, lpc, /* (re-)adds location preferences of resources that the * master instance should/must be colocated with */ if(constraint->role_lh == RSC_ROLE_MASTER) { crm_debug_2("RHS: %s with %s: %d", constraint->rsc_lh->id, constraint->rsc_rh->id, constraint->score); rsc->allowed_nodes = constraint->rsc_rh->cmds->merge_weights( constraint->rsc_rh, rsc->id, rsc->allowed_nodes, constraint->node_attribute, constraint->score/INFINITY, TRUE); } ); slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons_lhs, lpc, /* (re-)adds location preferences of resource that wish to be * colocated with the master instance */ if(constraint->role_rh == RSC_ROLE_MASTER) { crm_debug_2("LHS: %s with %s: %d", constraint->rsc_lh->id, constraint->rsc_rh->id, constraint->score); rsc->allowed_nodes = constraint->rsc_lh->cmds->merge_weights( constraint->rsc_lh, rsc->id, rsc->allowed_nodes, constraint->node_attribute, constraint->score/INFINITY, TRUE); } ); dump_node_scores(LOG_DEBUG_3, rsc, "After", rsc->allowed_nodes); /* write them back and sort */ slist_iter( child, resource_t, rsc->children, lpc, chosen = child->fns->location(child, NULL, FALSE); if(chosen == NULL || child->sort_index < 0) { crm_debug_2("%s: %d", child->id, child->sort_index); continue; } node = (node_t*)pe_find_node_id( rsc->allowed_nodes, chosen->details->id); CRM_ASSERT(node != NULL); child->sort_index = node->weight; crm_debug_2("%s: %d", child->id, child->sort_index); ); - rsc->children = g_list_sort(rsc->children, sort_master_instance); + rsc->children = g_list_sort_with_data(rsc->children, sort_master_instance, data_set); } int master_score(resource_t *rsc, node_t *node, int not_set_value) { char *attr_name; char *name = rsc->id; const char *attr_value; int score = not_set_value, len = 0; if(rsc->children) { slist_iter( child, resource_t, rsc->children, lpc, int c_score = master_score(child, node, not_set_value); if(score == not_set_value) { score = c_score; } else { score += c_score; } ); return score; } if(rsc->fns->state(rsc, TRUE) < RSC_ROLE_STARTED) { return score; } if(rsc->running_on) { node_t *match = pe_find_node_id(rsc->allowed_nodes, node->details->id); if(match->weight < 0) { crm_debug_2("%s on %s has score: %d - ignoring master pref", rsc->id, match->details->uname, match->weight); return score; } } if(rsc->clone_name) { /* Use the name the lrm knows this resource as, * since that's what crm_master would have used too */ name = rsc->clone_name; } len = 8 + strlen(name); crm_malloc0(attr_name, len); sprintf(attr_name, "master-%s", name); crm_debug_3("looking for %s on %s", attr_name, node->details->uname); attr_value = g_hash_table_lookup( node->details->attrs, attr_name); if(attr_value == NULL) { crm_free(attr_name); len = 8 + strlen(rsc->long_name); crm_malloc0(attr_name, len); sprintf(attr_name, "master-%s", rsc->long_name); crm_debug_3("looking for %s on %s", attr_name, node->details->uname); attr_value = g_hash_table_lookup( node->details->attrs, attr_name); } if(attr_value != NULL) { crm_debug_2("%s[%s] = %s", attr_name, node->details->uname, crm_str(attr_value)); score = char2score(attr_value); } crm_free(attr_name); return score; } #define max(a, b) aapplied_master_prefs) { /* Make sure we only do this once */ return; } clone_data->applied_master_prefs = TRUE; slist_iter( child_rsc, resource_t, rsc->children, lpc, slist_iter( node, node_t, child_rsc->allowed_nodes, lpc, if(can_run_resources(node) == FALSE) { /* This node will never be promoted to master, * so don't apply the master score as that may * lead to clone shuffling */ continue; } score = master_score(child_rsc, node, 0); if(score > 0) { new_score = merge_weights(node->weight, score); if(new_score != node->weight) { crm_debug_2("\t%s: Updating preference for %s (%d->%d)", child_rsc->id, node->details->uname, node->weight, new_score); node->weight = new_score; } } new_score = max(child_rsc->priority, score); if(new_score != child_rsc->priority) { crm_debug_2("\t%s: Updating priority (%d->%d)", child_rsc->id, child_rsc->priority, new_score); child_rsc->priority = new_score; } ); ); } static void set_role_slave(resource_t *rsc, gboolean current) { if(current) { if(rsc->role == RSC_ROLE_STARTED) { rsc->role = RSC_ROLE_SLAVE; } } else { GListPtr allocated = NULL; rsc->fns->location(rsc, &allocated, FALSE); if(allocated) { rsc->next_role = RSC_ROLE_SLAVE; } else { rsc->next_role = RSC_ROLE_STOPPED; } g_list_free(allocated); } slist_iter( child_rsc, resource_t, rsc->children, lpc, set_role_slave(child_rsc, current); ); } static void set_role_master(resource_t *rsc) { if(rsc->next_role == RSC_ROLE_UNKNOWN) { rsc->next_role = RSC_ROLE_MASTER; } slist_iter( child_rsc, resource_t, rsc->children, lpc, set_role_master(child_rsc); ); } node_t * master_color(resource_t *rsc, pe_working_set_t *data_set) { int promoted = 0; node_t *chosen = NULL; node_t *cons_node = NULL; enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); apply_master_prefs(rsc); clone_color(rsc, data_set); /* count now tracks the number of masters allocated */ slist_iter(node, node_t, rsc->allowed_nodes, lpc, node->count = 0; ); /* * assign priority */ slist_iter( child_rsc, resource_t, rsc->children, lpc, GListPtr list = NULL; crm_debug_2("Assigning priority for %s: %s", child_rsc->id, role2text(child_rsc->next_role)); if(child_rsc->fns->state(child_rsc, TRUE) == RSC_ROLE_STARTED) { set_role_slave(child_rsc, TRUE); } chosen = child_rsc->fns->location(child_rsc, &list, FALSE); if(g_list_length(list) > 1) { crm_config_err("Cannot promote non-colocated child %s", child_rsc->id); } g_list_free(list); if(chosen == NULL) { continue; } next_role = child_rsc->fns->state(child_rsc, FALSE); switch(next_role) { case RSC_ROLE_STARTED: case RSC_ROLE_UNKNOWN: CRM_CHECK(chosen != NULL, break); /* * Default to -1 if no value is set * * This allows master locations to be specified * based solely on rsc_location constraints, * but prevents anyone from being promoted if * neither a constraint nor a master-score is present */ child_rsc->priority = master_score(child_rsc, chosen, -1); break; case RSC_ROLE_SLAVE: case RSC_ROLE_STOPPED: child_rsc->priority = -INFINITY; break; case RSC_ROLE_MASTER: /* We will arrive here if we're re-creating actions after a stonith * OR target-role is set */ break; default: CRM_CHECK(FALSE/* unhandled */, crm_err("Unknown resource role: %d for %s", next_role, child_rsc->id)); } apply_master_location(child_rsc->rsc_location); apply_master_location(rsc->rsc_location); slist_iter( cons, rsc_colocation_t, child_rsc->rsc_cons, lpc2, child_rsc->cmds->rsc_colocation_lh(child_rsc, cons->rsc_rh, cons); ); child_rsc->sort_index = child_rsc->priority; crm_debug_2("Assigning priority for %s: %d", child_rsc->id, child_rsc->priority); if(next_role == RSC_ROLE_MASTER) { child_rsc->sort_index = INFINITY; } ); - master_promotion_order(rsc); + master_promotion_order(rsc, data_set); /* mark the first N as masters */ slist_iter( child_rsc, resource_t, rsc->children, lpc, chosen = child_rsc->fns->location(child_rsc, NULL, FALSE); if(show_scores) { fprintf(stdout, "%s promotion score on %s: %d\n", child_rsc->id, chosen?chosen->details->uname:"none", child_rsc->sort_index); } else { do_crm_log_unlikely(scores_log_level, "%s promotion score on %s: %d", child_rsc->id, chosen?chosen->details->uname:"none", child_rsc->sort_index); } chosen = NULL; /* nuke 'chosen' so that we don't promote more than the * required number of instances */ if(promoted < clone_data->master_max || is_not_set(rsc->flags, pe_rsc_managed)) { chosen = can_be_master(child_rsc); } crm_debug("%s master score: %d", child_rsc->id, child_rsc->priority); if(chosen == NULL) { set_role_slave(child_rsc, FALSE); continue; } chosen->count++; crm_info("Promoting %s (%s %s)", child_rsc->id, role2text(child_rsc->role), chosen->details->uname); set_role_master(child_rsc); promoted++; ); clone_data->masters_allocated = promoted; crm_info("%s: Promoted %d instances of a possible %d to master", rsc->id, promoted, clone_data->master_max); return NULL; } void master_create_actions(resource_t *rsc, pe_working_set_t *data_set) { action_t *action = NULL; action_t *action_complete = NULL; gboolean any_promoting = FALSE; gboolean any_demoting = FALSE; resource_t *last_promote_rsc = NULL; resource_t *last_demote_rsc = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); crm_debug("Creating actions for %s", rsc->id); /* create actions as normal */ clone_create_actions(rsc, data_set); slist_iter( child_rsc, resource_t, rsc->children, lpc, gboolean child_promoting = FALSE; gboolean child_demoting = FALSE; crm_debug_2("Creating actions for %s", child_rsc->id); child_rsc->cmds->create_actions(child_rsc, data_set); master_update_pseudo_status( child_rsc, &child_demoting, &child_promoting); any_demoting = any_demoting || child_demoting; any_promoting = any_promoting || child_promoting; crm_debug_2("Created actions for %s: %d %d", child_rsc->id, child_promoting, child_demoting); ); /* promote */ action = promote_action(rsc, NULL, !any_promoting); action_complete = custom_action( rsc, promoted_key(rsc), RSC_PROMOTED, NULL, !any_promoting, TRUE, data_set); action->pseudo = TRUE; action->runnable = FALSE; action_complete->pseudo = TRUE; action_complete->runnable = FALSE; action_complete->priority = INFINITY; if(clone_data->masters_allocated > 0) { action->runnable = TRUE; action_complete->runnable = TRUE; } child_promoting_constraints(clone_data, pe_order_optional, rsc, NULL, last_promote_rsc, data_set); clone_data->promote_notify = create_notification_boundaries( rsc, RSC_PROMOTE, action, action_complete, data_set); /* demote */ action = demote_action(rsc, NULL, !any_demoting); action_complete = custom_action( rsc, demoted_key(rsc), RSC_DEMOTED, NULL, !any_demoting, TRUE, data_set); action_complete->priority = INFINITY; action->pseudo = TRUE; action->runnable = TRUE; action_complete->pseudo = TRUE; action_complete->runnable = TRUE; child_demoting_constraints(clone_data, pe_order_optional, rsc, NULL, last_demote_rsc, data_set); clone_data->demote_notify = create_notification_boundaries( rsc, RSC_DEMOTE, action, action_complete, data_set); if(clone_data->promote_notify) { /* If we ever wanted groups to have notifications we'd need to move this to native_internal_constraints() one day * Requires exposing *_notify */ order_actions(clone_data->stop_notify->post_done, clone_data->promote_notify->pre, pe_order_optional); order_actions(clone_data->start_notify->post_done, clone_data->promote_notify->pre, pe_order_optional); order_actions(clone_data->demote_notify->post_done, clone_data->promote_notify->pre, pe_order_optional); order_actions(clone_data->demote_notify->post_done, clone_data->start_notify->pre, pe_order_optional); order_actions(clone_data->demote_notify->post_done, clone_data->stop_notify->pre, pe_order_optional); } /* restore the correct priority */ slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->priority = rsc->priority; ); } void master_internal_constraints(resource_t *rsc, pe_working_set_t *data_set) { resource_t *last_rsc = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); clone_internal_constraints(rsc, data_set); /* global stopped before start */ new_rsc_order(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional, data_set); /* global stopped before promote */ new_rsc_order(rsc, RSC_STOPPED, rsc, RSC_PROMOTE, pe_order_optional, data_set); /* global demoted before start */ new_rsc_order(rsc, RSC_DEMOTED, rsc, RSC_START, pe_order_optional, data_set); /* global started before promote */ new_rsc_order(rsc, RSC_STARTED, rsc, RSC_PROMOTE, pe_order_optional, data_set); /* global demoted before stop */ new_rsc_order(rsc, RSC_DEMOTED, rsc, RSC_STOP, pe_order_optional, data_set); /* global demote before demoted */ new_rsc_order(rsc, RSC_DEMOTE, rsc, RSC_DEMOTED, pe_order_optional, data_set); /* global demoted before promote */ new_rsc_order(rsc, RSC_DEMOTED, rsc, RSC_PROMOTE, pe_order_optional, data_set); slist_iter( child_rsc, resource_t, rsc->children, lpc, /* child demote before promote */ new_rsc_order(child_rsc, RSC_DEMOTE, child_rsc, RSC_PROMOTE, pe_order_optional, data_set); child_promoting_constraints(clone_data, pe_order_optional, rsc, child_rsc, last_rsc, data_set); child_demoting_constraints(clone_data, pe_order_optional, rsc, child_rsc, last_rsc, data_set); last_rsc = child_rsc; ); } static void node_list_update_one(GListPtr list, node_t *other, const char *attr, int score) { const char *value = NULL; if(other == NULL) { return; } else if(attr == NULL) { attr = "#"XML_ATTR_UNAME; } value = g_hash_table_lookup(other->details->attrs, attr); slist_iter(node, node_t, list, lpc, const char *tmp = g_hash_table_lookup(node->details->attrs, attr); if(safe_str_eq(value, tmp)) { crm_debug_2("%s: %d + %d", node->details->uname, node->weight, other->weight); node->weight = merge_weights(node->weight, score); } ); } void master_rsc_colocation_rh( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc_rh); CRM_CHECK(rsc_rh != NULL, return); if(is_set(rsc_rh->flags, pe_rsc_provisional)) { return; } else if(constraint->role_rh == RSC_ROLE_UNKNOWN) { crm_debug_3("Handling %s as a clone colocation", constraint->id); clone_rsc_colocation_rh(rsc_lh, rsc_rh, constraint); return; } CRM_CHECK(rsc_lh != NULL, return); CRM_CHECK(rsc_lh->variant == pe_native, return); crm_debug_2("Processing constraint %s: %d", constraint->id, constraint->score); if(constraint->role_rh == RSC_ROLE_UNKNOWN) { slist_iter( child_rsc, resource_t, rsc_rh->children, lpc, child_rsc->cmds->rsc_colocation_rh(rsc_lh, child_rsc, constraint); ); } else if(is_set(rsc_lh->flags, pe_rsc_provisional)) { GListPtr lhs = NULL, rhs = NULL; lhs = rsc_lh->allowed_nodes; slist_iter( child_rsc, resource_t, rsc_rh->children, lpc, node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE); enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, FALSE); crm_debug_3("Processing: %s", child_rsc->id); if(chosen != NULL && next_role == constraint->role_rh) { crm_debug_3("Applying: %s %s %s %d", child_rsc->id, role2text(next_role), chosen->details->uname, constraint->score); if(constraint->score < INFINITY) { node_list_update_one(rsc_lh->allowed_nodes, chosen, constraint->node_attribute, constraint->score); } rhs = g_list_append(rhs, chosen); } ); /* Only do this if its not a master-master colocation * Doing this unconditionally would prevent the slaves from being started */ if(constraint->role_lh != RSC_ROLE_MASTER || constraint->role_rh != RSC_ROLE_MASTER) { if(constraint->score > 0) { rsc_lh->allowed_nodes = node_list_exclude(lhs, rhs); pe_free_shallow(lhs); } } pe_free_shallow_adv(rhs, FALSE); } else if(constraint->role_lh == RSC_ROLE_MASTER) { resource_t *rh_child = find_compatible_child(rsc_lh, rsc_rh, constraint->role_rh, FALSE); if(rh_child == NULL && constraint->score >= INFINITY) { crm_debug_2("%s can't be promoted %s", rsc_lh->id, constraint->id); rsc_lh->priority = -INFINITY; } else if(rh_child != NULL) { int new_priority = merge_weights(rsc_lh->priority, constraint->score); crm_debug("Applying %s to %s", constraint->id, rsc_lh->id); crm_debug("\t%s: %d->%d", rsc_lh->id, rsc_lh->priority, new_priority); rsc_lh->priority = new_priority; } } return; } diff --git a/pengine/native.c b/pengine/native.c index 89ff52b51a..3954f4f47c 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -1,2158 +1,2194 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #define DELETE_THEN_REFRESH 1 /* The crmd will remove the resource from the CIB itself, making this redundant */ #define VARIANT_NATIVE 1 #include void native_rsc_colocation_rh_must(resource_t *rsc_lh, gboolean update_lh, resource_t *rsc_rh, gboolean update_rh); void native_rsc_colocation_rh_mustnot(resource_t *rsc_lh, gboolean update_lh, resource_t *rsc_rh, gboolean update_rh); void Recurring(resource_t *rsc, action_t *start, node_t *node, pe_working_set_t *data_set); void RecurringOp(resource_t *rsc, action_t *start, node_t *node, xmlNode *operation, pe_working_set_t *data_set); void pe_post_notify( resource_t *rsc, node_t *node, action_t *op, notify_data_t *n_data, pe_working_set_t *data_set); void NoRoleChange (resource_t *rsc, node_t *current, node_t *next, pe_working_set_t *data_set); gboolean DeleteRsc (resource_t *rsc, node_t *node, gboolean optional, pe_working_set_t *data_set); gboolean StopRsc (resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); gboolean StartRsc (resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); gboolean DemoteRsc (resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); gboolean PromoteRsc(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); gboolean RoleError (resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); gboolean NullOp (resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, }, /* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, }, /* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Slave */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_UNKNOWN, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Master */ { RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_UNKNOWN, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, }; gboolean (*rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX])(resource_t*,node_t*,gboolean,pe_working_set_t*) = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, }, /* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, }, /* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, }, /* Slave */ { RoleError, StopRsc, RoleError, NullOp, PromoteRsc, }, /* Master */ { RoleError, RoleError, RoleError, DemoteRsc, NullOp, }, }; static gboolean -native_choose_node(resource_t *rsc) +have_enough_capacity(node_t *node, resource_t *rsc) +{ + GHashTableIter iter; + const char *key = NULL; + const char *value = NULL; + int required = 0; + int remaining = 0; + int rc = TRUE; + + g_hash_table_iter_init(&iter, rsc->utilization); + while (g_hash_table_iter_next(&iter, (gpointer)&key, (gpointer)&value)) { + required = crm_parse_int(value, "0"); + remaining = crm_parse_int(g_hash_table_lookup(node->details->utilization, key), "0"); + + if (required > remaining) { + crm_debug("Node %s has no enough %s for resource %s: required=%d remaining=%d", + node->details->uname, key, rsc->id, required, remaining); + rc = FALSE; + } + } + return rc; +} + +static gboolean +native_choose_node(resource_t *rsc, pe_working_set_t *data_set) { /* 1. Sort by weight 2. color.chosen_node = the node (of those with the highest wieght) with the fewest resources 3. remove color.chosen_node from all other colors */ + int alloc_details = scores_log_level+1; + GListPtr nodes = NULL; node_t *chosen = NULL; int lpc = 0; int multiple = 0; - int length = g_list_length(rsc->allowed_nodes); + int length = 0; + + if (safe_str_neq(data_set->placement_strategy, "default")) { + slist_iter( + node, node_t, data_set->nodes, lpc, + if (have_enough_capacity(node, rsc) == FALSE) { + crm_debug("Resource %s cannot be allocated to node %s: none of enough capacity", + rsc->id, node->details->uname); + resource_location(rsc, node, -INFINITY, "__limit_utilization_", data_set); + } + ); + dump_node_scores(alloc_details, rsc, "Post-utilization", rsc->allowed_nodes); + } + + length = g_list_length(rsc->allowed_nodes); if(is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to?TRUE:FALSE; } crm_debug_3("Choosing node for %s from %d candidates", rsc->id, length); if(rsc->allowed_nodes) { - rsc->allowed_nodes = g_list_sort(rsc->allowed_nodes, sort_node_weight); + rsc->allowed_nodes = g_list_sort_with_data(rsc->allowed_nodes, sort_node_weight, data_set); nodes = rsc->allowed_nodes; chosen = g_list_nth_data(nodes, 0); if(chosen && chosen->weight > 0 && can_run_resources(chosen)) { node_t *running = g_list_nth_data(rsc->running_on, 0); if(can_run_resources(running) == FALSE) { running = NULL; } for(lpc = 1; lpc < length; lpc++) { node_t *tmp = g_list_nth_data(nodes, lpc); if(tmp->weight == chosen->weight) { multiple++; if(running && tmp->details == running->details) { /* prefer the existing node if scores are equal */ chosen = tmp; } } } } } if(multiple > 1) { int log_level = LOG_INFO; char *score = score2char(chosen->weight); if(chosen->weight >= INFINITY) { log_level = LOG_WARNING; } do_crm_log(log_level, "%d nodes with equal score (%s) for" " running %s resources. Chose %s.", multiple, score, rsc->id, chosen->details->uname); crm_free(score); } return native_assign_node(rsc, nodes, chosen, FALSE); } int node_list_attr_score(GListPtr list, const char *attr, const char *value) { int best_score = -INFINITY; const char *best_node = NULL; if(attr == NULL) { attr = "#"XML_ATTR_UNAME; } slist_iter(node, node_t, list, lpc, int weight = node->weight; if(can_run_resources(node) == FALSE) { weight = -INFINITY; } if(weight > best_score || best_node == NULL) { const char *tmp = g_hash_table_lookup(node->details->attrs, attr); if(safe_str_eq(value, tmp)) { best_score = weight; best_node = node->details->uname; } } ); if(safe_str_neq(attr, "#"XML_ATTR_UNAME)) { crm_info("Best score for %s=%s was %s with %d", attr, value, best_node?best_node:"", best_score); } return best_score; } static void node_list_update(GListPtr list1, GListPtr list2, const char *attr, int factor) { int score = 0; if(attr == NULL) { attr = "#"XML_ATTR_UNAME; } slist_iter( node, node_t, list1, lpc, CRM_CHECK(node != NULL, continue); score = node_list_attr_score(list2, attr, g_hash_table_lookup(node->details->attrs, attr)); if(factor < 0 && score < 0) { /* Negative preference for a node with a negative score * should not become a positive preference * * TODO: Decide if we want to filter only if weight == -INFINITY * */ continue; } crm_debug_2("%s: %d + %d*%d", node->details->uname, node->weight, factor, score); node->weight = merge_weights(factor*score, node->weight); ); } GListPtr native_merge_weights( resource_t *rsc, const char *rhs, GListPtr nodes, const char *attr, int factor, gboolean allow_rollback) { GListPtr archive = NULL; int multiplier = 1; if(factor < 0) { multiplier = -1; } if(is_set(rsc->flags, pe_rsc_merging)) { crm_info("%s: Breaking dependancy loop at %s", rhs, rsc->id); return nodes; } set_bit(rsc->flags, pe_rsc_merging); crm_debug_2("%s: Combining scores from %s", rhs, rsc->id); if(allow_rollback) { archive = node_list_dup(nodes, FALSE, FALSE); } node_list_update(nodes, rsc->allowed_nodes, attr, factor); if(can_run_any(nodes) == FALSE) { if(archive) { crm_info("%s: Rolling back scores from %s", rhs, rsc->id); pe_free_shallow_adv(nodes, TRUE); nodes = archive; } goto bail; } pe_free_shallow_adv(archive, TRUE); slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons_lhs, lpc, crm_info("%s: Rolling back scores from %s", rhs, rsc->id); nodes = constraint->rsc_lh->cmds->merge_weights( constraint->rsc_lh, rhs, nodes, constraint->node_attribute, multiplier*constraint->score/INFINITY, allow_rollback); ); bail: clear_bit(rsc->flags, pe_rsc_merging); return nodes; } node_t * native_color(resource_t *rsc, pe_working_set_t *data_set) { int alloc_details = scores_log_level+1; if(rsc->parent && is_not_set(rsc->parent->flags, pe_rsc_allocating)) { /* never allocate children on their own */ crm_debug("Escalating allocation of %s to its parent: %s", rsc->id, rsc->parent->id); rsc->parent->cmds->color(rsc->parent, data_set); } if(is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to; } if(is_set(rsc->flags, pe_rsc_allocating)) { crm_debug("Dependancy loop detected involving %s", rsc->id); return NULL; } set_bit(rsc->flags, pe_rsc_allocating); print_resource(alloc_details, "Allocating: ", rsc, FALSE); dump_node_scores(alloc_details, rsc, "Pre-allloc", rsc->allowed_nodes); slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons, lpc, resource_t *rsc_rh = constraint->rsc_rh; crm_debug_2("%s: Pre-Processing %s (%s)", rsc->id, constraint->id, rsc_rh->id); rsc_rh->cmds->color(rsc_rh, data_set); rsc->cmds->rsc_colocation_lh(rsc, rsc_rh, constraint); ); dump_node_scores(alloc_details, rsc, "Post-coloc", rsc->allowed_nodes); slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons_lhs, lpc, rsc->allowed_nodes = constraint->rsc_lh->cmds->merge_weights( constraint->rsc_lh, rsc->id, rsc->allowed_nodes, constraint->node_attribute, constraint->score/INFINITY, TRUE); ); print_resource(LOG_DEBUG_2, "Allocating: ", rsc, FALSE); if(rsc->next_role == RSC_ROLE_STOPPED) { crm_debug_2("Making sure %s doesn't get allocated", rsc->id); /* make sure it doesnt come up again */ resource_location( rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE, data_set); } dump_node_scores(show_scores?0:scores_log_level, rsc, __PRETTY_FUNCTION__, rsc->allowed_nodes); if(is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } if(is_not_set(rsc->flags, pe_rsc_managed)) { const char *reason = NULL; node_t *assign_to = NULL; if(rsc->running_on == NULL) { reason = "inactive"; } else if(rsc->role == RSC_ROLE_MASTER) { assign_to = rsc->running_on->data; reason = "master"; } else if(is_set(rsc->flags, pe_rsc_failed)) { reason = "failed"; } else { assign_to = rsc->running_on->data; reason = "active"; } crm_info("Unmanaged resource %s allocated to %s: %s", rsc->id, assign_to?assign_to->details->uname:"'nowhere'", reason); native_assign_node(rsc, NULL, assign_to, TRUE); } else if(is_set(data_set->flags, pe_flag_stop_everything)) { crm_debug("Forcing %s to stop", rsc->id); native_assign_node(rsc, NULL, NULL, TRUE); } else if(is_set(rsc->flags, pe_rsc_provisional) - && native_choose_node(rsc) ) { + && native_choose_node(rsc, data_set) ) { crm_debug_3("Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } else if(rsc->allocated_to == NULL) { if(is_not_set(rsc->flags, pe_rsc_orphan)) { pe_warn("Resource %s cannot run anywhere", rsc->id); } else if(rsc->running_on != NULL) { crm_info("Stopping orphan resource %s", rsc->id); } } else { crm_debug("Pre-Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } clear_bit(rsc->flags, pe_rsc_allocating); print_resource(LOG_DEBUG_3, "Allocated ", rsc, TRUE); return rsc->allocated_to; } static gboolean is_op_dup( resource_t *rsc, const char *name, const char *interval) { gboolean dup = FALSE; const char *id = NULL; const char *value = NULL; xml_child_iter_filter( rsc->ops_xml, operation, "op", value = crm_element_value(operation, "name"); if(safe_str_neq(value, name)) { continue; } value = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if(value == NULL) { value = "0"; } if(safe_str_neq(value, interval)) { continue; } if(id == NULL) { id = ID(operation); } else { crm_config_err("Operation %s is a duplicate of %s", ID(operation), id); crm_config_err("Do not use the same (name, interval) combination more than once per resource"); dup = TRUE; } ); return dup; } void RecurringOp(resource_t *rsc, action_t *start, node_t *node, xmlNode *operation, pe_working_set_t *data_set) { char *key = NULL; const char *name = NULL; const char *value = NULL; const char *interval = NULL; const char *node_uname = NULL; unsigned long long interval_ms = 0; action_t *mon = NULL; gboolean is_optional = TRUE; GListPtr possible_matches = NULL; crm_debug_2("Creating recurring action %s for %s in role %s", ID(operation), rsc->id, role2text(rsc->next_role)); if(node != NULL) { node_uname = node->details->uname; } interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_get_interval(interval); if(interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if(is_op_dup(rsc, name, interval)) { return; } key = generate_op_key(rsc->id, name, interval_ms); if(find_rsc_op_entry(rsc, key) == NULL) { /* disabled */ return; } if(start != NULL) { crm_debug_3("Marking %s %s due to %s", key, start->optional?"optional":"manditory", start->uuid); is_optional = start->optional; } else { crm_debug_2("Marking %s optional", key); is_optional = TRUE; } /* start a monitor for an already active resource */ possible_matches = find_actions_exact(rsc->actions, key, node); if(possible_matches == NULL) { is_optional = FALSE; crm_debug_3("Marking %s manditory: not active", key); } else { g_list_free(possible_matches); } value = crm_element_value(operation, "role"); if((rsc->next_role == RSC_ROLE_MASTER && value == NULL) || (value != NULL && text2role(value) != rsc->next_role)) { int log_level = LOG_DEBUG_2; const char *result = "Ignoring"; if(is_optional) { char *local_key = crm_strdup(key); log_level = LOG_INFO; result = "Cancelling"; /* its running : cancel it */ mon = custom_action( rsc, local_key, RSC_CANCEL, node, FALSE, TRUE, data_set); crm_free(mon->task); mon->task = crm_strdup(RSC_CANCEL); add_hash_param(mon->meta, XML_LRM_ATTR_INTERVAL, interval); add_hash_param(mon->meta, XML_LRM_ATTR_TASK, name); local_key = NULL; switch(rsc->role) { case RSC_ROLE_SLAVE: case RSC_ROLE_STARTED: if(rsc->next_role == RSC_ROLE_MASTER) { local_key = promote_key(rsc); } else if(rsc->next_role == RSC_ROLE_STOPPED) { local_key = stop_key(rsc); } break; case RSC_ROLE_MASTER: local_key = demote_key(rsc); break; default: break; } if(local_key) { custom_action_order(rsc, NULL, mon, rsc, local_key, NULL, pe_order_runnable_left, data_set); } mon = NULL; } do_crm_log(log_level, "%s action %s (%s vs. %s)", result , key, value?value:role2text(RSC_ROLE_SLAVE), role2text(rsc->next_role)); crm_free(key); key = NULL; return; } mon = custom_action(rsc, key, name, node, is_optional, TRUE, data_set); key = mon->uuid; if(is_optional) { crm_debug_2("%s\t %s (optional)", crm_str(node_uname), mon->uuid); } if(start == NULL || start->runnable == FALSE) { crm_debug("%s\t %s (cancelled : start un-runnable)", crm_str(node_uname), mon->uuid); mon->runnable = FALSE; } else if(node == NULL || node->details->online == FALSE || node->details->unclean) { crm_debug("%s\t %s (cancelled : no node available)", crm_str(node_uname), mon->uuid); mon->runnable = FALSE; } else if(mon->optional == FALSE) { crm_notice(" Start recurring %s (%llus) for %s on %s", mon->task, interval_ms/1000, rsc->id, crm_str(node_uname)); } if(rsc->next_role == RSC_ROLE_MASTER) { char *running_master = crm_itoa(EXECRA_RUNNING_MASTER); add_hash_param(mon->meta, XML_ATTR_TE_TARGET_RC, running_master); crm_free(running_master); } if(node == NULL || is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(rsc, start_key(rsc), NULL, NULL, crm_strdup(key), mon, pe_order_implies_right|pe_order_runnable_left, data_set); if(rsc->next_role == RSC_ROLE_MASTER) { custom_action_order( rsc, promote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional|pe_order_runnable_left, data_set); } else if(rsc->role == RSC_ROLE_MASTER) { custom_action_order( rsc, demote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional|pe_order_runnable_left, data_set); } } } void Recurring(resource_t *rsc, action_t *start, node_t *node, pe_working_set_t *data_set) { if(is_not_set(data_set->flags, pe_flag_maintenance_mode)) { xml_child_iter_filter( rsc->ops_xml, operation, "op", RecurringOp(rsc, start, node, operation, data_set); ); } } void native_create_actions(resource_t *rsc, pe_working_set_t *data_set) { action_t *start = NULL; node_t *chosen = NULL; enum rsc_role_e role = RSC_ROLE_UNKNOWN; enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; crm_debug_2("Createing actions for %s: %s->%s", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); chosen = rsc->allocated_to; if(chosen != NULL && rsc->next_role == RSC_ROLE_UNKNOWN) { rsc->next_role = RSC_ROLE_STARTED; } else if(rsc->next_role == RSC_ROLE_UNKNOWN) { rsc->next_role = RSC_ROLE_STOPPED; } get_rsc_attributes(rsc->parameters, rsc, chosen, data_set); if(g_list_length(rsc->running_on) > 1) { if(rsc->recovery_type == recovery_stop_start) { pe_proc_warn("Attempting recovery of resource %s", rsc->id); if(rsc->role == RSC_ROLE_MASTER) { DemoteRsc(rsc, NULL, FALSE, data_set); } StopRsc(rsc, NULL, FALSE, data_set); rsc->role = RSC_ROLE_STOPPED; } } else if(rsc->running_on != NULL) { node_t *current = rsc->running_on->data; NoRoleChange(rsc, current, chosen, data_set); } else if(rsc->role == RSC_ROLE_STOPPED && rsc->next_role == RSC_ROLE_STOPPED) { char *key = start_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, NULL); slist_iter( action, action_t, possible_matches, lpc, action->optional = TRUE; /* action->pseudo = TRUE; */ ); g_list_free(possible_matches); crm_debug_2("Stopping a stopped resource"); crm_free(key); goto do_recurring; } else if(rsc->role != RSC_ROLE_STOPPED) { /* A cheap trick to account for the fact that Master/Slave groups may not be * completely running when we set their role to Slave */ crm_debug_2("Resetting %s.role = %s (was %s)", rsc->id, role2text(RSC_ROLE_STOPPED), role2text(rsc->role)); rsc->role = RSC_ROLE_STOPPED; } role = rsc->role; while(role != rsc->next_role) { next_role = rsc_state_matrix[role][rsc->next_role]; crm_debug_2("Executing: %s->%s (%s)", role2text(role), role2text(next_role), rsc->id); if(rsc_action_matrix[role][next_role]( rsc, chosen, FALSE, data_set) == FALSE) { break; } role = next_role; } do_recurring: if(rsc->next_role != RSC_ROLE_STOPPED || is_set(rsc->flags, pe_rsc_managed) == FALSE) { start = start_action(rsc, chosen, TRUE); Recurring(rsc, start, chosen, data_set); } } void native_internal_constraints(resource_t *rsc, pe_working_set_t *data_set) { int type = pe_order_optional; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); if(rsc->variant == pe_native) { type |= pe_order_implies_right; } if(rsc->parent == NULL || rsc->parent->variant == pe_group) { type |= pe_order_restart; } new_rsc_order(rsc, RSC_STOP, rsc, RSC_START, type, data_set); new_rsc_order(rsc, RSC_DEMOTE, rsc, RSC_STOP, pe_order_demote_stop, data_set); new_rsc_order(rsc, RSC_START, rsc, RSC_PROMOTE, pe_order_runnable_left, data_set); new_rsc_order(rsc, RSC_DELETE, rsc, RSC_START, pe_order_optional, data_set); if(is_not_set(rsc->flags, pe_rsc_managed)) { crm_debug_3("Skipping fencing constraints for unmanaged resource: %s", rsc->id); return; } if(rsc->variant == pe_native && safe_str_neq(class, "stonith")) { custom_action_order( rsc, stop_key(rsc), NULL, NULL, crm_strdup(all_stopped->task), all_stopped, pe_order_implies_right|pe_order_runnable_left, data_set); } } void native_rsc_colocation_lh( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { if(rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", constraint->id); return; } else if(constraint->rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } crm_debug_2("Processing colocation constraint between %s and %s", rsc_lh->id, rsc_rh->id); rsc_rh->cmds->rsc_colocation_rh(rsc_lh, rsc_rh, constraint); } static gboolean filter_colocation_constraint( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { int level = LOG_DEBUG_4; if(constraint->score == 0){ return FALSE; } if(constraint->score > 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh != rsc_lh->next_role) { do_crm_log_unlikely(level, "LH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } if(constraint->score > 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh != rsc_rh->next_role) { do_crm_log_unlikely(level, "RH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } if(constraint->score < 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh == rsc_lh->next_role) { do_crm_log_unlikely(level, "LH: Skipping -ve constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } if(constraint->score < 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh == rsc_rh->next_role) { do_crm_log_unlikely(level, "RH: Skipping -ve constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } return TRUE; } static void colocation_match( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { const char *tmp = NULL; const char *value = NULL; gboolean do_check = FALSE; const char *attribute = "#id"; if(constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } if(rsc_rh->allocated_to) { value = g_hash_table_lookup( rsc_rh->allocated_to->details->attrs, attribute); do_check = TRUE; } else if(constraint->score < 0) { /* nothing to do: * anti-colocation with something thats not running */ return; } slist_iter( node, node_t, rsc_lh->allowed_nodes, lpc, tmp = g_hash_table_lookup(node->details->attrs, attribute); if(do_check && safe_str_eq(tmp, value)) { if(constraint->score < INFINITY) { crm_debug_2("%s: %s.%s += %d", constraint->id, rsc_lh->id, node->details->uname, constraint->score); node->weight = merge_weights( constraint->score, node->weight); } } else if(do_check == FALSE || constraint->score >= INFINITY) { crm_debug_2("%s: %s.%s -= %d (%s)", constraint->id, rsc_lh->id, node->details->uname, constraint->score, do_check?"failed":"unallocated"); node->weight = merge_weights(-constraint->score, node->weight); } ); } void native_rsc_colocation_rh( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { crm_debug_2("%sColocating %s with %s (%s, weight=%d)", constraint->score >= 0?"":"Anti-", rsc_lh->id, rsc_rh->id, constraint->id, constraint->score); if(filter_colocation_constraint(rsc_lh, rsc_rh, constraint) == FALSE) { return; } if(is_set(rsc_rh->flags, pe_rsc_provisional)) { return; } else if(is_not_set(rsc_lh->flags, pe_rsc_provisional)) { /* error check */ struct node_shared_s *details_lh; struct node_shared_s *details_rh; if((constraint->score > -INFINITY) && (constraint->score < INFINITY)) { return; } details_rh = rsc_rh->allocated_to?rsc_rh->allocated_to->details:NULL; details_lh = rsc_lh->allocated_to?rsc_lh->allocated_to->details:NULL; if(constraint->score == INFINITY && details_lh != details_rh) { crm_err("%s and %s are both allocated" " but to different nodes: %s vs. %s", rsc_lh->id, rsc_rh->id, details_lh?details_lh->uname:"n/a", details_rh?details_rh->uname:"n/a"); } else if(constraint->score == -INFINITY && details_lh == details_rh) { crm_err("%s and %s are both allocated" " but to the SAME node: %s", rsc_lh->id, rsc_rh->id, details_rh?details_rh->uname:"n/a"); } return; } else { colocation_match(rsc_lh, rsc_rh, constraint); } } static GListPtr find_actions_by_task(GListPtr actions, resource_t *rsc, const char *original_key) { GListPtr list = NULL; list = find_actions(actions, original_key, NULL); if(list == NULL) { /* we're potentially searching a child of the original resource */ char *key = NULL; char *tmp = NULL; char *task = NULL; int interval = 0; if(parse_op_key(original_key, &tmp, &task, &interval)) { key = generate_op_key(rsc->id, task, interval); /* crm_err("looking up %s instead of %s", key, original_key); */ /* slist_iter(action, action_t, actions, lpc, */ /* crm_err(" - %s", action->uuid)); */ list = find_actions(actions, key, NULL); } else { crm_err("search key: %s", original_key); } crm_free(key); crm_free(tmp); crm_free(task); } return list; } void native_rsc_order_lh(resource_t *lh_rsc, order_constraint_t *order, pe_working_set_t *data_set) { GListPtr lh_actions = NULL; action_t *lh_action = order->lh_action; resource_t *rh_rsc = order->rh_rsc; crm_debug_3("Processing LH of ordering constraint %d", order->id); CRM_ASSERT(lh_rsc != NULL); if(lh_action != NULL) { lh_actions = g_list_append(NULL, lh_action); } else if(lh_action == NULL) { lh_actions = find_actions_by_task( lh_rsc->actions, lh_rsc, order->lh_action_task); } if(lh_actions == NULL && lh_rsc != rh_rsc) { char *key = NULL; char *rsc_id = NULL; char *op_type = NULL; int interval = 0; crm_debug_4("No LH-Side (%s/%s) found for constraint %d with %s - creating", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); parse_op_key( order->lh_action_task, &rsc_id, &op_type, &interval); key = generate_op_key(lh_rsc->id, op_type, interval); lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set); if(lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && safe_str_eq(op_type, RSC_STOP)) { lh_action->pseudo = TRUE; lh_action->runnable = TRUE; } lh_actions = g_list_append(NULL, lh_action); crm_free(op_type); crm_free(rsc_id); } slist_iter( lh_action_iter, action_t, lh_actions, lpc, if(rh_rsc == NULL && order->rh_action) { rh_rsc = order->rh_action->rsc; } if(rh_rsc) { rh_rsc->cmds->rsc_order_rh( lh_action_iter, rh_rsc, order); } else if(order->rh_action) { order_actions( lh_action_iter, order->rh_action, order->type); } ); pe_free_shallow_adv(lh_actions, FALSE); } void native_rsc_order_rh( action_t *lh_action, resource_t *rsc, order_constraint_t *order) { GListPtr rh_actions = NULL; action_t *rh_action = NULL; CRM_CHECK(rsc != NULL, return); CRM_CHECK(order != NULL, return); rh_action = order->rh_action; crm_debug_3("Processing RH of ordering constraint %d", order->id); if(rh_action != NULL) { rh_actions = g_list_append(NULL, rh_action); } else if(rsc != NULL) { rh_actions = find_actions_by_task( rsc->actions, rsc, order->rh_action_task); } if(rh_actions == NULL) { crm_debug_4("No RH-Side (%s/%s) found for constraint..." " ignoring", rsc->id,order->rh_action_task); if(lh_action) { crm_debug_4("LH-Side was: %s", lh_action->uuid); } return; } slist_iter( rh_action_iter, action_t, rh_actions, lpc, if(lh_action) { order_actions(lh_action, rh_action_iter, order->type); } else if(order->type & pe_order_implies_right) { rh_action_iter->runnable = FALSE; crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, order->type); } else { crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, order->type); } ); pe_free_shallow_adv(rh_actions, FALSE); } void native_rsc_location(resource_t *rsc, rsc_to_node_t *constraint) { GListPtr or_list; crm_debug_2("Applying %s (%s) to %s", constraint->id, role2text(constraint->role_filter), rsc->id); /* take "lifetime" into account */ if(constraint == NULL) { pe_err("Constraint is NULL"); return; } else if(rsc == NULL) { pe_err("LHS of rsc_to_node (%s) is NULL", constraint->id); return; } else if(constraint->role_filter > 0 && constraint->role_filter != rsc->next_role) { crm_debug("Constraint (%s) is not active (role : %s)", constraint->id, role2text(constraint->role_filter)); return; } else if(is_active(constraint) == FALSE) { crm_debug_2("Constraint (%s) is not active", constraint->id); return; } if(constraint->node_list_rh == NULL) { crm_debug_2("RHS of constraint %s is NULL", constraint->id); return; } or_list = node_list_or( rsc->allowed_nodes, constraint->node_list_rh, FALSE); pe_free_shallow(rsc->allowed_nodes); rsc->allowed_nodes = or_list; slist_iter(node, node_t, or_list, lpc, crm_debug_3("%s + %s : %d", rsc->id, node->details->uname, node->weight); ); } void native_expand(resource_t *rsc, pe_working_set_t *data_set) { crm_debug_3("Processing actions from %s", rsc->id); slist_iter( action, action_t, rsc->actions, lpc, crm_debug_4("processing action %d for rsc=%s", action->id, rsc->id); graph_element_from_action(action, data_set); ); slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->expand(child_rsc, data_set); ); } void LogActions(resource_t *rsc, pe_working_set_t *data_set) { node_t *next = NULL; node_t *current = NULL; gboolean moving = FALSE; if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, LogActions(child_rsc, data_set); ); return; } next = rsc->allocated_to; if(rsc->running_on) { current = rsc->running_on->data; if(rsc->role == RSC_ROLE_STOPPED) { /* * This can occur when resources are being recovered * We fiddle with the current role in native_create_actions() */ rsc->role = RSC_ROLE_STARTED; } } if(current == NULL && is_set(rsc->flags, pe_rsc_orphan)) { /* Don't log stopped orphans */ return; } if(is_not_set(rsc->flags, pe_rsc_managed) || (current == NULL && next == NULL)) { crm_notice("Leave resource %s\t(%s%s)", rsc->id, role2text(rsc->role), is_not_set(rsc->flags, pe_rsc_managed)?" unmanaged":""); return; } if(current != NULL && next != NULL && safe_str_neq(current->details->id, next->details->id)) { moving = TRUE; } if(rsc->role == rsc->next_role) { action_t *start = NULL; char *key = start_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, next); crm_free(key); if(possible_matches) { start = possible_matches->data; g_list_free(possible_matches); } key = generate_op_key(rsc->id, CRMD_ACTION_MIGRATED, 0); possible_matches = find_actions(rsc->actions, key, next); crm_free(key); CRM_CHECK(next != NULL,); if(next == NULL) { } else if(possible_matches) { crm_notice("Migrate resource %s\t(%s %s -> %s)", rsc->id, role2text(rsc->role), current->details->uname, next->details->uname); g_list_free(possible_matches); } else if(start == NULL || start->optional) { crm_notice("Leave resource %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if(moving && current) { crm_notice("Move resource %s\t(%s %s -> %s)", rsc->id, role2text(rsc->role), current->details->uname, next->details->uname); } else if(is_set(rsc->flags, pe_rsc_failed)) { crm_notice("Recover resource %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if(start && start->runnable == FALSE) { crm_notice("Stop resource %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else { crm_notice("Restart resource %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } return; } if(rsc->role > RSC_ROLE_SLAVE && rsc->role > rsc->next_role) { CRM_CHECK(current != NULL,); if(current != NULL) { crm_notice("Demote %s\t(%s -> %s %s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), current->details->uname); } } if(rsc->next_role == RSC_ROLE_STOPPED || moving) { CRM_CHECK(current != NULL,); slist_iter(node, node_t, rsc->running_on, lpc, crm_notice("Stop resource %s\t(%s)", rsc->id, node->details->uname)); } if(rsc->role == RSC_ROLE_STOPPED || moving) { CRM_CHECK(next != NULL,); if(next != NULL) { crm_notice("Start %s\t(%s)", rsc->id, next->details->uname); } } if(rsc->next_role > RSC_ROLE_SLAVE && rsc->role < rsc->next_role) { CRM_CHECK(next != NULL,); crm_notice("Promote %s\t(%s -> %s %s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), next->details->uname); } } void NoRoleChange(resource_t *rsc, node_t *current, node_t *next, pe_working_set_t *data_set) { action_t *stop = NULL; action_t *start = NULL; GListPtr possible_matches = NULL; crm_debug_2("Executing: %s (role=%s)", rsc->id, role2text(rsc->next_role)); if(current == NULL || next == NULL) { return; } if(is_set(rsc->flags, pe_rsc_failed) || safe_str_neq(current->details->id, next->details->id)) { if(rsc->next_role > RSC_ROLE_STARTED) { gboolean optional = TRUE; if(rsc->role == RSC_ROLE_MASTER) { optional = FALSE; } DemoteRsc(rsc, current, optional, data_set); } if(rsc->role == RSC_ROLE_MASTER) { DemoteRsc(rsc, current, FALSE, data_set); } StopRsc(rsc, current, FALSE, data_set); StartRsc(rsc, next, FALSE, data_set); if(rsc->next_role == RSC_ROLE_MASTER) { PromoteRsc(rsc, next, FALSE, data_set); } possible_matches = find_recurring_actions(rsc->actions, next); slist_iter(match, action_t, possible_matches, lpc, if(match->optional == FALSE) { crm_debug("Fixing recurring action: %s", match->uuid); match->optional = TRUE; } ); g_list_free(possible_matches); } else if(is_set(rsc->flags, pe_rsc_start_pending)) { start = start_action(rsc, next, TRUE); if(start->runnable) { /* wait for StartRsc() to be called */ rsc->role = RSC_ROLE_STOPPED; } else { /* wait for StopRsc() to be called */ rsc->next_role = RSC_ROLE_STOPPED; } } else { stop = stop_action(rsc, current, TRUE); start = start_action(rsc, next, TRUE); stop->optional = start->optional; if(rsc->next_role > RSC_ROLE_STARTED) { DemoteRsc(rsc, current, start->optional, data_set); } StopRsc(rsc, current, start->optional, data_set); StartRsc(rsc, current, start->optional, data_set); if(rsc->next_role == RSC_ROLE_MASTER) { PromoteRsc(rsc, next, start->optional, data_set); } if(start->runnable == FALSE) { rsc->next_role = RSC_ROLE_STOPPED; } } } gboolean StopRsc(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { action_t *stop = NULL; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); crm_debug_2("Executing: %s", rsc->id); if(rsc->next_role == RSC_ROLE_STOPPED && rsc->variant == pe_native && safe_str_eq(class, "stonith")) { action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); custom_action_order( NULL, crm_strdup(all_stopped->task), all_stopped, rsc, stop_key(rsc), NULL, pe_order_implies_left|pe_order_stonith_stop, data_set); } slist_iter( current, node_t, rsc->running_on, lpc, stop = stop_action(rsc, current, optional); if(is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, current, optional, data_set); } ); return TRUE; } gboolean StartRsc(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { action_t *start = NULL; crm_debug_2("Executing: %s", rsc->id); start = start_action(rsc, next, TRUE); if(start->runnable && optional == FALSE) { start->optional = FALSE; } return TRUE; } gboolean PromoteRsc(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { char *key = NULL; gboolean runnable = TRUE; GListPtr action_list = NULL; crm_debug_2("Executing: %s", rsc->id); CRM_CHECK(rsc->next_role == RSC_ROLE_MASTER, crm_err("Next role: %s", role2text(rsc->next_role)); return FALSE); CRM_CHECK(next != NULL, return FALSE); key = start_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); crm_free(key); slist_iter(start, action_t, action_list, lpc, if(start->runnable == FALSE) { runnable = FALSE; } ); g_list_free(action_list); if(runnable) { promote_action(rsc, next, optional); return TRUE; } crm_debug("%s\tPromote %s (canceled)", next->details->uname, rsc->id); key = promote_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); crm_free(key); slist_iter(promote, action_t, action_list, lpc, promote->runnable = FALSE; ); g_list_free(action_list); return TRUE; } gboolean DemoteRsc(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { crm_debug_2("Executing: %s", rsc->id); /* CRM_CHECK(rsc->next_role == RSC_ROLE_SLAVE, return FALSE); */ slist_iter( current, node_t, rsc->running_on, lpc, demote_action(rsc, current, optional); ); return TRUE; } gboolean RoleError(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { crm_debug("Executing: %s", rsc->id); CRM_CHECK(FALSE, return FALSE); return FALSE; } gboolean NullOp(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { crm_debug_2("Executing: %s", rsc->id); return FALSE; } gboolean DeleteRsc(resource_t *rsc, node_t *node, gboolean optional, pe_working_set_t *data_set) { action_t *delete = NULL; #if DELETE_THEN_REFRESH action_t *refresh = NULL; #endif if(is_set(rsc->flags, pe_rsc_failed)) { crm_debug_2("Resource %s not deleted from %s: failed", rsc->id, node->details->uname); return FALSE; } else if(node == NULL) { crm_debug_2("Resource %s not deleted: NULL node", rsc->id); return FALSE; } else if(node->details->unclean || node->details->online == FALSE) { crm_debug_2("Resource %s not deleted from %s: unrunnable", rsc->id, node->details->uname); return FALSE; } crm_notice("Removing %s from %s", rsc->id, node->details->uname); delete = delete_action(rsc, node, optional); new_rsc_order(rsc, RSC_STOP, rsc, RSC_DELETE, optional?pe_order_implies_right:pe_order_implies_left, data_set); #if DELETE_THEN_REFRESH refresh = custom_action( NULL, crm_strdup(CRM_OP_LRM_REFRESH), CRM_OP_LRM_REFRESH, node, FALSE, TRUE, data_set); add_hash_param(refresh->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); order_actions(delete, refresh, pe_order_optional); #endif return TRUE; } #include <../lib/pengine/unpack.h> gboolean native_create_probe(resource_t *rsc, node_t *node, action_t *complete, gboolean force, pe_working_set_t *data_set) { char *key = NULL; char *target_rc = NULL; action_t *probe = NULL; node_t *running = NULL; resource_t *top = uber_parent(rsc); CRM_CHECK(node != NULL, return FALSE); if(rsc->children) { gboolean any_created = FALSE; slist_iter( child_rsc, resource_t, rsc->children, lpc, any_created = child_rsc->cmds->create_probe( child_rsc, node, complete, force, data_set) || any_created; ); return any_created; } if(is_set(rsc->flags, pe_rsc_orphan)) { crm_debug_2("Skipping orphan: %s", rsc->id); return FALSE; } running = pe_find_node_id(rsc->known_on, node->details->id); if(force == FALSE && running != NULL) { /* we already know the status of the resource on this node */ crm_debug_3("Skipping active: %s", rsc->id); return FALSE; } if(running == NULL && is_set(top->flags, pe_rsc_unique) == FALSE) { /* Annoyingly we also need to check any other clone instances * Clumsy, but it will work. * * An alternative would be to update known_on for every peer * during process_rsc_state() */ char *clone_id = clone_zero(rsc->id); resource_t *peer = pe_find_resource(top->children, clone_id); while(peer && running == NULL) { running = pe_find_node_id(peer->known_on, node->details->id); if(force == FALSE && running != NULL) { /* we already know the status of the resource on this node */ crm_debug_3("Skipping active clone: %s", rsc->id); crm_free(clone_id); return FALSE; } clone_id = increment_clone(clone_id); peer = pe_find_resource(data_set->resources, clone_id); } crm_free(clone_id); } key = generate_op_key(rsc->id, RSC_STATUS, 0); probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE, data_set); probe->optional = FALSE; running = pe_find_node_id(rsc->running_on, node->details->id); if(running == NULL) { target_rc = crm_itoa(EXECRA_NOT_RUNNING); } else if(rsc->role == RSC_ROLE_MASTER) { target_rc = crm_itoa(EXECRA_RUNNING_MASTER); } if(target_rc != NULL) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, target_rc); crm_free(target_rc); } crm_debug("Probing %s on %s (%s)", rsc->id, node->details->uname, role2text(rsc->role)); order_actions(probe, complete, pe_order_implies_right); return TRUE; } static void native_start_constraints( resource_t *rsc, action_t *stonith_op, gboolean is_stonith, pe_working_set_t *data_set) { node_t *target = stonith_op?stonith_op->node:NULL; if(is_stonith) { char *key = start_key(rsc); action_t *ready = get_pseudo_op(STONITH_UP, data_set); crm_debug_2("Ordering %s action before stonith events", key); custom_action_order( rsc, key, NULL, NULL, crm_strdup(ready->task), ready, pe_order_optional, data_set); } else { action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); action_t *stonith_done = get_pseudo_op(STONITH_DONE, data_set); slist_iter(action, action_t, rsc->actions, lpc2, if(action->needs == rsc_req_stonith) { order_actions(stonith_done, action, pe_order_implies_left); } else if(target != NULL && safe_str_eq(action->task, RSC_START) && NULL == pe_find_node_id( rsc->known_on, target->details->id)) { /* if known == NULL, then we dont know if * the resource is active on the node * we're about to shoot * * in this case, regardless of action->needs, * the only safe option is to wait until * the node is shot before doing anything * to with the resource * * its analogous to waiting for all the probes * for rscX to complete before starting rscX * * the most likely explaination is that the * DC died and took its status with it */ crm_info("Ordering %s after %s recovery", action->uuid, target->details->uname); order_actions(all_stopped, action, pe_order_implies_left|pe_order_runnable_left); } ); } } static void native_stop_constraints( resource_t *rsc, action_t *stonith_op, gboolean is_stonith, pe_working_set_t *data_set) { char *key = NULL; GListPtr action_list = NULL; resource_t *top = uber_parent(rsc); key = stop_key(rsc); action_list = find_actions(rsc->actions, key, stonith_op->node); crm_free(key); /* add the stonith OP as a stop pre-req and the mark the stop * as a pseudo op - since its now redundant */ slist_iter( action, action_t, action_list, lpc2, resource_t *parent = NULL; if(action->node->details->online && action->node->details->unclean == FALSE && is_set(rsc->flags, pe_rsc_failed)) { continue; } if(is_set(rsc->flags, pe_rsc_failed)) { crm_warn("Stop of failed resource %s is" " implicit after %s is fenced", rsc->id, action->node->details->uname); } else { crm_info("%s is implicit after %s is fenced", action->uuid, action->node->details->uname); } /* the stop would never complete and is * now implied by the stonith operation */ action->pseudo = TRUE; action->runnable = TRUE; action->implied_by_stonith = TRUE; if(is_stonith == FALSE) { action_t *parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL); order_actions(stonith_op, action, pe_order_optional); order_actions(stonith_op, parent_stop, pe_order_optional); } if(is_set(rsc->flags, pe_rsc_notify)) { /* Create a second notification that will be delivered * immediately after the node is fenced * * Basic problem: * - C is a clone active on the node to be shot and stopping on another * - R is a resource that depends on C * * + C.stop depends on R.stop * + C.stopped depends on STONITH * + C.notify depends on C.stopped * + C.healthy depends on C.notify * + R.stop depends on C.healthy * * The extra notification here changes * + C.healthy depends on C.notify * into: * + C.healthy depends on C.notify' * + C.notify' depends on STONITH' * thus breaking the loop */ notify_data_t *n_data = create_notification_boundaries(rsc, RSC_STOP, NULL, stonith_op, data_set); crm_info("Creating secondary notification for %s", action->uuid); collect_notification_data(rsc, TRUE, FALSE, n_data); g_hash_table_insert(n_data->keys, crm_strdup("notify_stop_resource"), crm_strdup(rsc->id)); g_hash_table_insert(n_data->keys, crm_strdup("notify_stop_uname"), crm_strdup(action->node->details->uname)); create_notifications(uber_parent(rsc), n_data, data_set); free_notification_data(n_data); } /* find the top-most resource */ parent = rsc->parent; while(parent != NULL && parent->parent != NULL) { parent = parent->parent; } if(parent) { crm_debug_2("Re-creating actions for %s", parent->id); parent->cmds->create_actions(parent, data_set); /* make sure we dont mess anything up in create_actions */ CRM_CHECK(action->pseudo, action->pseudo = TRUE); CRM_CHECK(action->runnable, action->runnable = TRUE); } /* From Bug #1601, successful fencing must be an input to a failed resources stop action. However given group(rA, rB) running on nodeX and B.stop has failed, A := stop healthy resource (rA.stop) B := stop failed resource (pseudo operation B.stop) C := stonith nodeX A requires B, B requires C, C requires A This loop would prevent the cluster from making progress. This block creates the "C requires A" dependancy and therefore must (at least for now) be disabled. Instead, run the block above and treat all resources on nodeX as B would be (marked as a pseudo op depending on the STONITH). TODO: Break the "A requires B" dependancy in update_action() and re-enable this block } else if(is_stonith == FALSE) { crm_info("Moving healthy resource %s" " off %s before fencing", rsc->id, node->details->uname); * stop healthy resources before the * stonith op * custom_action_order( rsc, stop_key(rsc), NULL, NULL,crm_strdup(CRM_OP_FENCE),stonith_op, pe_order_optional, data_set); */ ); g_list_free(action_list); key = demote_key(rsc); action_list = find_actions(rsc->actions, key, stonith_op->node); crm_free(key); slist_iter( action, action_t, action_list, lpc2, if(action->node->details->online == FALSE || is_set(rsc->flags, pe_rsc_failed)) { crm_info("Demote of failed resource %s is" " implict after %s is fenced", rsc->id, action->node->details->uname); /* the stop would never complete and is * now implied by the stonith operation */ action->pseudo = TRUE; action->runnable = TRUE; if(is_stonith == FALSE) { order_actions(stonith_op, action, pe_order_optional); } } ); g_list_free(action_list); } void complex_stonith_ordering( resource_t *rsc, action_t *stonith_op, pe_working_set_t *data_set) { gboolean is_stonith = FALSE; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->stonith_ordering( child_rsc, stonith_op, data_set); ); return; } if(is_not_set(rsc->flags, pe_rsc_managed)) { crm_debug_3("Skipping fencing constraints for unmanaged resource: %s", rsc->id); return; } if(stonith_op != NULL && safe_str_eq(class, "stonith")) { is_stonith = TRUE; } /* Start constraints */ native_start_constraints(rsc, stonith_op, is_stonith, data_set); /* Stop constraints */ native_stop_constraints(rsc, stonith_op, is_stonith, data_set); } -#define ALLOW_WEAK_MIGRATION 0 - enum stack_activity { stack_stable = 0, stack_starting = 1, stack_stopping = 2, stack_middle = 4, }; static enum stack_activity find_clone_activity_on(resource_t *rsc, resource_t *target, node_t *node, const char *type) { int mode = stack_stable; action_t *active = NULL; if(target->children) { slist_iter( child, resource_t, target->children, lpc, mode |= find_clone_activity_on(rsc, child, node, type); ); return mode; } active = find_first_action(target->actions, NULL, CRMD_ACTION_START, NULL); if(active && active->optional == FALSE && active->pseudo == FALSE) { crm_debug("%s: found scheduled %s action (%s)", rsc->id, active->uuid, type); mode |= stack_starting; } active = find_first_action(target->actions, NULL, CRMD_ACTION_STOP, node); if(active && active->optional == FALSE && active->pseudo == FALSE) { crm_debug("%s: found scheduled %s action (%s)", rsc->id, active->uuid, type); mode |= stack_stopping; } return mode; } static enum stack_activity check_stack_element(resource_t *rsc, resource_t *other_rsc, const char *type) { resource_t *other_p = uber_parent(other_rsc); if(other_rsc == NULL || other_rsc == rsc) { return stack_stable; } else if(other_p->variant == pe_native) { crm_notice("Cannot migrate %s due to dependancy on %s (%s)", rsc->id, other_rsc->id, type); return stack_middle; } else if(other_rsc == rsc->parent) { int mode = 0; slist_iter(constraint, rsc_colocation_t, other_rsc->rsc_cons, lpc, if(constraint->score > 0) { mode |= check_stack_element(rsc, constraint->rsc_rh, type); } ); return mode; } else if(other_p->variant == pe_group) { crm_notice("Cannot migrate %s due to dependancy on group %s (%s)", rsc->id, other_rsc->id, type); return stack_middle; } /* else: >= clone */ /* ## Assumption A depends on clone(B) ## Resource Activity During Move N1 N2 N3 --- --- --- t0 A.stop t1 B.stop B.stop t2 B.start B.start t3 A.start ## Resource Activity During Migration N1 N2 N3 --- --- --- t0 B.start B.start t1 A.stop (1) t2 A.start (2) t3 B.stop B.stop Node 1: Rewritten to be a migrate-to operation Node 2: Rewritten to be a migrate-from operation # Constraints The following constraints already exist in the system. The 'ok' and 'fail' column refers to whether they still hold for migration. a) A.stop -> A.start - ok b) B.stop -> B.start - fail c) A.stop -> B.stop - ok d) B.start -> A.start - ok e) B.stop -> A.start - fail f) A.stop -> B.start - fail ## Scenarios B unchanged - ok B stopping only - fail - possible after fixing 'e' B starting only - fail - possible after fixing 'f' B stoping and starting - fail - constraint 'b' is unfixable B restarting only on N2 - fail - as-per previous only rarer */ /* Only allow migration when the clone is either stable, only starting or only stopping */ return find_clone_activity_on(rsc, other_rsc, NULL, type); } static gboolean at_stack_bottom(resource_t *rsc) { char *key = NULL; action_t *start = NULL; action_t *other = NULL; int mode = stack_stable; GListPtr action_list = NULL; key = start_key(rsc); action_list = find_actions(rsc->actions, key, NULL); crm_free(key); crm_debug_3("%s: processing", rsc->id); CRM_CHECK(action_list != NULL, return FALSE); start = action_list->data; g_list_free(action_list); slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons, lpc, resource_t *target = constraint->rsc_rh; crm_debug_4("Checking %s: %s == %s (%d)", constraint->id, rsc->id, target->id, constraint->score); if(constraint->score > 0) { mode |= check_stack_element(rsc, target, "coloc"); if(mode & stack_middle) { return FALSE; } else if((mode & stack_stopping) && (mode & stack_starting)) { crm_notice("Cannot migrate %s due to colocation activity (last was %s)", rsc->id, target->id); return FALSE; } } ); slist_iter( other_w, action_wrapper_t, start->actions_before, lpc, other = other_w->action; -#if ALLOW_WEAK_MIGRATION - if((other_w->type & pe_order_implies_right) == 0) { - crm_debug_3("%s: depends on %s (optional ordering)", + if(other_w->type & pe_order_serialize_only) { + crm_debug_3("%s: depends on %s (serialize ordering)", rsc->id, other->uuid); continue; } -#endif crm_debug_2("%s: Checking %s ordering", rsc->id, other->uuid); if(other->optional == FALSE) { mode |= check_stack_element(rsc, other->rsc, "order"); if(mode & stack_middle) { return FALSE; } else if((mode & stack_stopping) && (mode & stack_starting)) { crm_notice("Cannot migrate %s due to ordering activity (last was %s)", rsc->id, other->rsc->id); return FALSE; } } ); return TRUE; } void complex_migrate_reload(resource_t *rsc, pe_working_set_t *data_set) { char *key = NULL; int level = LOG_DEBUG; GListPtr action_list = NULL; action_t *stop = NULL; action_t *start = NULL; action_t *other = NULL; action_t *action = NULL; const char *value = NULL; if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->migrate_reload(child_rsc, data_set); ); other = NULL; return; } else if(rsc->variant > pe_native) { return; } do_crm_log_unlikely(level+1, "Processing %s", rsc->id); if(is_not_set(rsc->flags, pe_rsc_managed) || is_set(rsc->flags, pe_rsc_failed) || is_set(rsc->flags, pe_rsc_start_pending) || rsc->next_role < RSC_ROLE_STARTED || g_list_length(rsc->running_on) != 1) { do_crm_log_unlikely( level+1, "%s: general resource state: flags=0x%.16llx", rsc->id, rsc->flags); return; } value = g_hash_table_lookup(rsc->meta, XML_OP_ATTR_ALLOW_MIGRATE); if(crm_is_true(value)) { set_bit(rsc->flags, pe_rsc_can_migrate); } if(rsc->next_role > RSC_ROLE_SLAVE) { clear_bit(rsc->flags, pe_rsc_can_migrate); do_crm_log_unlikely( level+1, "%s: resource role: role=%s", rsc->id, role2text(rsc->next_role)); } key = start_key(rsc); action_list = find_actions(rsc->actions, key, NULL); crm_free(key); if(action_list == NULL) { do_crm_log_unlikely(level, "%s: no start action", rsc->id); return; } start = action_list->data; g_list_free(action_list); if(is_not_set(rsc->flags, pe_rsc_can_migrate) && start->allow_reload_conversion == FALSE) { do_crm_log_unlikely(level+1, "%s: no need to continue", rsc->id); return; } key = stop_key(rsc); action_list = find_actions(rsc->actions, key, NULL); crm_free(key); if(action_list == NULL) { do_crm_log_unlikely(level, "%s: no stop action", rsc->id); return; } stop = action_list->data; g_list_free(action_list); action = start; if(action->pseudo || action->optional || action->node == NULL || action->runnable == FALSE) { do_crm_log_unlikely(level, "%s: %s", rsc->id, action->task); return; } action = stop; if(action->pseudo || action->optional || action->node == NULL || action->runnable == FALSE) { do_crm_log_unlikely(level, "%s: %s", rsc->id, action->task); return; } if(is_set(rsc->flags, pe_rsc_can_migrate)) { if(start->node == NULL || stop->node == NULL || stop->node->details == start->node->details) { clear_bit(rsc->flags, pe_rsc_can_migrate); } else if(at_stack_bottom(rsc) == FALSE) { clear_bit(rsc->flags, pe_rsc_can_migrate); } } if(is_set(rsc->flags, pe_rsc_can_migrate)) { crm_info("Migrating %s from %s to %s", rsc->id, stop->node->details->uname, start->node->details->uname); crm_free(stop->uuid); crm_free(stop->task); stop->task = crm_strdup(RSC_MIGRATE); stop->uuid = generate_op_key(rsc->id, stop->task, 0); add_hash_param(stop->meta, "migrate_source", stop->node->details->uname); add_hash_param(stop->meta, "migrate_target", start->node->details->uname); /* Create the correct ordering ajustments based on find_clone_activity_on(); */ slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons, lpc, resource_t *target = constraint->rsc_rh; crm_info("Repairing %s: %s == %s (%d)", constraint->id, rsc->id, target->id, constraint->score); if(constraint->score > 0) { int mode = check_stack_element(rsc, target, "coloc"); action_t *clone_stop = find_first_action(target->actions, NULL, RSC_STOP, NULL); action_t *clone_start = find_first_action(target->actions, NULL, RSC_STARTED, NULL); CRM_ASSERT(clone_stop != NULL); CRM_ASSERT(clone_start != NULL); CRM_ASSERT((mode & stack_middle) == 0); CRM_ASSERT(((mode & stack_stopping) && (mode & stack_starting)) == 0); if(mode & stack_stopping) { action_t *clone_stop = find_first_action(target->actions, NULL, RSC_STOP, NULL); action_t *clone_start = find_first_action(target->actions, NULL, RSC_STARTED, NULL); crm_debug("Creating %s.start -> %s.stop ordering", rsc->id, target->id); order_actions(start, clone_stop, pe_order_optional); slist_iter( other_w, action_wrapper_t, start->actions_before, lpc2, /* Needed if the clone's started pseudo-action ever gets printed in the graph */ if(other_w->action == clone_start) { crm_debug("Breaking %s -> %s ordering", other_w->action->uuid, start->uuid); other_w->type = pe_order_none; } ); } else if(mode & stack_starting) { crm_debug("Creating %s.started -> %s.stop ordering", target->id, rsc->id); order_actions(clone_start, stop, pe_order_optional); slist_iter( other_w, action_wrapper_t, clone_stop->actions_before, lpc2, /* Needed if the clone's stop pseudo-action ever gets printed in the graph */ if(other_w->action == stop) { crm_debug("Breaking %s -> %s ordering", other_w->action->uuid, clone_stop->uuid); other_w->type = pe_order_none; } ); } } ); crm_free(start->uuid); crm_free(start->task); start->task = crm_strdup(RSC_MIGRATED); start->uuid = generate_op_key(rsc->id, start->task, 0); add_hash_param(start->meta, "migrate_source_uuid", stop->node->details->id); add_hash_param(start->meta, "migrate_source", stop->node->details->uname); add_hash_param(start->meta, "migrate_target", start->node->details->uname); /* Anything that needed stop to complete, now also needs start to have completed */ slist_iter( other_w, action_wrapper_t, stop->actions_after, lpc, other = other_w->action; if(other->optional || other->rsc != NULL) { continue; } crm_debug("Ordering %s before %s (stop)", start->uuid, other_w->action->uuid); order_actions(start, other, other_w->type); ); /* Stop also needs anything that the start needed to have completed too */ slist_iter( other_w, action_wrapper_t, start->actions_before, lpc, other = other_w->action; if(other->rsc == NULL) { /* nothing */ } else if(other->optional || other->rsc == rsc || other->rsc == rsc->parent) { continue; } crm_debug("Ordering %s before %s (start)", other_w->action->uuid, stop->uuid); order_actions(other, stop, other_w->type); ); } else if(start && stop && start->allow_reload_conversion && stop->node->details == start->node->details) { action_t *rewrite = NULL; start->pseudo = TRUE; /* easier than trying to delete it from the graph */ action = NULL; key = promote_key(rsc); action_list = find_actions(rsc->actions, key, NULL); if(action_list) { action = action_list->data; } if(action && action->optional == FALSE) { action->pseudo = TRUE; } g_list_free(action_list); crm_free(key); action = NULL; key = demote_key(rsc); action_list = find_actions(rsc->actions, key, NULL); if(action_list) { action = action_list->data; } g_list_free(action_list); crm_free(key); if(action && action->optional == FALSE) { rewrite = action; stop->pseudo = TRUE; } else { rewrite = stop; } crm_info("Rewriting %s of %s on %s as a reload", rewrite->task, rsc->id, stop->node->details->uname); crm_free(rewrite->uuid); crm_free(rewrite->task); rewrite->task = crm_strdup("reload"); rewrite->uuid = generate_op_key(rsc->id, rewrite->task, 0); } else { do_crm_log_unlikely(level+1, "%s nothing to do", rsc->id); } } diff --git a/pengine/pengine.h b/pengine/pengine.h index 8128bd55c2..cb399e7a86 100644 --- a/pengine/pengine.h +++ b/pengine/pengine.h @@ -1,173 +1,174 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef PENGINE__H #define PENGINE__H typedef struct rsc_to_node_s rsc_to_node_t; typedef struct rsc_colocation_s rsc_colocation_t; typedef struct lrm_agent_s lrm_agent_t; typedef struct order_constraint_s order_constraint_t; #include #include #include #include #include #include #include #include enum pe_stop_fail { pesf_block, pesf_stonith, pesf_ignore }; enum pe_ordering { pe_order_none = 0x0, /* deleted */ pe_order_implies_left = 0x01, /* was: _mandatory */ pe_order_implies_right = 0x02, /* was: _recover */ pe_order_runnable_left = 0x10, /* needs the LHS side to be runnable */ pe_order_runnable_right = 0x20, /* needs the RHS side to be runnable */ pe_order_optional = 0x100, /* pure ordering, nothing implied */ pe_order_stonith_stop = 0x200, /* only applies if the action is non-pseudo */ pe_order_restart = 0x400, /* stop-start constraint */ pe_order_demote = 0x800, /* stop-start constraint */ pe_order_shutdown = 0x1000, /* combines with pe_order_restart to make a complex resource shut down */ pe_order_demote_stop = 0x2000, /* upgrades to implies_left if the resource is a master */ pe_order_complex_left = 0x10000, /* upgrades to implies left */ pe_order_complex_right = 0x20000, /* upgrades to implies right */ pe_order_implies_left_printed = 0x40000, /* Like implies left but only ensures the action is printed, not manditory */ pe_order_implies_right_printed = 0x80000, /* Like implies right but only ensures the action is printed, not manditory */ - pe_order_test = 0x100000 /* test marker */ + pe_order_serialize_only = 0x100000, /* serialize */ + pe_order_test = 0x200000 /* test marker */ }; struct rsc_colocation_s { const char *id; const char *node_attribute; resource_t *rsc_lh; resource_t *rsc_rh; int role_lh; int role_rh; int score; }; struct rsc_to_node_s { const char *id; resource_t *rsc_lh; enum rsc_role_e role_filter; GListPtr node_list_rh; /* node_t* */ }; struct order_constraint_s { int id; enum pe_ordering type; void *lh_opaque; resource_t *lh_rsc; action_t *lh_action; char *lh_action_task; void *rh_opaque; resource_t *rh_rsc; action_t *rh_action; char *rh_action_task; /* (soon to be) variant specific */ /* int lh_rsc_incarnation; */ /* int rh_rsc_incarnation; */ }; enum pe_link_state { pe_link_not_dumped, pe_link_dumped, pe_link_dup, }; typedef struct action_wrapper_s action_wrapper_t; struct action_wrapper_s { enum pe_ordering type; enum pe_link_state state; action_t *action; }; extern gboolean stage0(pe_working_set_t *data_set); extern gboolean probe_resources(pe_working_set_t *data_set); extern gboolean stage2(pe_working_set_t *data_set); extern gboolean stage3(pe_working_set_t *data_set); extern gboolean stage4(pe_working_set_t *data_set); extern gboolean stage5(pe_working_set_t *data_set); extern gboolean stage6(pe_working_set_t *data_set); extern gboolean stage7(pe_working_set_t *data_set); extern gboolean stage8(pe_working_set_t *data_set); extern gboolean summary(GListPtr resources); extern gboolean pe_msg_dispatch(IPC_Channel *sender, void *user_data); extern gboolean process_pe_message( xmlNode *msg, xmlNode *xml_data, IPC_Channel *sender); extern gboolean unpack_constraints( xmlNode *xml_constraints, pe_working_set_t *data_set); extern gboolean update_action_states(GListPtr actions); extern gboolean shutdown_constraints( node_t *node, action_t *shutdown_op, pe_working_set_t *data_set); extern gboolean stonith_constraints( node_t *node, action_t *stonith_op, pe_working_set_t *data_set); extern int custom_action_order( resource_t *lh_rsc, char *lh_task, action_t *lh_action, resource_t *rh_rsc, char *rh_task, action_t *rh_action, enum pe_ordering type, pe_working_set_t *data_set); extern int new_rsc_order(resource_t *lh_rsc, const char *lh_task, resource_t *rh_rsc, const char *rh_task, enum pe_ordering type, pe_working_set_t *data_set); #define order_start_start(rsc1,rsc2, type) \ new_rsc_order(rsc1, CRMD_ACTION_START, rsc2, CRMD_ACTION_START, type, data_set) #define order_stop_stop(rsc1, rsc2, type) \ new_rsc_order(rsc1, CRMD_ACTION_STOP, rsc2, CRMD_ACTION_STOP, type, data_set) extern void graph_element_from_action( action_t *action, pe_working_set_t *data_set); extern gboolean show_scores; extern int scores_log_level; extern const char* transition_idle_timeout; #endif diff --git a/pengine/regression.sh b/pengine/regression.sh index 76ad020583..9f65f0b18d 100755 --- a/pengine/regression.sh +++ b/pengine/regression.sh @@ -1,336 +1,343 @@ #!/bin/bash # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # if [ -x /usr/bin/valgrind ]; then export G_SLICE=always-malloc VALGRIND_CMD="valgrind -q --show-reachable=no --leak-check=full --trace-children=no --time-stamp=yes --num-callers=20 --suppressions=./ptest.supp" fi . regression.core.sh create_mode="true" echo Generating test outputs for these tests... # do_test file description echo Done. echo "" echo Performing the following tests... create_mode="false" echo "" do_test simple1 "Offline " do_test simple2 "Start " do_test simple3 "Start 2 " do_test simple4 "Start Failed" do_test simple6 "Stop Start " do_test simple7 "Shutdown " #do_test simple8 "Stonith " #do_test simple9 "Lower version" #do_test simple10 "Higher version" do_test simple11 "Priority (ne)" do_test simple12 "Priority (eq)" do_test simple8 "Stickiness" echo "" do_test params-0 "Params: No change" do_test params-1 "Params: Changed" do_test params-2 "Params: Resource definition" do_test params-4 "Params: Reload" do_test novell-251689 "Resource definition change + target_role=stopped" do_test bug-lf-2106 "Restart all anonymous clone instances after config change" echo "" do_test orphan-0 "Orphan ignore" do_test orphan-1 "Orphan stop" echo "" do_test target-0 "Target Role : baseline" do_test target-1 "Target Role : master" do_test target-2 "Target Role : invalid" echo "" do_test date-1 "Dates" -d "2005-020" do_test date-2 "Date Spec - Pass" -d "2005-020T12:30" do_test date-3 "Date Spec - Fail" -d "2005-020T11:30" do_test probe-0 "Probe (anon clone)" do_test probe-1 "Pending Probe" do_test probe-2 "Correctly re-probe cloned groups" do_test standby "Standby" do_test comments "Comments" echo "" do_test rsc_dep1 "Must not " do_test rsc_dep3 "Must " do_test rsc_dep5 "Must not 3 " do_test rsc_dep7 "Must 3 " do_test rsc_dep10 "Must (but cant)" do_test rsc_dep2 "Must (running) " do_test rsc_dep8 "Must (running : alt) " do_test rsc_dep4 "Must (running + move)" do_test asymmetric "Asymmetric - require explicit location constraints" echo "" do_test order1 "Order start 1 " do_test order2 "Order start 2 " do_test order3 "Order stop " do_test order4 "Order (multiple) " do_test order5 "Order (move) " do_test order6 "Order (move w/ restart) " do_test order7 "Order (manditory) " do_test order-optional "Order (score=0) " do_test order-required "Order (score=INFINITY) " do_test bug-lf-2171 "Prevent group start when clone is stopped" do_test order-clone "Clone ordering should be able to prevent startup of dependant clones" do_test order-sets "Ordering for resource sets" +do_test order-serialize "Serialize resources without inhibiting migration" +do_test order-serialize-set "Serialize a set of resources without inhibiting migration" echo "" do_test coloc-loop "Colocation - loop" do_test coloc-many-one "Colocation - many-to-one" do_test coloc-list "Colocation - many-to-one with list" do_test coloc-group "Colocation - groups" do_test coloc-slave-anti "Anti-colocation with slave shouldn't prevent master colocation" do_test coloc-attr "Colocation based on node attributes" do_test coloc-negative-group "Negative colocation with a group" #echo "" #do_test agent1 "version: lt (empty)" #do_test agent2 "version: eq " #do_test agent3 "version: gt " echo "" do_test attrs1 "string: eq (and) " do_test attrs2 "string: lt / gt (and)" do_test attrs3 "string: ne (or) " do_test attrs4 "string: exists " do_test attrs5 "string: not_exists " do_test attrs6 "is_dc: true " do_test attrs7 "is_dc: false " do_test attrs8 "score_attribute " echo "" do_test mon-rsc-1 "Schedule Monitor - start" do_test mon-rsc-2 "Schedule Monitor - move " do_test mon-rsc-3 "Schedule Monitor - pending start " do_test mon-rsc-4 "Schedule Monitor - move/pending start" echo "" do_test rec-rsc-0 "Resource Recover - no start " do_test rec-rsc-1 "Resource Recover - start " do_test rec-rsc-2 "Resource Recover - monitor " do_test rec-rsc-3 "Resource Recover - stop - ignore" do_test rec-rsc-4 "Resource Recover - stop - block " do_test rec-rsc-5 "Resource Recover - stop - fence " do_test rec-rsc-6 "Resource Recover - multiple - restart" do_test rec-rsc-7 "Resource Recover - multiple - stop " do_test rec-rsc-8 "Resource Recover - multiple - block " do_test rec-rsc-9 "Resource Recover - group/group" echo "" do_test quorum-1 "No quorum - ignore" do_test quorum-2 "No quorum - freeze" do_test quorum-3 "No quorum - stop " do_test quorum-4 "No quorum - start anyway" do_test quorum-5 "No quorum - start anyway (group)" do_test quorum-6 "No quorum - start anyway (clone)" echo "" do_test rec-node-1 "Node Recover - Startup - no fence" do_test rec-node-2 "Node Recover - Startup - fence " do_test rec-node-3 "Node Recover - HA down - no fence" do_test rec-node-4 "Node Recover - HA down - fence " do_test rec-node-5 "Node Recover - CRM down - no fence" do_test rec-node-6 "Node Recover - CRM down - fence " do_test rec-node-7 "Node Recover - no quorum - ignore " do_test rec-node-8 "Node Recover - no quorum - freeze " do_test rec-node-9 "Node Recover - no quorum - stop " do_test rec-node-10 "Node Recover - no quorum - stop w/fence" do_test rec-node-11 "Node Recover - CRM down w/ group - fence " do_test rec-node-12 "Node Recover - nothing active - fence " do_test rec-node-13 "Node Recover - failed resource + shutdown - fence " do_test rec-node-15 "Node Recover - unknown lrm section" do_test rec-node-14 "Serialize all stonith's" echo "" do_test multi1 "Multiple Active (stop/start)" echo "" do_test migrate-stop "Migration in a stopping stack" do_test migrate-start "Migration in a starting stack" do_test migrate-stop_start "Migration in a restarting stack" do_test migrate-stop-complex "Migration in a complex stopping stack" do_test migrate-start-complex "Migration in a complex starting stack" do_test migrate-1 "Migrate (migrate)" do_test migrate-2 "Migrate (stable)" do_test migrate-3 "Migrate (failed migrate_to)" do_test migrate-4 "Migrate (failed migrate_from)" do_test novell-252693 "Migration in a stopping stack" do_test novell-252693-2 "Migration in a starting stack" do_test novell-252693-3 "Non-Migration in a starting and stopping stack" do_test bug-1820 "Migration in a group" do_test bug-1820-1 "Non-migration in a group" do_test migrate-5 "Primitive migration with a clone" #echo "" #do_test complex1 "Complex " echo "" do_test group1 "Group " do_test group2 "Group + Native " do_test group3 "Group + Group " do_test group4 "Group + Native (nothing)" do_test group5 "Group + Native (move) " do_test group6 "Group + Group (move) " do_test group7 "Group colocation" do_test group13 "Group colocation (cant run)" do_test group8 "Group anti-colocation" do_test group9 "Group recovery" do_test group10 "Group partial recovery" do_test group11 "Group target_role" do_test group14 "Group stop (graph terminated)" do_test group15 "-ve group colocation" do_test bug-1573 "Partial stop of a group with two children" do_test bug-1718 "Mandatory group ordering - Stop group_FUN" echo "" do_test clone-anon-probe-1 "Probe the correct (anonymous) clone instance for each node" do_test clone-anon-probe-2 "Avoid needless re-probing of anonymous clones" do_test inc0 "Incarnation start" do_test inc1 "Incarnation start order" do_test inc2 "Incarnation silent restart, stop, move" do_test inc3 "Inter-incarnation ordering, silent restart, stop, move" do_test inc4 "Inter-incarnation ordering, silent restart, stop, move (ordered)" do_test inc5 "Inter-incarnation ordering, silent restart, stop, move (restart 1)" do_test inc6 "Inter-incarnation ordering, silent restart, stop, move (restart 2)" do_test inc7 "Clone colocation" do_test inc8 "Clone anti-colocation" do_test inc9 "Non-unique clone" do_test inc10 "Non-unique clone (stop)" do_test inc11 "Primitive colocation with clones" do_test inc12 "Clone shutdown" do_test cloned-group "Make sure only the correct number of cloned groups are started" do_test clone-no-shuffle "Dont prioritize allocation of instances that must be moved" do_test clone-max-zero "Orphan processing with clone-max=0" do_test clone-anon-dup "Bug LF#2087 - Correctly parse the state of anonymous clones that are active more than once per node" do_test bug-lf-2160 "Dont shuffle clones due to colocation" do_test bug-lf-2213 "clone-node-max enforcement for cloned groups" do_test bug-lf-2153 "Clone ordering constraints" echo "" do_test master-0 "Stopped -> Slave" do_test master-1 "Stopped -> Promote" do_test master-2 "Stopped -> Promote : notify" do_test master-3 "Stopped -> Promote : master location" do_test master-4 "Started -> Promote : master location" do_test master-5 "Promoted -> Promoted" do_test master-6 "Promoted -> Promoted (2)" do_test master-7 "Promoted -> Fenced" do_test master-8 "Promoted -> Fenced -> Moved" do_test master-9 "Stopped + Promotable + No quorum" do_test master-10 "Stopped -> Promotable : notify with monitor" do_test master-11 "Stopped -> Promote : colocation" do_test novell-239082 "Demote/Promote ordering" do_test novell-239087 "Stable master placement" do_test master-12 "Promotion based solely on rsc_location constraints" do_test master-13 "Include preferences of colocated resources when placing master" do_test master-demote "Ordering when actions depends on demoting a slave resource" do_test master-ordering "Prevent resources from starting that need a master" do_test bug-1765 "Master-Master Colocation (dont stop the slaves)" do_test master-group "Promotion of cloned groups" do_test bug-lf-1852 "Don't shuffle master/slave instances unnecessarily" do_test master-failed-demote "Dont retry failed demote actions" do_test master-failed-demote-2 "Dont retry failed demote actions (notify=false)" do_test master-depend "Ensure resources that depend on the master don't get allocated until the master does" do_test master-reattach "Re-attach to a running master" do_test master-allow-start "Don't include master score if it would prevent allocation" do_test master-colocation "Allow master instances placemaker to be influenced by colocation constraints" do_test master-pseudo "Make sure promote/demote pseudo actions are created correctly" do_test master-role "Prevent target-role from promoting more than master-max instances" echo "" do_test managed-0 "Managed (reference)" do_test managed-1 "Not managed - down " do_test managed-2 "Not managed - up " echo "" do_test interleave-0 "Interleave (reference)" do_test interleave-1 "coloc - not interleaved" do_test interleave-2 "coloc - interleaved " do_test interleave-3 "coloc - interleaved (2)" do_test interleave-pseudo-stop "Interleaved clone during stonith" do_test interleave-stop "Interleaved clone during stop" do_test interleave-restart "Interleaved clone during dependancy restart" echo "" do_test notify-0 "Notify reference" do_test notify-1 "Notify simple" do_test notify-2 "Notify simple, confirm" do_test notify-3 "Notify move, confirm" do_test novell-239079 "Notification priority" #do_test notify-2 "Notify - 764" echo "" do_test 594 "OSDL #594" do_test 662 "OSDL #662" do_test 696 "OSDL #696" do_test 726 "OSDL #726" do_test 735 "OSDL #735" do_test 764 "OSDL #764" do_test 797 "OSDL #797" do_test 829 "OSDL #829" do_test 994 "OSDL #994" do_test 994-2 "OSDL #994 - with a dependant resource" do_test 1360 "OSDL #1360 - Clone stickiness" do_test 1484 "OSDL #1484 - on_fail=stop" do_test 1494 "OSDL #1494 - Clone stability" do_test unrunnable-1 "Unrunnable" do_test stonith-0 "Stonith loop - 1" do_test stonith-1 "Stonith loop - 2" do_test stonith-2 "Stonith loop - 3" do_test stonith-3 "Stonith startup" do_test bug-1572-1 "Recovery of groups depending on master/slave" do_test bug-1572-2 "Recovery of groups depending on master/slave when the master is never re-promoted" do_test bug-1685 "Depends-on-master ordering" do_test bug-1822 "Dont promote partially active groups" do_test bug-pm-11 "New resource added to a m/s group" do_test bug-pm-12 "Recover only the failed portion of a cloned group" do_test bug-n-387749 "Don't shuffle clone instances" do_test bug-n-385265 "Don't ignore the failure stickiness of group children - resource_idvscommon should stay stopped" do_test bug-n-385265-2 "Ensure groups are migrated instead of remaining partially active on the current node" do_test bug-lf-1920 "Correctly handle probes that find active resources" do_test bnc-515172 "Location constraint with multiple expressions" echo "" do_test systemhealth1 "System Health () #1" do_test systemhealth2 "System Health () #2" do_test systemhealth3 "System Health () #3" do_test systemhealthn1 "System Health (None) #1" do_test systemhealthn2 "System Health (None) #2" do_test systemhealthn3 "System Health (None) #3" do_test systemhealthm1 "System Health (Migrate On Red) #1" do_test systemhealthm2 "System Health (Migrate On Red) #2" do_test systemhealthm3 "System Health (Migrate On Red) #3" do_test systemhealtho1 "System Health (Only Green) #1" do_test systemhealtho2 "System Health (Only Green) #2" do_test systemhealtho3 "System Health (Only Green) #3" do_test systemhealthp1 "System Health (Progessive) #1" do_test systemhealthp2 "System Health (Progessive) #2" do_test systemhealthp3 "System Health (Progessive) #3" +echo "" +do_test utilization "Placement Strategy - utilization" +do_test minimal "Placement Strategy - minimal" +do_test balanced "Placement Strategy - balanced" + echo "" test_results diff --git a/pengine/test10/balanced.dot b/pengine/test10/balanced.dot new file mode 100644 index 0000000000..2003f9bd88 --- /dev/null +++ b/pengine/test10/balanced.dot @@ -0,0 +1,19 @@ +digraph "g" { +"probe_complete host1" -> "probe_complete" [ style = bold] +"probe_complete host1" [ style=bold color="green" fontcolor="black" ] +"probe_complete host2" -> "probe_complete" [ style = bold] +"probe_complete host2" [ style=bold color="green" fontcolor="black" ] +"probe_complete" -> "rsc1_start_0 host2" [ style = bold] +"probe_complete" -> "rsc2_start_0 host1" [ style = bold] +"probe_complete" [ style=bold color="green" fontcolor="orange" ] +"rsc1_monitor_0 host1" -> "probe_complete host1" [ style = bold] +"rsc1_monitor_0 host1" [ style=bold color="green" fontcolor="black" ] +"rsc1_monitor_0 host2" -> "probe_complete host2" [ style = bold] +"rsc1_monitor_0 host2" [ style=bold color="green" fontcolor="black" ] +"rsc1_start_0 host2" [ style=bold color="green" fontcolor="black" ] +"rsc2_monitor_0 host1" -> "probe_complete host1" [ style = bold] +"rsc2_monitor_0 host1" [ style=bold color="green" fontcolor="black" ] +"rsc2_monitor_0 host2" -> "probe_complete host2" [ style = bold] +"rsc2_monitor_0 host2" [ style=bold color="green" fontcolor="black" ] +"rsc2_start_0 host1" [ style=bold color="green" fontcolor="black" ] +} diff --git a/pengine/test10/balanced.exp b/pengine/test10/balanced.exp new file mode 100644 index 0000000000..57d170b43b --- /dev/null +++ b/pengine/test10/balanced.exp @@ -0,0 +1,110 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/balanced.scores b/pengine/test10/balanced.scores new file mode 100644 index 0000000000..68ed45e9c5 --- /dev/null +++ b/pengine/test10/balanced.scores @@ -0,0 +1,5 @@ +Allocation scores: +native_color: rsc1 allocation score on host1: 0 +native_color: rsc1 allocation score on host2: 0 +native_color: rsc2 allocation score on host1: 0 +native_color: rsc2 allocation score on host2: 0 diff --git a/pengine/test10/balanced.xml b/pengine/test10/balanced.xml new file mode 100644 index 0000000000..bc852762b6 --- /dev/null +++ b/pengine/test10/balanced.xml @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/minimal.dot b/pengine/test10/minimal.dot new file mode 100644 index 0000000000..db35911cd3 --- /dev/null +++ b/pengine/test10/minimal.dot @@ -0,0 +1,19 @@ +digraph "g" { +"probe_complete host1" -> "probe_complete" [ style = bold] +"probe_complete host1" [ style=bold color="green" fontcolor="black" ] +"probe_complete host2" -> "probe_complete" [ style = bold] +"probe_complete host2" [ style=bold color="green" fontcolor="black" ] +"probe_complete" -> "rsc1_start_0 host1" [ style = bold] +"probe_complete" -> "rsc2_start_0 host1" [ style = bold] +"probe_complete" [ style=bold color="green" fontcolor="orange" ] +"rsc1_monitor_0 host1" -> "probe_complete host1" [ style = bold] +"rsc1_monitor_0 host1" [ style=bold color="green" fontcolor="black" ] +"rsc1_monitor_0 host2" -> "probe_complete host2" [ style = bold] +"rsc1_monitor_0 host2" [ style=bold color="green" fontcolor="black" ] +"rsc1_start_0 host1" [ style=bold color="green" fontcolor="black" ] +"rsc2_monitor_0 host1" -> "probe_complete host1" [ style = bold] +"rsc2_monitor_0 host1" [ style=bold color="green" fontcolor="black" ] +"rsc2_monitor_0 host2" -> "probe_complete host2" [ style = bold] +"rsc2_monitor_0 host2" [ style=bold color="green" fontcolor="black" ] +"rsc2_start_0 host1" [ style=bold color="green" fontcolor="black" ] +} diff --git a/pengine/test10/minimal.exp b/pengine/test10/minimal.exp new file mode 100644 index 0000000000..44af56af3b --- /dev/null +++ b/pengine/test10/minimal.exp @@ -0,0 +1,110 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/minimal.scores b/pengine/test10/minimal.scores new file mode 100644 index 0000000000..68ed45e9c5 --- /dev/null +++ b/pengine/test10/minimal.scores @@ -0,0 +1,5 @@ +Allocation scores: +native_color: rsc1 allocation score on host1: 0 +native_color: rsc1 allocation score on host2: 0 +native_color: rsc2 allocation score on host1: 0 +native_color: rsc2 allocation score on host2: 0 diff --git a/pengine/test10/minimal.xml b/pengine/test10/minimal.xml new file mode 100644 index 0000000000..54b74c37d9 --- /dev/null +++ b/pengine/test10/minimal.xml @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/order-serialize-set.dot b/pengine/test10/order-serialize-set.dot new file mode 100644 index 0000000000..175dc8e3d0 --- /dev/null +++ b/pengine/test10/order-serialize-set.dot @@ -0,0 +1,82 @@ +digraph "g" { +"all_stopped" -> "xen-b-fencing_stop_0 xen-a" [ style = bold] +"all_stopped" [ style=bold color="green" fontcolor="orange" ] +"base_migrate_from_0 xen-b" -> "all_stopped" [ style = bold] +"base_migrate_from_0 xen-b" -> "base_monitor_10000 xen-b" [ style = bold] +"base_migrate_from_0 xen-b" -> "xen-set-start-end" [ style = bold] +"base_migrate_from_0 xen-b" [ style=bold color="green" fontcolor="black" ] +"base_migrate_to_0 xen-a" -> "all_stopped" [ style = bold] +"base_migrate_to_0 xen-a" -> "base_migrate_from_0 xen-b" [ style = bold] +"base_migrate_to_0 xen-a" [ style=bold color="green" fontcolor="black" ] +"base_monitor_10000 xen-b" [ style=bold color="green" fontcolor="black" ] +"core-101_migrate_from_0 xen-b" -> "all_stopped" [ style = bold] +"core-101_migrate_from_0 xen-b" -> "base_migrate_from_0 xen-b" [ style = bold] +"core-101_migrate_from_0 xen-b" -> "base_migrate_to_0 xen-a" [ style = bold] +"core-101_migrate_from_0 xen-b" -> "core-101_monitor_10000 xen-b" [ style = bold] +"core-101_migrate_from_0 xen-b" -> "core-200_migrate_from_0 xen-b" [ style = bold] +"core-101_migrate_from_0 xen-b" -> "core-200_migrate_to_0 xen-a" [ style = bold] +"core-101_migrate_from_0 xen-b" -> "edge_migrate_from_0 xen-b" [ style = bold] +"core-101_migrate_from_0 xen-b" -> "edge_migrate_to_0 xen-a" [ style = bold] +"core-101_migrate_from_0 xen-b" -> "xen-set-start-end" [ style = bold] +"core-101_migrate_from_0 xen-b" [ style=bold color="green" fontcolor="black" ] +"core-101_migrate_to_0 xen-a" -> "all_stopped" [ style = bold] +"core-101_migrate_to_0 xen-a" -> "core-101_migrate_from_0 xen-b" [ style = bold] +"core-101_migrate_to_0 xen-a" [ style=bold color="green" fontcolor="black" ] +"core-101_monitor_10000 xen-b" [ style=bold color="green" fontcolor="black" ] +"core-200_migrate_from_0 xen-b" -> "all_stopped" [ style = bold] +"core-200_migrate_from_0 xen-b" -> "base_migrate_from_0 xen-b" [ style = bold] +"core-200_migrate_from_0 xen-b" -> "base_migrate_to_0 xen-a" [ style = bold] +"core-200_migrate_from_0 xen-b" -> "core-200_monitor_10000 xen-b" [ style = bold] +"core-200_migrate_from_0 xen-b" -> "edge_migrate_from_0 xen-b" [ style = bold] +"core-200_migrate_from_0 xen-b" -> "edge_migrate_to_0 xen-a" [ style = bold] +"core-200_migrate_from_0 xen-b" -> "xen-set-start-end" [ style = bold] +"core-200_migrate_from_0 xen-b" [ style=bold color="green" fontcolor="black" ] +"core-200_migrate_to_0 xen-a" -> "all_stopped" [ style = bold] +"core-200_migrate_to_0 xen-a" -> "core-200_migrate_from_0 xen-b" [ style = bold] +"core-200_migrate_to_0 xen-a" [ style=bold color="green" fontcolor="black" ] +"core-200_monitor_10000 xen-b" [ style=bold color="green" fontcolor="black" ] +"db_migrate_from_0 xen-b" -> "all_stopped" [ style = bold] +"db_migrate_from_0 xen-b" -> "base_migrate_from_0 xen-b" [ style = bold] +"db_migrate_from_0 xen-b" -> "base_migrate_to_0 xen-a" [ style = bold] +"db_migrate_from_0 xen-b" -> "core-101_migrate_from_0 xen-b" [ style = bold] +"db_migrate_from_0 xen-b" -> "core-101_migrate_to_0 xen-a" [ style = bold] +"db_migrate_from_0 xen-b" -> "core-200_migrate_from_0 xen-b" [ style = bold] +"db_migrate_from_0 xen-b" -> "core-200_migrate_to_0 xen-a" [ style = bold] +"db_migrate_from_0 xen-b" -> "db_monitor_10000 xen-b" [ style = bold] +"db_migrate_from_0 xen-b" -> "edge_migrate_from_0 xen-b" [ style = bold] +"db_migrate_from_0 xen-b" -> "edge_migrate_to_0 xen-a" [ style = bold] +"db_migrate_from_0 xen-b" -> "xen-set-start-end" [ style = bold] +"db_migrate_from_0 xen-b" [ style=bold color="green" fontcolor="black" ] +"db_migrate_to_0 xen-a" -> "all_stopped" [ style = bold] +"db_migrate_to_0 xen-a" -> "db_migrate_from_0 xen-b" [ style = bold] +"db_migrate_to_0 xen-a" [ style=bold color="green" fontcolor="black" ] +"db_monitor_10000 xen-b" [ style=bold color="green" fontcolor="black" ] +"edge_migrate_from_0 xen-b" -> "all_stopped" [ style = bold] +"edge_migrate_from_0 xen-b" -> "base_migrate_from_0 xen-b" [ style = bold] +"edge_migrate_from_0 xen-b" -> "base_migrate_to_0 xen-a" [ style = bold] +"edge_migrate_from_0 xen-b" -> "edge_monitor_10000 xen-b" [ style = bold] +"edge_migrate_from_0 xen-b" -> "xen-set-start-end" [ style = bold] +"edge_migrate_from_0 xen-b" [ style=bold color="green" fontcolor="black" ] +"edge_migrate_to_0 xen-a" -> "all_stopped" [ style = bold] +"edge_migrate_to_0 xen-a" -> "edge_migrate_from_0 xen-b" [ style = bold] +"edge_migrate_to_0 xen-a" [ style=bold color="green" fontcolor="black" ] +"edge_monitor_10000 xen-b" [ style=bold color="green" fontcolor="black" ] +"xen-a-fencing_monitor_60000 xen-b" [ style=bold color="green" fontcolor="black" ] +"xen-a-fencing_start_0 xen-b" -> "xen-a-fencing_monitor_60000 xen-b" [ style = bold] +"xen-a-fencing_start_0 xen-b" [ style=bold color="green" fontcolor="black" ] +"xen-a-fencing_stop_0 xen-b" -> "xen-a-fencing_start_0 xen-b" [ style = bold] +"xen-a-fencing_stop_0 xen-b" [ style=bold color="green" fontcolor="black" ] +"xen-b-fencing_stop_0 xen-a" [ style=bold color="green" fontcolor="black" ] +"xen-set-start-begin" -> "base_migrate_from_0 xen-b" [ style = bold] +"xen-set-start-begin" -> "base_migrate_to_0 xen-a" [ style = bold] +"xen-set-start-begin" -> "core-101_migrate_from_0 xen-b" [ style = bold] +"xen-set-start-begin" -> "core-101_migrate_to_0 xen-a" [ style = bold] +"xen-set-start-begin" -> "core-200_migrate_from_0 xen-b" [ style = bold] +"xen-set-start-begin" -> "core-200_migrate_to_0 xen-a" [ style = bold] +"xen-set-start-begin" -> "db_migrate_from_0 xen-b" [ style = bold] +"xen-set-start-begin" -> "db_migrate_to_0 xen-a" [ style = bold] +"xen-set-start-begin" -> "edge_migrate_from_0 xen-b" [ style = bold] +"xen-set-start-begin" -> "edge_migrate_to_0 xen-a" [ style = bold] +"xen-set-start-begin" [ style=bold color="green" fontcolor="orange" ] +"xen-set-start-end" [ style=bold color="green" fontcolor="orange" ] +} diff --git a/pengine/test10/order-serialize-set.exp b/pengine/test10/order-serialize-set.exp new file mode 100644 index 0000000000..88396521fc --- /dev/null +++ b/pengine/test10/order-serialize-set.exp @@ -0,0 +1,392 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/order-serialize-set.scores b/pengine/test10/order-serialize-set.scores new file mode 100644 index 0000000000..3ab363b0a7 --- /dev/null +++ b/pengine/test10/order-serialize-set.scores @@ -0,0 +1,21 @@ +Allocation scores: +native_color: xen-a-fencing allocation score on xen-b: 1000 +native_color: xen-a-fencing allocation score on xen-a: -1000000 +native_color: xen-b-fencing allocation score on xen-b: -1000000 +native_color: xen-b-fencing allocation score on xen-a: 1000 +native_color: db allocation score on xen-b: 0 +native_color: db allocation score on xen-a: 6000 +native_color: dbreplica allocation score on xen-b: 6000 +native_color: dbreplica allocation score on xen-a: 0 +native_color: core-101 allocation score on xen-b: 0 +native_color: core-101 allocation score on xen-a: 6000 +native_color: core-200 allocation score on xen-b: 0 +native_color: core-200 allocation score on xen-a: 6000 +native_color: sysadmin allocation score on xen-b: 6000 +native_color: sysadmin allocation score on xen-a: 0 +native_color: edge allocation score on xen-b: 0 +native_color: edge allocation score on xen-a: 6000 +native_color: base allocation score on xen-b: 0 +native_color: base allocation score on xen-a: 6000 +native_color: Email_Alerting allocation score on xen-b: 1000 +native_color: Email_Alerting allocation score on xen-a: 0 diff --git a/pengine/test10/order-serialize-set.xml b/pengine/test10/order-serialize-set.xml new file mode 100644 index 0000000000..eb5f7bf32e --- /dev/null +++ b/pengine/test10/order-serialize-set.xml @@ -0,0 +1,336 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/order-serialize.dot b/pengine/test10/order-serialize.dot new file mode 100644 index 0000000000..fb5907abdf --- /dev/null +++ b/pengine/test10/order-serialize.dot @@ -0,0 +1,49 @@ +digraph "g" { +"all_stopped" -> "xen-b-fencing_stop_0 xen-a" [ style = bold] +"all_stopped" [ style=bold color="green" fontcolor="orange" ] +"base_migrate_from_0 xen-b" -> "all_stopped" [ style = bold] +"base_migrate_from_0 xen-b" -> "base_monitor_10000 xen-b" [ style = bold] +"base_migrate_from_0 xen-b" [ style=bold color="green" fontcolor="black" ] +"base_migrate_to_0 xen-a" -> "all_stopped" [ style = bold] +"base_migrate_to_0 xen-a" -> "base_migrate_from_0 xen-b" [ style = bold] +"base_migrate_to_0 xen-a" [ style=bold color="green" fontcolor="black" ] +"base_monitor_10000 xen-b" [ style=bold color="green" fontcolor="black" ] +"core-101_migrate_from_0 xen-b" -> "all_stopped" [ style = bold] +"core-101_migrate_from_0 xen-b" -> "core-101_monitor_10000 xen-b" [ style = bold] +"core-101_migrate_from_0 xen-b" -> "core-200_migrate_from_0 xen-b" [ style = bold] +"core-101_migrate_from_0 xen-b" -> "core-200_migrate_to_0 xen-a" [ style = bold] +"core-101_migrate_from_0 xen-b" [ style=bold color="green" fontcolor="black" ] +"core-101_migrate_to_0 xen-a" -> "all_stopped" [ style = bold] +"core-101_migrate_to_0 xen-a" -> "core-101_migrate_from_0 xen-b" [ style = bold] +"core-101_migrate_to_0 xen-a" [ style=bold color="green" fontcolor="black" ] +"core-101_monitor_10000 xen-b" [ style=bold color="green" fontcolor="black" ] +"core-200_migrate_from_0 xen-b" -> "all_stopped" [ style = bold] +"core-200_migrate_from_0 xen-b" -> "core-200_monitor_10000 xen-b" [ style = bold] +"core-200_migrate_from_0 xen-b" [ style=bold color="green" fontcolor="black" ] +"core-200_migrate_to_0 xen-a" -> "all_stopped" [ style = bold] +"core-200_migrate_to_0 xen-a" -> "core-200_migrate_from_0 xen-b" [ style = bold] +"core-200_migrate_to_0 xen-a" [ style=bold color="green" fontcolor="black" ] +"core-200_monitor_10000 xen-b" [ style=bold color="green" fontcolor="black" ] +"db_migrate_from_0 xen-b" -> "all_stopped" [ style = bold] +"db_migrate_from_0 xen-b" -> "db_monitor_10000 xen-b" [ style = bold] +"db_migrate_from_0 xen-b" [ style=bold color="green" fontcolor="black" ] +"db_migrate_to_0 xen-a" -> "all_stopped" [ style = bold] +"db_migrate_to_0 xen-a" -> "db_migrate_from_0 xen-b" [ style = bold] +"db_migrate_to_0 xen-a" [ style=bold color="green" fontcolor="black" ] +"db_monitor_10000 xen-b" [ style=bold color="green" fontcolor="black" ] +"edge_migrate_from_0 xen-b" -> "all_stopped" [ style = bold] +"edge_migrate_from_0 xen-b" -> "base_migrate_from_0 xen-b" [ style = bold] +"edge_migrate_from_0 xen-b" -> "base_migrate_to_0 xen-a" [ style = bold] +"edge_migrate_from_0 xen-b" -> "edge_monitor_10000 xen-b" [ style = bold] +"edge_migrate_from_0 xen-b" [ style=bold color="green" fontcolor="black" ] +"edge_migrate_to_0 xen-a" -> "all_stopped" [ style = bold] +"edge_migrate_to_0 xen-a" -> "edge_migrate_from_0 xen-b" [ style = bold] +"edge_migrate_to_0 xen-a" [ style=bold color="green" fontcolor="black" ] +"edge_monitor_10000 xen-b" [ style=bold color="green" fontcolor="black" ] +"xen-a-fencing_monitor_60000 xen-b" [ style=bold color="green" fontcolor="black" ] +"xen-a-fencing_start_0 xen-b" -> "xen-a-fencing_monitor_60000 xen-b" [ style = bold] +"xen-a-fencing_start_0 xen-b" [ style=bold color="green" fontcolor="black" ] +"xen-a-fencing_stop_0 xen-b" -> "xen-a-fencing_start_0 xen-b" [ style = bold] +"xen-a-fencing_stop_0 xen-b" [ style=bold color="green" fontcolor="black" ] +"xen-b-fencing_stop_0 xen-a" [ style=bold color="green" fontcolor="black" ] +} diff --git a/pengine/test10/order-serialize.exp b/pengine/test10/order-serialize.exp new file mode 100644 index 0000000000..6271054857 --- /dev/null +++ b/pengine/test10/order-serialize.exp @@ -0,0 +1,279 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/order-serialize.scores b/pengine/test10/order-serialize.scores new file mode 100644 index 0000000000..3ab363b0a7 --- /dev/null +++ b/pengine/test10/order-serialize.scores @@ -0,0 +1,21 @@ +Allocation scores: +native_color: xen-a-fencing allocation score on xen-b: 1000 +native_color: xen-a-fencing allocation score on xen-a: -1000000 +native_color: xen-b-fencing allocation score on xen-b: -1000000 +native_color: xen-b-fencing allocation score on xen-a: 1000 +native_color: db allocation score on xen-b: 0 +native_color: db allocation score on xen-a: 6000 +native_color: dbreplica allocation score on xen-b: 6000 +native_color: dbreplica allocation score on xen-a: 0 +native_color: core-101 allocation score on xen-b: 0 +native_color: core-101 allocation score on xen-a: 6000 +native_color: core-200 allocation score on xen-b: 0 +native_color: core-200 allocation score on xen-a: 6000 +native_color: sysadmin allocation score on xen-b: 6000 +native_color: sysadmin allocation score on xen-a: 0 +native_color: edge allocation score on xen-b: 0 +native_color: edge allocation score on xen-a: 6000 +native_color: base allocation score on xen-b: 0 +native_color: base allocation score on xen-a: 6000 +native_color: Email_Alerting allocation score on xen-b: 1000 +native_color: Email_Alerting allocation score on xen-a: 0 diff --git a/pengine/test10/order-serialize.xml b/pengine/test10/order-serialize.xml new file mode 100644 index 0000000000..950400d05e --- /dev/null +++ b/pengine/test10/order-serialize.xml @@ -0,0 +1,331 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/utilization.dot b/pengine/test10/utilization.dot new file mode 100644 index 0000000000..77b065b4e1 --- /dev/null +++ b/pengine/test10/utilization.dot @@ -0,0 +1,17 @@ +digraph "g" { +"probe_complete host1" -> "probe_complete" [ style = bold] +"probe_complete host1" [ style=bold color="green" fontcolor="black" ] +"probe_complete host2" -> "probe_complete" [ style = bold] +"probe_complete host2" [ style=bold color="green" fontcolor="black" ] +"probe_complete" -> "rsc2_start_0 host2" [ style = bold] +"probe_complete" [ style=bold color="green" fontcolor="orange" ] +"rsc1_monitor_0 host1" -> "probe_complete host1" [ style = bold] +"rsc1_monitor_0 host1" [ style=bold color="green" fontcolor="black" ] +"rsc1_monitor_0 host2" -> "probe_complete host2" [ style = bold] +"rsc1_monitor_0 host2" [ style=bold color="green" fontcolor="black" ] +"rsc2_monitor_0 host1" -> "probe_complete host1" [ style = bold] +"rsc2_monitor_0 host1" [ style=bold color="green" fontcolor="black" ] +"rsc2_monitor_0 host2" -> "probe_complete host2" [ style = bold] +"rsc2_monitor_0 host2" [ style=bold color="green" fontcolor="black" ] +"rsc2_start_0 host2" [ style=bold color="green" fontcolor="black" ] +} diff --git a/pengine/test10/utilization.exp b/pengine/test10/utilization.exp new file mode 100644 index 0000000000..1da017cd39 --- /dev/null +++ b/pengine/test10/utilization.exp @@ -0,0 +1,97 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/utilization.scores b/pengine/test10/utilization.scores new file mode 100644 index 0000000000..e55b799d76 --- /dev/null +++ b/pengine/test10/utilization.scores @@ -0,0 +1,5 @@ +Allocation scores: +native_color: rsc2 allocation score on host1: 0 +native_color: rsc2 allocation score on host2: 0 +native_color: rsc1 allocation score on host1: 0 +native_color: rsc1 allocation score on host2: 0 diff --git a/pengine/test10/utilization.xml b/pengine/test10/utilization.xml new file mode 100644 index 0000000000..e4583b09bc --- /dev/null +++ b/pengine/test10/utilization.xml @@ -0,0 +1,47 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/utils.c b/pengine/utils.c index bd843d711d..0bf437d569 100644 --- a/pengine/utils.c +++ b/pengine/utils.c @@ -1,533 +1,620 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include void print_rsc_to_node(const char *pre_text, rsc_to_node_t *cons, gboolean details) { if(cons == NULL) { crm_debug_4("%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } crm_debug_4("%s%s%s Constraint %s (%p) - %d nodes:", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", "rsc_to_node", cons->id, cons, g_list_length(cons->node_list_rh)); if(details == FALSE) { crm_debug_4("\t%s (node placement rule)", safe_val3(NULL, cons, rsc_lh, id)); slist_iter( node, node_t, cons->node_list_rh, lpc, print_node("\t\t-->", node, FALSE) ); } } void print_rsc_colocation(const char *pre_text, rsc_colocation_t *cons, gboolean details) { if(cons == NULL) { crm_debug_4("%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } crm_debug_4("%s%s%s Constraint %s (%p):", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", XML_CONS_TAG_RSC_DEPEND, cons->id, cons); if(details == FALSE) { crm_debug_4("\t%s --> %s, %d", safe_val3(NULL, cons, rsc_lh, id), safe_val3(NULL, cons, rsc_rh, id), cons->score); } } void pe_free_ordering(GListPtr constraints) { GListPtr iterator = constraints; while(iterator != NULL) { order_constraint_t *order = iterator->data; iterator = iterator->next; crm_free(order->lh_action_task); crm_free(order->rh_action_task); crm_free(order); } if(constraints != NULL) { g_list_free(constraints); } } void pe_free_rsc_to_node(GListPtr constraints) { GListPtr iterator = constraints; while(iterator != NULL) { rsc_to_node_t *cons = iterator->data; iterator = iterator->next; pe_free_shallow(cons->node_list_rh); crm_free(cons); } if(constraints != NULL) { g_list_free(constraints); } } rsc_to_node_t * rsc2node_new(const char *id, resource_t *rsc, int node_weight, node_t *foo_node, pe_working_set_t *data_set) { rsc_to_node_t *new_con = NULL; if(rsc == NULL || id == NULL) { pe_err("Invalid constraint %s for rsc=%p", crm_str(id), rsc); return NULL; } else if(foo_node == NULL) { CRM_CHECK(node_weight == 0, return NULL); } crm_malloc0(new_con, sizeof(rsc_to_node_t)); if(new_con != NULL) { new_con->id = id; new_con->rsc_lh = rsc; new_con->node_list_rh = NULL; new_con->role_filter = RSC_ROLE_UNKNOWN; if(foo_node != NULL) { node_t *copy = node_copy(foo_node); copy->weight = node_weight; new_con->node_list_rh = g_list_append(NULL, copy); } data_set->placement_constraints = g_list_append( data_set->placement_constraints, new_con); rsc->rsc_location = g_list_append(rsc->rsc_location, new_con); } return new_con; } const char * ordering_type2text(enum pe_ordering type) { const char *result = ""; if(type & pe_order_implies_left) { /* was: mandatory */ result = "right_implies_left"; } else if(type & pe_order_implies_right) { /* was: recover */ result = "left_implies_right"; } else if(type & pe_order_optional) { /* pure ordering, nothing implied */ result = "optional"; } else if(type & pe_order_runnable_left) { result = "runnable"; /* } else { */ /* crm_err("Unknown ordering type: %.3x", type); */ } return result; } gboolean can_run_resources(const node_t *node) { if(node == NULL) { return FALSE; } if(node->details->online == FALSE || node->details->shutdown || node->details->unclean || node->details->standby) { crm_debug_2("%s: online=%d, unclean=%d, standby=%d", node->details->uname, node->details->online, node->details->unclean, node->details->standby); return FALSE; } return TRUE; } + +/* rc < 0 if 'node1' has more capacity remaining + * rc > 0 if 'node1' has less capacity remaining + */ +static int +compare_capacity(const node_t *node1, const node_t *node2) +{ + GHashTableIter iter; + const char *key = NULL; + const char *value = NULL; + int node1_capacity = 0; + int node2_capacity = 0; + int result = 0; + + g_hash_table_iter_init(&iter, node1->details->utilization); + while (g_hash_table_iter_next(&iter, (gpointer)&key, (gpointer)&value)) { + node1_capacity = crm_parse_int(value, "0"); + node2_capacity = crm_parse_int(g_hash_table_lookup(node2->details->utilization, key), "0"); + + if (node1_capacity > node2_capacity) { + result += -1; + } else if (node1_capacity < node2_capacity) { + result += 1; + } + } + + g_hash_table_iter_init(&iter, node2->details->utilization); + while (g_hash_table_iter_next(&iter, (gpointer)&key, (gpointer)&value)) { + if (g_hash_table_lookup_extended(node1->details->utilization, key, NULL, NULL)) { + continue; + } + + node1_capacity = 0; + node2_capacity = crm_parse_int(value, "0"); + + if (node1_capacity > node2_capacity) { + result += -1; + } else if (node1_capacity < node2_capacity) { + result += 1; + } + } + + return result; +} + /* return -1 if 'a' is more preferred * return 1 if 'b' is more preferred */ -gint sort_node_weight(gconstpointer a, gconstpointer b) +gint sort_node_weight(gconstpointer a, gconstpointer b, gpointer data) { int level = LOG_DEBUG_3; const node_t *node1 = (const node_t*)a; const node_t *node2 = (const node_t*)b; + const pe_working_set_t *data_set = (const pe_working_set_t*)data; int node1_weight = 0; int node2_weight = 0; + + int result = 0; if(a == NULL) { return 1; } if(b == NULL) { return -1; } node1_weight = node1->weight; node2_weight = node2->weight; if(can_run_resources(node1) == FALSE) { node1_weight = -INFINITY; } if(can_run_resources(node2) == FALSE) { node2_weight = -INFINITY; } if(node1_weight > node2_weight) { do_crm_log_unlikely(level, "%s (%d) > %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); return -1; } if(node1_weight < node2_weight) { do_crm_log_unlikely(level, "%s (%d) < %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); return 1; } do_crm_log_unlikely(level, "%s (%d) == %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); + + if (safe_str_eq(data_set->placement_strategy, "minimal")) { + goto equal; + } + + if (safe_str_eq(data_set->placement_strategy, "balanced")) { + result = compare_capacity(node1, node2); + if (result != 0) { + return result; + } + } /* now try to balance resources across the cluster */ if(node1->details->num_resources < node2->details->num_resources) { do_crm_log_unlikely(level, "%s (%d) < %s (%d) : resources", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return -1; } else if(node1->details->num_resources > node2->details->num_resources) { do_crm_log_unlikely(level, "%s (%d) > %s (%d) : resources", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return 1; } +equal: do_crm_log_unlikely(level, "%s = %s", node1->details->uname, node2->details->uname); return 0; } +/* Specify 'allocate' to TRUE when allocating + * Otherwise to FALSE when deallocating + */ +static void +calculate_utilization(node_t *node, resource_t *rsc, gboolean allocate) +{ + GHashTableIter iter; + const char *key = NULL; + const char *value = NULL; + const char *capacity = NULL; + char *remain_capacity = NULL; + + g_hash_table_iter_init(&iter, rsc->utilization); + while (g_hash_table_iter_next(&iter, (gpointer)&key, (gpointer)&value)) { + capacity = g_hash_table_lookup(node->details->utilization, key); + if (capacity) { + if (allocate) { + remain_capacity = crm_itoa(crm_parse_int(capacity, "0") - crm_parse_int(value, "0")); + } else { + remain_capacity = crm_itoa(crm_parse_int(capacity, "0") + crm_parse_int(value, "0")); + } + g_hash_table_replace(node->details->utilization, crm_strdup(key), remain_capacity); + } + } +} gboolean native_assign_node(resource_t *rsc, GListPtr nodes, node_t *chosen, gboolean force) { CRM_ASSERT(rsc->variant == pe_native); clear_bit(rsc->flags, pe_rsc_provisional); if(chosen == NULL) { crm_debug("Could not allocate a node for %s", rsc->id); rsc->next_role = RSC_ROLE_STOPPED; return FALSE; } else if(force == FALSE && (can_run_resources(chosen) == FALSE || chosen->weight < 0)) { crm_debug("All nodes for resource %s are unavailable" ", unclean or shutting down (%s: %d, %d)", rsc->id, chosen->details->uname, can_run_resources(chosen), chosen->weight); rsc->next_role = RSC_ROLE_STOPPED; return FALSE; } /* todo: update the old node for each resource to reflect its * new resource count */ if(rsc->allocated_to) { node_t *old = rsc->allocated_to; old->details->allocated_rsc = g_list_remove( old->details->allocated_rsc, rsc); old->details->num_resources--; old->count--; + calculate_utilization(old, rsc, FALSE); } crm_debug("Assigning %s to %s", chosen->details->uname, rsc->id); crm_free(rsc->allocated_to); rsc->allocated_to = node_copy(chosen); chosen->details->allocated_rsc = g_list_append(chosen->details->allocated_rsc, rsc); chosen->details->num_resources++; chosen->count++; + calculate_utilization(chosen, rsc, TRUE); return TRUE; } char * convert_non_atomic_task(char *old_uuid, resource_t *rsc, gboolean allow_notify, gboolean free_original) { int interval = 0; char *uuid = NULL; char *rid = NULL; char *raw_task = NULL; int task = no_action; crm_debug_3("Processing %s", old_uuid); if(old_uuid == NULL) { return NULL; } else if(strstr(old_uuid, "notify") != NULL) { goto done; /* no conversion */ } else if(rsc->variant < pe_group) { goto done; /* no conversion */ } CRM_ASSERT(parse_op_key(old_uuid, &rid, &raw_task, &interval)); if(interval > 0) { goto done; /* no conversion */ } task = text2task(raw_task); switch(task) { case stop_rsc: case start_rsc: case action_notify: case action_promote: case action_demote: break; case stopped_rsc: case started_rsc: case action_notified: case action_promoted: case action_demoted: task--; break; case monitor_rsc: case shutdown_crm: case stonith_node: task = no_action; break; default: crm_err("Unknown action: %s", raw_task); task = no_action; break; } if(task != no_action) { if(is_set(rsc->flags, pe_rsc_notify) && allow_notify) { uuid = generate_notify_key(rid, "confirmed-post", task2text(task+1)); } else { uuid = generate_op_key(rid, task2text(task+1), 0); } crm_debug_2("Converted %s -> %s", old_uuid, uuid); } done: if(uuid == NULL) { uuid = crm_strdup(old_uuid); } if(free_original) { crm_free(old_uuid); } crm_free(raw_task); crm_free(rid); return uuid; } void order_actions( action_t *lh_action, action_t *rh_action, enum pe_ordering order) { action_wrapper_t *wrapper = NULL; GListPtr list = NULL; crm_debug_3("Ordering Action %s before %s", lh_action->uuid, rh_action->uuid); log_action(LOG_DEBUG_4, "LH (order_actions)", lh_action, FALSE); log_action(LOG_DEBUG_4, "RH (order_actions)", rh_action, FALSE); crm_malloc0(wrapper, sizeof(action_wrapper_t)); wrapper->action = rh_action; wrapper->type = order; list = lh_action->actions_after; list = g_list_append(list, wrapper); lh_action->actions_after = list; wrapper = NULL; /* order |= pe_order_implies_right; */ /* order ^= pe_order_implies_right; */ crm_malloc0(wrapper, sizeof(action_wrapper_t)); wrapper->action = lh_action; wrapper->type = order; list = rh_action->actions_before; list = g_list_append(list, wrapper); rh_action->actions_before = list; } void log_action(unsigned int log_level, const char *pre_text, action_t *action, gboolean details) { const char *node_uname = NULL; const char *node_uuid = NULL; if(action == NULL) { do_crm_log_unlikely(log_level, "%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } if(action->pseudo) { node_uname = NULL; node_uuid = NULL; } else if(action->node != NULL) { node_uname = action->node->details->uname; node_uuid = action->node->details->id; } else { node_uname = ""; node_uuid = NULL; } switch(text2task(action->task)) { case stonith_node: case shutdown_crm: do_crm_log_unlikely(log_level, "%s%s%sAction %d: %s%s%s%s%s%s", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", action->pseudo?"Pseduo ":action->optional?"Optional ":action->runnable?action->processed?"":"(Provisional) ":"!!Non-Startable!! ", action->id, action->uuid, node_uname?"\ton ":"", node_uname?node_uname:"", node_uuid?"\t\t(":"", node_uuid?node_uuid:"", node_uuid?")":""); break; default: do_crm_log_unlikely(log_level, "%s%s%sAction %d: %s %s%s%s%s%s%s", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", action->optional?"Optional ":action->pseudo?"Pseduo ":action->runnable?action->processed?"":"(Provisional) ":"!!Non-Startable!! ", action->id, action->uuid, safe_val3("", action, rsc, id), node_uname?"\ton ":"", node_uname?node_uname:"", node_uuid?"\t\t(":"", node_uuid?node_uuid:"", node_uuid?")":""); break; } if(details) { do_crm_log_unlikely(log_level+1, "\t\t====== Preceeding Actions"); slist_iter( other, action_wrapper_t, action->actions_before, lpc, log_action(log_level+1, "\t\t", other->action, FALSE); ); do_crm_log_unlikely(log_level+1, "\t\t====== Subsequent Actions"); slist_iter( other, action_wrapper_t, action->actions_after, lpc, log_action(log_level+1, "\t\t", other->action, FALSE); ); do_crm_log_unlikely(log_level+1, "\t\t====== End"); } else { do_crm_log_unlikely(log_level, "\t\t(seen=%d, before=%d, after=%d)", action->seen_count, g_list_length(action->actions_before), g_list_length(action->actions_after)); } } action_t *get_pseudo_op(const char *name, pe_working_set_t *data_set) { action_t *op = NULL; const char *op_s = name; GListPtr possible_matches = NULL; possible_matches = find_actions(data_set->actions, name, NULL); if(possible_matches != NULL) { if(g_list_length(possible_matches) > 1) { pe_warn("Action %s exists %d times", name, g_list_length(possible_matches)); } op = g_list_nth_data(possible_matches, 0); g_list_free(possible_matches); } else { op = custom_action(NULL, crm_strdup(op_s), op_s, NULL, TRUE, TRUE, data_set); op->pseudo = TRUE; op->runnable = TRUE; } return op; } gboolean can_run_any(GListPtr nodes) { if(nodes == NULL) { return FALSE; } slist_iter( node, node_t, nodes, lpc, if(can_run_resources(node) && node->weight >= 0) { return TRUE; } ); return FALSE; } diff --git a/pengine/utils.h b/pengine/utils.h index d1de56edc7..22fd120e4c 100644 --- a/pengine/utils.h +++ b/pengine/utils.h @@ -1,70 +1,70 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef PENGINE_AUTILS__H #define PENGINE_AUTILS__H /* Constraint helper functions */ extern rsc_colocation_t *invert_constraint(rsc_colocation_t *constraint); extern rsc_to_node_t *copy_constraint(rsc_to_node_t *constraint); extern void print_rsc_to_node( const char *pre_text, rsc_to_node_t *cons, gboolean details); extern void print_rsc_colocation( const char *pre_text, rsc_colocation_t *cons, gboolean details); extern rsc_to_node_t *rsc2node_new( const char *id, resource_t *rsc, int weight, node_t *node, pe_working_set_t *data_set); extern void pe_free_rsc_to_node(GListPtr constraints); extern void pe_free_ordering(GListPtr constraints); extern const char *ordering_type2text(enum pe_ordering type); extern gboolean rsc_colocation_new( const char *id, const char *node_attr, int score, resource_t *rsc_lh, resource_t *rsc_rh, const char *state_lh, const char *state_rh, pe_working_set_t *data_set); extern rsc_to_node_t *generate_location_rule( resource_t *rsc, xmlNode *location_rule, pe_working_set_t *data_set); -extern gint sort_node_weight(gconstpointer a, gconstpointer b); +extern gint sort_node_weight(gconstpointer a, gconstpointer b, gpointer data_set); extern gboolean can_run_resources(const node_t *node); extern gboolean native_assign_node(resource_t *rsc, GListPtr candidates, node_t *chosen, gboolean force); extern char *convert_non_atomic_task(char *old_uuid, resource_t *rsc, gboolean allow_notify, gboolean free_original); extern void order_actions(action_t *lh_action, action_t *rh_action, enum pe_ordering order); extern void log_action(unsigned int log_level, const char *pre_text, action_t *action, gboolean details); extern action_t *get_pseudo_op(const char *name, pe_working_set_t *data_set); extern gboolean can_run_any(GListPtr nodes); extern resource_t *find_compatible_child( resource_t *local_child, resource_t *rsc, enum rsc_role_e filter, gboolean current); #define STONITH_UP "stonith_up" #define STONITH_DONE "stonith_complete" #define ALL_STOPPED "all_stopped" #endif diff --git a/tools/Makefile.am b/tools/Makefile.am index de81f1ee21..53805147c1 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -1,133 +1,143 @@ # # Copyright (C) 2004-2009 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in SUBDIRS = shell INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl COMMONLIBS = \ $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/cib/libcib.la \ $(CURSESLIBS) $(CLUSTERLIBS) headerdir = $(pkgincludedir)/crm header_HEADERS = attrd.h EXTRA_DIST = $(sbin_SCRIPTS) halibdir = $(CRM_DAEMON_DIR) halib_SCRIPTS = haresources2cib.py hb2openais.sh halib_PROGRAMS = attrd pingd halib_PYTHON = crm_primitive.py hb2openais-helper.py -sbin_PROGRAMS = crmadmin cibadmin crm_node crm_attribute crm_resource crm_verify \ +sbin_PROGRAMS = crm_simulate crmadmin cibadmin crm_node crm_attribute crm_resource crm_verify \ crm_uuid crm_shadow attrd_updater crm_diff crm_mon iso8601 if BUILD_SERVICELOG sbin_PROGRAMS += notifyServicelogEvent endif if BUILD_OPENIPMI_SERICELOG sbin_PROGRAMS += ipmiservicelogd endif if BUILD_HELP man8_MANS = $(sbin_PROGRAMS:%=%.8) -%.8: % - echo Creating $@ - help2man --output $@ --no-info --section 8 --name "Part of the Pacemaker cluster resource manager" $(top_builddir)/tools/$< endif sbin_SCRIPTS = crm crm_standby crm_master crm_failcount ## SOURCES #noinst_HEADERS = config.h control.h crmd.h noinst_HEADERS = crmadmin_SOURCES = crmadmin.c crmadmin_LDADD = $(COMMONLIBS) $(CLUSTERLIBS) \ $(top_builddir)/lib/pengine/libpe_status.la crm_uuid_SOURCES = crm_uuid.c crm_uuid_LDADD = $(top_builddir)/lib/common/libcrmcluster.la cibadmin_SOURCES = cibadmin.c cibadmin_LDADD = $(COMMONLIBS) crm_shadow_SOURCES = cib_shadow.c crm_shadow_LDADD = $(COMMONLIBS) crm_node_SOURCES = ccm_epoche.c crm_node_LDADD = $(COMMONLIBS) $(CLUSTERLIBS) \ $(top_builddir)/lib/common/libcrmcluster.la +crm_simulate_SOURCES = crm_inject.c +crm_simulate_CFLAGS = -I$(top_srcdir)/pengine + +crm_simulate_LDADD = $(COMMONLIBS) \ + $(top_builddir)/lib/pengine/libpe_status.la \ + $(top_builddir)/pengine/libpengine.la \ + $(top_builddir)/lib/cib/libcib.la \ + $(top_builddir)/lib/transition/libtransitioner.la + crm_diff_SOURCES = xml_diff.c crm_diff_LDADD = $(COMMONLIBS) crm_mon_SOURCES = crm_mon.c crm_mon_LDADD = $(COMMONLIBS) $(SNMPLIBS) $(ESMTPLIBS) -llrm \ $(top_builddir)/lib/pengine/libpe_status.la # Arguments could be made that this should live in crm/pengine crm_verify_SOURCES = crm_verify.c crm_verify_LDADD = $(COMMONLIBS) \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/pengine/libpengine.la crm_attribute_SOURCES = crm_attribute.c crm_attribute_LDADD = $(COMMONLIBS) crm_resource_SOURCES = crm_resource.c crm_resource_LDADD = $(COMMONLIBS) \ $(top_builddir)/lib/pengine/libpe_rules.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/pengine/libpengine.la iso8601_SOURCES = test.iso8601.c iso8601_LDADD = $(COMMONLIBS) attrd_SOURCES = attrd.c attrd_LDADD = $(COMMONLIBS) $(top_builddir)/lib/common/libcrmcluster.la pingd_SOURCES = pingd.c pingd_LDADD = $(COMMONLIBS) attrd_updater_SOURCES = attrd_updater.c attrd_updater_LDADD = $(COMMONLIBS) if BUILD_SERVICELOG notifyServicelogEvent_SOURCES = notifyServicelogEvent.c notifyServicelogEvent_CFLAGS = `pkg-config --cflags servicelog-1` notifyServicelogEvent_LDFLAGS = `pkg-config --libs servicelog-1` $(top_builddir)/lib/common/libcrmcommon.la endif if BUILD_OPENIPMI_SERICELOG ipmiservicelogd_SOURCES = ipmiservicelogd.c ipmiservicelogd_CFLAGS = `pkg-config --cflags OpenIPMI OpenIPMIposix servicelog-1` ipmiservicelogd_LDFLAGS = `pkg-config --libs OpenIPMI OpenIPMIposix servicelog-1` $(top_builddir)/lib/common/libcrmcommon.la endif +%.8: % + echo Creating $@ + help2man --output $@ --no-info --section 8 --name "Part of the Pacemaker cluster resource manager" $(top_builddir)/tools/$< + clean-generic: rm -f *.log *.debug *.xml *~ install-exec-local: uninstall-local: .PHONY: install-exec-hook diff --git a/tools/crm_inject.c b/tools/crm_inject.c new file mode 100644 index 0000000000..b664defe82 --- /dev/null +++ b/tools/crm_inject.c @@ -0,0 +1,1026 @@ +/* + * Copyright (C) 2009 Andrew Beekhof + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +cib_t *global_cib = NULL; +GListPtr op_fail = NULL; +gboolean quiet = FALSE; + +#define node_template "//"XML_CIB_TAG_STATE"[@uname='%s']" +#define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" +#define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']" +/* #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']" */ + +#define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + +#define quiet_log(fmt, args...) do { \ + if(quiet == FALSE) { \ + printf(fmt , ##args); \ + } \ + } while(0) + + +extern xmlNode * do_calculations( + pe_working_set_t *data_set, xmlNode *xml_input, ha_time_t *now); + +static xmlNode *find_resource(xmlNode *cib_node, const char *resource) +{ + char *xpath = NULL; + xmlNode *match = NULL; + const char *node = crm_element_value(cib_node, XML_ATTR_UNAME); + int max = strlen(rsc_template) + strlen(resource) + strlen(node) + 1; + crm_malloc0(xpath, max); + + snprintf(xpath, max, rsc_template, node, resource); + match = get_xpath_object(xpath, cib_node, LOG_DEBUG_2); + + crm_free(xpath); + return match; +} + +static xmlNode *inject_node(cib_t *cib_conn, char *node) +{ + int rc = cib_ok; + int max = strlen(rsc_template) + strlen(node) + 1; + char *xpath = NULL; + xmlNode *cib_object = NULL; + crm_malloc0(xpath, max); + + snprintf(xpath, max, node_template, node); + rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object, cib_xpath|cib_sync_call|cib_scope_local); + + if(rc == cib_NOTEXISTS) { + char *uuid = NULL; + + cib_object = create_xml_node(NULL, XML_CIB_TAG_STATE); + determine_host(cib_conn, &node, &uuid); + crm_xml_add(cib_object, XML_ATTR_UUID, uuid); + crm_xml_add(cib_object, XML_ATTR_UNAME, node); + cib_conn->cmds->create(cib_conn, XML_CIB_TAG_STATUS, cib_object, cib_sync_call|cib_scope_local); + + rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object, cib_xpath|cib_sync_call|cib_scope_local); + } + + CRM_ASSERT(rc == cib_ok); + return cib_object; +} + +static xmlNode *modify_node(cib_t *cib_conn, char *node, gboolean up) +{ + xmlNode *cib_node = inject_node(cib_conn, node); + if(up) { + crm_xml_add(cib_node, XML_CIB_ATTR_HASTATE, ACTIVESTATUS); + crm_xml_add(cib_node, XML_CIB_ATTR_INCCM, XML_BOOLEAN_YES); + crm_xml_add(cib_node, XML_CIB_ATTR_CRMDSTATE, ONLINESTATUS); + crm_xml_add(cib_node, XML_CIB_ATTR_JOINSTATE, CRMD_JOINSTATE_MEMBER); + crm_xml_add(cib_node, XML_CIB_ATTR_EXPSTATE, CRMD_JOINSTATE_MEMBER); + + } else { + crm_xml_add(cib_node, XML_CIB_ATTR_HASTATE, DEADSTATUS); + crm_xml_add(cib_node, XML_CIB_ATTR_INCCM, XML_BOOLEAN_NO); + crm_xml_add(cib_node, XML_CIB_ATTR_CRMDSTATE, OFFLINESTATUS); + crm_xml_add(cib_node, XML_CIB_ATTR_JOINSTATE, CRMD_JOINSTATE_DOWN); + crm_xml_add(cib_node, XML_CIB_ATTR_EXPSTATE, CRMD_JOINSTATE_DOWN); + } + + crm_xml_add(cib_node, XML_ATTR_ORIGIN, crm_system_name); + return cib_node; +} + +static xmlNode *inject_resource(xmlNode *cib_node, const char *resource, const char *rclass, const char *rtype, const char *rprovider) +{ + xmlNode *lrm = NULL; + xmlNode *container = NULL; + xmlNode *cib_resource = NULL; + + cib_resource = find_resource(cib_node, resource); + if(cib_resource != NULL) { + return cib_resource; + } + + /* One day, add query for class, provider, type */ + + if(rclass == NULL || rtype == NULL) { + fprintf(stderr, "Resource %s not found in the status section of %s." + " Please supply the class and type to continue\n", resource, ID(cib_node)); + return NULL; + + } else if(safe_str_neq(rclass, "ocf") + && safe_str_neq(rclass, "lsb")) { + fprintf(stderr, "Invalid class for %s: %s\n", resource, rclass); + return NULL; + + } else if(safe_str_eq(rclass, "ocf") && rprovider == NULL) { + fprintf(stderr, "Please specify the provider for resource %s\n", resource); + return NULL; + } + + crm_info("Injecting new resource %s into %s '%s'", resource, xmlGetNodePath(cib_node), ID(cib_node)); + + lrm = first_named_child(cib_node, XML_CIB_TAG_LRM); + if(lrm == NULL) { + const char *node_uuid = ID(cib_node); + lrm = create_xml_node(cib_node, XML_CIB_TAG_LRM); + crm_xml_add(lrm, XML_ATTR_ID, node_uuid); + } + + container = first_named_child(lrm, XML_LRM_TAG_RESOURCES); + if(container == NULL) { + container = create_xml_node(lrm, XML_LRM_TAG_RESOURCES); + } + + cib_resource = create_xml_node(container, XML_LRM_TAG_RESOURCE); + crm_xml_add(cib_resource, XML_ATTR_ID, resource); + + crm_xml_add(cib_resource, XML_AGENT_ATTR_CLASS, rclass); + crm_xml_add(cib_resource, XML_AGENT_ATTR_PROVIDER, rprovider); + crm_xml_add(cib_resource, XML_ATTR_TYPE, rtype); + + return cib_resource; +} + +static lrm_op_t *create_op( + xmlNode *cib_resource, const char *task, int interval, int outcome) +{ + lrm_op_t *op = NULL; + crm_malloc0(op, sizeof(lrm_op_t)); + + op->app_name = crm_strdup(crm_system_name); + + op->rsc_id = crm_strdup(ID(cib_resource)); + op->interval = interval; + op->op_type = crm_strdup(task); + + op->rc = outcome; + op->op_status = 0; + op->params = NULL; /* TODO: Fill me in */ + + op->call_id = 0; + xml_child_iter(cib_resource, xop, + int tmp = 0; + crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp); + if(tmp > op->call_id) { + op->call_id = tmp; + } + ); + op->call_id++; + + return op; +} + +static xmlNode *inject_op(xmlNode *cib_resource, lrm_op_t *op, int target_rc) +{ + return create_operation_update(cib_resource, op, CRM_FEATURE_SET, target_rc, crm_system_name); +} + +static gboolean exec_pseudo_action(crm_graph_t *graph, crm_action_t *action) +{ + action->confirmed = TRUE; + update_graph(graph, action); + return TRUE; +} + +static gboolean exec_rsc_action(crm_graph_t *graph, crm_action_t *action) +{ + int rc = 0; + lrm_op_t *op = NULL; + int target_outcome = 0; + + const char *rtype = NULL; + const char *rclass = NULL; + const char *resource = NULL; + const char *rprovider = NULL; + const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC); + + xmlNode *cib_op = NULL; + xmlNode *cib_node = NULL; + xmlNode *cib_object = NULL; + xmlNode *cib_resource = NULL; + xmlNode *action_rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); + + char *node = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET); + + if(safe_str_eq(crm_element_value(action->xml, "operation"), "probe_complete")) { + crm_notice("Skipping %s op for %s", crm_element_value(action->xml, "operation"), node); + goto done; + } + + if(action_rsc == NULL) { + crm_log_xml_err(action->xml, "Bad"); + return FALSE; + } + + resource = ID(action_rsc); + rclass = crm_element_value(action_rsc, XML_AGENT_ATTR_CLASS); + rtype = crm_element_value(action_rsc, XML_ATTR_TYPE); + rprovider = crm_element_value(action_rsc, XML_AGENT_ATTR_PROVIDER); + + if(target_rc_s != NULL) { + target_outcome = crm_parse_int(target_rc_s, "0"); + } + + CRM_ASSERT(global_cib->cmds->query(global_cib, NULL, &cib_object, cib_sync_call|cib_scope_local) == cib_ok); + + cib_node = inject_node(global_cib, node); + CRM_ASSERT(cib_node != NULL); + + cib_resource = inject_resource(cib_node, resource, rclass, rtype, rprovider); + CRM_ASSERT(cib_resource != NULL); + + op = convert_graph_action(cib_resource, action, 0, target_outcome); + printf(" * Executing action %d: %s_%s_%d on %s\n", action->id, resource, op->op_type, op->interval, node); + + slist_iter(spec, char, op_fail, lpc, + + char *key = NULL; + crm_malloc0(key, strlen(spec)); + snprintf(key, strlen(spec), "%s_%s_%d@%s=", resource, op->op_type, op->interval, node); + + if(strncasecmp(key, spec, strlen(key)) == 0) { + rc = sscanf(spec, "%*[^=]=%d", &op->rc); + + action->failed = TRUE; + graph->abort_priority = INFINITY; + printf("\tPretending action %d failed with rc=%d\n", action->id, op->rc); + + break; + } + ); + + cib_op = inject_op(cib_resource, op, target_outcome); + + rc = global_cib->cmds->modify(global_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call|cib_scope_local); + CRM_ASSERT(rc == cib_ok); + + done: + action->confirmed = TRUE; + update_graph(graph, action); + return TRUE; +} + +static gboolean exec_crmd_action(crm_graph_t *graph, crm_action_t *action) +{ + action->confirmed = TRUE; + update_graph(graph, action); + return TRUE; +} + +#define STATUS_PATH_MAX 512 +static gboolean exec_stonith_action(crm_graph_t *graph, crm_action_t *action) +{ + int rc = 0; + char xpath[STATUS_PATH_MAX]; + char *target = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET); + xmlNode *cib_node = modify_node(global_cib, target, FALSE); + crm_xml_add(cib_node, XML_ATTR_ORIGIN, __FUNCTION__); + CRM_ASSERT(cib_node != NULL); + + printf(" * Fencing %s\n", target); + rc = global_cib->cmds->replace(global_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call|cib_scope_local); + CRM_ASSERT(rc == cib_ok); + + snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target, XML_CIB_TAG_LRM); + rc = global_cib->cmds->delete(global_cib, xpath, NULL, cib_xpath|cib_sync_call|cib_scope_local); + + snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target, XML_TAG_TRANSIENT_NODEATTRS); + rc = global_cib->cmds->delete(global_cib, xpath, NULL, cib_xpath|cib_sync_call|cib_scope_local); + + action->confirmed = TRUE; + update_graph(graph, action); + return TRUE; +} + +static char * +add_list_element(char *list, const char *value) +{ + int len = 0; + int last = 0; + + if(value == NULL) { + return list; + } + if(list) { + last = strlen(list); + } + len = last + 2; /* +1 space, +1 EOS */ + len += strlen(value); + crm_realloc(list, len); + sprintf(list + last, " %s", value); + return list; +} + +static void print_cluster_status(pe_working_set_t *data_set) +{ + char *online_nodes = NULL; + char *offline_nodes = NULL; + + slist_iter(node, node_t, data_set->nodes, lpc2, + const char *node_mode = NULL; + + if(node->details->unclean) { + if(node->details->online && node->details->unclean) { + node_mode = "UNCLEAN (online)"; + + } else if(node->details->pending) { + node_mode = "UNCLEAN (pending)"; + + } else { + node_mode = "UNCLEAN (offline)"; + } + + } else if(node->details->pending) { + node_mode = "pending"; + + } else if(node->details->standby_onfail && node->details->online) { + node_mode = "standby (on-fail)"; + + } else if(node->details->standby) { + if(node->details->online) { + node_mode = "standby"; + } else { + node_mode = "OFFLINE (standby)"; + } + + } else if(node->details->online) { + node_mode = "online"; + online_nodes = add_list_element(online_nodes, node->details->uname); + continue; + + } else { + node_mode = "OFFLINE"; + offline_nodes = add_list_element(offline_nodes, node->details->uname); + continue; + } + + if(safe_str_eq(node->details->uname, node->details->id)) { + printf("Node %s: %s\n", + node->details->uname, node_mode); + } else { + printf("Node %s (%s): %s\n", + node->details->uname, node->details->id, + node_mode); + } + ); + + if(online_nodes) { + printf("Online: [%s ]\n", online_nodes); + crm_free(online_nodes); + } + if(offline_nodes) { + printf("OFFLINE: [%s ]\n", offline_nodes); + crm_free(offline_nodes); + } + + fprintf(stdout, "\n"); + slist_iter(rsc, resource_t, data_set->resources, lpc, + if(is_set(rsc->flags, pe_rsc_orphan) + && rsc->role == RSC_ROLE_STOPPED) { + continue; + } + rsc->fns->print(rsc, NULL, pe_print_printf, stdout); + ); + fprintf(stdout, "\n"); +} + +static void +run_simulation(pe_working_set_t *data_set) +{ + crm_graph_t *transition = NULL; + enum transition_status graph_rc = -1; + + crm_graph_functions_t exec_fns = + { + exec_pseudo_action, + exec_rsc_action, + exec_crmd_action, + exec_stonith_action, + }; + + set_graph_functions(&exec_fns); + + quiet_log("\nExecuting cluster transition:\n"); + transition = unpack_graph(data_set->graph, crm_system_name); + print_graph(LOG_DEBUG, transition); + + do { + graph_rc = run_graph(transition); + + } while(graph_rc == transition_active); + + if(graph_rc != transition_complete) { + fprintf(stderr, "Transition failed: %s\n", transition_status(graph_rc)); + print_graph(LOG_ERR, transition); + } + destroy_graph(transition); + CRM_CHECK(graph_rc == transition_complete, fprintf(stderr, "An invalid transition was produced")); + + if(quiet == FALSE) { + xmlNode *cib_object = NULL; + ha_time_t *a_date = data_set->now; + int rc = global_cib->cmds->query(global_cib, NULL, &cib_object, cib_sync_call|cib_scope_local); + CRM_ASSERT(rc == cib_ok); + + quiet_log("\nRevised cluster status:\n"); + set_working_set_defaults(data_set); + data_set->input = cib_object; + data_set->now = a_date; + + cluster_status(data_set); + print_cluster_status(data_set); + } +} + +static char * +create_action_name(action_t *action) +{ + char *action_name = NULL; + const char *action_host = NULL; + if(action->node) { + action_host = action->node->details->uname; + action_name = crm_concat(action->uuid, action_host, ' '); + + } else if(action->pseudo) { + action_name = crm_strdup(action->uuid); + + } else { + action_host = ""; + action_name = crm_concat(action->uuid, action_host, ' '); + } + if(safe_str_eq(action->task, RSC_CANCEL)) { + char *tmp_action_name = action_name; + action_name = crm_concat("Cancel", tmp_action_name, ' '); + crm_free(tmp_action_name); + } + + return action_name; +} + +static void +create_dotfile(pe_working_set_t *data_set, const char *dot_file, gboolean all_actions) +{ + FILE *dot_strm = fopen(dot_file, "w"); + if(dot_strm == NULL) { + crm_perror(LOG_ERR,"Could not open %s for writing", dot_file); + return; + } + + fprintf(dot_strm, " digraph \"g\" {\n"); + slist_iter( + action, action_t, data_set->actions, lpc, + + const char *style = "filled"; + const char *font = "black"; + const char *color = "black"; + const char *fill = NULL; + char *action_name = create_action_name(action); + crm_debug_3("Action %d: %p", action->id, action); + + if(action->pseudo) { + font = "orange"; + } + + style = "dashed"; + if(action->dumped) { + style = "bold"; + color = "green"; + + } else if(action->rsc != NULL + && is_not_set(action->rsc->flags, pe_rsc_managed)) { + color = "purple"; + if(all_actions == FALSE) { + goto dont_write; + } + + } else if(action->optional) { + color = "blue"; + if(all_actions == FALSE) { + goto dont_write; + } + + } else { + color = "red"; + CRM_CHECK(action->runnable == FALSE, ;); + } + + action->dumped = TRUE; + fprintf(dot_strm, "\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\" %s%s]\n", + action_name, style, color, font, fill?"fillcolor=":"", fill?fill:""); + dont_write: + crm_free(action_name); + ); + + + slist_iter( + action, action_t, data_set->actions, lpc, + slist_iter( + before, action_wrapper_t, action->actions_before, lpc2, + char *before_name = NULL; + char *after_name = NULL; + const char *style = "dashed"; + gboolean optional = TRUE; + if(before->state == pe_link_dumped) { + optional = FALSE; + style = "bold"; + } else if(action->pseudo + && (before->type & pe_order_stonith_stop)) { + continue; + } else if(before->state == pe_link_dup) { + continue; + } else if(before->type == pe_order_none) { + continue; + } else if(action->dumped && before->action->dumped) { + optional = FALSE; + } + + if(all_actions || optional == FALSE) { + before_name = create_action_name(before->action); + after_name = create_action_name(action); + fprintf(dot_strm, "\"%s\" -> \"%s\" [ style = %s]\n", + before_name, after_name, style); + crm_free(before_name); + crm_free(after_name); + } + ); + ); + fprintf(dot_strm, "}\n"); + if(dot_strm != NULL) { + fflush(dot_strm); + fclose(dot_strm); + } +} + +static void modify_configuration( + pe_working_set_t *data_set, + const char *quorum, GListPtr node_up, GListPtr node_down, GListPtr node_fail, GListPtr op_inject) +{ + int rc = cib_ok; + + xmlNode *cib_op = NULL; + xmlNode *cib_node = NULL; + xmlNode *cib_resource = NULL; + + lrm_op_t *op = NULL; + + if(quorum) { + xmlNode *top = create_xml_node(NULL, XML_TAG_CIB); + quiet_log(" + Setting quorum: %s\n", quorum); + /* crm_xml_add(top, XML_ATTR_DC_UUID, dc_uuid); */ + crm_xml_add(top, XML_ATTR_HAVE_QUORUM, quorum); + + rc = global_cib->cmds->modify(global_cib, NULL, top, cib_sync_call|cib_scope_local); + CRM_ASSERT(rc == cib_ok); + } + + slist_iter(node, char, node_up, lpc, + quiet_log(" + Bringing node %s online\n", node); + cib_node = modify_node(global_cib, node, TRUE); + CRM_ASSERT(cib_node != NULL); + + rc = global_cib->cmds->modify(global_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call|cib_scope_local); + CRM_ASSERT(rc == cib_ok); + ); + + slist_iter(node, char, node_down, lpc, + quiet_log(" + Taking node %s offline\n", node); + cib_node = modify_node(global_cib, node, FALSE); + CRM_ASSERT(cib_node != NULL); + + rc = global_cib->cmds->modify(global_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call|cib_scope_local); + CRM_ASSERT(rc == cib_ok); + ); + + slist_iter(node, char, node_fail, lpc, + quiet_log(" + Failing node %s\n", node); + cib_node = modify_node(global_cib, node, TRUE); + crm_xml_add(cib_node, XML_CIB_ATTR_INCCM, XML_BOOLEAN_NO); + CRM_ASSERT(cib_node != NULL); + + rc = global_cib->cmds->modify(global_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call|cib_scope_local); + CRM_ASSERT(rc == cib_ok); + ); + + + slist_iter(spec, char, op_inject, lpc, + + int rc = 0; + int outcome = 0; + int interval = 0; + + char *key = NULL; + char *node = NULL; + char *task = NULL; + char *resource = NULL; + + const char *rtype = NULL; + const char *rclass = NULL; + const char *rprovider = NULL; + + resource_t *rsc = NULL; + quiet_log(" + Injecting %s into the configuration\n", spec); + + crm_malloc0(key, strlen(spec)); + crm_malloc0(node, strlen(spec)); + rc = sscanf(spec, "%[^@]@%[^=]=%d", key, node, &outcome); + CRM_CHECK(rc == 3, fprintf(stderr, "Invalid operation spec: %s. Only found %d fields\n", spec, rc); continue); + + parse_op_key(key, &resource, &task, &interval); + + rsc = pe_find_resource(data_set->resources, resource); + CRM_CHECK(rsc != NULL, fprintf(stderr, "Invalid resource name: %s\n", resource); continue); + + rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); + rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); + rprovider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); + + cib_node = inject_node(global_cib, node); + CRM_ASSERT(cib_node != NULL); + + cib_resource = inject_resource(cib_node, resource, rclass, rtype, rprovider); + CRM_ASSERT(cib_resource != NULL); + + op = create_op(cib_resource, task, interval, outcome); + CRM_ASSERT(op != NULL); + + cib_op = inject_op(cib_resource, op, 0); + CRM_ASSERT(cib_op != NULL); + + rc = global_cib->cmds->modify(global_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call|cib_scope_local); + CRM_ASSERT(rc == cib_ok); + ); +} + +static void +setup_input(const char *input, const char *output) +{ + int rc = cib_ok; + cib_t *cib_conn = NULL; + xmlNode *cib_object = NULL; + + if(input == NULL) { + /* Use live CIB */ + cib_conn = cib_new(); + rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command); + + if(rc == cib_ok) { + cib_object = get_cib_copy(cib_conn); + } + + cib_conn->cmds->signoff(cib_conn); + cib_delete(cib_conn); + cib_conn = NULL; + + if(cib_object == NULL) { + fprintf(stderr, "Live CIB query failed: empty result\n"); + exit(3); + } + + } else if(safe_str_eq(input, "-")) { + cib_object = filename2xml(NULL); + + } else { + cib_object = filename2xml(input); + } + + if(cli_config_update(&cib_object, NULL, FALSE) == FALSE) { + free_xml(cib_object); + exit(cib_STALE); + } + + if(validate_xml(cib_object, NULL, FALSE) != TRUE) { + free_xml(cib_object); + exit(cib_dtd_validation); + } + + if(output == NULL) { + char *pid = crm_itoa(getpid()); + output = get_shadow_file(pid); + crm_free(pid); + } + + rc = write_xml_file(cib_object, output, FALSE); + free_xml(cib_object); + cib_object = NULL; + + if(rc < 0) { + fprintf(stderr, "Could not create '%s': %s\n", output, strerror(errno)); + exit(rc); + } + setenv("CIB_file", output, 1); +} + + +static struct crm_option long_options[] = { + /* Top-level Options */ + {"help", 0, 0, '?', "\tThis text"}, + {"version", 0, 0, '$', "\tVersion information" }, + {"quiet", 0, 0, 'Q', "\tDisplay only essentialoutput"}, + {"verbose", 0, 0, 'V', "\tIncrease debug output"}, + + {"-spacer-", 0, 0, '-', "\nOperations:"}, + {"run", 0, 0, 'R', "\tDetermine the cluster's response to the given configuration and status"}, + {"simulate", 0, 0, 'S', "Simulate the transition's execution and display the resulting cluster status"}, + {"in-place", 0, 0, 'X', "Simulate the transition's execution and store the result back to the input file"}, + {"show-scores", 0, 0, 's', "Show allocation scores"}, + + {"-spacer-", 0, 0, '-', "\nSynthetic Cluster Events:"}, + {"node-up", 1, 0, 'u', "\tBring a node online"}, + {"node-down", 1, 0, 'd', "\tTake a node offline"}, + {"node-fail", 1, 0, 'f', "\tMark a node as failed"}, + {"op-inject", 1, 0, 'i', "\t$node;$rsc_$task_$interval;$rc - Inject the specified task before running the simulation"}, + {"op-fail", 1, 0, 'F', "\t$node;$rsc_$task_$interval;$rc - Fail the specified task while running the simulation"}, + {"set-datetime", 1, 0, 't', "Set date/time"}, + {"quorum", 1, 0, 'q', "\tSpecify a value for quorum"}, + + {"-spacer-", 0, 0, '-', "\nOutput Options:"}, + + {"save-input", 1, 0, 'I', "\tSave the input configuration to the named file"}, + {"save-output", 1, 0, 'O', "Save the output configuration to the named file"}, + {"save-graph", 1, 0, 'G', "\tSave the transition graph (XML format) to the named file"}, + {"save-dotfile", 1, 0, 'D', "Save the transition graph (DOT format) to the named file"}, + {"all-actions", 0, 0, 'a', "\tDisplay all possible actions in the DOT graph - even ones not part of the transition"}, + + {"-spacer-", 0, 0, '-', "\nData Source:"}, + {"live-check", 0, 0, 'L', "\tConnect to the CIB and use the current contents as input"}, + {"xml-file", 1, 0, 'x', "\tRetrieve XML from the named file"}, + {"xml-pipe", 0, 0, 'p', "\tRetrieve XML from stdin"}, + + {0, 0, 0, 0} +}; + +int +main(int argc, char ** argv) +{ + int rc = 0; + guint modified = 0; + + gboolean store = FALSE; + gboolean process = FALSE; + gboolean verbose = FALSE; + gboolean simulate = FALSE; + gboolean all_actions = FALSE; + + pe_working_set_t data_set; + ha_time_t *a_date = NULL; + + const char *xml_file = "-"; + const char *quorum = NULL; + const char *dot_file = NULL; + const char *graph_file = NULL; + const char *input_file = NULL; + const char *output_file = NULL; + + int flag = 0; + int index = 0; + int argerr = 0; + char *use_date = NULL; + + GListPtr node_up = NULL; + GListPtr node_down = NULL; + GListPtr node_fail = NULL; + GListPtr op_inject = NULL; + + xmlNode *input = NULL; + + crm_log_init("crm_inject", LOG_ERR, FALSE, FALSE, argc, argv); + crm_set_options("?$VQx:Lpu:d:f:i:RSXD:G:I:O:saF:", "datasource operation [additional options]", + long_options, "Tool for simulating the cluster's response to events"); + + if(argc < 2) { + crm_help('?', LSB_EXIT_EINVAL); + } + + while (1) { + flag = crm_get_option(argc, argv, &index); + if (flag == -1) + break; + + switch(flag) { + case 'V': + verbose = TRUE; + alter_debug(DEBUG_INC); + cl_log_enable_stderr(TRUE); + break; + case '?': + case '$': + crm_help(flag, LSB_EXIT_OK); + break; + case 'p': + xml_file = "-"; + break; + case 'Q': + quiet = TRUE; + break; + case 'L': + xml_file = NULL; + break; + case 'x': + xml_file = optarg; + break; + case 'u': + modified++; + node_up = g_list_append(node_up, optarg); + break; + case 'd': + modified++; + node_down = g_list_append(node_down, optarg); + break; + case 'f': + modified++; + node_fail = g_list_append(node_fail, optarg); + break; + case 'i': + modified++; + op_inject = g_list_append(op_inject, optarg); + break; + case 'F': + process = TRUE; + simulate = TRUE; + op_fail = g_list_append(op_fail, optarg); + break; + case 'q': + modified++; + quorum = optarg; + break; + case 'a': + all_actions = TRUE; + break; + case 's': + process = TRUE; + show_scores = TRUE; + break; + case 'S': + process = TRUE; + simulate = TRUE; + break; + case 'X': + store = TRUE; + process = TRUE; + simulate = TRUE; + break; + case 'R': + process = TRUE; + break; + case 'D': + process = TRUE; + dot_file = optarg; + break; + case 'G': + process = TRUE; + graph_file = optarg; + break; + case 'I': + input_file = optarg; + break; + case 'O': + store = TRUE; + simulate = TRUE; + output_file = optarg; + break; + default: + ++argerr; + break; + } + } + + if (optind > argc) { + ++argerr; + } + + if (argerr) { + crm_help('?', LSB_EXIT_GENERIC); + } + + setup_input(xml_file, output_file); + + global_cib = cib_new(); + global_cib->cmds->signon(global_cib, crm_system_name, cib_command); + + if(use_date != NULL) { + a_date = parse_date(&use_date); + quiet_log(" + Setting effective cluster time: %s", use_date); + log_date(LOG_WARNING, "Set fake 'now' to", a_date, ha_log_date|ha_log_time); + } + + if(quiet == FALSE) { + xmlNode *cib_object = NULL; + rc = global_cib->cmds->query(global_cib, NULL, &cib_object, cib_sync_call|cib_scope_local); + CRM_ASSERT(rc == cib_ok); + + set_working_set_defaults(&data_set); + data_set.input = cib_object; + data_set.now = a_date; + + cluster_status(&data_set); + quiet_log("\nCurrent cluster status:\n"); + print_cluster_status(&data_set); + if(process == FALSE && modified == FALSE) { + return 0; + } + } + + if(modified) { + quiet_log("Performing requested modifications\n"); + modify_configuration(&data_set, quorum, node_up, node_down, node_fail, op_inject); + } + + rc = global_cib->cmds->query(global_cib, NULL, &input, cib_sync_call); + if(rc != cib_ok) { + fprintf(stderr, "Could not connect to the CIB for input: %s\n", cib_error2string(rc)); + return rc; + } + + if(input_file != NULL) { + rc = write_xml_file(input, input_file, FALSE); + if(rc < 0) { + fprintf(stderr, "Could not create '%s': %s\n", input_file, strerror(errno)); + return rc; + } + free_xml(input); + } + + if(process || simulate) { + if(show_scores) { + printf("Allocation scores:\n"); + } + + do_calculations(&data_set, input, a_date); + + if(show_scores) { + printf("\n"); + } + + if(graph_file != NULL) { + char *msg_buffer = dump_xml_formatted(data_set.graph); + FILE *graph_strm = fopen(graph_file, "w"); + if(graph_strm == NULL) { + crm_perror(LOG_ERR,"Could not open %s for writing", graph_file); + + } else { + if(fprintf(graph_strm, "%s\n\n", msg_buffer) < 0) { + crm_perror(LOG_ERR,"Write to %s failed", graph_file); + } + fflush(graph_strm); + fclose(graph_strm); + } + crm_free(msg_buffer); + } + + if(dot_file != NULL) { + create_dotfile(&data_set, dot_file, all_actions); + } + + if(quiet == FALSE && verbose == FALSE) { + quiet_log("Transition Summary:\n"); + + crm_log_level = LOG_NOTICE; + cl_log_enable_stderr(TRUE); + slist_iter( + rsc, resource_t, data_set.resources, lpc, + LogActions(rsc, &data_set); + ); + + cl_log_enable_stderr(FALSE); + } + } + + if(simulate) { + run_simulation(&data_set); + } + + rc = global_cib->cmds->signoff(global_cib); + cib_delete(global_cib); + fflush(stderr); + + return 0; +} diff --git a/tools/ipmiservicelogd.c b/tools/ipmiservicelogd.c index f921d60498..e4f2e780f1 100644 --- a/tools/ipmiservicelogd.c +++ b/tools/ipmiservicelogd.c @@ -1,631 +1,626 @@ /* * ipmiservicelogd.c * * A program that listens to IPMI events and writes them * out to servicelog. * * Author: International Business Machines, IBM * Mark Hamzy * Author: Intel Corporation * Jeff Zheng * * Copyright 2009 International Business Machines, IBM * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, write to the Free * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* gcc -o ipmiservicelogd -g `pkg-config --cflags --libs OpenIPMI OpenIPMIposix servicelog-1` ipmiservicelogd.c */ /* ./ipmiservicelogd smi 0 */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define COMPLEX 1 static os_handler_t *os_hnd; char *getStringExecOutput (char *args[]); char *getSerialNumber (void); char *getProductName (void); static void con_usage (const char *name, const char *help, void *cb_data); static void usage (const char *progname); void ipmi2servicelog (struct sl_data_bmc *bmc_data); static int sensor_threshold_event_handler (ipmi_sensor_t *sensor, enum ipmi_event_dir_e dir, enum ipmi_thresh_e threshold, enum ipmi_event_value_dir_e high_low, enum ipmi_value_present_e value_present, unsigned int raw_value, double value, void *cb_data, ipmi_event_t *event); static int sensor_discrete_event_handler (ipmi_sensor_t *sensor, enum ipmi_event_dir_e dir, int offset, int severity, int prev_severity, void *cb_data, ipmi_event_t *event); static void sensor_change (enum ipmi_update_e op, ipmi_entity_t *ent, ipmi_sensor_t *sensor, void *cb_data); static void entity_change (enum ipmi_update_e op, ipmi_domain_t *domain, ipmi_entity_t *entity, void *cb_data); void setup_done (ipmi_domain_t *domain, int err, unsigned int conn_num, unsigned int port_num, int still_connected, void *user_data); char * getStringExecOutput (char *args[]) { int rc; pid_t pid; int pipefd[2]; rc = pipe2 (pipefd, 0); if (rc == -1) { crm_err ("Error: pipe errno = %d", errno); return NULL; } pid = fork (); if (0 < pid) { /* Parent */ int childExitStatus; char serialNumber[256]; ssize_t sizeRead; /* close write end of pipe */ rc = close (pipefd[1]); if (rc == -1) { crm_err ("Error: parent close (pipefd[1]) = %d", errno); } /* make 0 same as read-from end of pipe */ rc = dup2 (pipefd[0], 0); if (rc == -1) { crm_err ("Error: parent dup2 (pipefd[0]) = %d", errno); } /* close excess fildes */ rc = close (pipefd[0]); if (rc == -1) { crm_err ("Error: parent close (pipefd[0]) = %d", errno); } waitpid (pid, &childExitStatus, 0); if (!WIFEXITED(childExitStatus)) { crm_err ("waitpid() exited with an error: status = %d", WEXITSTATUS(childExitStatus)); return NULL; } else if (WIFSIGNALED(childExitStatus)) { crm_err ("waitpid() exited due to a signal = %d", WTERMSIG(childExitStatus)); return NULL; } memset (serialNumber, 0, sizeof (serialNumber)); sizeRead = read (0, serialNumber, sizeof (serialNumber) - 1); if (sizeRead > 0) { char *end = serialNumber + strlen (serialNumber) - 1; char *retSerialNumber = NULL; while ( end > serialNumber && (*end == '\n' || *end == '\r' || *end == '\t' || *end == ' ') ) { *end = '\0'; end--; } retSerialNumber = malloc (strlen (serialNumber) + 1); if (retSerialNumber) { strcpy (retSerialNumber, serialNumber); } return retSerialNumber; } return NULL; } else if (pid == 0) { /* Child */ /* close read end of pipe */ rc = close (pipefd[0]); if (rc == -1) { crm_err ("Error: child close (pipefd[0]) = %d", errno); } /* make 1 same as write-to end of pipe */ rc = dup2 (pipefd[1], 1); if (rc == -1) { crm_err ("Error: child dup2 (pipefd[1]) = %d", errno); } /* close excess fildes */ rc = close (pipefd[1]); if (rc == -1) { crm_err ("Error: child close (pipefd[1]) = %d", errno); } rc = execvp (args[0], args); if (rc == -1) { crm_err ("Error: child execvp = %d", errno); } /* In case of error */ return NULL; } else { /* Error */ crm_err ("fork errno = %d", errno); return NULL; } return NULL; } char * getSerialNumber (void) { char *dmiArgs[] = { "dmidecode", "--string", "system-serial-number", NULL }; return getStringExecOutput (dmiArgs); } char * getProductName (void) { char *dmiArgs[] = { "dmidecode", "--string", "system-product-name", NULL }; return getStringExecOutput (dmiArgs); } static void con_usage (const char *name, const char *help, void *cb_data) { printf("\n%s%s", name, help); } static void usage(const char *progname) { printf("Usage:\n"); printf(" %s \n", progname); printf(" Where is one of:"); ipmi_parse_args_iter_help(con_usage, NULL); } void ipmi2servicelog (struct sl_data_bmc *bmc_data) { servicelog *slog = NULL; struct sl_event sl_event; uint64_t new_id = 0; struct utsname name; char *serial_number = NULL; char *product_name = NULL; int rc; if (uname (&name) == -1) { crm_err ("Error: uname failed"); return; } rc = servicelog_open (&slog, 0); /* flags is one of SL_FLAG_xxx */ if (!slog) { crm_err ("Error: servicelog_open failed, rc = %d", rc); return; } serial_number = getSerialNumber (); if (serial_number) { if (strlen (serial_number) > 20) { serial_number[20] = '\0'; } } product_name = getProductName (); if (product_name) { if (strlen (product_name) > 20) { product_name[20] = '\0'; } } memset (&sl_event, 0, sizeof (sl_event)); sl_event.next = NULL; /* only used if in a linked list */ sl_event.id = 0; /* unique identifier - filled in by API call */ sl_event.time_logged = time (NULL); sl_event.time_event = time (NULL); sl_event.time_last_update = time (NULL); sl_event.type = SL_TYPE_BMC; /* one of SL_TYPE_* */ sl_event.severity = SL_SEV_WARNING; /* one of SL_SEV_* */ sl_event.platform = name.machine; /* ppc64, etc */ sl_event.machine_serial = serial_number; sl_event.machine_model = product_name; /* it may not have the serial # within the first 20 chars */ sl_event.nodename = name.nodename; sl_event.refcode = "ipmi"; sl_event.description = "ipmi event"; sl_event.serviceable = 1; /* 1 or 0 */ sl_event.predictive = 0; /* 1 or 0 */ sl_event.disposition = SL_DISP_RECOVERABLE; /* one of SL_DISP_* */ sl_event.call_home_status = SL_CALLHOME_NONE; /* one of SL_CALLHOME_*, only valid if serviceable */ sl_event.closed = 1; /* 1 or 0, only valid if serviceable */ sl_event.repair = 0; /* id of repairing repair_action */ sl_event.callouts = NULL; sl_event.raw_data_len = 0; sl_event.raw_data = NULL; sl_event.addl_data = &bmc_data; /* pointer to an sl_data_* struct */ rc = servicelog_event_log (slog, &sl_event, &new_id); if (rc != 0) { crm_err ("Error: servicelog_event_log, rc = %d (\"%s\")", rc, servicelog_error (slog)); } else { crm_debug ("Sending to servicelog database"); } - if (serial_number) { - free (serial_number); - } - - if (product_name) { - free (product_name); - } + free (serial_number); + free (product_name); servicelog_close (slog); } static int sensor_threshold_event_handler(ipmi_sensor_t *sensor, enum ipmi_event_dir_e dir, enum ipmi_thresh_e threshold, enum ipmi_event_value_dir_e high_low, enum ipmi_value_present_e value_present, unsigned int raw_value, double value, void *cb_data, ipmi_event_t *event) { ipmi_entity_t *ent = ipmi_sensor_get_entity(sensor); int id, instance; char name[IPMI_ENTITY_NAME_LEN]; struct sl_data_bmc bmc_data; uint32_t sel_id; uint32_t sel_type; uint16_t generator; uint8_t version; uint8_t sensor_type; int sensor_lun; int sensor_number; uint8_t event_class; uint8_t event_type; int direction; id = ipmi_entity_get_entity_id(ent); instance = ipmi_entity_get_entity_instance(ent); ipmi_sensor_get_id(sensor, name, sizeof (name)); ipmi_sensor_get_num (sensor, &sensor_lun, &sensor_number); sel_id = ipmi_entity_get_entity_id (ent); sel_type = ipmi_entity_get_type (ent); generator = ipmi_entity_get_slave_address (ent) | (sensor_lun << 5); /* LUN (2 bits) | SLAVE ADDRESS (5 bits) */ version = 0x04; sensor_type = ipmi_sensor_get_sensor_type (sensor); event_class = 0; /* @TBD - where does this come from? */ event_type = ipmi_event_get_type (event); direction = dir; memset (&bmc_data, 0, sizeof (bmc_data)); bmc_data.sel_id = sel_id; bmc_data.sel_type = sel_type; bmc_data.generator = generator; bmc_data.version = version; bmc_data.sensor_type = sensor_type; bmc_data.sensor_number = sensor_number; bmc_data.event_class = event_class; bmc_data.event_type = event_type; bmc_data.direction = direction; crm_debug ("Writing bmc_data (%08x, %08x, %04x, %02x, %02x, %02x, %02x, %02x, %d)\n", bmc_data.sel_id, bmc_data.sel_type, bmc_data.generator, bmc_data.version, bmc_data.sensor_type, bmc_data.sensor_number, bmc_data.event_class, bmc_data.event_type, bmc_data.direction); ipmi2servicelog (&bmc_data); /* This passes the event on to the main event handler, which does not exist in this program. */ return IPMI_EVENT_NOT_HANDLED; } static int sensor_discrete_event_handler(ipmi_sensor_t *sensor, enum ipmi_event_dir_e dir, int offset, int severity, int prev_severity, void *cb_data, ipmi_event_t *event) { ipmi_entity_t *ent = ipmi_sensor_get_entity(sensor); int id, instance; char name[IPMI_ENTITY_NAME_LEN]; struct sl_data_bmc bmc_data; uint32_t sel_id; uint32_t sel_type; uint16_t generator; uint8_t version; uint8_t sensor_type; int sensor_lun; int sensor_number; uint8_t event_class; uint8_t event_type; int direction; id = ipmi_entity_get_entity_id(ent); instance = ipmi_entity_get_entity_instance(ent); ipmi_sensor_get_id(sensor, name, sizeof (name)); sel_id = ipmi_entity_get_entity_id (ent); sel_type = ipmi_entity_get_type (ent); generator = ipmi_entity_get_slave_address (ent) | (sensor_lun << 5); /* LUN (2 bits) | SLAVE ADDRESS (5 bits) */ version = 0x04; sensor_type = ipmi_sensor_get_sensor_type (sensor); ipmi_sensor_get_num (sensor, &sensor_lun, &sensor_number); event_class = 0; /* @TBD - where does this come from? */ event_type = ipmi_event_get_type (event); direction = dir; memset (&bmc_data, 0, sizeof (bmc_data)); bmc_data.sel_id = sel_id; bmc_data.sel_type = sel_type; bmc_data.generator = generator; bmc_data.version = version; bmc_data.sensor_type = sensor_type; bmc_data.sensor_number = sensor_number; bmc_data.event_class = event_class; bmc_data.event_type = event_type; bmc_data.direction = direction; crm_debug ("Writing bmc_data (%08x, %08x, %04x, %02x, %02x, %02x, %02x, %02x, %d)\n", bmc_data.sel_id, bmc_data.sel_type, bmc_data.generator, bmc_data.version, bmc_data.sensor_type, bmc_data.sensor_number, bmc_data.event_class, bmc_data.event_type, bmc_data.direction); ipmi2servicelog (&bmc_data); /* This passes the event on to the main event handler, which does not exist in this program. */ return IPMI_EVENT_NOT_HANDLED; } /* Whenever the status of a sensor changes, the function is called We display the information of the sensor if we find a new sensor */ static void sensor_change(enum ipmi_update_e op, ipmi_entity_t *ent, ipmi_sensor_t *sensor, void *cb_data) { int rv; if (op == IPMI_ADDED) { if (ipmi_sensor_get_event_reading_type(sensor) == IPMI_EVENT_READING_TYPE_THRESHOLD) rv = ipmi_sensor_add_threshold_event_handler (sensor, sensor_threshold_event_handler, NULL); else rv = ipmi_sensor_add_discrete_event_handler (sensor, sensor_discrete_event_handler, NULL); if (rv) crm_err ("Unable to add the sensor event handler: %x", rv); } } /* Whenever the status of an entity changes, the function is called When a new entity is created, we search all sensors that belong to the entity */ static void entity_change(enum ipmi_update_e op, ipmi_domain_t *domain, ipmi_entity_t *entity, void *cb_data) { int rv; int id, instance; id = ipmi_entity_get_entity_id(entity); instance = ipmi_entity_get_entity_instance(entity); if (op == IPMI_ADDED) { /* Register callback so that when the status of a sensor changes, sensor_change is called */ rv = ipmi_entity_add_sensor_update_handler(entity, sensor_change, entity); if (rv) { crm_err ("ipmi_entity_set_sensor_update_handler: 0x%x", rv); exit(1); } } } /* After we have established connection to domain, this function get called At this time, we can do whatever things we want to do. Herr we want to search all entities in the system */ void setup_done(ipmi_domain_t *domain, int err, unsigned int conn_num, unsigned int port_num, int still_connected, void *user_data) { int rv; /* Register a callback functin entity_change. When a new entities is created, entity_change is called */ rv = ipmi_domain_add_entity_update_handler(domain, entity_change, domain); if (rv) { crm_err ("ipmi_domain_add_entity_update_handler return error: %d", rv); return; } } int main(int argc, char *argv[]) { int rv; int curr_arg = 1; ipmi_args_t *args; ipmi_con_t *con; /* OS handler allocated first. */ os_hnd = ipmi_posix_setup_os_handler(); if (!os_hnd) { crm_err ("ipmi_smi_setup_con: Unable to allocate os handler"); exit(1); } /* Initialize the OpenIPMI library. */ ipmi_init(os_hnd); #ifdef COMPLEX rv = ipmi_parse_args2(&curr_arg, argc, argv, &args); if (rv) { crm_err ("Error parsing command arguments, argument %d: %s", curr_arg, strerror(rv)); usage(argv[0]); exit(1); } #endif crm_make_daemon ("ipmiservicelogd", TRUE, "/var/run/ipmiservicelogd.pid0"); crm_log_init ("ipmiservicelogd", LOG_INFO, FALSE, TRUE, argc, argv); #ifdef COMPLEX rv = ipmi_args_setup_con(args, os_hnd, NULL, &con); if (rv) { crm_err ("ipmi_ip_setup_con: %s", strerror(rv)); crm_err ("Error: Is IPMI configured correctly?"); exit(1); } #else /* If all you need is an SMI connection, this is all the code you need. */ /* Establish connections to domain through system interface. This function connect domain, selector and OS handler together. When there is response message from domain, the status of file descriptor in selector is changed and predefined callback is called. After the connection is established, setup_done will be called. */ rv = ipmi_smi_setup_con(0, os_hnd, NULL, &con); if (rv) { crm_err ("ipmi_smi_setup_con: %s", strerror(rv)); crm_err ("Error: Is IPMI configured correctly?"); exit(1); } #endif rv = ipmi_open_domain("", &con, 1, setup_done, NULL, NULL, NULL, NULL, 0, NULL); if (rv) { crm_err ("ipmi_init_domain: %s", strerror(rv)); exit(1); } /* This is the main loop of the event-driven program. Try to exit the program */ /* Let the selector code run the select loop. */ os_hnd->operation_loop(os_hnd); /* Technically, we can't get here, but this is an example. */ os_hnd->free_os_handler(os_hnd); } diff --git a/xml/constraints.rng.in b/xml/constraints.rng.in index 721cbe9810..36319db5c2 100644 --- a/xml/constraints.rng.in +++ b/xml/constraints.rng.in @@ -1,162 +1,180 @@ - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + start promote demote stop Stopped Started Master Slave + + + Optional + Mandatory + Serialize + + + diff --git a/xml/pacemaker.rng.in b/xml/pacemaker.rng.in index 894c684097..12b56aaf02 100644 --- a/xml/pacemaker.rng.in +++ b/xml/pacemaker.rng.in @@ -1,132 +1,137 @@ none pacemaker-0.6 transitional-0.6 pacemaker-0.7 pacemaker-1.0 normal member ping - - - + + + + + + + + diff --git a/xml/resources.rng.in b/xml/resources.rng.in index 86c6b5cdbe..dbc2a0dedb 100644 --- a/xml/resources.rng.in +++ b/xml/resources.rng.in @@ -1,176 +1,181 @@ ocf lsb heartbeat stonith + + + + + Stopped Started Slave Master nothing quorum fencing ignore block stop restart standby fence