diff --git a/configure.ac b/configure.ac index b7b7742a4a..961f94b8ca 100644 --- a/configure.ac +++ b/configure.ac @@ -1,1880 +1,1891 @@ dnl dnl autoconf for Pacemaker dnl dnl License: GNU General Public License (GPL) dnl =============================================== dnl Bootstrap dnl =============================================== AC_PREREQ(2.59) dnl Suggested structure: dnl information on the package dnl checks for programs dnl checks for libraries dnl checks for header files dnl checks for types dnl checks for structures dnl checks for compiler characteristics dnl checks for library functions dnl checks for system services AC_INIT(pacemaker, 1.1.10, pacemaker@oss.clusterlabs.org,,http://clusterlabs.org) CRM_DTD_VERSION="1.2" PCMK_FEATURES="" HB_PKG=heartbeat AC_CONFIG_AUX_DIR(.) AC_CANONICAL_HOST dnl Where #defines go (e.g. `AC_CHECK_HEADERS' below) dnl dnl Internal header: include/config.h dnl - Contains ALL defines dnl - include/config.h.in is generated automatically by autoheader dnl - NOT to be included in any header files except lha_internal.h dnl (which is also not to be included in any other header files) dnl dnl External header: include/crm_config.h dnl - Contains a subset of defines checked here dnl - Manually edit include/crm_config.h.in to have configure include dnl new defines dnl - Should not include HAVE_* defines dnl - Safe to include anywhere AM_CONFIG_HEADER(include/config.h include/crm_config.h) ALL_LINGUAS="en fr" AC_ARG_WITH(version, [ --with-version=version Override package version (if you're a packager needing to pretend) ], [ PACKAGE_VERSION="$withval" ]) AC_ARG_WITH(pkg-name, [ --with-pkg-name=name Override package name (if you're a packager needing to pretend) ], [ PACKAGE_NAME="$withval" ]) AM_INIT_AUTOMAKE($PACKAGE_NAME, $PACKAGE_VERSION) AC_DEFINE_UNQUOTED(PACEMAKER_VERSION, "$PACKAGE_VERSION", Current pacemaker version) PACKAGE_SERIES=`echo $PACKAGE_VERSION | awk -F. '{ print $1"."$2 }'` AC_SUBST(PACKAGE_SERIES) AC_SUBST(PACKAGE_VERSION) dnl automake >= 1.11 offers --enable-silent-rules for suppressing the output from dnl normal compilation. When a failure occurs, it will then display the full dnl command line dnl Wrap in m4_ifdef to avoid breaking on older platforms m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])]) dnl Example 2.4. Silent Custom Rule to Generate a File dnl %-bar.pc: %.pc dnl $(AM_V_GEN)$(LN_S) $(notdir $^) $@ CC_IN_CONFIGURE=yes export CC_IN_CONFIGURE LDD=ldd dnl ======================================================================== dnl Compiler characteristics dnl ======================================================================== AC_PROG_CC dnl Can force other with environment variable "CC". AM_PROG_CC_C_O AC_PROG_CC_STDC gl_EARLY gl_INIT AC_LIBTOOL_DLOPEN dnl Enable dlopen support... AC_LIBLTDL_CONVENIENCE dnl make libltdl a convenience lib AC_PROG_LIBTOOL AC_PROG_YACC AM_PROG_LEX AC_C_STRINGIZE AC_TYPE_SIZE_T AC_CHECK_SIZEOF(char) AC_CHECK_SIZEOF(short) AC_CHECK_SIZEOF(int) AC_CHECK_SIZEOF(long) AC_CHECK_SIZEOF(long long) AC_STRUCT_TIMEZONE dnl =============================================== dnl Helpers dnl =============================================== cc_supports_flag() { local CFLAGS="$@" AC_MSG_CHECKING(whether $CC supports "$@") AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ ]], [[ ]])], [RC=0; AC_MSG_RESULT(yes)],[RC=1; AC_MSG_RESULT(no)]) return $RC } try_extract_header_define() { AC_MSG_CHECKING(if $2 in $1 exists) Cfile=$srcdir/extract_define.$2.${$} printf "#include \n" > ${Cfile}.c printf "#include <%s>\n" $1 >> ${Cfile}.c printf "int main(int argc, char **argv) {\n" >> ${Cfile}.c printf "#ifdef %s\n" $2 >> ${Cfile}.c printf "printf(\"%%s\", %s);\n" $2 >> ${Cfile}.c printf "#endif \n return 0; }\n" >> ${Cfile}.c $CC $CFLAGS ${Cfile}.c -o ${Cfile} 2>/dev/null value= if test -x ${Cfile}; then value=`${Cfile} 2>/dev/null` fi if test x"${value}" == x""; then value=$3 AC_MSG_RESULT(default: $value) else AC_MSG_RESULT($value) fi printf $value rm -rf ${Cfile}.c ${Cfile} ${Cfile}.dSYM ${Cfile}.gcno } extract_header_define() { AC_MSG_CHECKING(for $2 in $1) Cfile=$srcdir/extract_define.$2.${$} printf "#include \n" > ${Cfile}.c printf "#include <%s>\n" $1 >> ${Cfile}.c printf "int main(int argc, char **argv) { printf(\"%%s\", %s); return 0; }\n" $2 >> ${Cfile}.c $CC $CFLAGS ${Cfile}.c -o ${Cfile} value=`${Cfile}` AC_MSG_RESULT($value) printf $value rm -rf ${Cfile}.c ${Cfile} ${Cfile}.dSYM ${Cfile}.gcno } dnl =============================================== dnl Configure Options dnl =============================================== dnl Some systems, like Solaris require a custom package name AC_ARG_WITH(pkgname, [ --with-pkgname=name name for pkg (typically for Solaris) ], [ PKGNAME="$withval" ], [ PKGNAME="LXHAhb" ], ) AC_SUBST(PKGNAME) AC_ARG_ENABLE([ansi], [ --enable-ansi force GCC to compile to ANSI/ANSI standard for older compilers. [default=no]]) AC_ARG_ENABLE([fatal-warnings], [ --enable-fatal-warnings very pedantic and fatal warnings for gcc [default=yes]]) AC_ARG_ENABLE([quiet], [ --enable-quiet Supress make output unless there is an error [default=no]]) AC_ARG_ENABLE([thread-safe], [ --enable-thread-safe Enable some client libraries to be thread safe. [default=no]]) AC_ARG_ENABLE([bundled-ltdl], [ --enable-bundled-ltdl Configure, build and install the standalone ltdl library bundled with ${PACKAGE} [default=no]]) LTDL_LIBS="" AC_ARG_ENABLE([no-stack], [ --enable-no-stack Only build the Policy Engine and pieces needed to support it [default=no]]) AC_ARG_ENABLE([upstart], [ --enable-upstart Do not build support for the Upstart init system [default=yes]]) AC_ARG_ENABLE([systemd], [ --enable-systemd Do not build support for the Systemd init system [default=yes]]) AC_ARG_WITH(ais, [ --with-ais Support the Corosync messaging and membership layer ], [ SUPPORT_CS=$withval ], [ SUPPORT_CS=try ], ) AC_ARG_WITH(corosync, [ --with-corosync Support the Corosync messaging and membership layer ], [ SUPPORT_CS=$withval ] dnl initialized in AC_ARG_WITH(ais...) already, dnl don't reset to try if it was given as --without-ais ) AC_ARG_WITH(heartbeat, [ --with-heartbeat Support the Heartbeat messaging and membership layer ], [ SUPPORT_HEARTBEAT=$withval ], [ SUPPORT_HEARTBEAT=try ], ) AC_ARG_WITH(cman, [ --with-cman Support the consumption of membership and quorum from cman ], [ SUPPORT_CMAN=$withval ], [ SUPPORT_CMAN=try ], ) AC_ARG_WITH(cpg, [ --with-cs-quorum Support the consumption of membership and quorum from corosync ], [ SUPPORT_CS_QUORUM=$withval ], [ SUPPORT_CS_QUORUM=try ], ) AC_ARG_WITH(nagios, [ --with-nagios Support nagios remote monitoring ], [ SUPPORT_NAGIOS=$withval ], [ SUPPORT_NAGIOS=try ], ) AC_ARG_WITH(nagios-plugin-dir, [ --with-nagios-plugin-dir=DIR Directory for nagios plugins [${NAGIOS_PLUGIN_DIR}]], [ NAGIOS_PLUGIN_DIR="$withval" ] ) AC_ARG_WITH(nagios-metadata-dir, [ --with-nagios-metadata-dir=DIR Directory for nagios plugins metadata [${NAGIOS_METADATA_DIR}]], [ NAGIOS_METADATA_DIR="$withval" ] ) AC_ARG_WITH(snmp, [ --with-snmp Support the SNMP protocol ], [ SUPPORT_SNMP=$withval ], [ SUPPORT_SNMP=try ], ) AC_ARG_WITH(esmtp, [ --with-esmtp Support the sending mail notifications with the esmtp library ], [ SUPPORT_ESMTP=$withval ], [ SUPPORT_ESMTP=try ], ) AC_ARG_WITH(acl, [ --with-acl Support CIB ACL ], [ SUPPORT_ACL=$withval ], [ SUPPORT_ACL=no ], ) AC_ARG_WITH(cibsecrets, [ --with-cibsecrets Support CIB secrets ], [ SUPPORT_CIBSECRETS=$withval ], [ SUPPORT_CIBSECRETS=no ], ) CSPREFIX="" AC_ARG_WITH(ais-prefix, [ --with-ais-prefix=DIR Prefix used when Corosync was installed [$prefix]], [ CSPREFIX=$withval ], [ CSPREFIX=$prefix ]) LCRSODIR="" AC_ARG_WITH(lcrso-dir, [ --with-lcrso-dir=DIR Corosync lcrso files. ], [ LCRSODIR="$withval" ]) INITDIR="" AC_ARG_WITH(initdir, [ --with-initdir=DIR directory for init (rc) scripts [${INITDIR}]], [ INITDIR="$withval" ]) SUPPORT_PROFILING=0 AC_ARG_WITH(profiling, [ --with-profiling Disable optimizations for effective profiling ], [ SUPPORT_PROFILING=$withval ]) AC_ARG_WITH(coverage, [ --with-coverage Disable optimizations for effective profiling ], [ SUPPORT_COVERAGE=$withval ]) PUBLICAN_BRAND="common" AC_ARG_WITH(brand, [ --with-brand=brand Brand to use for generated documentation [$PUBLICAN_BRAND]], [ PUBLICAN_BRAND="$withval" ]) AC_SUBST(PUBLICAN_BRAND) ASCIIDOC_CLI_TYPE="pcs" AC_ARG_WITH(doc-cli, [ --with-doc-cli=cli_type CLI type to use for generated documentation. [$ASCIIDOC_CLI_TYPE]], [ ASCIIDOC_CLI_TYPE="$withval" ]) AC_SUBST(ASCIIDOC_CLI_TYPE) dnl =============================================== dnl General Processing dnl =============================================== AC_SUBST(HB_PKG) INIT_EXT="" echo Our Host OS: $host_os/$host AC_MSG_NOTICE(Sanitizing prefix: ${prefix}) case $prefix in NONE) prefix=/usr dnl Fix default variables - "prefix" variable if not specified if test "$localstatedir" = "\${prefix}/var"; then localstatedir="/var" fi if test "$sysconfdir" = "\${prefix}/etc"; then sysconfdir="/etc" fi ;; esac AC_MSG_NOTICE(Sanitizing exec_prefix: ${exec_prefix}) case $exec_prefix in dnl For consistency with Heartbeat, map NONE->$prefix NONE) exec_prefix=$prefix;; prefix) exec_prefix=$prefix;; esac AC_MSG_NOTICE(Sanitizing ais_prefix: ${CSPREFIX}) case $CSPREFIX in dnl For consistency with Heartbeat, map NONE->$prefix NONE) CSPREFIX=$prefix;; prefix) CSPREFIX=$prefix;; esac AC_MSG_NOTICE(Sanitizing INITDIR: ${INITDIR}) case $INITDIR in prefix) INITDIR=$prefix;; "") AC_MSG_CHECKING(which init (rc) directory to use) for initdir in /etc/init.d /etc/rc.d/init.d /sbin/init.d \ /usr/local/etc/rc.d /etc/rc.d do if test -d $initdir then INITDIR=$initdir break fi done AC_MSG_RESULT($INITDIR);; esac AC_SUBST(INITDIR) AC_MSG_NOTICE(Sanitizing libdir: ${libdir}) case $libdir in dnl For consistency with Heartbeat, map NONE->$prefix *prefix*|NONE) AC_MSG_CHECKING(which lib directory to use) for aDir in lib64 lib do trydir="${exec_prefix}/${aDir}" if test -d ${trydir} then libdir=${trydir} break fi done AC_MSG_RESULT($libdir); ;; esac dnl Expand autoconf variables so that we dont end up with '${prefix}' dnl in #defines and python scripts dnl NOTE: Autoconf deliberately leaves them unexpanded to allow dnl make exec_prefix=/foo install dnl No longer being able to do this seems like no great loss to me... eval prefix="`eval echo ${prefix}`" eval exec_prefix="`eval echo ${exec_prefix}`" eval bindir="`eval echo ${bindir}`" eval sbindir="`eval echo ${sbindir}`" eval libexecdir="`eval echo ${libexecdir}`" eval datadir="`eval echo ${datadir}`" eval sysconfdir="`eval echo ${sysconfdir}`" eval sharedstatedir="`eval echo ${sharedstatedir}`" eval localstatedir="`eval echo ${localstatedir}`" eval libdir="`eval echo ${libdir}`" eval includedir="`eval echo ${includedir}`" eval oldincludedir="`eval echo ${oldincludedir}`" eval infodir="`eval echo ${infodir}`" eval mandir="`eval echo ${mandir}`" dnl Home-grown variables eval INITDIR="${INITDIR}" eval docdir="`eval echo ${docdir}`" if test x"${docdir}" = x""; then docdir=${datadir}/doc/${PACKAGE}-${VERSION} #docdir=${datadir}/doc/packages/${PACKAGE} fi AC_SUBST(docdir) for j in prefix exec_prefix bindir sbindir libexecdir datadir sysconfdir \ sharedstatedir localstatedir libdir includedir oldincludedir infodir \ mandir INITDIR docdir do dirname=`eval echo '${'${j}'}'` if test ! -d "$dirname" then AC_MSG_WARN([$j directory ($dirname) does not exist!]) fi done dnl This OS-based decision-making is poor autotools practice; dnl feature-based mechanisms are strongly preferred. dnl dnl So keep this section to a bare minimum; regard as a "necessary evil". case "$host_os" in -*bsd*) LIBS="-L/usr/local/lib" +*bsd*) + AC_DEFINE_UNQUOTED(ON_BSD, 1, Compiling for BSD platform) + LIBS="-L/usr/local/lib" CPPFLAGS="$CPPFLAGS -I/usr/local/include" INIT_EXT=".sh" ;; *solaris*) ;; *linux*) AC_DEFINE_UNQUOTED(ON_LINUX, 1, Compiling for Linux platform) CFLAGS="$CFLAGS -I${prefix}/include" ;; darwin*) AC_DEFINE_UNQUOTED(ON_DARWIN, 1, Compiling for Darwin platform) LIBS="$LIBS -L${prefix}/lib" CFLAGS="$CFLAGS -I${prefix}/include" ;; esac dnl Eventually remove this CFLAGS="$CFLAGS -I${prefix}/include/heartbeat" AC_SUBST(INIT_EXT) AC_MSG_NOTICE(Host CPU: $host_cpu) case "$host_cpu" in ppc64|powerpc64) case $CFLAGS in *powerpc64*) ;; *) if test "$GCC" = yes; then CFLAGS="$CFLAGS -m64" fi ;; esac esac AC_MSG_CHECKING(which format is needed to print uint64_t) ac_save_CFLAGS=$CFLAGS CFLAGS="-Wall -Werror" AC_COMPILE_IFELSE( [AC_LANG_PROGRAM( [ #include #include #include ], [ int max = 512; uint64_t bignum = 42; char *buffer = malloc(max); const char *random = "random"; snprintf(buffer, max-1, "", bignum, random); fprintf(stderr, "Result: %s\n", buffer); ] )], [U64T="%lu"], [U64T="%llu"] ) CFLAGS=$ac_save_CFLAGS AC_MSG_RESULT($U64T) AC_DEFINE_UNQUOTED(U64T, "$U64T", Correct printf format for logging uint64_t) dnl =============================================== dnl Program Paths dnl =============================================== PATH="$PATH:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin" export PATH dnl Replacing AC_PROG_LIBTOOL with AC_CHECK_PROG because LIBTOOL dnl was NOT being expanded all the time thus causing things to fail. AC_CHECK_PROGS(LIBTOOL, glibtool libtool libtool15 libtool13) AM_PATH_PYTHON AC_CHECK_PROGS(MAKE, gmake make) AC_PATH_PROGS(HTML2TXT, lynx w3m) AC_PATH_PROGS(HELP2MAN, help2man) AC_PATH_PROGS(POD2MAN, pod2man, pod2man) AC_PATH_PROGS(ASCIIDOC, asciidoc) AC_PATH_PROGS(PUBLICAN, publican) AC_PATH_PROGS(INKSCAPE, inkscape) AC_PATH_PROGS(XSLTPROC, xsltproc) AC_PATH_PROGS(FOP, fop) AC_PATH_PROGS(SSH, ssh, /usr/bin/ssh) AC_PATH_PROGS(SCP, scp, /usr/bin/scp) AC_PATH_PROGS(TAR, tar) AC_PATH_PROGS(MD5, md5) AC_PATH_PROGS(TEST, test) AC_PATH_PROGS(PKGCONFIG, pkg-config) AC_PATH_PROGS(XML2CONFIG, xml2-config) AC_PATH_PROGS(VALGRIND_BIN, valgrind, /usr/bin/valgrind) AC_DEFINE_UNQUOTED(VALGRIND_BIN, "$VALGRIND_BIN", Valgrind command) dnl Disable these until we decide if the stonith config file should be supported dnl AC_PATH_PROGS(BISON, bison) dnl AC_PATH_PROGS(FLEX, flex) dnl AC_PATH_PROGS(HAVE_YACC, $YACC) if test x"${LIBTOOL}" = x""; then AC_MSG_ERROR(You need (g)libtool installed in order to build ${PACKAGE}) fi if test x"${MAKE}" = x""; then AC_MSG_ERROR(You need (g)make installed in order to build ${PACKAGE}) fi AM_CONDITIONAL(BUILD_HELP, test x"${HELP2MAN}" != x"") if test x"${HELP2MAN}" != x""; then PCMK_FEATURES="$PCMK_FEATURES generated-manpages" fi MANPAGE_XSLT="" if test x"${XSLTPROC}" != x""; then AC_MSG_CHECKING(docbook to manpage transform) XSLT=`find ${datadir} -name docbook.xsl` for xsl in $XSLT; do dname=`dirname $xsl` bname=`basename $dname` if test "$bname" = "manpages"; then MANPAGE_XSLT="$xsl" break fi done fi AC_MSG_RESULT($MANPAGE_XSLT) AC_SUBST(MANPAGE_XSLT) AM_CONDITIONAL(BUILD_XML_HELP, test x"${MANPAGE_XSLT}" != x"") if test x"${MANPAGE_XSLT}" != x""; then PCMK_FEATURES="$PCMK_FEATURES agent-manpages" fi AM_CONDITIONAL(BUILD_ASCIIDOC, test x"${ASCIIDOC}" != x"") if test x"${ASCIIDOC}" != x""; then PCMK_FEATURES="$PCMK_FEATURES ascii-docs" fi SUPPORT_STONITH_CONFIG=0 if test x"${HAVE_YACC}" != x"" -a x"${FLEX}" != x"" -a x"${BISON}" != x""; then SUPPORT_STONITH_CONFIG=1 PCMK_FEATURES="$PCMK_FEATURES st-conf" fi AM_CONDITIONAL(BUILD_STONITH_CONFIG, test $SUPPORT_STONITH_CONFIG = 1) AC_DEFINE_UNQUOTED(SUPPORT_STONITH_CONFIG, $SUPPORT_STONITH_CONFIG, Support a stand-alone stonith config file in addition to the CIB) AM_CONDITIONAL(BUILD_DOCBOOK, test x"${PUBLICAN}" != x"" -a x"${INKSCAPE}" != x"") if test x"${PUBLICAN}" != x"" -a x"${INKSCAPE}" != x""; then AC_MSG_NOTICE(Enabling publican) PCMK_FEATURES="$PCMK_FEATURES publican-docs" fi dnl ======================================================================== dnl checks for library functions to replace them dnl dnl NoSuchFunctionName: dnl is a dummy function which no system supplies. It is here to make dnl the system compile semi-correctly on OpenBSD which doesn't know dnl how to create an empty archive dnl dnl scandir: Only on BSD. dnl System-V systems may have it, but hidden and/or deprecated. dnl A replacement function is supplied for it. dnl dnl setenv: is some bsdish function that should also be avoided (use dnl putenv instead) dnl On the other hand, putenv doesn't provide the right API for the dnl code and has memory leaks designed in (sigh...) Fortunately this dnl A replacement function is supplied for it. dnl dnl strerror: returns a string that corresponds to an errno. dnl A replacement function is supplied for it. dnl dnl strnlen: is a gnu function similar to strlen, but safer. dnl We wrote a tolearably-fast replacement function for it. dnl dnl strndup: is a gnu function similar to strdup, but safer. dnl We wrote a tolearably-fast replacement function for it. AC_REPLACE_FUNCS(alphasort NoSuchFunctionName scandir setenv strerror strchrnul unsetenv strnlen strndup) dnl =============================================== dnl Libraries dnl =============================================== AC_CHECK_LIB(socket, socket) dnl -lsocket AC_CHECK_LIB(c, dlopen) dnl if dlopen is in libc... AC_CHECK_LIB(dl, dlopen) dnl -ldl (for Linux) AC_CHECK_LIB(rt, sched_getscheduler) dnl -lrt (for Tru64) AC_CHECK_LIB(gnugetopt, getopt_long) dnl -lgnugetopt ( if available ) AC_CHECK_LIB(pam, pam_start) dnl -lpam (if available) AC_CHECK_FUNCS([sched_getparam sched_setparam sched_get_priority_min]) AC_CHECK_LIB(uuid, uuid_parse) dnl load the library if necessary AC_CHECK_FUNCS(uuid_unparse) dnl OSX ships uuid_* as standard functions AC_CHECK_HEADERS(uuid/uuid.h) if test "x$ac_cv_func_uuid_unparse" != xyes; then AC_MSG_ERROR(You do not have the libuuid development package installed) fi if test x"${PKGCONFIG}" = x""; then AC_MSG_ERROR(You need pkgconfig installed in order to build ${PACKAGE}) fi if test "x${enable_thread_safe}" = "xyes"; then GPKGNAME="gthread-2.0" else GPKGNAME="glib-2.0" fi if $PKGCONFIG --exists $GPKGNAME then GLIBCONFIG="$PKGCONFIG $GPKGNAME" else set -x echo PKG_CONFIG_PATH=$PKG_CONFIG_PATH $PKGCONFIG --exists $GPKGNAME; echo $? $PKGCONFIG --cflags $GPKGNAME; echo $? $PKGCONFIG $GPKGNAME; echo $? set +x AC_MSG_ERROR(You need glib2-devel installed in order to build ${PACKAGE}) fi AC_MSG_RESULT(using $GLIBCONFIG) # # Where is dlopen? # if test "$ac_cv_lib_c_dlopen" = yes; then LIBADD_DL="" elif test "$ac_cv_lib_dl_dlopen" = yes; then LIBADD_DL=-ldl else LIBADD_DL=${lt_cv_dlopen_libs} fi dnl dnl Check for location of gettext dnl dnl On at least Solaris 2.x, where it is in libc, specifying lintl causes dnl grief. Ensure minimal result, not the sum of all possibilities. dnl And do libc first. dnl Known examples: dnl c: Linux, Solaris 2.6+ dnl intl: BSD, AIX AC_CHECK_LIB(c, gettext) if test x$ac_cv_lib_c_gettext != xyes; then AC_CHECK_LIB(intl, gettext) fi if test x$ac_cv_lib_c_gettext != xyes -a x$ac_cv_lib_intl_gettext != xyes; then AC_MSG_ERROR(You need gettext installed in order to build ${PACKAGE}) fi if test "X$GLIBCONFIG" != X; then AC_MSG_CHECKING(for special glib includes: ) GLIBHEAD=`$GLIBCONFIG --cflags` AC_MSG_RESULT($GLIBHEAD) CPPFLAGS="$CPPFLAGS $GLIBHEAD" AC_MSG_CHECKING(for glib library flags) GLIBLIB=`$GLIBCONFIG --libs` AC_MSG_RESULT($GLIBLIB) LIBS="$LIBS $GLIBLIB" fi +dnl FreeBSD needs -lcompat for ftime() used by lrmd.c +AC_CHECK_LIB([compat], [ftime], [COMPAT_LIBS='-lcompat']) +AC_SUBST(COMPAT_LIBS) + dnl ======================================================================== dnl Headers dnl ======================================================================== AC_HEADER_STDC AC_CHECK_HEADERS(arpa/inet.h) AC_CHECK_HEADERS(asm/types.h) AC_CHECK_HEADERS(assert.h) AC_CHECK_HEADERS(auth-client.h) AC_CHECK_HEADERS(ctype.h) AC_CHECK_HEADERS(dirent.h) AC_CHECK_HEADERS(errno.h) AC_CHECK_HEADERS(fcntl.h) AC_CHECK_HEADERS(getopt.h) AC_CHECK_HEADERS(glib.h) AC_CHECK_HEADERS(grp.h) AC_CHECK_HEADERS(limits.h) AC_CHECK_HEADERS(linux/errqueue.h) AC_CHECK_HEADERS(malloc.h) AC_CHECK_HEADERS(netdb.h) AC_CHECK_HEADERS(netinet/in.h) AC_CHECK_HEADERS(netinet/ip.h) AC_CHECK_HEADERS(pam/pam_appl.h) AC_CHECK_HEADERS(pthread.h) AC_CHECK_HEADERS(pwd.h) AC_CHECK_HEADERS(security/pam_appl.h) AC_CHECK_HEADERS(sgtty.h) AC_CHECK_HEADERS(signal.h) AC_CHECK_HEADERS(stdarg.h) AC_CHECK_HEADERS(stddef.h) AC_CHECK_HEADERS(stdio.h) AC_CHECK_HEADERS(stdlib.h) AC_CHECK_HEADERS(string.h) AC_CHECK_HEADERS(strings.h) AC_CHECK_HEADERS(sys/dir.h) AC_CHECK_HEADERS(sys/ioctl.h) AC_CHECK_HEADERS(sys/param.h) AC_CHECK_HEADERS(sys/poll.h) AC_CHECK_HEADERS(sys/reboot.h) AC_CHECK_HEADERS(sys/resource.h) AC_CHECK_HEADERS(sys/select.h) AC_CHECK_HEADERS(sys/socket.h) AC_CHECK_HEADERS(sys/signalfd.h) AC_CHECK_HEADERS(sys/sockio.h) AC_CHECK_HEADERS(sys/stat.h) AC_CHECK_HEADERS(sys/time.h) AC_CHECK_HEADERS(sys/timeb.h) AC_CHECK_HEADERS(sys/types.h) AC_CHECK_HEADERS(sys/uio.h) AC_CHECK_HEADERS(sys/un.h) AC_CHECK_HEADERS(sys/utsname.h) AC_CHECK_HEADERS(sys/wait.h) AC_CHECK_HEADERS(time.h) AC_CHECK_HEADERS(unistd.h) AC_CHECK_HEADERS(winsock.h) dnl These headers need prerequisits before the tests will pass dnl AC_CHECK_HEADERS(net/if.h) dnl AC_CHECK_HEADERS(netinet/icmp6.h) dnl AC_CHECK_HEADERS(netinet/ip6.h) dnl AC_CHECK_HEADERS(netinet/ip_icmp.h) AC_MSG_CHECKING(for special libxml2 includes) if test "x$XML2CONFIG" = "x"; then AC_MSG_ERROR(libxml2 config not found) else XML2HEAD="`$XML2CONFIG --cflags`" AC_MSG_RESULT($XML2HEAD) AC_CHECK_LIB(xml2, xmlReadMemory) AC_CHECK_LIB(xslt, xsltApplyStylesheet) fi CPPFLAGS="$CPPFLAGS $XML2HEAD" AC_CHECK_HEADERS(libxml/xpath.h) AC_CHECK_HEADERS(libxslt/xslt.h) if test "$ac_cv_header_libxml_xpath_h" != "yes"; then AC_MSG_ERROR(The libxml developement headers were not found) fi if test "$ac_cv_header_libxslt_xslt_h" != "yes"; then AC_MSG_ERROR(The libxslt developement headers were not found) fi dnl ======================================================================== dnl Structures dnl ======================================================================== AC_CHECK_MEMBERS([struct tm.tm_gmtoff],,,[[#include ]]) AC_CHECK_MEMBERS([lrm_op_t.rsc_deleted],,,[[#include ]]) dnl ======================================================================== dnl Functions dnl ======================================================================== AC_CHECK_FUNCS(g_log_set_default_handler) AC_CHECK_FUNCS(getopt, AC_DEFINE(HAVE_DECL_GETOPT, 1, [Have getopt function])) AC_CHECK_FUNCS(nanosleep, AC_DEFINE(HAVE_DECL_NANOSLEEP, 1, [Have nanosleep function])) dnl ======================================================================== dnl ltdl dnl ======================================================================== AC_CHECK_LIB(ltdl, lt_dlopen, [LTDL_foo=1]) if test "x${enable_bundled_ltdl}" = "xyes"; then if test $ac_cv_lib_ltdl_lt_dlopen = yes; then AC_MSG_NOTICE([Disabling usage of installed ltdl]) fi ac_cv_lib_ltdl_lt_dlopen=no fi LIBLTDL_DIR="" if test $ac_cv_lib_ltdl_lt_dlopen != yes ; then AC_MSG_NOTICE([Installing local ltdl]) LIBLTDL_DIR=libltdl ( cd $srcdir ; $TAR -xvf libltdl.tar ) if test "$?" -ne 0; then AC_MSG_ERROR([$TAR of libltdl.tar in $srcdir failed]) fi AC_CONFIG_SUBDIRS(libltdl) else LIBS="$LIBS -lltdl" AC_MSG_NOTICE([Using installed ltdl]) INCLTDL="" LIBLTDL="" fi AC_SUBST(INCLTDL) AC_SUBST(LIBLTDL) AC_SUBST(LIBLTDL_DIR) dnl ======================================================================== dnl bzip2 dnl ======================================================================== AC_CHECK_HEADERS(bzlib.h) AC_CHECK_LIB(bz2, BZ2_bzBuffToBuffCompress) if test x$ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress != xyes ; then AC_MSG_ERROR(BZ2 libraries not found) fi if test x$ac_cv_header_bzlib_h != xyes; then AC_MSG_ERROR(BZ2 Development headers not found) fi dnl ======================================================================== dnl sighandler_t is missing from Illumos, Solaris11 systems dnl ======================================================================== AC_MSG_CHECKING([for sighandler_t]) AC_TRY_COMPILE([#include ],[sighandler_t *f;], has_sighandler_t=yes,has_sighandler_t=no) AC_MSG_RESULT($has_sighandler_t) if test "$has_sighandler_t" = "yes" ; then AC_DEFINE( HAVE_SIGHANDLER_T, 1, [Define if sighandler_t available] ) fi dnl ======================================================================== dnl ncurses dnl ======================================================================== dnl dnl A few OSes (e.g. Linux) deliver a default "ncurses" alongside "curses". dnl Many non-Linux deliver "curses"; sites may add "ncurses". dnl dnl However, the source-code recommendation for both is to #include "curses.h" dnl (i.e. "ncurses" still wants the include to be simple, no-'n', "curses.h"). dnl dnl ncurse takes precedence. dnl AC_CHECK_HEADERS(curses.h) AC_CHECK_HEADERS(curses/curses.h) AC_CHECK_HEADERS(ncurses.h) AC_CHECK_HEADERS(ncurses/ncurses.h) dnl Although n-library is preferred, only look for it if the n-header was found. CURSESLIBS='' if test "$ac_cv_header_ncurses_h" = "yes"; then AC_CHECK_LIB(ncurses, printw, [CURSESLIBS='-lncurses'; AC_DEFINE(HAVE_LIBNCURSES,1, have ncurses library)] ) fi if test "$ac_cv_header_ncurses_ncurses_h" = "yes"; then AC_CHECK_LIB(ncurses, printw, [CURSESLIBS='-lncurses'; AC_DEFINE(HAVE_LIBNCURSES,1, have ncurses library)] ) fi dnl Only look for non-n-library if there was no n-library. if test X"$CURSESLIBS" = X"" -a "$ac_cv_header_curses_h" = "yes"; then AC_CHECK_LIB(curses, printw, [CURSESLIBS='-lcurses'; AC_DEFINE(HAVE_LIBCURSES,1, have curses library)] ) fi dnl Only look for non-n-library if there was no n-library. if test X"$CURSESLIBS" = X"" -a "$ac_cv_header_curses_curses_h" = "yes"; then AC_CHECK_LIB(curses, printw, [CURSESLIBS='-lcurses'; AC_DEFINE(HAVE_LIBCURSES,1, have curses library)] ) fi if test "x$CURSESLIBS" != "x"; then PCMK_FEATURES="$PCMK_FEATURES ncurses" fi dnl Check for printw() prototype compatibility if test X"$CURSESLIBS" != X"" && cc_supports_flag -Wcast-qual && cc_supports_flag -Werror; then AC_MSG_CHECKING(whether printw() requires argument of "const char *") ac_save_LIBS=$LIBS LIBS="$CURSESLIBS $LIBS" ac_save_CFLAGS=$CFLAGS CFLAGS="-Wcast-qual -Werror" AC_LINK_IFELSE( [AC_LANG_PROGRAM( [ #if defined(HAVE_NCURSES_H) # include #elif defined(HAVE_NCURSES_NCURSES_H) # include #elif defined(HAVE_CURSES_H) # include #endif ], [printw((const char *)"Test");] )], [ac_cv_compatible_printw=yes], [ac_cv_compatible_printw=no] ) LIBS=$ac_save_LIBS CFLAGS=$ac_save_CFLAGS AC_MSG_RESULT([$ac_cv_compatible_printw]) if test "$ac_cv_compatible_printw" = no; then AC_MSG_WARN([The printw() function of your ncurses or curses library is old, we will disable usage of the library. If you want to use this library anyway, please update to newer version of the library, ncurses 5.4 or later is recommended. You can get the library from http://www.gnu.org/software/ncurses/.]) AC_MSG_NOTICE([Disabling curses]) AC_DEFINE(HAVE_INCOMPATIBLE_PRINTW, 1, [Do we have incompatible printw() in curses library?]) fi fi AC_SUBST(CURSESLIBS) dnl ======================================================================== dnl Profiling and GProf dnl ======================================================================== AC_MSG_NOTICE(Old CFLAGS: $CFLAGS) case $SUPPORT_COVERAGE in 1|yes|true) SUPPORT_PROFILING=1 PCMK_FEATURES="$PCMK_FEATURES coverage" CFLAGS="$CFLAGS -fprofile-arcs -ftest-coverage" dnl During linking, make sure to specify -lgcov or -coverage dnl Enable gprof #LIBS="$LIBS -pg" #CFLAGS="$CFLAGS -pg" ;; esac case $SUPPORT_PROFILING in 1|yes|true) SUPPORT_PROFILING=1 dnl Disable various compiler optimizations CFLAGS="$CFLAGS -fno-omit-frame-pointer -fno-inline" dnl CFLAGS="$CFLAGS -fno-inline-functions -fno-default-inline -fno-inline-functions-called-once -fno-optimize-sibling-calls" dnl Turn off optimization so tools can get accurate line numbers CFLAGS=`echo $CFLAGS | sed -e 's/-O.\ //g' -e 's/-Wp,-D_FORTIFY_SOURCE=.\ //g' -e 's/-D_FORTIFY_SOURCE=.\ //g'` CFLAGS="$CFLAGS -O0" dnl Update features PCMK_FEATURES="$PCMK_FEATURES profile" ;; *) SUPPORT_PROFILING=0;; esac AC_MSG_NOTICE(New CFLAGS: $CFLAGS) AC_DEFINE_UNQUOTED(SUPPORT_PROFILING, $SUPPORT_PROFILING, Support for profiling) dnl ======================================================================== dnl Cluster infrastructure - Heartbeat / LibQB dnl ======================================================================== dnl Compatability checks AC_CHECK_MEMBERS([struct lrm_ops.fail_rsc],,,[[#include ]]) if test x${enable_no_stack} = xyes; then SUPPORT_HEARTBEAT=no SUPPORT_CS=no fi PKG_CHECK_MODULES(libqb, libqb, HAVE_libqb=1, HAVE_libqb=0) AC_CHECK_HEADERS(qb/qbipc_common.h) AC_CHECK_LIB(qb, qb_ipcs_connection_auth_set) LIBQB_LOG=1 PCMK_FEATURES="$PCMK_FEATURES libqb-logging libqb-ipc" if ! pkg-config --atleast-version 0.13 libqb then AC_MSG_FAILURE(Version of libqb is too old: v0.13 or greater requried) fi LIBS="$LIBS $libqb_LIBS" AC_CHECK_HEADERS(heartbeat/hb_config.h) AC_CHECK_HEADERS(heartbeat/glue_config.h) AC_CHECK_HEADERS(stonith/stonith.h) AC_CHECK_HEADERS(agent_config.h) GLUE_HEADER=none HAVE_GLUE=0 if test "$ac_cv_header_heartbeat_glue_config_h" = "yes"; then GLUE_HEADER=glue_config.h HAVE_GLUE=1 elif test "$ac_cv_header_heartbeat_hb_config_h" = "yes"; then GLUE_HEADER=hb_config.h HAVE_GLUE=1 else AC_MSG_WARN(cluster-glue development headers were not found) fi if test "$ac_cv_header_stonith_stonith_h" = "yes"; then PCMK_FEATURES="$PCMK_FEATURES lha-fencing" fi if test $HAVE_GLUE = 1; then dnl On Debian, AC_CHECK_LIBS fail if a library has any unresolved symbols dnl So check for all the depenancies (so they're added to LIBS) before checking for -lplumb AC_CHECK_LIB(pils, PILLoadPlugin) AC_CHECK_LIB(plumb, G_main_add_IPC_Channel) fi dnl =============================================== dnl Variables needed for substitution dnl =============================================== CRM_DTD_DIRECTORY="${datadir}/pacemaker" AC_DEFINE_UNQUOTED(CRM_DTD_DIRECTORY,"$CRM_DTD_DIRECTORY", Location for the Pacemaker Relax-NG Schema) AC_SUBST(CRM_DTD_DIRECTORY) AC_DEFINE_UNQUOTED(CRM_DTD_VERSION,"$CRM_DTD_VERSION", Current version of the Pacemaker Relax-NG Schema) AC_SUBST(CRM_DTD_VERSION) CRM_CORE_DIR=`try_extract_header_define $GLUE_HEADER HA_COREDIR ${localstatedir}/lib/pacemaker/cores` AC_DEFINE_UNQUOTED(CRM_CORE_DIR,"$CRM_CORE_DIR", Location to store core files produced by Pacemaker daemons) AC_SUBST(CRM_CORE_DIR) CRM_DAEMON_USER=`try_extract_header_define $GLUE_HEADER HA_CCMUSER hacluster` AC_DEFINE_UNQUOTED(CRM_DAEMON_USER,"$CRM_DAEMON_USER", User to run Pacemaker daemons as) AC_SUBST(CRM_DAEMON_USER) CRM_DAEMON_GROUP=`try_extract_header_define $GLUE_HEADER HA_APIGROUP haclient` AC_DEFINE_UNQUOTED(CRM_DAEMON_GROUP,"$CRM_DAEMON_GROUP", Group to run Pacemaker daemons as) AC_SUBST(CRM_DAEMON_GROUP) CRM_STATE_DIR=${localstatedir}/run/crm AC_DEFINE_UNQUOTED(CRM_STATE_DIR,"$CRM_STATE_DIR", Where to keep state files and sockets) AC_SUBST(CRM_STATE_DIR) CRM_BLACKBOX_DIR=${localstatedir}/lib/pacemaker/blackbox AC_DEFINE_UNQUOTED(CRM_BLACKBOX_DIR,"$CRM_BLACKBOX_DIR", Where to keep blackbox dumps) AC_SUBST(CRM_BLACKBOX_DIR) PE_STATE_DIR="${localstatedir}/lib/pacemaker/pengine" AC_DEFINE_UNQUOTED(PE_STATE_DIR,"$PE_STATE_DIR", Where to keep PEngine outputs) AC_SUBST(PE_STATE_DIR) CRM_CONFIG_DIR="${localstatedir}/lib/pacemaker/cib" AC_DEFINE_UNQUOTED(CRM_CONFIG_DIR,"$CRM_CONFIG_DIR", Where to keep configuration files) AC_SUBST(CRM_CONFIG_DIR) +CRM_CONFIG_CTS="${localstatedir}/lib/pacemaker/cts" +AC_DEFINE_UNQUOTED(CRM_CONFIG_CTS,"$CRM_CONFIG_CTS", Where to keep cts stateful data) +AC_SUBST(CRM_CONFIG_CTS) + CRM_LEGACY_CONFIG_DIR="${localstatedir}/lib/heartbeat/crm" AC_DEFINE_UNQUOTED(CRM_LEGACY_CONFIG_DIR,"$CRM_LEGACY_CONFIG_DIR", Where Pacemaker used to keep configuration files) AC_SUBST(CRM_LEGACY_CONFIG_DIR) CRM_DAEMON_DIR="${libexecdir}/pacemaker" AC_DEFINE_UNQUOTED(CRM_DAEMON_DIR,"$CRM_DAEMON_DIR", Location for Pacemaker daemons) AC_SUBST(CRM_DAEMON_DIR) HB_DAEMON_DIR=`try_extract_header_define $GLUE_HEADER HA_LIBHBDIR $libdir/heartbeat` AC_DEFINE_UNQUOTED(HB_DAEMON_DIR,"$HB_DAEMON_DIR", Location Heartbeat expects Pacemaker daemons to be in) AC_SUBST(HB_DAEMON_DIR) dnl Needed so that the Corosync plugin can clear out the directory as Heartbeat does HA_STATE_DIR=`try_extract_header_define $GLUE_HEADER HA_VARRUNDIR ${localstatedir}/run` AC_DEFINE_UNQUOTED(HA_STATE_DIR,"$HA_STATE_DIR", Where Heartbeat keeps state files and sockets) AC_SUBST(HA_STATE_DIR) CRM_RSCTMP_DIR=`try_extract_header_define agent_config.h HA_RSCTMPDIR $HA_STATE_DIR/resource-agents` AC_MSG_CHECKING(Scratch dir for resource agents) AC_MSG_RESULT($CRM_RSCTMP_DIR) AC_DEFINE_UNQUOTED(CRM_RSCTMP_DIR,"$CRM_RSCTMP_DIR", Where resource agents should keep state files) AC_SUBST(CRM_RSCTMP_DIR) dnl Needed for the location of hostcache in CTS.py HA_VARLIBHBDIR=`try_extract_header_define $GLUE_HEADER HA_VARLIBHBDIR ${localstatedir}/lib/heartbeat` AC_SUBST(HA_VARLIBHBDIR) AC_DEFINE_UNQUOTED(UUID_FILE,"$localstatedir/lib/heartbeat/hb_uuid", Location of Heartbeat's UUID file) OCF_ROOT_DIR=`try_extract_header_define $GLUE_HEADER OCF_ROOT_DIR /usr/lib/ocf` if test "X$OCF_ROOT_DIR" = X; then AC_MSG_ERROR(Could not locate OCF directory) fi AC_SUBST(OCF_ROOT_DIR) OCF_RA_DIR=`try_extract_header_define $GLUE_HEADER OCF_RA_DIR $OCF_ROOT_DIR/resource.d` AC_DEFINE_UNQUOTED(OCF_RA_DIR,"$OCF_RA_DIR", Location for OCF RAs) AC_SUBST(OCF_RA_DIR) RH_STONITH_DIR="$sbindir" AC_DEFINE_UNQUOTED(RH_STONITH_DIR,"$RH_STONITH_DIR", Location for Red Hat Stonith agents) RH_STONITH_PREFIX="fence_" AC_DEFINE_UNQUOTED(RH_STONITH_PREFIX,"$RH_STONITH_PREFIX", Prefix for Red Hat Stonith agents) AC_PATH_PROGS(GIT, git false) AC_MSG_CHECKING(build version) BUILD_VERSION=$Format:%h$ if test $BUILD_VERSION != ":%h$"; then AC_MSG_RESULT(archive hash: $BUILD_VERSION) elif test -x $GIT -a -d .git; then BUILD_VERSION=`$GIT log --pretty="format:%h" -n 1` AC_MSG_RESULT(git hash: $BUILD_VERSION) else # The current directory name make a reasonable default # Most generated archives will include the hash or tag BASE=`basename $PWD` BUILD_VERSION=`echo $BASE | sed s:.*[[Pp]]acemaker-::` AC_MSG_RESULT(directory based hash: $BUILD_VERSION) fi AC_DEFINE_UNQUOTED(BUILD_VERSION, "$BUILD_VERSION", Build version) AC_SUBST(BUILD_VERSION) HAVE_gio=1 HAVE_upstart=0 HAVE_systemd=0 PKG_CHECK_MODULES(GIO, gio-2.0, ,HAVE_gio=0) AC_CHECK_TYPE([GDBusProxy],,,[[#include ]]) if test x$ac_cv_type_GDBusProxy != xyes; then HAVE_gio=0 AC_MSG_WARN(Unable to support systemd/upstart. You need to use glib >= 2.26) fi if test $HAVE_gio = 1 -a "x${enable_upstart}" != xno; then HAVE_upstart=1 PCMK_FEATURES="$PCMK_FEATURES upstart" fi AC_DEFINE_UNQUOTED(SUPPORT_UPSTART, $HAVE_upstart, Support upstart based system services) AM_CONDITIONAL(BUILD_UPSTART, test $HAVE_upstart = 1) if $PKGCONFIG --exists systemd then systemdunitdir=`$PKGCONFIG --variable=systemdsystemunitdir systemd` AC_SUBST(systemdunitdir) else enable_systemd=no fi if test $HAVE_gio = 1 -a "x${enable_systemd}" != xno; then if test -n "$systemdunitdir" -a "x$systemdunitdir" != xno; then HAVE_systemd=1 PCMK_FEATURES="$PCMK_FEATURES systemd" fi fi AC_DEFINE_UNQUOTED(SUPPORT_SYSTEMD, $HAVE_systemd, Support systemd based system services) AM_CONDITIONAL(BUILD_SYSTEMD, test $HAVE_systemd = 1) case $SUPPORT_NAGIOS in 1|yes|true|try) SUPPORT_NAGIOS=1;; *) SUPPORT_NAGIOS=0;; esac if test $SUPPORT_NAGIOS = 1; then PCMK_FEATURES="$PCMK_FEATURES nagios" fi AC_DEFINE_UNQUOTED(SUPPORT_NAGIOS, $SUPPORT_NAGIOS, Support nagios plugins) AM_CONDITIONAL(BUILD_NAGIOS, test $SUPPORT_NAGIOS = 1) if test x"$NAGIOS_PLUGIN_DIR" = x""; then NAGIOS_PLUGIN_DIR="${libexecdir}/nagios/plugins" fi AC_DEFINE_UNQUOTED(NAGIOS_PLUGIN_DIR, "$NAGIOS_PLUGIN_DIR", Directory for nagios plugins) AC_SUBST(NAGIOS_PLUGIN_DIR) if test x"$NAGIOS_METADATA_DIR" = x""; then NAGIOS_METADATA_DIR="${datadir}/nagios/plugins-metadata" fi AC_DEFINE_UNQUOTED(NAGIOS_METADATA_DIR, "$NAGIOS_METADATA_DIR", Directory for nagios plugins metadata) AC_SUBST(NAGIOS_METADATA_DIR) STACKS="" CLUSTERLIBS="" dnl ======================================================================== dnl Cluster stack - Heartbeat dnl ======================================================================== case $SUPPORT_HEARTBEAT in 1|yes|true|try) AC_MSG_CHECKING(for heartbeat support) AC_CHECK_LIB(hbclient, ll_cluster_new, [SUPPORT_HEARTBEAT=1], [if test $SUPPORT_HEARTBEAT != try; then AC_MSG_FAILURE(Unable to support Heartbeat: client libraries not found) fi]) if test $SUPPORT_HEARTBEAT = 1 ; then STACKS="$STACKS heartbeat" dnl objdump -x ${libdir}/libccmclient.so | grep SONAME | awk '{print $2}' AC_DEFINE_UNQUOTED(CCM_LIBRARY, "libccmclient.so.1", Library to load for ccm support) AC_DEFINE_UNQUOTED(HEARTBEAT_LIBRARY, "libhbclient.so.1", Library to load for heartbeat support) else SUPPORT_HEARTBEAT=0 fi ;; *) SUPPORT_HEARTBEAT=0;; esac AM_CONDITIONAL(BUILD_HEARTBEAT_SUPPORT, test $SUPPORT_HEARTBEAT = 1) AC_DEFINE_UNQUOTED(SUPPORT_HEARTBEAT, $SUPPORT_HEARTBEAT, Support the Heartbeat messaging and membership layer) AC_SUBST(SUPPORT_HEARTBEAT) dnl ======================================================================== dnl Cluster stack - Corosync dnl ======================================================================== dnl Normalize the values case $SUPPORT_CS in 1|yes|true) SUPPORT_CS=yes missingisfatal=1;; try) missingisfatal=0;; *) SUPPORT_CS=no;; esac AC_MSG_CHECKING(for native corosync) COROSYNC_LIBS="" CS_USES_LIBQB=0 PCMK_SERVICE_ID=9 LCRSODIR="$libdir" if test $SUPPORT_CS = no; then AC_MSG_RESULT(no (disabled)) SUPPORT_CS=0 else AC_MSG_RESULT($SUPPORT_CS, with '$CSPREFIX') PKG_CHECK_MODULES(cpg, libcpg) dnl Fatal PKG_CHECK_MODULES(cfg, libcfg) dnl Fatal PKG_CHECK_MODULES(cmap, libcmap, HAVE_cmap=1, HAVE_cmap=0) PKG_CHECK_MODULES(cman, libcman, HAVE_cman=1, HAVE_cman=0) PKG_CHECK_MODULES(confdb, libconfdb, HAVE_confdb=1, HAVE_confdb=0) PKG_CHECK_MODULES(fenced, libfenced, HAVE_fenced=1, HAVE_fenced=0) PKG_CHECK_MODULES(quorum, libquorum, HAVE_quorum=1, HAVE_quorum=0) PKG_CHECK_MODULES(oldipc, libcoroipcc, HAVE_oldipc=1, HAVE_oldipc=0) if test $HAVE_oldipc = 1; then SUPPORT_CS=1 CFLAGS="$CFLAGS $oldipc_FLAGS $cpg_FLAGS $cfg_FLAGS" COROSYNC_LIBS="$COROSYNC_LIBS $oldipc_LIBS $cpg_LIBS $cfg_LIBS" elif test $HAVE_libqb = 1; then SUPPORT_CS=1 CS_USES_LIBQB=1 CFLAGS="$CFLAGS $libqb_FLAGS $cpg_FLAGS $cfg_FLAGS" COROSYNC_LIBS="$COROSYNC_LIBS $libqb_LIBS $cpg_LIBS $cfg_LIBS" AC_CHECK_LIB(corosync_common, cs_strerror) else aisreason="corosync/libqb IPC libraries not found by pkg_config" fi AC_DEFINE_UNQUOTED(HAVE_CONFDB, $HAVE_confdb, Have the old herarchial Corosync config API) AC_DEFINE_UNQUOTED(HAVE_CMAP, $HAVE_cmap, Have the new non-herarchial Corosync config API) fi if test $SUPPORT_CS = 1 -a x$HAVE_oldipc = x0 ; then dnl Support for plugins was removed about the time the IPC was dnl moved to libqb. dnl The only option now is the built-in quorum API CFLAGS="$CFLAGS $cmap_CFLAGS $quorum_CFLAGS" COROSYNC_LIBS="$COROSYNC_LIBS $cmap_LIBS $quorum_LIBS" STACKS="$STACKS corosync-native" AC_DEFINE_UNQUOTED(SUPPORT_CS_QUORUM, 1, Support the consumption of membership and quorum from corosync) fi SUPPORT_PLUGIN=0 if test $SUPPORT_CS = 1 -a x$HAVE_confdb = x1; then dnl Need confdb to support cman and the plugins SUPPORT_PLUGIN=1 LCRSODIR=`$PKGCONFIG corosync --variable=lcrsodir` STACKS="$STACKS corosync-plugin" COROSYNC_LIBS="$COROSYNC_LIBS $confdb_LIBS" if test $SUPPORT_CMAN != no; then if test $HAVE_cman = 1 -a $HAVE_fenced = 1; then SUPPORT_CMAN=1 STACKS="$STACKS cman" CFLAGS="$CFLAGS $cman_FLAGS $fenced_FLAGS" COROSYNC_LIBS="$COROSYNC_LIBS $cman_LIBS $fenced_LIBS" fi fi fi dnl Normalize SUPPORT_CS and SUPPORT_CMAN for use with #if directives if test $SUPPORT_CMAN != 1; then SUPPORT_CMAN=0 fi if test $SUPPORT_CS = 1; then CLUSTERLIBS="$CLUSTERLIBS $COROSYNC_LIBS" elif test $SUPPORT_CS != 0; then SUPPORT_CS=0 if test $missingisfatal = 0; then AC_MSG_WARN(Unable to support Corosync: $aisreason) else AC_MSG_FAILURE(Unable to support Corosync: $aisreason) fi fi AC_DEFINE_UNQUOTED(SUPPORT_COROSYNC, $SUPPORT_CS, Support the Corosync messaging and membership layer) AC_DEFINE_UNQUOTED(SUPPORT_CMAN, $SUPPORT_CMAN, Support the consumption of membership and quorum from cman) AC_DEFINE_UNQUOTED(CS_USES_LIBQB, $CS_USES_LIBQB, Does corosync use libqb for its ipc) AC_DEFINE_UNQUOTED(PCMK_SERVICE_ID, $PCMK_SERVICE_ID, Corosync service number) AC_DEFINE_UNQUOTED(SUPPORT_PLUGIN, $SUPPORT_PLUGIN, Support the Pacemaker plugin for Corosync) AM_CONDITIONAL(BUILD_CS_SUPPORT, test $SUPPORT_CS = 1) AM_CONDITIONAL(BUILD_CS_PLUGIN, test $SUPPORT_PLUGIN = 1) AM_CONDITIONAL(BUILD_CMAN, test $SUPPORT_CMAN = 1) AC_SUBST(SUPPORT_CMAN) AC_SUBST(SUPPORT_CS) dnl dnl Cluster stack - Sanity dnl if test x${enable_no_stack} = xyes; then AC_MSG_NOTICE(No cluster stack supported. Just building the Policy Engine) PCMK_FEATURES="$PCMK_FEATURES no-cluster-stack" else AC_MSG_CHECKING(for supported stacks) if test x"$STACKS" = x; then AC_MSG_FAILURE(You must support at least one cluster stack (heartbeat or corosync) ) fi AC_MSG_RESULT($STACKS) PCMK_FEATURES="$PCMK_FEATURES $STACKS" fi AC_SUBST(CLUSTERLIBS) AC_SUBST(LCRSODIR) dnl ======================================================================== dnl SNMP dnl ======================================================================== case $SUPPORT_SNMP in 1|yes|true) missingisfatal=1;; try) missingisfatal=0;; *) SUPPORT_SNMP=no;; esac SNMPLIBS="" AC_MSG_CHECKING(for snmp support) if test $SUPPORT_SNMP = no; then AC_MSG_RESULT(no (disabled)) SUPPORT_SNMP=0 else SNMPCONFIG="" AC_MSG_RESULT($SUPPORT_SNMP) AC_CHECK_HEADERS(net-snmp/net-snmp-config.h) if test "x${ac_cv_header_net_snmp_net_snmp_config_h}" != "xyes"; then SUPPORT_SNMP="no" fi if test $SUPPORT_SNMP != no; then AC_PATH_PROGS(SNMPCONFIG, net-snmp-config) if test "X${SNMPCONFIG}" = "X"; then AC_MSG_RESULT(You need the net_snmp development package to continue.) SUPPORT_SNMP=no fi fi if test $SUPPORT_SNMP != no; then AC_MSG_CHECKING(for special snmp libraries) SNMPLIBS=`$SNMPCONFIG --agent-libs` AC_MSG_RESULT($SNMPLIBS) fi if test $SUPPORT_SNMP != no; then savedLibs=$LIBS LIBS="$LIBS $SNMPLIBS" dnl On many systems libcrypto is needed when linking against libsnmp. dnl Check to see if it exists, and if so use it. dnl AC_CHECK_LIB(crypto, CRYPTO_free, CRYPTOLIB="-lcrypto",) dnl AC_SUBST(CRYPTOLIB) AC_CHECK_FUNCS(netsnmp_transport_open_client) if test $ac_cv_func_netsnmp_transport_open_client != yes; then AC_CHECK_FUNCS(netsnmp_tdomain_transport) if test $ac_cv_func_netsnmp_tdomain_transport != yes; then SUPPORT_SNMP=no else AC_DEFINE_UNQUOTED(NETSNMPV53, 1, [Use the older 5.3 version of the net-snmp API]) fi fi LIBS=$savedLibs fi if test $SUPPORT_SNMP = no; then SNMPLIBS="" SUPPORT_SNMP=0 if test $missingisfatal = 0; then AC_MSG_WARN(Unable to support SNMP) else AC_MSG_FAILURE(Unable to support SNMP) fi else SUPPORT_SNMP=1 fi fi if test $SUPPORT_SNMP = 1; then PCMK_FEATURES="$PCMK_FEATURES snmp" fi AC_SUBST(SNMPLIBS) AM_CONDITIONAL(ENABLE_SNMP, test "$SUPPORT_SNMP" = "1") AC_DEFINE_UNQUOTED(ENABLE_SNMP, $SUPPORT_SNMP, Build in support for sending SNMP traps) dnl ======================================================================== dnl ESMTP dnl ======================================================================== case $SUPPORT_ESMTP in 1|yes|true) missingisfatal=1;; try) missingisfatal=0;; *) SUPPORT_ESMTP=no;; esac ESMTPLIB="" AC_MSG_CHECKING(for esmtp support) if test $SUPPORT_ESMTP = no; then AC_MSG_RESULT(no (disabled)) SUPPORT_ESMTP=0 else ESMTPCONFIG="" AC_MSG_RESULT($SUPPORT_ESMTP) AC_CHECK_HEADERS(libesmtp.h) if test "x${ac_cv_header_libesmtp_h}" != "xyes"; then ENABLE_ESMTP="no" fi if test $SUPPORT_ESMTP != no; then AC_PATH_PROGS(ESMTPCONFIG, libesmtp-config) if test "X${ESMTPCONFIG}" = "X"; then AC_MSG_RESULT(You need the libesmtp development package to continue.) SUPPORT_ESMTP=no fi fi if test $SUPPORT_ESMTP != no; then AC_MSG_CHECKING(for special esmtp libraries) ESMTPLIBS=`$ESMTPCONFIG --libs | tr '\n' ' '` AC_MSG_RESULT($ESMTPLIBS) fi if test $SUPPORT_ESMTP = no; then SUPPORT_ESMTP=0 if test $missingisfatal = 0; then AC_MSG_WARN(Unable to support ESMTP) else AC_MSG_FAILURE(Unable to support ESMTP) fi else SUPPORT_ESMTP=1 PCMK_FEATURES="$PCMK_FEATURES libesmtp" fi fi AC_SUBST(ESMTPLIBS) AM_CONDITIONAL(ENABLE_ESMTP, test "$SUPPORT_ESMTP" = "1") AC_DEFINE_UNQUOTED(ENABLE_ESMTP, $SUPPORT_ESMTP, Build in support for sending mail notifications with ESMTP) dnl ======================================================================== dnl ACL dnl ======================================================================== case $SUPPORT_ACL in 1|yes|true) missingisfatal=1;; try) missingisfatal=0;; *) SUPPORT_ACL=no;; esac AC_MSG_CHECKING(for acl support) if test $SUPPORT_ACL = no; then AC_MSG_RESULT(no (disabled)) SUPPORT_ACL=0 else AC_MSG_RESULT($SUPPORT_ACL) SUPPORT_ACL=1 AC_CHECK_LIB(qb, qb_ipcs_connection_auth_set) if test $ac_cv_lib_qb_qb_ipcs_connection_auth_set != yes; then SUPPORT_ACL=0 fi if test $SUPPORT_ACL = 0; then if test $missingisfatal = 0; then AC_MSG_WARN(Unable to support ACL. You need to use libqb > 0.13.0) else AC_MSG_FAILURE(Unable to support ACL. You need to use libqb > 0.13.0) fi fi fi if test $SUPPORT_ACL = 1; then PCMK_FEATURES="$PCMK_FEATURES acls" fi AM_CONDITIONAL(ENABLE_ACL, test "$SUPPORT_ACL" = "1") AC_DEFINE_UNQUOTED(ENABLE_ACL, $SUPPORT_ACL, Build in support for CIB ACL) dnl ======================================================================== dnl CIB secrets dnl ======================================================================== case $SUPPORT_CIBSECRETS in 1|yes|true|try) SUPPORT_CIBSECRETS=1;; *) SUPPORT_CIBSECRETS=0;; esac AC_DEFINE_UNQUOTED(SUPPORT_CIBSECRETS, $SUPPORT_CIBSECRETS, Support CIB secrets) AM_CONDITIONAL(BUILD_CIBSECRETS, test $SUPPORT_CIBSECRETS = 1) if test $SUPPORT_CIBSECRETS = 1; then PCMK_FEATURES="$PCMK_FEATURES cibsecrets" LRM_CIBSECRETS_DIR="${localstatedir}/lib/pacemaker/lrm/secrets" AC_DEFINE_UNQUOTED(LRM_CIBSECRETS_DIR,"$LRM_CIBSECRETS_DIR", Location for CIB secrets) AC_SUBST(LRM_CIBSECRETS_DIR) LRM_LEGACY_CIBSECRETS_DIR="${localstatedir}/lib/heartbeat/lrm/secrets" AC_DEFINE_UNQUOTED(LRM_LEGACY_CIBSECRETS_DIR,"$LRM_LEGACY_CIBSECRETS_DIR", Legacy location for CIB secrets) AC_SUBST(LRM_LEGACY_CIBSECRETS_DIR) fi dnl ======================================================================== dnl GnuTLS dnl ======================================================================== AC_CHECK_HEADERS(gnutls/gnutls.h) AC_CHECK_HEADERS(security/pam_appl.h pam/pam_appl.h) dnl GNUTLS library: Attempt to determine by 'libgnutls-config' program. dnl If no 'libgnutls-config', try traditional autoconf means. AC_PATH_PROGS(LIBGNUTLS_CONFIG, libgnutls-config) if test -n "$LIBGNUTLS_CONFIG"; then AC_MSG_CHECKING(for gnutls header flags) GNUTLSHEAD="`$LIBGNUTLS_CONFIG --cflags`"; AC_MSG_RESULT($GNUTLSHEAD) AC_MSG_CHECKING(for gnutls library flags) GNUTLSLIBS="`$LIBGNUTLS_CONFIG --libs`"; AC_MSG_RESULT($GNUTLSLIBS) fi AC_CHECK_LIB(gnutls, gnutls_init) AC_CHECK_FUNCS(gnutls_priority_set_direct) AC_SUBST(GNUTLSHEAD) AC_SUBST(GNUTLSLIBS) dnl ======================================================================== dnl System Health dnl ======================================================================== dnl Check if servicelog development package is installed SERVICELOG=servicelog-1 SERVICELOG_EXISTS="no" AC_MSG_CHECKING(for $SERVICELOG packages) if $PKGCONFIG --exists $SERVICELOG then PKG_CHECK_MODULES([SERVICELOG], [servicelog-1]) SERVICELOG_EXISTS="yes" fi AC_MSG_RESULT($SERVICELOG_EXISTS) AM_CONDITIONAL(BUILD_SERVICELOG, test "$SERVICELOG_EXISTS" = "yes") dnl Check if OpenIMPI packages and servicelog are installed OPENIPMI="OpenIPMI OpenIPMIposix" OPENIPMI_SERVICELOG_EXISTS="no" AC_MSG_CHECKING(for $SERVICELOG $OPENIPMI packages) if $PKGCONFIG --exists $OPENIPMI $SERVICELOG then PKG_CHECK_MODULES([OPENIPMI_SERVICELOG],[OpenIPMI OpenIPMIposix]) OPENIPMI_SERVICELOG_EXISTS="yes" fi AC_MSG_RESULT($OPENIPMI_SERVICELOG_EXISTS) AM_CONDITIONAL(BUILD_OPENIPMI_SERVICELOG, test "$OPENIPMI_SERVICELOG_EXISTS" = "yes") dnl ======================================================================== dnl Compiler flags dnl ======================================================================== dnl Make sure that CFLAGS is not exported. If the user did dnl not have CFLAGS in their environment then this should have dnl no effect. However if CFLAGS was exported from the user's dnl environment, then the new CFLAGS will also be exported dnl to sub processes. CC_ERRORS="" CC_EXTRAS="" if export | fgrep " CFLAGS=" > /dev/null; then SAVED_CFLAGS="$CFLAGS" unset CFLAGS CFLAGS="$SAVED_CFLAGS" unset SAVED_CFLAGS fi if test "$GCC" != yes; then CFLAGS="$CFLAGS -g" enable_fatal_warnings=no else CFLAGS="$CFLAGS -ggdb" # We had to eliminate -Wnested-externs because of libtool changes EXTRA_FLAGS="-fgnu89-inline -fstack-protector-all -Wall -Waggregate-return -Wbad-function-cast -Wcast-align -Wdeclaration-after-statement -Wendif-labels -Wfloat-equal -Wformat=2 -Wformat-security -Wformat-nonliteral -Wmissing-prototypes -Wmissing-declarations -Wnested-externs -Wno-long-long -Wno-strict-aliasing -Wunused-but-set-variable -Wpointer-arith -Wstrict-prototypes -Wunsigned-char -Wwrite-strings" # Additional warnings it might be nice to enable one day # -Wshadow # -Wunreachable-code for j in $EXTRA_FLAGS do if cc_supports_flag $j then CC_EXTRAS="$CC_EXTRAS $j" fi done dnl In lib/ais/Makefile.am there's a gcc option available as of v4.x GCC_MAJOR=`gcc -v 2>&1 | awk 'END{print $3}' | sed 's/[.].*//'` AM_CONDITIONAL(GCC_4, test "${GCC_MAJOR}" = 4) dnl System specific options case "$host_os" in *linux*|*bsd*) if test "${enable_fatal_warnings}" = "unknown"; then enable_fatal_warnings=yes fi ;; esac if test "x${enable_fatal_warnings}" != xno && cc_supports_flag -Werror ; then enable_fatal_warnings=yes else enable_fatal_warnings=no fi if test "x${enable_ansi}" = xyes && cc_supports_flag -std=iso9899:199409 ; then AC_MSG_NOTICE(Enabling ANSI Compatibility) CC_EXTRAS="$CC_EXTRAS -ansi -D_GNU_SOURCE -DANSI_ONLY" fi AC_MSG_NOTICE(Activated additional gcc flags: ${CC_EXTRAS}) fi CFLAGS="$CFLAGS $CC_EXTRAS" NON_FATAL_CFLAGS="$CFLAGS" AC_SUBST(NON_FATAL_CFLAGS) dnl dnl We reset CFLAGS to include our warnings *after* all function dnl checking goes on, so that our warning flags don't keep the dnl AC_*FUNCS() calls above from working. In particular, -Werror will dnl *always* cause us troubles if we set it before here. dnl dnl if test "x${enable_fatal_warnings}" = xyes ; then AC_MSG_NOTICE(Enabling Fatal Warnings) CFLAGS="$CFLAGS -Werror" fi AC_SUBST(CFLAGS) dnl This is useful for use in Makefiles that need to remove one specific flag CFLAGS_COPY="$CFLAGS" AC_SUBST(CFLAGS_COPY) AC_SUBST(LIBADD_DL) dnl extra flags for dynamic linking libraries AC_SUBST(LIBADD_INTL) dnl extra flags for GNU gettext stuff... AC_SUBST(LOCALE) dnl Options for cleaning up the compiler output QUIET_LIBTOOL_OPTS="" QUIET_MAKE_OPTS="" if test "x${enable_quiet}" = "xyes"; then QUIET_LIBTOOL_OPTS="--quiet" QUIET_MAKE_OPTS="--quiet" fi AC_MSG_RESULT(Supress make details: ${enable_quiet}) dnl Put the above variables to use LIBTOOL="${LIBTOOL} --tag=CC \$(QUIET_LIBTOOL_OPTS)" MAKE="${MAKE} \$(QUIET_MAKE_OPTS)" AC_SUBST(CC) AC_SUBST(MAKE) AC_SUBST(LIBTOOL) AC_SUBST(QUIET_MAKE_OPTS) AC_SUBST(QUIET_LIBTOOL_OPTS) AC_DEFINE_UNQUOTED(CRM_FEATURES, "$PCMK_FEATURES", Set of enabled features) AC_SUBST(PCMK_FEATURES) dnl The Makefiles and shell scripts we output AC_CONFIG_FILES(Makefile \ Doxyfile \ coverage.sh \ cts/Makefile \ cts/CTSvars.py \ cts/LSBDummy \ cts/benchmark/Makefile \ - cts/benchmark/clubench \ + cts/benchmark/clubench \ + cts/lxc_autogen.sh \ cib/Makefile \ crmd/Makefile \ pengine/Makefile \ pengine/regression.core.sh \ doc/Makefile \ doc/Pacemaker_Explained/publican.cfg \ doc/Clusters_from_Scratch/publican.cfg \ doc/Pacemaker_Remote/publican.cfg \ include/Makefile \ include/crm/Makefile \ include/crm/cib/Makefile \ include/crm/common/Makefile \ include/crm/cluster/Makefile \ include/crm/fencing/Makefile \ include/crm/pengine/Makefile \ replace/Makefile \ lib/Makefile \ lib/pacemaker.pc \ lib/pacemaker-cib.pc \ lib/pacemaker-lrmd.pc \ lib/pacemaker-service.pc \ lib/pacemaker-pengine.pc \ lib/pacemaker-fencing.pc \ lib/pacemaker-cluster.pc \ lib/ais/Makefile \ lib/common/Makefile \ lib/cluster/Makefile \ lib/cib/Makefile \ lib/pengine/Makefile \ lib/transition/Makefile \ lib/fencing/Makefile \ lib/lrmd/Makefile \ lib/services/Makefile \ mcp/Makefile \ mcp/pacemaker \ mcp/pacemaker.service \ mcp/pacemaker.upstart \ mcp/pacemaker.combined.upstart \ fencing/Makefile \ fencing/regression.py \ lrmd/Makefile \ lrmd/regression.py \ lrmd/pacemaker_remote.service \ lrmd/pacemaker_remote \ extra/Makefile \ extra/resources/Makefile \ extra/rgmanager/Makefile \ tools/Makefile \ tools/crm_report \ tools/report.common \ tools/cibsecret \ xml/Makefile \ lib/gnu/Makefile \ ) dnl Now process the entire list of files added by previous dnl calls to AC_CONFIG_FILES() AC_OUTPUT() dnl ***************** dnl Configure summary dnl ***************** AC_MSG_RESULT([]) AC_MSG_RESULT([$PACKAGE configuration:]) AC_MSG_RESULT([ Version = ${VERSION} (Build: $BUILD_VERSION)]) AC_MSG_RESULT([ Features =${PCMK_FEATURES}]) AC_MSG_RESULT([]) AC_MSG_RESULT([ Prefix = ${prefix}]) AC_MSG_RESULT([ Executables = ${sbindir}]) AC_MSG_RESULT([ Man pages = ${mandir}]) AC_MSG_RESULT([ Libraries = ${libdir}]) AC_MSG_RESULT([ Header files = ${includedir}]) AC_MSG_RESULT([ Arch-independent files = ${datadir}]) AC_MSG_RESULT([ State information = ${localstatedir}]) AC_MSG_RESULT([ System configuration = ${sysconfdir}]) AC_MSG_RESULT([ Corosync Plugins = ${LCRSODIR}]) AC_MSG_RESULT([]) AC_MSG_RESULT([ Use system LTDL = ${ac_cv_lib_ltdl_lt_dlopen}]) AC_MSG_RESULT([]) AC_MSG_RESULT([ HA group name = ${CRM_DAEMON_GROUP}]) AC_MSG_RESULT([ HA user name = ${CRM_DAEMON_USER}]) AC_MSG_RESULT([]) AC_MSG_RESULT([ CFLAGS = ${CFLAGS}]) AC_MSG_RESULT([ Libraries = ${LIBS}]) AC_MSG_RESULT([ Stack Libraries = ${CLUSTERLIBS}]) diff --git a/cts/CTSlab.py b/cts/CTSlab.py index 58506c30c2..e0470fbf06 100755 --- a/cts/CTSlab.py +++ b/cts/CTSlab.py @@ -1,616 +1,621 @@ #!/usr/bin/python '''CTS: Cluster Testing System: Lab environment module ''' __copyright__=''' Copyright (C) 2001,2005 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. from UserDict import UserDict import sys, types, string, string, signal, os, socket pdir=os.path.dirname(sys.path[0]) sys.path.insert(0, pdir) # So that things work from the source directory try: from cts.CTSvars import * from cts.CM_ais import * from cts.CM_lha import crm_lha from cts.CTSaudits import AuditList from cts.CTStests import TestList from cts.CTSscenarios import * except ImportError: sys.stderr.write("abort: couldn't find cts libraries in [%s]\n" % ' '.join(sys.path)) sys.stderr.write("(check your install and PYTHONPATH)\n") # Now do it again to get more details from cts.CTSvars import * from cts.CM_ais import * from cts.CM_lha import crm_lha from cts.CTSaudits import AuditList from cts.CTStests import TestList from cts.CTSscenarios import * sys.exit(-1) cm = None Tests = [] Chosen = [] scenario = None # Not really used, the handler in def sig_handler(signum, frame) : if cm: cm.log("Interrupted by signal %d"%signum) if scenario: scenario.summarize() if signum == 15 : if scenario: scenario.TearDown() sys.exit(1) class LabEnvironment(CtsLab): def __init__(self): CtsLab.__init__(self) # Get a random seed for the random number generator. self["DoStandby"] = 1 self["DoFencing"] = 1 self["XmitLoss"] = "0.0" self["RecvLoss"] = "0.0" self["ClobberCIB"] = 0 self["CIBfilename"] = None self["CIBResource"] = 0 self["DoBSC"] = 0 self["use_logd"] = 0 self["oprofile"] = [] self["warn-inactive"] = 0 self["ListTests"] = 0 self["benchmark"] = 0 self["Schema"] = "pacemaker-1.0" self["Stack"] = "openais" self["stonith-type"] = "external/ssh" self["stonith-params"] = "hostlist=all,livedangerously=yes" self["logger"] = ([StdErrLog(self)]) self["loop-minutes"] = 60 self["valgrind-prefix"] = None self["valgrind-procs"] = "cib crmd attrd pengine stonith-ng" self["valgrind-opts"] = """--leak-check=full --show-reachable=yes --trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp""" #self["valgrind-opts"] = """--trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp""" self["experimental-tests"] = 0 + self["remote-tests"] = 0 self["valgrind-tests"] = 0 self["unsafe-tests"] = 1 self["loop-tests"] = 1 self["scenario"] = "random" self["stats"] = 0 master = socket.gethostname() # Use the IP where possible to avoid name lookup failures for ip in socket.gethostbyname_ex(master)[2]: if ip != "127.0.0.1": master = ip break; self["cts-master"] = master def usage(arg, status=1): print "Illegal argument " + arg print "usage: " + sys.argv[0] +" [options] number-of-iterations" print "\nCommon options: " print "\t [--nodes 'node list'] list of cluster nodes separated by whitespace" print "\t [--group | -g 'name'] use the nodes listed in the named DSH group (~/.dsh/groups/$name)" print "\t [--limit-nodes max] only use the first 'max' cluster nodes supplied with --nodes" print "\t [--stack (v0|v1|cman|corosync|heartbeat|openais)] which cluster stack is installed" print "\t [--list-tests] list the valid tests" print "\t [--benchmark] add the timing information" print "\t " print "Options that CTS will usually auto-detect correctly: " print "\t [--logfile path] where should the test software look for logs from cluster nodes" print "\t [--syslog-facility name] which syslog facility should the test software log to" print "\t [--at-boot (1|0)] does the cluster software start at boot time" print "\t [--test-ip-base ip] offset for generated IP address resources" print "\t " print "Options for release testing: " print "\t [--populate-resources | -r] generate a sample configuration" print "\t [--choose name] run only the named test" print "\t [--stonith (1 | 0 | yes | no | rhcs | ssh)]" print "\t [--once] run all valid tests once" print "\t " print "Additional (less common) options: " print "\t [--clobber-cib | -c ] erase any existing configuration" print "\t [--outputfile path] optional location for the test software to write logs to" print "\t [--trunc] truncate logfile before starting" print "\t [--xmit-loss lost-rate(0.0-1.0)]" print "\t [--recv-loss lost-rate(0.0-1.0)]" print "\t [--standby (1 | 0 | yes | no)]" print "\t [--fencing (1 | 0 | yes | no | rhcs | lha | openstack )]" print "\t [--stonith-type type]" print "\t [--stonith-args name=value]" print "\t [--bsc]" print "\t [--no-loop-tests] dont run looping/time-based tests" print "\t [--no-unsafe-tests] dont run tests that are unsafe for use with ocfs2/drbd" print "\t [--valgrind-tests] include tests using valgrind" print "\t [--experimental-tests] include experimental tests" + print "\t [--remote-tests] include pacemaker_remote tests" print "\t [--oprofile 'node list'] list of cluster nodes to run oprofile on]" print "\t [--qarsh] use the QARSH backdoor to access nodes instead of SSH" print "\t [--seed random_seed]" print "\t [--set option=value]" print "\t " print "\t Example: " print "\t python ./CTSlab.py -g virt1 --stack cs -r --stonith ssh --schema pacemaker-1.0 500" sys.exit(status) # # A little test code... # if __name__ == '__main__': Environment = LabEnvironment() rsh = RemoteExec(None, silent=True) NumIter = 0 Version = 1 LimitNodes = 0 TruncateLog = 0 ListTests = 0 HaveSeed = 0 node_list = '' # Set the signal handler signal.signal(15, sig_handler) signal.signal(10, sig_handler) # Process arguments... skipthis=None args=sys.argv[1:] for i in range(0, len(args)): if skipthis: skipthis=None continue elif args[i] == "-l" or args[i] == "--limit-nodes": skipthis=1 LimitNodes = int(args[i+1]) elif args[i] == "-r" or args[i] == "--populate-resources": Environment["CIBResource"] = 1 Environment["ClobberCIB"] = 1 elif args[i] == "-L" or args[i] == "--logfile": skipthis=1 Environment["LogFileName"] = args[i+1] elif args[i] == "--outputfile": skipthis=1 Environment["OutputFile"] = args[i+1] elif args[i] == "--ip" or args[i] == "--test-ip-base": skipthis=1 Environment["IPBase"] = args[i+1] Environment["CIBResource"] = 1 Environment["ClobberCIB"] = 1 elif args[i] == "--oprofile": skipthis=1 Environment["oprofile"] = args[i+1].split(' ') elif args[i] == "--trunc": Environment["TruncateLog"]=1 elif args[i] == "--list-tests" or args[i] == "--list" : Environment["ListTests"]=1 elif args[i] == "--benchmark": Environment["benchmark"]=1 elif args[i] == "--bsc": Environment["DoBSC"] = 1 Environment["scenario"] = "basic-sanity" elif args[i] == "--qarsh": Environment.rsh.enable_qarsh() rsh.enable_qarsh() elif args[i] == "--stonith" or args[i] == "--fencing": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": Environment["DoFencing"]=1 elif args[i+1] == "0" or args[i+1] == "no": Environment["DoFencing"]=0 elif args[i+1] == "rhcs" or args[i+1] == "xvm" or args[i+1] == "virt": Environment["DoStonith"]=1 Environment["stonith-type"] = "fence_xvm" Environment["stonith-params"] = "pcmk_arg_map=domain:uname,delay=0" elif args[i+1] == "ssh" or args[i+1] == "lha": Environment["DoStonith"]=1 Environment["stonith-type"] = "external/ssh" Environment["stonith-params"] = "hostlist=all,livedangerously=yes" elif args[i+1] == "north": Environment["DoStonith"]=1 Environment["stonith-type"] = "fence_apc" Environment["stonith-params"] = "ipaddr=north-apc,login=apc,passwd=apc,pcmk_host_map=north-01:2;north-02:3;north-03:4;north-04:5;north-05:6;north-06:7;north-07:9;north-08:10;north-09:11;north-10:12;north-11:13;north-12:14;north-13:15;north-14:18;north-15:17;north-16:19;" elif args[i+1] == "south": Environment["DoStonith"]=1 Environment["stonith-type"] = "fence_apc" Environment["stonith-params"] = "ipaddr=south-apc,login=apc,passwd=apc,pcmk_host_map=south-01:2;south-02:3;south-03:4;south-04:5;south-05:6;south-06:7;south-07:9;south-08:10;south-09:11;south-10:12;south-11:13;south-12:14;south-13:15;south-14:18;south-15:17;south-16:19;" elif args[i+1] == "east": Environment["DoStonith"]=1 Environment["stonith-type"] = "fence_apc" Environment["stonith-params"] = "ipaddr=east-apc,login=apc,passwd=apc,pcmk_host_map=east-01:2;east-02:3;east-03:4;east-04:5;east-05:6;east-06:7;east-07:9;east-08:10;east-09:11;east-10:12;east-11:13;east-12:14;east-13:15;east-14:18;east-15:17;east-16:19;" elif args[i+1] == "west": Environment["DoStonith"]=1 Environment["stonith-type"] = "fence_apc" Environment["stonith-params"] = "ipaddr=west-apc,login=apc,passwd=apc,pcmk_host_map=west-01:2;west-02:3;west-03:4;west-04:5;west-05:6;west-06:7;west-07:9;west-08:10;west-09:11;west-10:12;west-11:13;west-12:14;west-13:15;west-14:18;west-15:17;west-16:19;" elif args[i+1] == "openstack": Environment["DoStonith"]=1 Environment["stonith-type"] = "fence_openstack" print "Obtaining OpenStack credentials from the current environment" Environment["stonith-params"] = "region=%s,tenant=%s,auth=%s,user=%s,password=%s" % ( os.environ['OS_REGION_NAME'], os.environ['OS_TENANT_NAME'], os.environ['OS_AUTH_URL'], os.environ['OS_USERNAME'], os.environ['OS_PASSWORD'] ) elif args[i+1] == "rhevm": Environment["DoStonith"]=1 Environment["stonith-type"] = "fence_rhevm" print "Obtaining RHEV-M credentials from the current environment" Environment["stonith-params"] = "login=%s,passwd=%s,ipaddr=%s,ipport=%s,ssl=1,shell_timeout=10" % ( os.environ['RHEVM_USERNAME'], os.environ['RHEVM_PASSWORD'], os.environ['RHEVM_SERVER'], os.environ['RHEVM_PORT'], ) else: usage(args[i+1]) elif args[i] == "--stonith-type": Environment["stonith-type"] = args[i+1] skipthis=1 elif args[i] == "--stonith-args": Environment["stonith-params"] = args[i+1] skipthis=1 elif args[i] == "--standby": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": Environment["DoStandby"] = 1 elif args[i+1] == "0" or args[i+1] == "no": Environment["DoStandby"] = 0 else: usage(args[i+1]) elif args[i] == "--clobber-cib" or args[i] == "-c": Environment["ClobberCIB"] = 1 elif args[i] == "--cib-filename": skipthis=1 Environment["CIBfilename"] = args[i+1] elif args[i] == "--xmit-loss": try: float(args[i+1]) except ValueError: print ("--xmit-loss parameter should be float") usage(args[i+1]) skipthis=1 Environment["XmitLoss"] = args[i+1] elif args[i] == "--recv-loss": try: float(args[i+1]) except ValueError: print ("--recv-loss parameter should be float") usage(args[i+1]) skipthis=1 Environment["RecvLoss"] = args[i+1] elif args[i] == "--choose": skipthis=1 Chosen.append(args[i+1]) Environment["scenario"] = "sequence" elif args[i] == "--nodes": skipthis=1 node_list = args[i+1].split(' ') elif args[i] == "-g" or args[i] == "--group" or args[i] == "--dsh-group": skipthis=1 Environment["OutputFile"] = "%s/cluster-%s.log" % (os.environ['HOME'], args[i+1]) dsh_file = "%s/.dsh/group/%s" % (os.environ['HOME'], args[i+1]) if os.path.isfile(dsh_file): node_list = [] f = open(dsh_file, 'r') for line in f: l = line.strip().rstrip() if not l.startswith('#'): node_list.append(l) f.close() else: print("Unknown DSH group: %s" % args[i+1]) elif args[i] == "--syslog-facility" or args[i] == "--facility": skipthis=1 Environment["SyslogFacility"] = args[i+1] elif args[i] == "--seed": skipthis=1 Environment.SeedRandom(args[i+1]) elif args[i] == "--warn-inactive": Environment["warn-inactive"] = 1 elif args[i] == "--schema": skipthis=1 Environment["Schema"] = args[i+1] elif args[i] == "--ais": Environment["Stack"] = "openais" elif args[i] == "--at-boot" or args[i] == "--cluster-starts-at-boot": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": Environment["at-boot"] = 1 elif args[i+1] == "0" or args[i+1] == "no": Environment["at-boot"] = 0 else: usage(args[i+1]) elif args[i] == "--heartbeat" or args[i] == "--lha": Environment["Stack"] = "heartbeat" elif args[i] == "--hae": Environment["Stack"] = "openais" Environment["Schema"] = "hae" elif args[i] == "--stack": if args[i+1] == "fedora" or args[i+1] == "fedora-17" or args[i+1] == "fedora-18": Environment["Stack"] = "corosync" elif args[i+1] == "rhel-6": Environment["Stack"] = "cman" elif args[i+1] == "rhel-7": Environment["Stack"] = "corosync" else: Environment["Stack"] = args[i+1] skipthis=1 elif args[i] == "--once": Environment["scenario"] = "all-once" elif args[i] == "--boot": Environment["scenario"] = "boot" elif args[i] == "--valgrind-tests": Environment["valgrind-tests"] = 1 elif args[i] == "--no-loop-tests": Environment["loop-tests"] = 0 elif args[i] == "--loop-minutes": skipthis=1 try: Environment["loop-minutes"]=int(args[i+1]) except ValueError: usage(args[i]) elif args[i] == "--no-unsafe-tests": Environment["unsafe-tests"] = 0 elif args[i] == "--experimental-tests": Environment["experimental-tests"] = 1 + elif args[i] == "--remote-tests": + Environment["remote-tests"] = 1 + elif args[i] == "--set": skipthis=1 (name, value) = args[i+1].split('=') Environment[name] = value print "Setting %s = %s" % (name, value) elif args[i] == "--": break else: try: NumIter=int(args[i]) except ValueError: usage(args[i]) if Environment["DoBSC"]: NumIter = 2 LimitNodes = 1 Chosen.append("AddResource") Environment["ClobberCIB"] = 1 Environment["CIBResource"] = 0 Environment["logger"].append(FileLog(Environment, Environment["LogFileName"])) elif Environment["OutputFile"]: Environment["logger"].append(FileLog(Environment, Environment["OutputFile"])) elif Environment["SyslogFacility"]: Environment["logger"].append(SysLog(Environment)) if Environment["Stack"] == "heartbeat" or Environment["Stack"] == "lha": Environment["Stack"] = "heartbeat" Environment['CMclass'] = crm_lha elif Environment["Stack"] == "openais" or Environment["Stack"] == "ais" or Environment["Stack"] == "whitetank": Environment["Stack"] = "openais (whitetank)" Environment['CMclass'] = crm_whitetank Environment["use_logd"] = 0 elif Environment["Stack"] == "corosync" or Environment["Stack"] == "cs" or Environment["Stack"] == "mcp": Environment["Stack"] = "corosync 2.x" Environment['CMclass'] = crm_mcp Environment["use_logd"] = 0 elif Environment["Stack"] == "cman": Environment["Stack"] = "corosync (cman)" Environment['CMclass'] = crm_cman Environment["use_logd"] = 0 elif Environment["Stack"] == "v1": Environment["Stack"] = "corosync (plugin v1)" Environment['CMclass'] = crm_cs_v1 Environment["use_logd"] = 0 elif Environment["Stack"] == "v0": Environment["Stack"] = "corosync (plugin v0)" Environment['CMclass'] = crm_cs_v0 Environment["use_logd"] = 0 else: print "Unknown stack: "+Environment["Stack"] sys.exit(1) if len(node_list) < 1: print "No nodes specified!" sys.exit(1) if LimitNodes > 0: if len(node_list) > LimitNodes: print("Limiting the number of nodes configured=%d (max=%d)" %(len(node_list), LimitNodes)) while len(node_list) > LimitNodes: node_list.pop(len(node_list)-1) Environment["nodes"] = [] for n in node_list: if len(n.strip()): Environment["nodes"].append(n.strip()) discover = random.Random().choice(Environment["nodes"]) Environment["have_systemd"] = not rsh(discover, "systemctl list-units") # Detect syslog variant if not Environment.has_key("syslogd") or not Environment["syslogd"]: if Environment["have_systemd"]: # Systemd Environment["syslogd"] = rsh(discover, "systemctl list-units | grep syslog.*\.service.*active.*running | sed 's:.service.*::'", stdout=1) else: # SYS-V Environment["syslogd"] = rsh(discover, "chkconfig | grep syslog.*on | awk '{print $1}' | head -n 1", stdout=1) if not Environment.has_key("syslogd") or not Environment["syslogd"]: # default Environment["syslogd"] = "rsyslog" # Detect if the cluster starts at boot if not Environment.has_key("at-boot"): atboot = 0 if Environment["have_systemd"]: # Systemd atboot = atboot or not rsh(discover, "systemctl is-enabled heartbeat.service") atboot = atboot or not rsh(discover, "systemctl is-enabled corosync.service") atboot = atboot or not rsh(discover, "systemctl is-enabled pacemaker.service") else: # SYS-V atboot = atboot or not rsh(discover, "chkconfig | grep -e corosync.*on -e heartbeat.*on -e pacemaker.*on") Environment["at-boot"] = atboot # Try to determinw an offset for IPaddr resources if Environment["CIBResource"] and not Environment.has_key("IPBase"): network=rsh(discover, "ip addr | grep inet | grep -v -e link -e inet6 -e '/32' -e ' lo' | awk '{print $2}'", stdout=1).strip() Environment["IPBase"] = rsh(discover, "nmap -sn -n %s | grep 'scan report' | tail -n 1 | awk '{print $NF}' | sed 's:(::' | sed 's:)::'" % network, stdout=1).strip() if not Environment["IPBase"]: Environment["IPBase"] = "127.0.0.10" Environment.log("Could not determine an offset for IPaddr resources. Perhaps nmap is not installed on the nodes.") Environment.log("Defaulting to '%s', use --test-ip-base to override" % Environment["IPBase"]) # Create the Cluster Manager object cm = Environment['CMclass'](Environment) if TruncateLog: Environment.log("Truncating %s" % LogFile) lf = open(LogFile, "w"); if lf != None: lf.truncate(0) lf.close() Audits = AuditList(cm) if Environment["ListTests"] == 1 : Tests = TestList(cm, Audits) Environment.log("Total %d tests"%len(Tests)) for test in Tests : Environment.log(str(test.name)); sys.exit(0) if len(Chosen) == 0: Tests = TestList(cm, Audits) else: for TestCase in Chosen: match = None for test in TestList(cm, Audits): if test.name == TestCase: match = test if not match: usage("--choose: No applicable/valid tests chosen") else: Tests.append(match) # Scenario selection if Environment["scenario"] == "basic-sanity": scenario = RandomTests(cm, [ BasicSanityCheck(Environment) ], Audits, Tests) elif Environment["scenario"] == "all-once": NumIter = len(Tests) scenario = AllOnce( cm, [ BootCluster(Environment), PacketLoss(Environment) ], Audits, Tests) elif Environment["scenario"] == "sequence": scenario = Sequence( cm, [ BootCluster(Environment), PacketLoss(Environment) ], Audits, Tests) elif Environment["scenario"] == "boot": scenario = Boot(cm, [ LeaveBooted(Environment)], Audits, []) else: scenario = RandomTests( cm, [ BootCluster(Environment), PacketLoss(Environment) ], Audits, Tests) Environment.log(">>>>>>>>>>>>>>>> BEGINNING " + repr(NumIter) + " TESTS ") Environment.log("Stack: %s" % Environment["Stack"]) Environment.log("Schema: %s" % Environment["Schema"]) Environment.log("Scenario: %s" % scenario.__doc__) Environment.log("CTS Master: %s" % Environment["cts-master"]) Environment.log("CTS Logfile: %s" % Environment["OutputFile"]) Environment.log("Random Seed: %s" % Environment["RandSeed"]) Environment.log("Syslog variant: %s" % Environment["syslogd"].strip()) Environment.log("System log files: %s" % Environment["LogFileName"]) # Environment.log(" ") if Environment.has_key("IPBase"): Environment.log("Base IP for resources: %s" % Environment["IPBase"]) Environment.log("Cluster starts at boot: %d" % Environment["at-boot"]) Environment.dump() rc = Environment.run(scenario, NumIter) sys.exit(rc) diff --git a/cts/CTStests.py b/cts/CTStests.py index b5dd69ac5d..781f63cfa5 100644 --- a/cts/CTStests.py +++ b/cts/CTStests.py @@ -1,2476 +1,2585 @@ '''CTS: Cluster Testing System: Tests module There are a few things we want to do here: ''' __copyright__=''' Copyright (C) 2000, 2001 Alan Robertson Licensed under the GNU GPL. Add RecourceRecover testcase Zhao Kai ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. # # SPECIAL NOTE: # # Tests may NOT implement any cluster-manager-specific code in them. # EXTEND the ClusterManager object to provide the base capabilities # the test needs if you need to do something that the current CM classes # do not. Otherwise you screw up the whole point of the object structure # in CTS. # # Thank you. # import time, os, re, types, string, tempfile, sys from stat import * from cts import CTS from cts.CTSaudits import * AllTestClasses = [ ] class CTSTest: ''' A Cluster test. We implement the basic set of properties and behaviors for a generic cluster test. Cluster tests track their own statistics. We keep each of the kinds of counts we track as separate {name,value} pairs. ''' def __init__(self, cm): #self.name="the unnamed test" self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} # if not issubclass(cm.__class__, ClusterManager): # raise ValueError("Must be a ClusterManager object") self.CM = cm self.Audits = [] self.timeout=120 self.passed = 1 self.is_loop = 0 self.is_unsafe = 0 self.is_experimental = 0 + self.is_remote = 0 self.is_valgrind = 0 self.benchmark = 0 # which tests to benchmark self.timer = {} # timers def has_key(self, key): return self.Stats.has_key(key) def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def log_mark(self, msg): self.CM.debug("MARK: test %s %s %d" % (self.name,msg,time.time())) return def get_timer(self,key = "test"): try: return self.timer[key] except: return 0 def set_timer(self,key = "test"): self.timer[key] = time.time() return self.timer[key] def log_timer(self,key = "test"): elapsed = 0 if key in self.timer: elapsed = time.time() - self.timer[key] s = key == "test" and self.name or "%s:%s" %(self.name,key) self.CM.debug("%s runtime: %.2f" % (s, elapsed)) del self.timer[key] return elapsed def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 # Reset the test passed boolean if name == "calls": self.passed = 1 def failure(self, reason="none"): '''Increment the failure count''' self.passed = 0 self.incr("failure") self.CM.log(("Test %s" % self.name).ljust(35) +" FAILED: %s" % reason) return None def success(self): '''Increment the success count''' self.incr("success") return 1 def skipped(self): '''Increment the skipped count''' self.incr("skipped") return 1 def __call__(self, node): '''Perform the given test''' raise ValueError("Abstract Class member (__call__)") self.incr("calls") return self.failure() def audit(self): passed = 1 if len(self.Audits) > 0: for audit in self.Audits: if not audit(): self.CM.log("Internal %s Audit %s FAILED." % (self.name, audit.name())) self.incr("auditfail") passed = 0 return passed def setup(self, node): '''Setup the given test''' return self.success() def teardown(self, node): '''Tear down the given test''' return self.success() def create_watch(self, patterns, timeout, name=None): if not name: name = self.name return CTS.LogWatcher(self.CM.Env, self.CM["LogFileName"], patterns, name, timeout) def local_badnews(self, prefix, watch, local_ignore=[]): errcount = 0 if not prefix: prefix = "LocalBadNews:" ignorelist = [] ignorelist.append(" CTS: ") ignorelist.append(prefix) ignorelist.extend(local_ignore) while errcount < 100: match=watch.look(0) if match: add_err = 1 for ignore in ignorelist: if add_err == 1 and re.search(ignore, match): add_err = 0 if add_err == 1: self.CM.log(prefix + " " + match) errcount=errcount+1 else: break else: self.CM.log("Too many errors!") return errcount def is_applicable(self): return self.is_applicable_common() def is_applicable_common(self): '''Return TRUE if we are applicable in the current test configuration''' #raise ValueError("Abstract Class member (is_applicable)") if self.is_loop and not self.CM.Env["loop-tests"]: return 0 elif self.is_unsafe and not self.CM.Env["unsafe-tests"]: return 0 elif self.is_valgrind and not self.CM.Env["valgrind-tests"]: return 0 elif self.is_experimental and not self.CM.Env["experimental-tests"]: return 0 + elif self.is_remote and not self.CM.Env["remote-tests"]: + return 0 elif self.CM.Env["benchmark"] and self.benchmark == 0: return 0 return 1 def find_ocfs2_resources(self, node): self.r_o2cb = None self.r_ocfs2 = [] (rc, lines) = self.CM.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): r = AuditResource(self.CM, line) if r.rtype == "o2cb" and r.parent != "NA": self.CM.debug("Found o2cb: %s" % self.r_o2cb) self.r_o2cb = r.parent if re.search("^Constraint", line): c = AuditConstraint(self.CM, line) if c.type == "rsc_colocation" and c.target == self.r_o2cb: self.r_ocfs2.append(c.rsc) self.CM.debug("Found ocfs2 filesystems: %s" % repr(self.r_ocfs2)) return len(self.r_ocfs2) def canrunnow(self, node): '''Return TRUE if we can meaningfully run right now''' return 1 def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [] ################################################################### class StopTest(CTSTest): ################################################################### '''Stop (deactivate) the cluster manager on a node''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name="Stop" def __call__(self, node): '''Perform the 'stop' test. ''' self.incr("calls") if self.CM.ShouldBeStatus[node] != "up": return self.skipped() patterns = [] # Technically we should always be able to notice ourselves stopping patterns.append(self.CM["Pat:We_stopped"] % node) #if self.CM.Env["use_logd"]: # patterns.append(self.CM["Pat:Logd_stopped"] % node) # Any active node needs to notice this one left # NOTE: This wont work if we have multiple partitions for other in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[other] == "up" and other != node: patterns.append(self.CM["Pat:They_stopped"] %(other, self.CM.key_for_node(node))) #self.debug("Checking %s will notice %s left"%(other, node)) watch = self.create_watch(patterns, self.CM["DeadTime"]) watch.setwatch() if node == self.CM.OurNode: self.incr("us") else: if self.CM.upcount() <= 1: self.incr("all") else: self.incr("them") self.CM.StopaCM(node) watch_result = watch.lookforall() failreason=None UnmatchedList = "||" if watch.unmatched: (rc, output) = self.CM.rsh(node, "/bin/ps axf", None) for line in output: self.CM.debug(line) (rc, output) = self.CM.rsh(node, "/usr/sbin/dlm_tool dump", None) for line in output: self.CM.debug(line) for regex in watch.unmatched: self.CM.log ("ERROR: Shutdown pattern not found: %s" % (regex)) UnmatchedList += regex + "||"; failreason="Missing shutdown pattern" self.CM.cluster_stable(self.CM["DeadTime"]) if not watch.unmatched or self.CM.upcount() == 0: return self.success() if len(watch.unmatched) >= self.CM.upcount(): return self.failure("no match against (%s)" % UnmatchedList) if failreason == None: return self.success() else: return self.failure(failreason) # # We don't register StopTest because it's better when called by # another test... # ################################################################### class StartTest(CTSTest): ################################################################### '''Start (activate) the cluster manager on a node''' def __init__(self, cm, debug=None): CTSTest.__init__(self,cm) self.name="start" self.debug = debug def __call__(self, node): '''Perform the 'start' test. ''' self.incr("calls") if self.CM.upcount() == 0: self.incr("us") else: self.incr("them") if self.CM.ShouldBeStatus[node] != "down": return self.skipped() elif self.CM.StartaCM(node): return self.success() else: return self.failure("Startup %s on node %s failed" %(self.CM["Name"], node)) # # We don't register StartTest because it's better when called by # another test... # ################################################################### class FlipTest(CTSTest): ################################################################### '''If it's running, stop it. If it's stopped start it. Overthrow the status quo... ''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Flip" self.start = StartTest(cm) self.stop = StopTest(cm) def __call__(self, node): '''Perform the 'Flip' test. ''' self.incr("calls") if self.CM.ShouldBeStatus[node] == "up": self.incr("stopped") ret = self.stop(node) type="up->down" # Give the cluster time to recognize it's gone... time.sleep(self.CM["StableTime"]) elif self.CM.ShouldBeStatus[node] == "down": self.incr("started") ret = self.start(node) type="down->up" else: return self.skipped() self.incr(type) if ret: return self.success() else: return self.failure("%s failure" % type) # Register FlipTest as a good test to run AllTestClasses.append(FlipTest) ################################################################### class RestartTest(CTSTest): ################################################################### '''Stop and restart a node''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Restart" self.start = StartTest(cm) self.stop = StopTest(cm) self.benchmark = 1 def __call__(self, node): '''Perform the 'restart' test. ''' self.incr("calls") self.incr("node:" + node) ret1 = 1 if self.CM.StataCM(node): self.incr("WasStopped") if not self.start(node): return self.failure("start (setup) failure: "+node) self.set_timer() if not self.stop(node): return self.failure("stop failure: "+node) if not self.start(node): return self.failure("start failure: "+node) return self.success() # Register RestartTest as a good test to run AllTestClasses.append(RestartTest) ################################################################### class StonithdTest(CTSTest): ################################################################### def __init__(self, cm): CTSTest.__init__(self, cm) self.name="Stonithd" self.startall = SimulStartLite(cm) self.benchmark = 1 def __call__(self, node): self.incr("calls") if len(self.CM.Env["nodes"]) < 2: return self.skipped() ret = self.startall(None) if not ret: return self.failure("Setup failed") is_dc = self.CM.is_node_dc(node) watchpats = [] watchpats.append("log_operation: Operation .* for host '%s' with device .* returned: 0" % node) watchpats.append("tengine_stonith_notify: Peer %s was terminated .*: OK" % node) if self.CM.Env["at-boot"] == 0: self.CM.debug("Expecting %s to stay down" % node) self.CM.ShouldBeStatus[node]="down" else: self.CM.debug("Expecting %s to come up again %d" % (node, self.CM.Env["at-boot"])) watchpats.append("%s .*do_state_transition: .* S_STARTING -> S_PENDING" % node) watchpats.append("%s .*do_state_transition: .* S_PENDING -> S_NOT_DC" % node) watch = self.create_watch(watchpats, 30 + self.CM["DeadTime"] + self.CM["StableTime"] + self.CM["StartTime"]) watch.setwatch() origin = self.CM.Env.RandomGen.choice(self.CM.Env["nodes"]) rc = self.CM.rsh(origin, "stonith_admin --reboot %s -VVVVVV" % node) if rc == 194: # 194 - 256 = -62 = Timer expired # # Look for the patterns, usually this means the required # device was running on the node to be fenced - or that # the required devices were in the process of being loaded # and/or moved # # Effectively the node committed suicide so there will be # no confirmation, but pacemaker should be watching and # fence the node again self.CM.log("Fencing command on %s to fence %s timed out" % (origin, node)) elif origin != node and rc != 0: self.CM.debug("Waiting for the cluster to recover") self.CM.cluster_stable() self.CM.debug("Waiting STONITHd node to come back up") self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"], 600) self.CM.log("Fencing command on %s failed to fence %s (rc=%d)" % (origin, node, rc)) elif origin == node and rc != 255: # 255 == broken pipe, ie. the node was fenced as epxected self.CM.log("Logcally originated fencing returned %d" % rc) self.set_timer("fence") matched = watch.lookforall() self.log_timer("fence") self.set_timer("reform") if watch.unmatched: self.CM.log("Patterns not found: " + repr(watch.unmatched)) self.CM.debug("Waiting for the cluster to recover") self.CM.cluster_stable() self.CM.debug("Waiting STONITHd node to come back up") self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"], 600) self.CM.debug("Waiting for the cluster to re-stabilize with all nodes") is_stable = self.CM.cluster_stable(self.CM["StartTime"]) if not matched: return self.failure("Didn't find all expected patterns") elif not is_stable: return self.failure("Cluster did not become stable") self.log_timer("reform") return self.success() def errorstoignore(self): return [ self.CM["Pat:Fencing_start"] % ".*", self.CM["Pat:Fencing_ok"] % ".*", "error: native_create_actions: Resource .*stonith::.* is active on 2 nodes attempting recovery", "error: remote_op_done: Operation reboot of .*by .* for stonith_admin.*: Timer expired", ] def is_applicable(self): if not self.is_applicable_common(): return 0 if self.CM.Env.has_key("DoFencing"): return self.CM.Env["DoFencing"] return 1 AllTestClasses.append(StonithdTest) ################################################################### class StartOnebyOne(CTSTest): ################################################################### '''Start all the nodes ~ one by one''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="StartOnebyOne" self.stopall = SimulStopLite(cm) self.start = StartTest(cm) self.ns=CTS.NodeStatus(cm.Env) def __call__(self, dummy): '''Perform the 'StartOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Test setup failed") failed=[] self.set_timer() for node in self.CM.Env["nodes"]: if not self.start(node): failed.append(node) if len(failed) > 0: return self.failure("Some node failed to start: " + repr(failed)) return self.success() # Register StartOnebyOne as a good test to run AllTestClasses.append(StartOnebyOne) ################################################################### class SimulStart(CTSTest): ################################################################### '''Start all the nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStart" self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) def __call__(self, dummy): '''Perform the 'SimulStart' test. ''' self.incr("calls") # We ignore the "node" parameter... # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Setup failed") self.CM.clear_all_caches() if not self.startall(None): return self.failure("Startall failed") return self.success() # Register SimulStart as a good test to run AllTestClasses.append(SimulStart) ################################################################### class SimulStop(CTSTest): ################################################################### '''Stop all the nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStop" self.startall = SimulStartLite(cm) self.stopall = SimulStopLite(cm) def __call__(self, dummy): '''Perform the 'SimulStop' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") if not self.stopall(None): return self.failure("Stopall failed") return self.success() # Register SimulStop as a good test to run AllTestClasses.append(SimulStop) ################################################################### class StopOnebyOne(CTSTest): ################################################################### '''Stop all the nodes in order''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="StopOnebyOne" self.startall = SimulStartLite(cm) self.stop = StopTest(cm) def __call__(self, dummy): '''Perform the 'StopOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") failed=[] self.set_timer() for node in self.CM.Env["nodes"]: if not self.stop(node): failed.append(node) if len(failed) > 0: return self.failure("Some node failed to stop: " + repr(failed)) self.CM.clear_all_caches() return self.success() # Register StopOnebyOne as a good test to run AllTestClasses.append(StopOnebyOne) ################################################################### class RestartOnebyOne(CTSTest): ################################################################### '''Restart all the nodes in order''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="RestartOnebyOne" self.startall = SimulStartLite(cm) def __call__(self, dummy): '''Perform the 'RestartOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") did_fail=[] self.set_timer() self.restart = RestartTest(self.CM) for node in self.CM.Env["nodes"]: if not self.restart(node): did_fail.append(node) if did_fail: return self.failure("Could not restart %d nodes: %s" %(len(did_fail), repr(did_fail))) return self.success() # Register StopOnebyOne as a good test to run AllTestClasses.append(RestartOnebyOne) ################################################################### class PartialStart(CTSTest): ################################################################### '''Start a node - but tell it to stop before it finishes starting up''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="PartialStart" self.startall = SimulStartLite(cm) self.stopall = SimulStopLite(cm) self.stop = StopTest(cm) #self.is_unsafe = 1 def __call__(self, node): '''Perform the 'PartialStart' test. ''' self.incr("calls") ret = self.stopall(None) if not ret: return self.failure("Setup failed") # FIXME! This should use the CM class to get the pattern # then it would be applicable in general watchpats = [] watchpats.append("crmd.*Connecting to cluster infrastructure") watch = self.create_watch(watchpats, self.CM["DeadTime"]+10) watch.setwatch() self.CM.StartaCMnoBlock(node) ret = watch.lookforall() if not ret: self.CM.log("Patterns not found: " + repr(watch.unmatched)) return self.failure("Setup of %s failed" % node) ret = self.stop(node) if not ret: return self.failure("%s did not stop in time" % node) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' # We might do some fencing in the 2-node case if we make it up far enough return [ """Executing reboot fencing operation""" ] # Register StopOnebyOne as a good test to run AllTestClasses.append(PartialStart) ####################################################################### class StandbyTest(CTSTest): ####################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Standby" self.benchmark = 1 self.start = StartTest(cm) self.startall = SimulStartLite(cm) # make sure the node is active # set the node to standby mode # check resources, none resource should be running on the node # set the node to active mode # check resouces, resources should have been migrated back (SHOULD THEY?) def __call__(self, node): self.incr("calls") ret=self.startall(None) if not ret: return self.failure("Start all nodes failed") self.CM.debug("Make sure node %s is active" % node) if self.CM.StandbyStatus(node) != "off": if not self.CM.SetStandbyMode(node, "off"): return self.failure("can't set node %s to active mode" % node) self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "off": return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status)) self.CM.debug("Getting resources running on node %s" % node) rsc_on_node = self.CM.active_resources(node) watchpats = [] watchpats.append("do_state_transition:.*-> S_POLICY_ENGINE") watch = self.create_watch(watchpats, self.CM["DeadTime"]+10) watch.setwatch() self.CM.debug("Setting node %s to standby mode" % node) if not self.CM.SetStandbyMode(node, "on"): return self.failure("can't set node %s to standby mode" % node) self.set_timer("on") ret = watch.lookforall() if not ret: self.CM.log("Patterns not found: " + repr(watch.unmatched)) self.CM.SetStandbyMode(node, "off") return self.failure("cluster didn't react to standby change on %s" % node) self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "on": return self.failure("standby status of %s is [%s] but we expect [on]" % (node, status)) self.log_timer("on") self.CM.debug("Checking resources") bad_run = self.CM.active_resources(node) if len(bad_run) > 0: rc = self.failure("%s set to standby, %s is still running on it" % (node, repr(bad_run))) self.CM.debug("Setting node %s to active mode" % node) self.CM.SetStandbyMode(node, "off") return rc self.CM.debug("Setting node %s to active mode" % node) if not self.CM.SetStandbyMode(node, "off"): return self.failure("can't set node %s to active mode" % node) self.set_timer("off") self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "off": return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status)) self.log_timer("off") return self.success() AllTestClasses.append(StandbyTest) ####################################################################### class ValgrindTest(CTSTest): ####################################################################### '''Check for memory leaks''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Valgrind" self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) self.is_valgrind = 1 self.is_loop = 1 def setup(self, node): self.incr("calls") ret=self.stopall(None) if not ret: return self.failure("Stop all nodes failed") # Enable valgrind self.logPat = "/tmp/%s-*.valgrind" % self.name self.CM.Env["valgrind-prefix"] = self.name self.CM.rsh(node, "rm -f %s" % self.logPat, None) ret=self.startall(None) if not ret: return self.failure("Start all nodes failed") for node in self.CM.Env["nodes"]: (rc, output) = self.CM.rsh(node, "ps u --ppid `pidofproc aisexec`", None) for line in output: self.CM.debug(line) return self.success() def teardown(self, node): # Disable valgrind self.CM.Env["valgrind-prefix"] = None # Return all nodes to normal ret=self.stopall(None) if not ret: return self.failure("Stop all nodes failed") return self.success() def find_leaks(self): # Check for leaks leaked = [] self.stop = StopTest(self.CM) for node in self.CM.Env["nodes"]: (rc, ps_out) = self.CM.rsh(node, "ps u --ppid `pidofproc aisexec`", None) rc = self.stop(node) if not rc: self.failure("Couldn't shut down %s" % node) rc = self.CM.rsh(node, "grep -e indirectly.*lost:.*[1-9] -e definitely.*lost:.*[1-9] -e (ERROR|error).*SUMMARY:.*[1-9].*errors %s" % self.logPat, 0) if rc != 1: leaked.append(node) self.failure("Valgrind errors detected on %s" % node) for line in ps_out: self.CM.log(line) (rc, output) = self.CM.rsh(node, "grep -e lost: -e SUMMARY: %s" % self.logPat, None) for line in output: self.CM.log(line) (rc, output) = self.CM.rsh(node, "cat %s" % self.logPat, None) for line in output: self.CM.debug(line) self.CM.rsh(node, "rm -f %s" % self.logPat, None) return leaked def __call__(self, node): leaked = self.find_leaks() if len(leaked) > 0: return self.failure("Nodes %s leaked" % repr(leaked)) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [ """cib:.*readCibXmlFile:""", """HA_VALGRIND_ENABLED""" ] ####################################################################### class StandbyLoopTest(ValgrindTest): ####################################################################### '''Check for memory leaks by putting a node in and out of standby for an hour''' def __init__(self, cm): ValgrindTest.__init__(self,cm) self.name="StandbyLoop" def __call__(self, node): lpc = 0 delay = 2 failed = 0 done=time.time() + self.CM.Env["loop-minutes"]*60 while time.time() <= done and not failed: lpc = lpc + 1 time.sleep(delay) if not self.CM.SetStandbyMode(node, "on"): self.failure("can't set node %s to standby mode" % node) failed = lpc time.sleep(delay) if not self.CM.SetStandbyMode(node, "off"): self.failure("can't set node %s to active mode" % node) failed = lpc leaked = self.find_leaks() if failed: return self.failure("Iteration %d failed" % failed) elif len(leaked) > 0: return self.failure("Nodes %s leaked" % repr(leaked)) return self.success() AllTestClasses.append(StandbyLoopTest) ############################################################################## class BandwidthTest(CTSTest): ############################################################################## # Tests should not be cluster-manager-specific # If you need to find out cluster manager configuration to do this, then # it should be added to the generic cluster manager API. '''Test the bandwidth which heartbeat uses''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name = "Bandwidth" self.start = StartTest(cm) self.__setitem__("min",0) self.__setitem__("max",0) self.__setitem__("totalbandwidth",0) self.tempfile = tempfile.mktemp(".cts") self.startall = SimulStartLite(cm) def __call__(self, node): '''Perform the Bandwidth test''' self.incr("calls") if self.CM.upcount()<1: return self.skipped() Path = self.CM.InternalCommConfig() if "ip" not in Path["mediatype"]: return self.skipped() port = Path["port"][0] port = int(port) ret = self.startall(None) if not ret: return self.failure("Test setup failed") time.sleep(5) # We get extra messages right after startup. fstmpfile = "/var/run/band_estimate" dumpcmd = "tcpdump -p -n -c 102 -i any udp port %d > %s 2>&1" \ % (port, fstmpfile) rc = self.CM.rsh(node, dumpcmd) if rc == 0: farfile = "root@%s:%s" % (node, fstmpfile) self.CM.rsh.cp(farfile, self.tempfile) Bandwidth = self.countbandwidth(self.tempfile) if not Bandwidth: self.CM.log("Could not compute bandwidth.") return self.success() intband = int(Bandwidth + 0.5) self.CM.log("...bandwidth: %d bits/sec" % intband) self.Stats["totalbandwidth"] = self.Stats["totalbandwidth"] + Bandwidth if self.Stats["min"] == 0: self.Stats["min"] = Bandwidth if Bandwidth > self.Stats["max"]: self.Stats["max"] = Bandwidth if Bandwidth < self.Stats["min"]: self.Stats["min"] = Bandwidth self.CM.rsh(node, "rm -f %s" % fstmpfile) os.unlink(self.tempfile) return self.success() else: return self.failure("no response from tcpdump command [%d]!" % rc) def countbandwidth(self, file): fp = open(file, "r") fp.seek(0) count = 0 sum = 0 while 1: line = fp.readline() if not line: return None if re.search("udp",line) or re.search("UDP,", line): count=count+1 linesplit = string.split(line," ") for j in range(len(linesplit)-1): if linesplit[j]=="udp": break if linesplit[j]=="length:": break try: sum = sum + int(linesplit[j+1]) except ValueError: self.CM.log("Invalid tcpdump line: %s" % line) return None T1 = linesplit[0] timesplit = string.split(T1,":") time2split = string.split(timesplit[2],".") time1 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001 break while count < 100: line = fp.readline() if not line: return None if re.search("udp",line) or re.search("UDP,", line): count = count+1 linessplit = string.split(line," ") for j in range(len(linessplit)-1): if linessplit[j] =="udp": break if linesplit[j]=="length:": break try: sum=int(linessplit[j+1])+sum except ValueError: self.CM.log("Invalid tcpdump line: %s" % line) return None T2 = linessplit[0] timesplit = string.split(T2,":") time2split = string.split(timesplit[2],".") time2 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001 time = time2-time1 if (time <= 0): return 0 return (sum*8)/time def is_applicable(self): '''BandwidthTest never applicable''' return 0 AllTestClasses.append(BandwidthTest) ################################################################### class MaintenanceMode(CTSTest): ################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name="MaintenanceMode" self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.max=30 #self.is_unsafe = 1 self.benchmark = 1 self.action = "asyncmon" self.interval = 0 self.rid="maintenanceDummy" def toggleMaintenanceMode(self, node, action): pats = [] pats.append(self.CM["Pat:DC_IDLE"]) # fail the resource right after turning Maintenance mode on # verify it is not recovered until maintenance mode is turned off if action == "On": pats.append("Updating failcount for %s on .* after .* %s" % (self.rid, self.action)) else: pats.append("process_lrm_event: LRM operation %s_stop_0.*confirmed.*ok" % self.rid) pats.append("process_lrm_event: LRM operation %s_start_0.*confirmed.*ok" % self.rid) watch = self.create_watch(pats, 60) watch.setwatch() self.CM.debug("Turning maintenance mode %s" % action) self.CM.rsh(node, self.CM["MaintenanceMode%s" % (action)]) if (action == "On"): self.CM.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node)) self.set_timer("recover%s" % (action)) watch.lookforall() self.log_timer("recover%s" % (action)) if watch.unmatched: self.CM.debug("Failed to find patterns when turning maintenance mode %s" % action) return repr(watch.unmatched) return "" def insertMaintenanceDummy(self, node): pats = [] pats.append(".*%s.*process_lrm_event: LRM operation %s_start_0.*confirmed.*ok" % (node, self.rid)) watch = self.create_watch(pats, 60) watch.setwatch() self.CM.AddDummyRsc(node, self.rid) self.set_timer("addDummy") watch.lookforall() self.log_timer("addDummy") if watch.unmatched: self.CM.debug("Failed to find patterns when adding maintenance dummy resource") return repr(watch.unmatched) return "" def removeMaintenanceDummy(self, node): pats = [] pats.append("process_lrm_event: LRM operation %s_stop_0.*confirmed.*ok" % self.rid) watch = self.create_watch(pats, 60) watch.setwatch() self.CM.RemoveDummyRsc(node, self.rid) self.set_timer("removeDummy") watch.lookforall() self.log_timer("removeDummy") if watch.unmatched: self.CM.debug("Failed to find patterns when removing maintenance dummy resource") return repr(watch.unmatched) return "" def managedRscList(self, node): rscList = [] (rc, lines) = self.CM.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): tmp = AuditResource(self.CM, line) if tmp.managed(): rscList.append(tmp.id) return rscList def verifyResources(self, node, rscList, managed): managedList = list(rscList) managed_str = "managed" if not managed: managed_str = "unmanaged" (rc, lines) = self.CM.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): tmp = AuditResource(self.CM, line) if managed and not tmp.managed(): continue elif not managed and tmp.managed(): continue elif managedList.count(tmp.id): managedList.remove(tmp.id) if len(managedList) == 0: self.CM.debug("Found all %s resources on %s" % (managed_str, node)) return True self.CM.log("Could not find all %s resources on %s. %s" % (managed_str, node, managedList)) return False def __call__(self, node): '''Perform the 'MaintenanceMode' test. ''' self.incr("calls") verify_managed = False verify_unmanaged = False failPat = "" ret = self.startall(None) if not ret: return self.failure("Setup failed") # get a list of all the managed resources. We use this list # after enabling maintenance mode to verify all managed resources # become un-managed. After maintenance mode is turned off, we use # this list to verify all the resources become managed again. managedResources = self.managedRscList(node) if len(managedResources) == 0: self.CM.log("No managed resources on %s" % node) return self.skipped() # insert a fake resource we can fail during maintenance mode # so we can verify recovery does not take place until after maintenance # mode is disabled. failPat = failPat + self.insertMaintenanceDummy(node) # toggle maintenance mode ON, then fail dummy resource. failPat = failPat + self.toggleMaintenanceMode(node, "On") # verify all the resources are now unmanaged if self.verifyResources(node, managedResources, False): verify_unmanaged = True # Toggle maintenance mode OFF, verify dummy is recovered. failPat = failPat + self.toggleMaintenanceMode(node, "Off") # verify all the resources are now managed again if self.verifyResources(node, managedResources, True): verify_managed = True # Remove our maintenance dummy resource. failPat = failPat + self.removeMaintenanceDummy(node) self.CM.cluster_stable() if failPat != "": return self.failure("Unmatched patterns: %s" % (failPat)) elif verify_unmanaged is False: return self.failure("Failed to verify resources became unmanaged during maintenance mode") elif verify_managed is False: return self.failure("Failed to verify resources switched back to managed after disabling maintenance mode") return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [ """Updating failcount for %s""" % self.rid, """LogActions: Recover %s""" % self.rid, """Unknown operation: fail""", """(ERROR|error): sending stonithRA op to stonithd failed.""", """(ERROR|error): process_lrm_event: LRM operation %s_%s_%d""" % (self.rid, self.action, self.interval), """(ERROR|error): process_graph_event: Action %s_%s_%d .* initiated outside of a transition""" % (self.rid, self.action, self.interval), ] AllTestClasses.append(MaintenanceMode) ################################################################### class ResourceRecover(CTSTest): ################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name="ResourceRecover" self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.max=30 self.rid=None self.rid_alt=None #self.is_unsafe = 1 self.benchmark = 1 # these are the values used for the new LRM API call self.action = "asyncmon" self.interval = 0 def __call__(self, node): '''Perform the 'ResourceRecover' test. ''' self.incr("calls") ret = self.startall(None) if not ret: return self.failure("Setup failed") resourcelist = self.CM.active_resources(node) # if there are no resourcelist, return directly if len(resourcelist)==0: self.CM.log("No active resources on %s" % node) return self.skipped() self.rid = self.CM.Env.RandomGen.choice(resourcelist) self.rid_alt = self.rid rsc = None (rc, lines) = self.CM.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): tmp = AuditResource(self.CM, line) if tmp.id == self.rid: rsc = tmp # Handle anonymous clones that get renamed self.rid = rsc.clone_id break if not rsc: return self.failure("Could not find %s in the resource list" % self.rid) self.CM.debug("Shooting %s aka. %s" % (rsc.clone_id, rsc.id)) pats = [] pats.append("Updating failcount for %s on .* after .* %s" % (self.rid, self.action)) if rsc.managed(): pats.append("process_lrm_event: LRM operation %s_stop_0.*confirmed.*ok" % self.rid) if rsc.unique(): pats.append("process_lrm_event: LRM operation %s_start_0.*confirmed.*ok" % self.rid) else: # Anonymous clones may get restarted with a different clone number pats.append("process_lrm_event: LRM operation .*_start_0.*confirmed.*ok") watch = self.create_watch(pats, 60) watch.setwatch() self.CM.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node)) self.set_timer("recover") watch.lookforall() self.log_timer("recover") self.CM.cluster_stable() recovered=self.CM.ResourceLocation(self.rid) if watch.unmatched: return self.failure("Patterns not found: %s" % repr(watch.unmatched)) elif rsc.unique() and len(recovered) > 1: return self.failure("%s is now active on more than one node: %s"%(self.rid, repr(recovered))) elif len(recovered) > 0: self.CM.debug("%s is running on: %s" %(self.rid, repr(recovered))) elif rsc.managed(): return self.failure("%s was not recovered and is inactive" % self.rid) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [ """Updating failcount for %s""" % self.rid, """LogActions: Recover %s""" % self.rid, """LogActions: Recover %s""" % self.rid_alt, """Unknown operation: fail""", """(ERROR|error): sending stonithRA op to stonithd failed.""", """(ERROR|error): process_lrm_event: LRM operation %s_%s_%d""" % (self.rid, self.action, self.interval), """(ERROR|error): process_graph_event: Action %s_%s_%d .* initiated outside of a transition""" % (self.rid, self.action, self.interval), ] AllTestClasses.append(ResourceRecover) ################################################################### class ComponentFail(CTSTest): ################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name="ComponentFail" self.startall = SimulStartLite(cm) self.complist = cm.Components() self.patterns = [] self.okerrpatterns = [] self.is_unsafe = 1 def __call__(self, node): '''Perform the 'ComponentFail' test. ''' self.incr("calls") self.patterns = [] self.okerrpatterns = [] # start all nodes ret = self.startall(None) if not ret: return self.failure("Setup failed") if not self.CM.cluster_stable(self.CM["StableTime"]): return self.failure("Setup failed - unstable") node_is_dc = self.CM.is_node_dc(node, None) # select a component to kill chosen = self.CM.Env.RandomGen.choice(self.complist) while chosen.dc_only == 1 and node_is_dc == 0: chosen = self.CM.Env.RandomGen.choice(self.complist) self.CM.debug("...component %s (dc=%d,boot=%d)" % (chosen.name, node_is_dc,chosen.triggersreboot)) self.incr(chosen.name) if chosen.name != "aisexec" and chosen.name != "corosync": if self.CM["Name"] != "crm-lha" or chosen.name != "pengine": self.patterns.append(self.CM["Pat:ChildKilled"] %(node, chosen.name)) self.patterns.append(self.CM["Pat:ChildRespawn"] %(node, chosen.name)) self.patterns.extend(chosen.pats) if node_is_dc: self.patterns.extend(chosen.dc_pats) # In an ideal world, this next stuff should be in the "chosen" object as a member function if self.CM["Name"] == "crm-lha" and chosen.triggersreboot: # Make sure the node goes down and then comes back up if it should reboot... for other in self.CM.Env["nodes"]: if other != node: self.patterns.append(self.CM["Pat:They_stopped"] %(other, self.CM.key_for_node(node))) self.patterns.append(self.CM["Pat:Slave_started"] % node) self.patterns.append(self.CM["Pat:Local_started"] % node) if chosen.dc_only: # Sometimes these will be in the log, and sometimes they won't... self.okerrpatterns.append("%s .*Process %s:.* exited" %(node, chosen.name)) self.okerrpatterns.append("%s .*I_ERROR.*crmdManagedChildDied" %node) self.okerrpatterns.append("%s .*The %s subsystem terminated unexpectedly" %(node, chosen.name)) self.okerrpatterns.append("(ERROR|error): Client .* exited with return code") else: # Sometimes this won't be in the log... self.okerrpatterns.append(self.CM["Pat:ChildKilled"] %(node, chosen.name)) self.okerrpatterns.append(self.CM["Pat:ChildRespawn"] %(node, chosen.name)) self.okerrpatterns.append(self.CM["Pat:ChildExit"]) # supply a copy so self.patterns doesnt end up empty tmpPats = [] tmpPats.extend(self.patterns) self.patterns.extend(chosen.badnews_ignore) # Look for STONITH ops, depending on Env["at-boot"] we might need to change the nodes status stonithPats = [] stonithPats.append(self.CM["Pat:Fencing_ok"] % node) stonith = self.create_watch(stonithPats, 0) stonith.setwatch() # set the watch for stable watch = self.create_watch( tmpPats, self.CM["DeadTime"] + self.CM["StableTime"] + self.CM["StartTime"]) watch.setwatch() # kill the component chosen.kill(node) self.CM.debug("Waiting for the cluster to recover") self.CM.cluster_stable() self.CM.debug("Waiting for any STONITHd node to come back up") self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"], 600) self.CM.debug("Waiting for the cluster to re-stabilize with all nodes") self.CM.cluster_stable(self.CM["StartTime"]) self.CM.debug("Checking if %s was shot" % node) shot = stonith.look(60) if shot: self.CM.debug("Found: "+ repr(shot)) self.okerrpatterns.append(self.CM["Pat:Fencing_start"] % node) if self.CM.Env["at-boot"] == 0: self.CM.ShouldBeStatus[node]="down" # If fencing occurred, chances are many (if not all) the expected logs # will not be sent - or will be lost when the node reboots return self.success() # check for logs indicating a graceful recovery matched = watch.lookforall(allow_multiple_matches=1) if watch.unmatched: self.CM.log("Patterns not found: " + repr(watch.unmatched)) self.CM.debug("Waiting for the cluster to re-stabilize with all nodes") is_stable = self.CM.cluster_stable(self.CM["StartTime"]) if not matched: return self.failure("Didn't find all expected patterns") elif not is_stable: return self.failure("Cluster did not become stable") return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' # Note that okerrpatterns refers to the last time we ran this test # The good news is that this works fine for us... self.okerrpatterns.extend(self.patterns) return self.okerrpatterns AllTestClasses.append(ComponentFail) #################################################################### class SplitBrainTest(CTSTest): #################################################################### '''It is used to test split-brain. when the path between the two nodes break check the two nodes both take over the resource''' def __init__(self,cm): CTSTest.__init__(self,cm) self.name = "SplitBrain" self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.is_experimental = 1 def isolate_partition(self, partition): other_nodes = [] other_nodes.extend(self.CM.Env["nodes"]) for node in partition: try: other_nodes.remove(node) except ValueError: self.CM.log("Node "+node+" not in " + repr(self.CM.Env["nodes"]) + " from " +repr(partition)) if len(other_nodes) == 0: return 1 self.CM.debug("Creating partition: " + repr(partition)) self.CM.debug("Everyone else: " + repr(other_nodes)) for node in partition: if not self.CM.isolate_node(node, other_nodes): self.CM.log("Could not isolate %s" % node) return 0 return 1 def heal_partition(self, partition): other_nodes = [] other_nodes.extend(self.CM.Env["nodes"]) for node in partition: try: other_nodes.remove(node) except ValueError: self.CM.log("Node "+node+" not in " + repr(self.CM.Env["nodes"])) if len(other_nodes) == 0: return 1 self.CM.debug("Healing partition: " + repr(partition)) self.CM.debug("Everyone else: " + repr(other_nodes)) for node in partition: self.CM.unisolate_node(node, other_nodes) def __call__(self, node): '''Perform split-brain test''' self.incr("calls") self.passed = 1 partitions = {} ret = self.startall(None) if not ret: return self.failure("Setup failed") while 1: # Retry until we get multiple partitions partitions = {} p_max = len(self.CM.Env["nodes"]) for node in self.CM.Env["nodes"]: p = self.CM.Env.RandomGen.randint(1, p_max) if not partitions.has_key(p): partitions[p]= [] partitions[p].append(node) p_max = len(partitions.keys()) if p_max > 1: break # else, try again self.CM.debug("Created %d partitions" % p_max) for key in partitions.keys(): self.CM.debug("Partition["+str(key)+"]:\t"+repr(partitions[key])) # Disabling STONITH to reduce test complexity for now self.CM.rsh(node, "crm_attribute -V -n stonith-enabled -v false") for key in partitions.keys(): self.isolate_partition(partitions[key]) count = 30 while count > 0: if len(self.CM.find_partitions()) != p_max: time.sleep(10) else: break else: self.failure("Expected partitions were not created") # Target number of partitions formed - wait for stability if not self.CM.cluster_stable(): self.failure("Partitioned cluster not stable") # Now audit the cluster state self.CM.partitions_expected = p_max if not self.audit(): self.failure("Audits failed") self.CM.partitions_expected = 1 # And heal them again for key in partitions.keys(): self.heal_partition(partitions[key]) # Wait for a single partition to form count = 30 while count > 0: if len(self.CM.find_partitions()) != 1: time.sleep(10) count -= 1 else: break else: self.failure("Cluster did not reform") # Wait for it to have the right number of members count = 30 while count > 0: members = [] partitions = self.CM.find_partitions() if len(partitions) > 0: members = partitions[0].split() if len(members) != len(self.CM.Env["nodes"]): time.sleep(10) count -= 1 else: break else: self.failure("Cluster did not completely reform") # Wait up to 20 minutes - the delay is more preferable than # trying to continue with in a messed up state if not self.CM.cluster_stable(1200): self.failure("Reformed cluster not stable") answer = raw_input('Continue? [nY]') if answer and answer == "n": raise ValueError("Reformed cluster not stable") # Turn fencing back on if self.CM.Env["DoFencing"]: self.CM.rsh(node, "crm_attribute -V -D -n stonith-enabled") self.CM.cluster_stable() if self.passed: return self.success() return self.failure("See previous errors") def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [ "Another DC detected:", "(ERROR|error): attrd_cib_callback: .*Application of an update diff failed", "crmd_ha_msg_callback:.*not in our membership list", "CRIT:.*node.*returning after partition", ] def is_applicable(self): if not self.is_applicable_common(): return 0 return len(self.CM.Env["nodes"]) > 2 AllTestClasses.append(SplitBrainTest) #################################################################### class Reattach(CTSTest): #################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Reattach" self.startall = SimulStartLite(cm) self.restart1 = RestartTest(cm) self.stopall = SimulStopLite(cm) self.is_unsafe = 0 # Handled by canrunnow() def setup(self, node): attempt=0 if not self.startall(None): return None # Make sure we are really _really_ stable and that all # resources, including those that depend on transient node # attributes, are started while not self.CM.cluster_stable(double_check=True): if attempt < 5: attempt += 1 self.CM.debug("Not stable yet, re-testing") else: self.CM.log("Cluster is not stable") return None return 1 def teardown(self, node): # Make sure 'node' is up start = StartTest(self.CM) start(node) is_managed = self.CM.rsh(node, "crm_attribute -Q -G -t crm_config -n is-managed-default -d true", 1) is_managed = is_managed[:-1] # Strip off the newline if is_managed != "true": self.CM.log("Attempting to re-enable resource management on %s (%s)" % (node, is_managed)) managed = self.create_watch(["is-managed-default"], 60) managed.setwatch() self.CM.rsh(node, "crm_attribute -V -D -n is-managed-default") if not managed.lookforall(): self.CM.log("Patterns not found: " + repr(managed.unmatched)) self.CM.log("Could not re-enable resource management") return 0 return 1 def canrunnow(self, node): '''Return TRUE if we can meaningfully run right now''' if self.find_ocfs2_resources(node): self.CM.log("Detach/Reattach scenarios are not possible with OCFS2 services present") return 0 return 1 def __call__(self, node): self.incr("calls") pats = [] managed = self.create_watch(["is-managed-default"], 60) managed.setwatch() self.CM.debug("Disable resource management") self.CM.rsh(node, "crm_attribute -V -n is-managed-default -v false") if not managed.lookforall(): self.CM.log("Patterns not found: " + repr(managed.unmatched)) return self.failure("Resource management not disabled") pats = [] pats.append("process_lrm_event: .*_stop") pats.append("process_lrm_event: .*_start") pats.append("process_lrm_event: .*_promote") pats.append("process_lrm_event: .*_demote") pats.append("process_lrm_event: .*_migrate") watch = self.create_watch(pats, 60, "ShutdownActivity") watch.setwatch() self.CM.debug("Shutting down the cluster") ret = self.stopall(None) if not ret: self.CM.debug("Re-enable resource management") self.CM.rsh(node, "crm_attribute -V -D -n is-managed-default") return self.failure("Couldn't shut down the cluster") self.CM.debug("Bringing the cluster back up") ret = self.startall(None) time.sleep(5) # allow ping to update the CIB if not ret: self.CM.debug("Re-enable resource management") self.CM.rsh(node, "crm_attribute -V -D -n is-managed-default") return self.failure("Couldn't restart the cluster") if self.local_badnews("ResourceActivity:", watch): self.CM.debug("Re-enable resource management") self.CM.rsh(node, "crm_attribute -V -D -n is-managed-default") return self.failure("Resources stopped or started during cluster restart") watch = self.create_watch(pats, 60, "StartupActivity") watch.setwatch() managed = self.create_watch(["is-managed-default"], 60) managed.setwatch() self.CM.debug("Re-enable resource management") self.CM.rsh(node, "crm_attribute -V -D -n is-managed-default") if not managed.lookforall(): self.CM.log("Patterns not found: " + repr(managed.unmatched)) return self.failure("Resource management not enabled") self.CM.cluster_stable() # Ignore actions for STONITH resources ignore = [] (rc, lines) = self.CM.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): r = AuditResource(self.CM, line) if r.rclass == "stonith": self.CM.debug("Ignoring start actions for %s" % r.id) ignore.append("process_lrm_event: LRM operation %s_start_0.*confirmed.*ok" % r.id) if self.local_badnews("ResourceActivity:", watch, ignore): return self.failure("Resources stopped or started after resource management was re-enabled") return ret def errorstoignore(self): '''Return list of errors which should be ignored''' return [ "resources were active at shutdown", "pingd: .*(ERROR|error): send_ipc_message:", "pingd: .*(ERROR|error): send_update:", "lrmd: .*(ERROR|error): notify_client:", ] def is_applicable(self): if self.CM["Name"] == "crm-lha": return None return 1 AllTestClasses.append(Reattach) #################################################################### class SpecialTest1(CTSTest): #################################################################### '''Set up a custom test to cause quorum failure issues for Andrew''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SpecialTest1" self.startall = SimulStartLite(cm) self.restart1 = RestartTest(cm) self.stopall = SimulStopLite(cm) def __call__(self, node): '''Perform the 'SpecialTest1' test for Andrew. ''' self.incr("calls") # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Could not stop all nodes") # Start the selected node ret = self.restart1(node) if not ret: return self.failure("Could not start "+node) # Start all remaining nodes ret = self.startall(None) if not ret: return self.failure("Could not start the remaining nodes") return self.success() AllTestClasses.append(SpecialTest1) #################################################################### class HAETest(CTSTest): #################################################################### '''Set up a custom test to cause quorum failure issues for Andrew''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="HAETest" self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) self.is_loop = 1 def setup(self, node): # Start all remaining nodes ret = self.startall(None) if not ret: return self.failure("Couldn't start all nodes") return self.success() def teardown(self, node): # Stop everything ret = self.stopall(None) if not ret: return self.failure("Couldn't stop all nodes") return self.success() def wait_on_state(self, node, resource, expected_clones, attempts=240): while attempts > 0: active=0 (rc, lines) = self.CM.rsh(node, "crm_resource -r %s -W -Q" % resource, stdout=None) # Hack until crm_resource does the right thing if rc == 0 and lines: active = len(lines) if len(lines) == expected_clones: return 1 elif rc == 1: self.CM.debug("Resource %s is still inactive" % resource) elif rc == 234: self.CM.log("Unknown resource %s" % resource) return 0 elif rc == 246: self.CM.log("Cluster is inactive") return 0 elif rc != 0: self.CM.log("Call to crm_resource failed, rc=%d" % rc) return 0 else: self.CM.debug("Resource %s is active on %d times instead of %d" % (resource, active, expected_clones)) attempts -= 1 time.sleep(1) return 0 def find_dlm(self, node): self.r_dlm = None (rc, lines) = self.CM.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): r = AuditResource(self.CM, line) if r.rtype == "controld" and r.parent != "NA": self.CM.debug("Found dlm: %s" % self.r_dlm) self.r_dlm = r.parent return 1 return 0 def find_hae_resources(self, node): self.r_dlm = None self.r_o2cb = None self.r_ocfs2 = [] if self.find_dlm(node): self.find_ocfs2_resources(node) def is_applicable(self): if not self.is_applicable_common(): return 0 if self.CM.Env["Schema"] == "hae": return 1 return None #################################################################### class HAERoleTest(HAETest): #################################################################### def __init__(self, cm): '''Lars' mount/unmount test for the HA extension. ''' HAETest.__init__(self,cm) self.name="HAERoleTest" def change_state(self, node, resource, target): rc = self.CM.rsh(node, "crm_resource -V -r %s -p target-role -v %s --meta" % (resource, target)) return rc def __call__(self, node): self.incr("calls") lpc = 0 failed = 0 delay = 2 done=time.time() + self.CM.Env["loop-minutes"]*60 self.find_hae_resources(node) clone_max = len(self.CM.Env["nodes"]) while time.time() <= done and not failed: lpc = lpc + 1 self.change_state(node, self.r_dlm, "Stopped") if not self.wait_on_state(node, self.r_dlm, 0): self.failure("%s did not go down correctly" % self.r_dlm) failed = lpc self.change_state(node, self.r_dlm, "Started") if not self.wait_on_state(node, self.r_dlm, clone_max): self.failure("%s did not come up correctly" % self.r_dlm) failed = lpc if not self.wait_on_state(node, self.r_o2cb, clone_max): self.failure("%s did not come up correctly" % self.r_o2cb) failed = lpc for fs in self.r_ocfs2: if not self.wait_on_state(node, fs, clone_max): self.failure("%s did not come up correctly" % fs) failed = lpc if failed: return self.failure("iteration %d failed" % failed) return self.success() AllTestClasses.append(HAERoleTest) #################################################################### class HAEStandbyTest(HAETest): #################################################################### '''Set up a custom test to cause quorum failure issues for Andrew''' def __init__(self, cm): HAETest.__init__(self,cm) self.name="HAEStandbyTest" def change_state(self, node, resource, target): rc = self.CM.rsh(node, "crm_standby -V -l reboot -v %s" % (target)) return rc def __call__(self, node): self.incr("calls") lpc = 0 failed = 0 done=time.time() + self.CM.Env["loop-minutes"]*60 self.find_hae_resources(node) clone_max = len(self.CM.Env["nodes"]) while time.time() <= done and not failed: lpc = lpc + 1 self.change_state(node, self.r_dlm, "true") if not self.wait_on_state(node, self.r_dlm, clone_max-1): self.failure("%s did not go down correctly" % self.r_dlm) failed = lpc self.change_state(node, self.r_dlm, "false") if not self.wait_on_state(node, self.r_dlm, clone_max): self.failure("%s did not come up correctly" % self.r_dlm) failed = lpc if not self.wait_on_state(node, self.r_o2cb, clone_max): self.failure("%s did not come up correctly" % self.r_o2cb) failed = lpc for fs in self.r_ocfs2: if not self.wait_on_state(node, fs, clone_max): self.failure("%s did not come up correctly" % fs) failed = lpc if failed: return self.failure("iteration %d failed" % failed) return self.success() AllTestClasses.append(HAEStandbyTest) ################################################################### class NearQuorumPointTest(CTSTest): ################################################################### ''' This test brings larger clusters near the quorum point (50%). In addition, it will test doing starts and stops at the same time. Here is how I think it should work: - loop over the nodes and decide randomly which will be up and which will be down Use a 50% probability for each of up/down. - figure out what to do to get into that state from the current state - in parallel, bring up those going up and bring those going down. ''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="NearQuorumPoint" def __call__(self, dummy): '''Perform the 'NearQuorumPoint' test. ''' self.incr("calls") startset = [] stopset = [] stonith = self.CM.prepare_fencing_watcher("NearQuorumPoint") #decide what to do with each node for node in self.CM.Env["nodes"]: action = self.CM.Env.RandomGen.choice(["start","stop"]) #action = self.CM.Env.RandomGen.choice(["start","stop","no change"]) if action == "start" : startset.append(node) elif action == "stop" : stopset.append(node) self.CM.debug("start nodes:" + repr(startset)) self.CM.debug("stop nodes:" + repr(stopset)) #add search patterns watchpats = [ ] for node in stopset: if self.CM.ShouldBeStatus[node] == "up": watchpats.append(self.CM["Pat:We_stopped"] % node) for node in startset: if self.CM.ShouldBeStatus[node] == "down": #watchpats.append(self.CM["Pat:Slave_started"] % node) watchpats.append(self.CM["Pat:Local_started"] % node) else: for stopping in stopset: if self.CM.ShouldBeStatus[stopping] == "up": watchpats.append(self.CM["Pat:They_stopped"] % (node, self.CM.key_for_node(stopping))) if len(watchpats) == 0: return self.skipped() if len(startset) != 0: watchpats.append(self.CM["Pat:DC_IDLE"]) watch = self.create_watch(watchpats, self.CM["DeadTime"]+10) watch.setwatch() #begin actions for node in stopset: if self.CM.ShouldBeStatus[node] == "up": self.CM.StopaCMnoBlock(node) for node in startset: if self.CM.ShouldBeStatus[node] == "down": self.CM.StartaCMnoBlock(node) #get the result if watch.lookforall(): self.CM.cluster_stable() self.CM.fencing_cleanup("NearQuorumPoint", stonith) return self.success() self.CM.log("Warn: Patterns not found: " + repr(watch.unmatched)) #get the "bad" nodes upnodes = [] for node in stopset: if self.CM.StataCM(node) == 1: upnodes.append(node) downnodes = [] for node in startset: if self.CM.StataCM(node) == 0: downnodes.append(node) self.CM.fencing_cleanup,("NearQuorumPoint", stonith) if upnodes == [] and downnodes == []: self.CM.cluster_stable() # Make sure they're completely down with no residule for node in stopset: self.CM.rsh(node, self.CM["StopCmd"]) return self.success() if len(upnodes) > 0: self.CM.log("Warn: Unstoppable nodes: " + repr(upnodes)) if len(downnodes) > 0: self.CM.log("Warn: Unstartable nodes: " + repr(downnodes)) return self.failure() def is_applicable(self): if self.CM["Name"] == "crm-cman": return None return 1 AllTestClasses.append(NearQuorumPointTest) ################################################################### class RollingUpgradeTest(CTSTest): ################################################################### '''Perform a rolling upgrade of the cluster''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="RollingUpgrade" self.start = StartTest(cm) self.stop = StopTest(cm) self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) def setup(self, node): # Start all remaining nodes ret = self.stopall(None) if not ret: return self.failure("Couldn't stop all nodes") for node in self.CM.Env["nodes"]: if not self.downgrade(node, None): return self.failure("Couldn't downgrade %s" % node) ret = self.startall(None) if not ret: return self.failure("Couldn't start all nodes") return self.success() def teardown(self, node): # Stop everything ret = self.stopall(None) if not ret: return self.failure("Couldn't stop all nodes") for node in self.CM.Env["nodes"]: if not self.upgrade(node, None): return self.failure("Couldn't upgrade %s" % node) return self.success() def install(self, node, version, start=1, flags="--force"): target_dir = "/tmp/rpm-%s" % version src_dir = "%s/%s" % (self.CM.Env["rpm-dir"], version) self.CM.log("Installing %s on %s with %s" % (version, node, flags)) if not self.stop(node): return self.failure("stop failure: "+node) rc = self.CM.rsh(node, "mkdir -p %s" % target_dir) rc = self.CM.rsh(node, "rm -f %s/*.rpm" % target_dir) (rc, lines) = self.CM.rsh(node, "ls -1 %s/*.rpm" % src_dir, None) for line in lines: line = line[:-1] rc = self.CM.rsh.cp("%s" % (line), "%s:%s/" % (node, target_dir)) rc = self.CM.rsh(node, "rpm -Uvh %s %s/*.rpm" % (flags, target_dir)) if start and not self.start(node): return self.failure("start failure: "+node) return self.success() def upgrade(self, node, start=1): return self.install(node, self.CM.Env["current-version"], start) def downgrade(self, node, start=1): return self.install(node, self.CM.Env["previous-version"], start, "--force --nodeps") def __call__(self, node): '''Perform the 'Rolling Upgrade' test. ''' self.incr("calls") for node in self.CM.Env["nodes"]: if self.upgrade(node): return self.failure("Couldn't upgrade %s" % node) self.CM.cluster_stable() return self.success() def is_applicable(self): if not self.is_applicable_common(): return None if not self.CM.Env.has_key("rpm-dir"): return None if not self.CM.Env.has_key("current-version"): return None if not self.CM.Env.has_key("previous-version"): return None return 1 # Register RestartTest as a good test to run AllTestClasses.append(RollingUpgradeTest) ################################################################### class BSC_AddResource(CTSTest): ################################################################### '''Add a resource to the cluster''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name="AddResource" self.resource_offset = 0 self.cib_cmd="""cibadmin -C -o %s -X '%s' """ def __call__(self, node): self.incr("calls") self.resource_offset = self.resource_offset + 1 r_id = "bsc-rsc-%s-%d" % (node, self.resource_offset) start_pat = "crmd.*%s_start_0.*confirmed.*ok" patterns = [] patterns.append(start_pat % r_id) watch = self.create_watch(patterns, self.CM["DeadTime"]) watch.setwatch() fields = string.split(self.CM.Env["IPBase"], '.') fields[3] = str(int(fields[3])+1) ip = string.join(fields, '.') self.CM.Env["IPBase"] = ip if not self.make_ip_resource(node, r_id, "ocf", "IPaddr", ip): return self.failure("Make resource %s failed" % r_id) failed = 0 watch_result = watch.lookforall() if watch.unmatched: for regex in watch.unmatched: self.CM.log ("Warn: Pattern not found: %s" % (regex)) failed = 1 if failed: return self.failure("Resource pattern(s) not found") if not self.CM.cluster_stable(self.CM["DeadTime"]): return self.failure("Unstable cluster") return self.success() def make_ip_resource(self, node, id, rclass, type, ip): self.CM.log("Creating %s::%s:%s (%s) on %s" % (rclass,type,id,ip,node)) rsc_xml=""" """ % (id, rclass, type, id, id, ip) node_constraint=""" """ % (id, id, id, id, node) rc = 0 (rc, lines) = self.CM.rsh(node, self.cib_cmd % ("constraints", node_constraint), None) if rc != 0: self.CM.log("Constraint creation failed: %d" % rc) return None (rc, lines) = self.CM.rsh(node, self.cib_cmd % ("resources", rsc_xml), None) if rc != 0: self.CM.log("Resource creation failed: %d" % rc) return None return 1 def is_applicable(self): if self.CM.Env["DoBSC"]: return 1 return None AllTestClasses.append(BSC_AddResource) class SimulStopLite(CTSTest): ################################################################### '''Stop any active nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStopLite" def __call__(self, dummy): '''Perform the 'SimulStopLite' setup work. ''' self.incr("calls") self.CM.debug("Setup: " + self.name) # We ignore the "node" parameter... watchpats = [ ] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "up": self.incr("WasStarted") watchpats.append(self.CM["Pat:We_stopped"] % node) #if self.CM.Env["use_logd"]: # watchpats.append(self.CM["Pat:Logd_stopped"] % node) if len(watchpats) == 0: self.CM.clear_all_caches() return self.success() # Stop all the nodes - at about the same time... watch = self.create_watch(watchpats, self.CM["DeadTime"]+10) watch.setwatch() self.set_timer() for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "up": self.CM.StopaCMnoBlock(node) if watch.lookforall(): self.CM.clear_all_caches() # Make sure they're completely down with no residule for node in self.CM.Env["nodes"]: self.CM.rsh(node, self.CM["StopCmd"]) return self.success() did_fail=0 up_nodes = [] for node in self.CM.Env["nodes"]: if self.CM.StataCM(node) == 1: did_fail=1 up_nodes.append(node) if did_fail: return self.failure("Active nodes exist: " + repr(up_nodes)) self.CM.log("Warn: All nodes stopped but CTS didnt detect: " + repr(watch.unmatched)) self.CM.clear_all_caches() return self.failure("Missing log message: "+repr(watch.unmatched)) def is_applicable(self): '''SimulStopLite is a setup test and never applicable''' return 0 ################################################################### class SimulStartLite(CTSTest): ################################################################### '''Start any stopped nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStartLite" def __call__(self, dummy): '''Perform the 'SimulStartList' setup work. ''' self.incr("calls") self.CM.debug("Setup: " + self.name) # We ignore the "node" parameter... node_list = [] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "down": self.incr("WasStopped") node_list.append(node) self.set_timer() while len(node_list) > 0: watchpats = [ ] uppat = self.CM["Pat:Slave_started"] if self.CM.upcount() == 0: uppat = self.CM["Pat:Local_started"] watchpats.append(self.CM["Pat:DC_IDLE"]) for node in node_list: watchpats.append(uppat % node) watchpats.append(self.CM["Pat:InfraUp"] % node) watchpats.append(self.CM["Pat:PacemakerUp"] % node) # Start all the nodes - at about the same time... watch = self.create_watch(watchpats, self.CM["DeadTime"]+10) watch.setwatch() stonith = self.CM.prepare_fencing_watcher(self.name) for node in node_list: self.CM.StartaCMnoBlock(node) watch.lookforall() node_list = self.CM.fencing_cleanup(self.name, stonith) # Remove node_list messages from watch.unmatched for node in node_list: if watch.unmatched: watch.unmatched.remove(uppat % node) if watch.unmatched: for regex in watch.unmatched: self.CM.log ("Warn: Startup pattern not found: %s" %(regex)) if not self.CM.cluster_stable(): return self.failure("Cluster did not stabilize") did_fail=0 unstable = [] for node in self.CM.Env["nodes"]: if self.CM.StataCM(node) == 0: did_fail=1 unstable.append(node) if did_fail: return self.failure("Unstarted nodes exist: " + repr(unstable)) unstable = [] for node in self.CM.Env["nodes"]: if not self.CM.node_stable(node): did_fail=1 unstable.append(node) if did_fail: return self.failure("Unstable cluster nodes exist: " + repr(unstable)) return self.success() def is_applicable(self): '''SimulStartLite is a setup test and never applicable''' return 0 def TestList(cm, audits): result = [] for testclass in AllTestClasses: bound_test = testclass(cm) if bound_test.is_applicable(): bound_test.Audits = audits result.append(bound_test) return result +################################################################### +class RemoteSimple(CTSTest): +################################################################### + def __init__(self, cm): + CTSTest.__init__(self,cm) + self.name="RemoteSimple" + self.start = StartTest(cm) + self.startall = SimulStartLite(cm) + self.num_containers = 2 + self.is_remote = 1 + self.failed = 0 + self.fail_string = "" + + def start_lxc_simple(self, node): + # restore any artifacts laying around from a previous test. + self.CM.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -p -r &>/dev/null") + for num in range(self.num_containers): + self.CM.rsh(node, "virsh -c lxc:/// destroy lxc%d" % (num+1)) + self.CM.rsh(node, "virsh -c lxc:/// undefine lxc%d" % (num+1)) + + self.CM.rsh(node, "rm -rf /var/lib/pacemaker/cts/lxc") + + # generate the containers, put them in the config, add some resources to them + pats = [ ] + watch = self.create_watch(pats, 90) + watch.setwatch() + pats.append("process_lrm_event: LRM operation lxc1_start_0.*confirmed.*ok") + pats.append("process_lrm_event: LRM operation lxc2_start_0.*confirmed.*ok") + pats.append("process_lrm_event: LRM operation lxc-clone_start_0.*confirmed.*ok") + pats.append("process_lrm_event: LRM operation lxc-clone_start_0.*confirmed.*ok") + + self.CM.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -g -a -m -c %d &>/dev/null" % self.num_containers) + self.set_timer("remoteSimpleInit") + watch.lookforall() + self.log_timer("remoteSimpleInit") + if watch.unmatched: + self.fail_string = "Unmated patterns: %s" % (repr(watch.unmatched)) + self.failed = 1 + + def cleanup_lxc_simple(self, node): + + pats = [ ] + # if the test failed, attempt to clean up the cib and libvirt environment + # as best as possible + if self.failed == 1: + # restore libvirt and cib + self.CM.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -p -r &>/dev/null") + self.CM.rsh(node, "rm -rf /var/lib/pacemaker/cts/lxc") + self.CM.rsh(node, "crm_resource -C -r container1 &>/dev/null") + self.CM.rsh(node, "crm_resource -C -r container2 &>/dev/null") + self.CM.rsh(node, "crm_resource -C -r lxc1 &>/dev/null") + self.CM.rsh(node, "crm_resource -C -r lxc2 &>/dev/null") + self.CM.rsh(node, "crm_resource -C -r lxc-clone &>/dev/null") + time.sleep(20) + + return + + watch = self.create_watch(pats, 60) + watch.setwatch() + + pats.append("process_lrm_event: LRM operation container1_stop_0.*confirmed.*ok") + pats.append("process_lrm_event: LRM operation container2_stop_0.*confirmed.*ok") + + self.CM.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -p &>/dev/null") + self.set_timer("remoteSimpleCleanup") + watch.lookforall() + self.log_timer("remoteSimpleCleanup") + + if watch.unmatched: + self.fail_string = "Unmated patterns: %s" % (repr(watch.unmatched)) + self.failed = 1 + + # cleanup libvirt + self.CM.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -r &>/dev/null") + self.CM.rsh(node, "rm -rf /var/lib/pacemaker/cts/lxc") + + def __call__(self, node): + '''Perform the 'RemoteSimple' test. ''' + self.incr("calls") + + ret = self.startall(None) + if not ret: + return self.failure("Setup failed, start all nodes failed.") + + # TODO add an option to lxc_autogen to verify environment is capable of lxc + self.start_lxc_simple(node) + self.cleanup_lxc_simple(node) + + if self.failed == 1: + return self.failure(self.fail_string) + + return self.success() + + def errorstoignore(self): + '''Return list of errors which should be ignored''' + return [ """Updating failcount for ping""", + """LogActions: Recover ping""", + """LogActions: Recover lxc-clone""", + """LogActions: Recover container""", + """Unknown operation: fail""", + """notice: operation_finished: ping-""", + """(ERROR|error): sending stonithRA op to stonithd failed.""", + ] + +AllTestClasses.append(RemoteSimple) + # vim:ts=4:sw=4:et: diff --git a/cts/Makefile.am b/cts/Makefile.am index e01ac103f0..cb86db87ae 100644 --- a/cts/Makefile.am +++ b/cts/Makefile.am @@ -1,46 +1,47 @@ # # heartbeat: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in CLEANFILES = LSBDummy EXTRA_DIST = $(cts_SCRIPTS) $(cts_DATA) ctsdir = $(datadir)/$(PACKAGE)/tests/cts ctslibdir = $(pyexecdir)/cts ctslib_PYTHON = __init__.py \ CTSvars.py \ CM_lha.py \ CM_ais.py \ CTS.py \ CTSaudits.py \ CTStests.py \ CTSscenarios.py \ CIB.py cts_DATA = README cts.supp cts_SCRIPTS = cluster_test \ CTSlab.py \ + lxc_autogen.sh \ LSBDummy \ $(top_srcdir)/fencing/fence_dummy SUBDIRS = benchmark diff --git a/cts/lxc_autogen.sh.in b/cts/lxc_autogen.sh.in new file mode 100755 index 0000000000..50e0cd5fdd --- /dev/null +++ b/cts/lxc_autogen.sh.in @@ -0,0 +1,288 @@ +#!/bin/bash + +containers="2" +download=0 +# different than default libvirt network in case this is run nested in a KVM instance +addr="192.168.123.1" +restore=0 +restore_pcmk=0 +generate=0 +cib=0 +add_clone=0 +working_dir="@CRM_CONFIG_CTS@/lxc" +curdir=$(pwd) + +function helptext() { + echo "lxc_autogen.sh - A tool for generating libvirt lxc containers for testing purposes." + echo "" + echo "Usage: lxc-autogen [options]" + echo "" + echo "Options:" + echo "-g, --generate Generate libvirt lxc environment in the directory this script is run from." + echo "-r, --restore-libvirt Restore the default network, and libvirt config to before this script ran." + echo "-p, --restore-cib Remove cib entries this script generated." + echo "" + echo "-a, --add-cib Add remote-node entries for each lxc instance into the cib" + echo "-m, --add-clone Add clone resource shared between remote-nodes" + echo "-d, --download-agent Download and install the latest VirtualDomain agent." + echo "-c, --containers Specify the number of containers to generate, defaults to $containers. Used with -g" + echo "-n, --network What network to override default libvirt network to. Example: -n 192.168.123.1. Used with -g" + echo "" + exit $1 +} + +while true ; do + case "$1" in + --help|-h|-\?) helptext 0;; + -c|--containers) containers="$2"; shift; shift;; + -d|--download-agent) download=1; shift;; + -n|--network) addr="$2"; shift; shift;; + -r|--restore-libvirt) restore=1; shift;; + -p|--restore-cib) restore_pcmk=1; shift;; + -g|--generate) generate=1; shift;; + -a|--add-cib) cib=1; shift;; + -m|--add-clone) add_clone=1; shift;; + "") break;; + *) helptext 1;; + esac +done + +#strip last digits off addr +tmp="s/\.$(echo "$addr" | tr '.' ' ' | awk '{print $4}')$//g" +addr=$(echo $addr | sed -e ${tmp}) + +set_network() +{ + rm -f cur_network.xml + cat << END >> cur_network.xml + + default + 41ebdb84-7134-1111-a136-91f0f1119225 + + + + + + + + + +END + + ls restore_default.xml > /dev/null 2>&1 + if [ $? -ne 0 ]; then + virsh net-dumpxml default > restore_default.xml + fi + virsh net-destroy default + virsh net-undefine default + virsh net-define cur_network.xml + virsh net-start default +} + +generate() +{ + set_network + + #generate pacemaker remote key + ls /etc/pacemaker/authkey > /dev/null 2>&1 + if [ $? != 0 ]; then + mkdir -p /etc/pacemaker + dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1 + fi + + # Generate libvirt domains in xml + for (( c=1; c <= $containers; c++ )) + do + rm -rf lxc$c-filesystem + mkdir -p lxc$c-filesystem/var/run/ + mkdir -p lxc$c-filesystem/usr/var/run + rm -f lxc$c.xml + cat << END >> lxc$c.xml + + lxc$c + 102400 + + exe + $working_dir/lxc$c-filesystem/launch-helper + + + + + + + + + + + + + + + + + +END + rm -f container$c.cib + cat << END >> container$c.cib + + + + + + + + + + + + + + + +END + + rm -f lxc-ms$c.cib + cat << END >> lxc-ms.cib + + + + + + + + + + + +END + + rm -f lxc$c-filesystem/launch-helper + cat << END >> lxc$c-filesystem/launch-helper +#!/bin/bash +ifconfig eth0 $addr.10$c +route add 0.0.0.0 gw $addr.1 eth0 +/usr/sbin/pacemaker_remoted +END + chmod 711 lxc$c-filesystem/launch-helper + + cat << END >> /etc/hosts +$addr.10$c lxc$c +END + done +} + +apply_cib_clone() +{ + cibadmin -Q > cur.cib + export CIB_file=cur.cib + + cibadmin -o resources -C -x lxc-ms.cib + for tmp in $(ls lxc*.xml); do + tmp=$(echo $tmp | sed -e 's/\.xml//g') + echo "" > tmp_constraint + cibadmin -o constraints -C -x tmp_constraint + echo "" > tmp_constraint + cibadmin -o constraints -C -x tmp_constraint + rm -f tmp_constraint + done + unset CIB_file + + cibadmin --replace --xml-file cur.cib + rm -f cur.cib +} + +apply_cib_entries() +{ + node=$(crm_node -n) + + cibadmin -Q > cur.cib + export CIB_file=cur.cib + for tmp in $(ls container*.cib); do + cibadmin -o resources -C -x $tmp + + tmp=$(echo $tmp | sed -e 's/\.cib//g') + crm_resource -M -r $tmp -H $node + done + unset CIB_file + + cibadmin --replace --xml-file cur.cib + rm -f cur.cib +} + +restore_cib() +{ + node=$(crm_node -n) + cibadmin -Q > cur.cib + export CIB_file=cur.cib + + for tmp in $(ls lxc*.xml); do + tmp=$(echo $tmp | sed -e 's/\.xml//g') + echo "" > tmp_constraint + cibadmin -o constraints -D -x tmp_constraint + echo "" > tmp_constraint + cibadmin -o constraints -D -x tmp_constraint + rm -f tmp_constraint + done + cibadmin -o resources -D -x lxc-ms.cib + + for tmp in $(ls container*.cib); do + tmp=$(echo $tmp | sed -e 's/\.cib//g') + crm_resource -U -r $tmp -H $node + crm_resource -D -r $tmp -t primitive + done + unset CIB_file + + cibadmin --replace --xml-file cur.cib + rm -f cur.cib +} + +restore_libvirt() +{ + for tmp in $(ls lxc*.xml); do + tmp=$(echo $tmp | sed -e 's/\.xml//g') + virsh -c lxc:/// destroy $tmp > /dev/null 2>&1 + virsh -c lxc:/// undefine $tmp > /dev/null 2>&1 + + sed -i.bak "/...\....\....\..* ${tmp}/d" /etc/hosts + echo "$tmp destroyed" + done + + ls restore_default.xml > /dev/null 2>&1 + if [ $? -eq 0 ]; then + virsh net-destroy default > /dev/null 2>&1 + virsh net-undefine default > /dev/null 2>&1 + virsh net-define restore_default.xml + virsh net-start default + if [ $? -eq 0 ]; then + echo "default network restored" + fi + fi + rm -f restore_default.xml > /dev/null 2>&1 +} + +mkdir -p $working_dir +cd $working_dir + +if [ $download -eq 1 ]; then + wget https://raw.github.com/ClusterLabs/resource-agents/master/heartbeat/VirtualDomain + chmod 755 VirtualDomain + mv -f VirtualDomain /usr/lib/ocf/resource.d/heartbeat/VirtualDomain +fi +if [ $restore_pcmk -eq 1 ]; then + restore_cib +fi +if [ $restore -eq 1 ]; then + restore_libvirt +fi +if [ $generate -eq 1 ]; then + generate +fi +if [ $cib -eq 1 ]; then + apply_cib_entries +fi +if [ $add_clone -eq 1 ]; then + apply_cib_clone +fi + +cd $curdir + diff --git a/doc/pcs-crmsh-quick-ref.md b/doc/pcs-crmsh-quick-ref.md index a28960fce1..cfecd114c3 100644 --- a/doc/pcs-crmsh-quick-ref.md +++ b/doc/pcs-crmsh-quick-ref.md @@ -1,159 +1,159 @@ ## Display the configuration crmsh # crm configure show pcs # pcs cluster cib ## Display the current status crmsh # crm_mon -1 pcs # pcs status ## Node standby crmsh # crm node standby pcs # pcs cluster standby pcmk-1 crmsh # crm node online pcs # pcs cluster unstandby pcmk-1 ## Setting configuration options crmsh # crm configure property stonith-enabled=false pcs # pcs property set stonith-enabled=false ## Listing available resources crmsh # crm ra classes pcs # pcs resource standards crmsh # crm ra list ocf pacemaker pcs # pcs resource agents ocf:pacemaker ## Creating a resource crmsh # crm configure primitive ClusterIP ocf:heartbeat:IPaddr2 \ params ip=192.168.122.120 cidr_netmask=32 \ op monitor interval=30s pcs # pcs resource create ClusterIP IPaddr2 ip=192.168.0.120 cidr_netmask=32 The standard and provider (`ocf:heartbeat`) are determined automatically since `IPaddr2` is unique. The monitor operation is automatically created based on the agent's metadata. ## Start a resource crmsh # crm resource start ClusterIP pcs # pcs resource start ClusterIP ## Stop a resource crmsh # crm resource stop ClusterIP pcs # pcs resource stop ClusterIP ## Remove a resource crmsh # crm configure delete ClusterIP pcs # ## Update a resource crmsh # crm configure edit ClusterIP pcs # pcs resource update ClusterIP clusterip_hash=sourceip ## Display a resource crmsh # pcs # pcs resource show WebFS ## Resource defaults crmsh # crm configure rsc_defaults resource-stickiness=100 - pcs # pcs rsc defaults resource-stickiness=100 + pcs # pcs resource rsc defaults resource-stickiness=100 Listing the current defaults: - pcs # pcs rsc defaults + pcs # pcs resource rsc defaults ## Operation defaults crmsh # crm configure op_defaults timeout=240s pcs # pcs resource op defaults timeout=240s Listing the current defaults: pcs # pcs resource op defaults ## Colocation crmsh # crm configure colocation website-with-ip INFINITY: WebSite ClusterIP pcs # pcs constraint colocation add WebSite ClusterIP INFINITY With roles crmsh # pcs # ## Start/stop ordering crmsh # crm configure order apache-after-ip mandatory: ClusterIP WebSite pcs # pcs constraint order ClusterIP then WebSite With roles: crmsh # pcs # ## Preferred locations crmsh # crm configure location prefer-pcmk-1 WebSite 50: pcmk-1 pcs # pcs constraint location WebSite prefers pcmk-1=50 With roles: crmsh # pcs # ## Moving resources crmsh # crm resource move WebSite pcmk-1 - pcs # pcs constraint location WebSite prefers pcmk-1=INFINITY + pcs # pcs resource move WebSite pcmk-1 crmsh # crm resource unmove WebSite - pcs # pcs constraint rm location-WebSite-pcmk-1-INFINITY + pcs # pcs resource unmove WebSite ## Creating a clone crmsh # configure clone WebIP ClusterIP meta globally-unique="true" clone-max="2" clone-node-max="2" pcs # pcs resource clone ClusterIP globally-unique=true clone-max=2 clone-node-max=2 ## Creating a master/slave clone crmsh # crm configure ms WebDataClone WebData \ meta master-max=1 master-node-max=1 \ clone-max=2 clone-node-max=1 notify=true pcs # resource master WebDataClone WebData \ master-max=1 master-node-max=1 clone-max=2 clone-node-max=1 \ notify=true ## ... crmsh # pcs # crmsh # pcs # ## Batch changes crmsh # crm crmsh # cib new drbd_cfg crmsh # configure primitive WebData ocf:linbit:drbd params drbd_resource=wwwdata \ op monitor interval=60s crmsh # configure ms WebDataClone WebData meta master-max=1 master-node-max=1 \ clone-max=2 clone-node-max=1 notify=true crmsh # cib commit drbd_cfg crmsh # quit pcs # pcs cluster cib drbd_cfg pcs # pcs -f drbd_cfg resource create WebData ocf:linbit:drbd drbd_resource=wwwdata \ op monitor interval=60s pcs # pcs -f drbd_cfg resource master WebDataClone WebData master-max=1 master-node-max=1 \ clone-max=2 clone-node-max=1 notify=true pcs # pcs cluster push cib drbd_cfg diff --git a/include/portability.h b/include/portability.h index b0f9f1c7c2..27ab46a903 100644 --- a/include/portability.h +++ b/include/portability.h @@ -1,216 +1,232 @@ #ifndef PORTABILITY_H # define PORTABILITY_H /* * Copyright (C) 2001 Alan Robertson * This software licensed under the GNU LGPL. * * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ # define EOS '\0' # define DIMOF(a) ((int) (sizeof(a)/sizeof(a[0])) ) /* Needs to be defined before any other includes, otherwise some system * headers do not behave as expected! Major black magic... */ # undef _GNU_SOURCE /* in case it was defined on the command line */ # define _GNU_SOURCE /* Please leave this as the first #include - Solaris needs it there */ # ifdef HAVE_CONFIG_H # include # endif /* Prototypes for libreplace functions */ # ifndef HAVE_DAEMON /* We supply a replacement function, but need a prototype */ int daemon(int nochdir, int noclose); # endif /* HAVE_DAEMON */ # ifndef HAVE_SETENV /* We supply a replacement function, but need a prototype */ int setenv(const char *name, const char *value, int why); # endif /* HAVE_SETENV */ # ifndef HAVE_STRERROR /* We supply a replacement function, but need a prototype */ char *strerror(int errnum); # endif /* HAVE_STRERROR */ # ifndef HAVE_STRCHRNUL /* We supply a replacement function, but need a prototype */ char *strchrnul(const char *s, int c_in); # endif /* HAVE_STRCHRNUL */ # ifndef HAVE_ALPHASORT # include int alphasort(const void *dirent1, const void *dirent2); # endif /* HAVE_ALPHASORT */ # ifndef HAVE_STRNLEN size_t strnlen(const char *s, size_t maxlen); # else # define USE_GNU # endif # ifndef HAVE_STRNDUP char *strndup(const char *str, size_t len); # else # define USE_GNU # endif # include # if !GLIB_CHECK_VERSION(2,14,0) typedef struct fake_ghi { GHashTable *hash; int nth; /* current index over the iteration */ int lpc; /* internal loop counter inside g_hash_table_find */ gpointer key; gpointer value; } GHashTableIter; static inline void g_hash_prepend_value(gpointer key, gpointer value, gpointer user_data) { GList **values = (GList **) user_data; *values = g_list_prepend(*values, value); } /* Since: 2.14 */ static inline GList * g_hash_table_get_values(GHashTable * hash_table) { GList *values = NULL; g_hash_table_foreach(hash_table, g_hash_prepend_value, &values); return values; } # endif # if !GLIB_CHECK_VERSION(2,16,0) static inline gboolean g_hash_table_nth_data(gpointer key, gpointer value, gpointer user_data) { GHashTableIter *iter = (GHashTableIter *) user_data; if (iter->lpc++ == iter->nth) { iter->key = key; iter->value = value; return TRUE; } return FALSE; } static inline void g_hash_table_iter_init(GHashTableIter * iter, GHashTable * hash_table) { iter->hash = hash_table; iter->nth = 0; iter->lpc = 0; iter->key = NULL; iter->value = NULL; } static inline gboolean g_hash_table_iter_next(GHashTableIter * iter, gpointer * key, gpointer * value) { gboolean found = FALSE; iter->lpc = 0; iter->key = NULL; iter->value = NULL; if (iter->nth < g_hash_table_size(iter->hash)) { found = ! !g_hash_table_find(iter->hash, g_hash_table_nth_data, iter); iter->nth++; } if (key) *key = iter->key; if (value) *value = iter->value; return found; } static inline void g_hash_table_iter_remove(GHashTableIter * iter) { g_hash_table_remove(iter->hash, iter->key); iter->nth--; /* Or zero to be safe? */ } static inline int g_strcmp0(const char *str1, const char *str2) { if (!str1) return -(str1 != str2); if (!str2) return str1 != str2; return strcmp(str1, str2); } # endif /* !HAVE_LIBGLIB_2_0 */ # if !GLIB_CHECK_VERSION(2,28,0) # include /* Since: 2.28 */ static inline void g_list_free_full(GList * list, GDestroyNotify free_func) { g_list_foreach(list, (GFunc) free_func, NULL); g_list_free(list); } # endif /* Replacement error codes for non-linux */ # ifndef ENOTUNIQ # define ENOTUNIQ 190 # endif # ifndef ECOMM # define ECOMM 191 # endif # ifndef ELIBACC # define ELIBACC 192 # endif # ifndef EREMOTEIO # define EREMOTEIO 193 # endif # ifndef EUNATCH # define EUNATCH 194 # endif # ifndef ENOKEY # define ENOKEY 195 # endif +# ifndef ENODATA +# define ENODATA 196 +# endif + +# ifndef ETIME +# define ETIME 197 +# endif + +# ifndef ENOSR +# define ENOSR 198 +# endif + +# ifndef ENOSTR +# define ENOSTR 199 +# endif + /* * Some compilers (eg. Sun studio) do not define __FUNCTION__ */ # ifdef __SUNPRO_C # define __FUNCTION__ __func__ # endif # ifdef __MY_UNKNOWN_C # define __FUNCTION__ "__FUNCTION__" # endif #endif /* PORTABILITY_H */ diff --git a/lib/cib/cib_remote.c b/lib/cib/cib_remote.c index 88474227f8..7d0f45bd27 100644 --- a/lib/cib/cib_remote.c +++ b/lib/cib/cib_remote.c @@ -1,627 +1,629 @@ /* * Copyright (c) 2008 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GNUTLS_GNUTLS_H # undef KEYFILE # include gnutls_anon_client_credentials_t anon_cred_c; # define DEFAULT_CLIENT_HANDSHAKE_TIMEOUT 5000 /* 5 seconds */ const int kx_prio[] = { GNUTLS_KX_ANON_DH, 0 }; static gboolean remote_gnutls_credentials_init = FALSE; #else typedef void gnutls_session_t; #endif #include -#include +#ifndef ON_BSD +# include +#endif #define DH_BITS 1024 typedef struct cib_remote_opaque_s { int flags; int socket; int port; char *server; char *user; char *passwd; gboolean encrypted; crm_remote_t command; crm_remote_t callback; } cib_remote_opaque_t; void cib_remote_connection_destroy(gpointer user_data); int cib_remote_callback_dispatch(gpointer user_data); int cib_remote_command_dispatch(gpointer user_data); int cib_remote_signon(cib_t * cib, const char *name, enum cib_conn_type type); int cib_remote_signoff(cib_t * cib); int cib_remote_free(cib_t * cib); int cib_remote_perform_op(cib_t * cib, const char *op, const char *host, const char *section, xmlNode * data, xmlNode ** output_data, int call_options, const char *name); static int cib_remote_inputfd(cib_t * cib) { cib_remote_opaque_t *private = cib->variant_opaque; return private->callback.tcp_socket; } static int cib_remote_set_connection_dnotify(cib_t * cib, void (*dnotify) (gpointer user_data)) { return -EPROTONOSUPPORT; } static int cib_remote_register_notification(cib_t * cib, const char *callback, int enabled) { xmlNode *notify_msg = create_xml_node(NULL, "cib_command"); cib_remote_opaque_t *private = cib->variant_opaque; crm_xml_add(notify_msg, F_CIB_OPERATION, T_CIB_NOTIFY); crm_xml_add(notify_msg, F_CIB_NOTIFY_TYPE, callback); crm_xml_add_int(notify_msg, F_CIB_NOTIFY_ACTIVATE, enabled); crm_remote_send(&private->callback, notify_msg); free_xml(notify_msg); return pcmk_ok; } cib_t * cib_remote_new(const char *server, const char *user, const char *passwd, int port, gboolean encrypted) { cib_remote_opaque_t *private = NULL; cib_t *cib = cib_new_variant(); private = calloc(1, sizeof(cib_remote_opaque_t)); cib->variant = cib_remote; cib->variant_opaque = private; if (server) { private->server = strdup(server); } if (user) { private->user = strdup(user); } if (passwd) { private->passwd = strdup(passwd); } private->port = port; private->encrypted = encrypted; /* assign variant specific ops */ cib->delegate_fn = cib_remote_perform_op; cib->cmds->signon = cib_remote_signon; cib->cmds->signoff = cib_remote_signoff; cib->cmds->free = cib_remote_free; cib->cmds->inputfd = cib_remote_inputfd; cib->cmds->register_notification = cib_remote_register_notification; cib->cmds->set_connection_dnotify = cib_remote_set_connection_dnotify; return cib; } static int cib_tls_close(cib_t * cib) { cib_remote_opaque_t *private = cib->variant_opaque; #ifdef HAVE_GNUTLS_GNUTLS_H if (private->encrypted) { if (private->command.tls_session) { gnutls_bye(*(private->command.tls_session), GNUTLS_SHUT_RDWR); gnutls_deinit(*(private->command.tls_session)); gnutls_free(private->command.tls_session); } if (private->callback.tls_session) { gnutls_bye(*(private->callback.tls_session), GNUTLS_SHUT_RDWR); gnutls_deinit(*(private->callback.tls_session)); gnutls_free(private->callback.tls_session); } private->command.tls_session = NULL; private->callback.tls_session = NULL; if (remote_gnutls_credentials_init) { gnutls_anon_free_client_credentials(anon_cred_c); gnutls_global_deinit(); remote_gnutls_credentials_init = FALSE; } } #endif if (private->command.tcp_socket) { shutdown(private->command.tcp_socket, SHUT_RDWR); /* no more receptions */ close(private->command.tcp_socket); } if (private->callback.tcp_socket) { shutdown(private->callback.tcp_socket, SHUT_RDWR); /* no more receptions */ close(private->callback.tcp_socket); } private->command.tcp_socket = 0; private->callback.tcp_socket = 0; free(private->command.buffer); free(private->callback.buffer); private->command.buffer = NULL; private->callback.buffer = NULL; return 0; } static int cib_tls_signon(cib_t * cib, crm_remote_t * connection, gboolean event_channel) { int sock; cib_remote_opaque_t *private = cib->variant_opaque; int rc = 0; int disconnected = 0; xmlNode *answer = NULL; xmlNode *login = NULL; static struct mainloop_fd_callbacks cib_fd_callbacks = { 0, }; cib_fd_callbacks.dispatch = event_channel ? cib_remote_callback_dispatch : cib_remote_command_dispatch; cib_fd_callbacks.destroy = cib_remote_connection_destroy; connection->tcp_socket = 0; #ifdef HAVE_GNUTLS_GNUTLS_H connection->tls_session = NULL; #endif sock = crm_remote_tcp_connect(private->server, private->port); if (sock < 0) { crm_perror(LOG_ERR, "remote tcp connection to %s:%d failed", private->server, private->port); return -ENOTCONN; } connection->tcp_socket = sock; if (private->encrypted) { /* initialize GnuTls lib */ #ifdef HAVE_GNUTLS_GNUTLS_H if (remote_gnutls_credentials_init == FALSE) { gnutls_global_init(); gnutls_anon_allocate_client_credentials(&anon_cred_c); remote_gnutls_credentials_init = TRUE; } /* bind the socket to GnuTls lib */ connection->tls_session = crm_create_anon_tls_session(sock, GNUTLS_CLIENT, anon_cred_c); if (crm_initiate_client_tls_handshake(connection, DEFAULT_CLIENT_HANDSHAKE_TIMEOUT) != 0) { crm_err("Session creation for %s:%d failed", private->server, private->port); gnutls_deinit(*connection->tls_session); gnutls_free(connection->tls_session); connection->tls_session = NULL; cib_tls_close(cib); return -1; } #else return -EPROTONOSUPPORT; #endif } /* login to server */ login = create_xml_node(NULL, "cib_command"); crm_xml_add(login, "op", "authenticate"); crm_xml_add(login, "user", private->user); crm_xml_add(login, "password", private->passwd); crm_xml_add(login, "hidden", "password"); crm_remote_send(connection, login); free_xml(login); crm_remote_recv(connection, -1, &disconnected); if (disconnected) { rc = -ENOTCONN; } answer = crm_remote_parse_buffer(connection); crm_log_xml_trace(answer, "Reply"); if (answer == NULL) { rc = -EPROTO; } else { /* grab the token */ const char *msg_type = crm_element_value(answer, F_CIB_OPERATION); const char *tmp_ticket = crm_element_value(answer, F_CIB_CLIENTID); if (safe_str_neq(msg_type, CRM_OP_REGISTER)) { crm_err("Invalid registration message: %s", msg_type); rc = -EPROTO; } else if (tmp_ticket == NULL) { rc = -EPROTO; } else { connection->token = strdup(tmp_ticket); } } free_xml(answer); answer = NULL; if (rc != 0) { cib_tls_close(cib); return rc; } crm_trace("remote client connection established"); connection->source = mainloop_add_fd("cib-remote", G_PRIORITY_HIGH, connection->tcp_socket, cib, &cib_fd_callbacks); return rc; } void cib_remote_connection_destroy(gpointer user_data) { crm_err("Connection destroyed"); #ifdef HAVE_GNUTLS_GNUTLS_H cib_tls_close(user_data); #endif return; } int cib_remote_command_dispatch(gpointer user_data) { int disconnected = 0; cib_t *cib = user_data; cib_remote_opaque_t *private = cib->variant_opaque; crm_remote_recv(&private->command, -1, &disconnected); free(private->command.buffer); private->command.buffer = NULL; crm_err("received late reply for remote cib connection, discarding"); if (disconnected) { return -1; } return 0; } int cib_remote_callback_dispatch(gpointer user_data) { cib_t *cib = user_data; cib_remote_opaque_t *private = cib->variant_opaque; xmlNode *msg = NULL; int disconnected = 0; crm_info("Message on callback channel"); crm_remote_recv(&private->callback, -1, &disconnected); msg = crm_remote_parse_buffer(&private->callback); while (msg) { const char *type = crm_element_value(msg, F_TYPE); crm_trace("Activating %s callbacks...", type); if (safe_str_eq(type, T_CIB)) { cib_native_callback(cib, msg, 0, 0); } else if (safe_str_eq(type, T_CIB_NOTIFY)) { g_list_foreach(cib->notify_list, cib_native_notify, msg); } else { crm_err("Unknown message type: %s", type); } free_xml(msg); msg = crm_remote_parse_buffer(&private->callback); } if (disconnected) { return -1; } return 0; } int cib_remote_signon(cib_t * cib, const char *name, enum cib_conn_type type) { int rc = pcmk_ok; cib_remote_opaque_t *private = cib->variant_opaque; if (private->passwd == NULL) { struct termios settings; int rc; rc = tcgetattr(0, &settings); settings.c_lflag &= ~ECHO; rc = tcsetattr(0, TCSANOW, &settings); fprintf(stderr, "Password: "); private->passwd = calloc(1, 1024); rc = scanf("%s", private->passwd); fprintf(stdout, "\n"); /* fprintf(stderr, "entered: '%s'\n", buffer); */ if (rc < 1) { private->passwd = NULL; } settings.c_lflag |= ECHO; rc = tcsetattr(0, TCSANOW, &settings); } if (private->server == NULL || private->user == NULL) { rc = -EINVAL; } if (rc == pcmk_ok) { rc = cib_tls_signon(cib, &(private->command), FALSE); } if (rc == pcmk_ok) { rc = cib_tls_signon(cib, &(private->callback), TRUE); } if (rc == pcmk_ok) { xmlNode *hello = cib_create_op(0, private->callback.token, CRM_OP_REGISTER, NULL, NULL, NULL, 0, NULL); crm_xml_add(hello, F_CIB_CLIENTNAME, name); crm_remote_send(&private->command, hello); free_xml(hello); } if (rc == pcmk_ok) { crm_notice("%s: Opened connection to %s:%d\n", name, private->server, private->port); cib->state = cib_connected_command; cib->type = cib_command; } else { fprintf(stderr, "%s: Connection to %s:%d failed: %s\n", name, private->server, private->port, pcmk_strerror(rc)); } return rc; } int cib_remote_signoff(cib_t * cib) { int rc = pcmk_ok; /* cib_remote_opaque_t *private = cib->variant_opaque; */ crm_debug("Signing out of the CIB Service"); #ifdef HAVE_GNUTLS_GNUTLS_H cib_tls_close(cib); #endif cib->state = cib_disconnected; cib->type = cib_none; return rc; } int cib_remote_free(cib_t * cib) { int rc = pcmk_ok; crm_warn("Freeing CIB"); if (cib->state != cib_disconnected) { rc = cib_remote_signoff(cib); if (rc == pcmk_ok) { cib_remote_opaque_t *private = cib->variant_opaque; free(private->server); free(private->user); free(private->passwd); free(cib->cmds); free(private); free(cib); } } return rc; } int cib_remote_perform_op(cib_t * cib, const char *op, const char *host, const char *section, xmlNode * data, xmlNode ** output_data, int call_options, const char *name) { int rc = pcmk_ok; int disconnected = 0; int remaining_time = 0; time_t start_time; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; cib_remote_opaque_t *private = cib->variant_opaque; if (cib->state == cib_disconnected) { return -ENOTCONN; } if (output_data != NULL) { *output_data = NULL; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } cib->call_id++; /* prevent call_id from being negative (or zero) and conflicting * with the cib_errors enum * use 2 because we use it as (cib->call_id - 1) below */ if (cib->call_id < 1) { cib->call_id = 1; } op_msg = cib_create_op(cib->call_id, private->callback.token, op, host, section, data, call_options, NULL); if (op_msg == NULL) { return -EPROTO; } crm_trace("Sending %s message to CIB service", op); if (!(call_options & cib_sync_call)) { crm_remote_send(&private->callback, op_msg); } else { crm_remote_send(&private->command, op_msg); } free_xml(op_msg); if ((call_options & cib_discard_reply)) { crm_trace("Discarding reply"); return pcmk_ok; } else if (!(call_options & cib_sync_call)) { return cib->call_id; } crm_trace("Waiting for a syncronous reply"); start_time = time(NULL); remaining_time = cib->call_timeout ? cib->call_timeout : 60; while (remaining_time > 0 && !disconnected) { int reply_id = -1; int msg_id = cib->call_id; crm_remote_recv(&private->command, remaining_time * 1000, &disconnected); op_reply = crm_remote_parse_buffer(&private->command); if (!op_reply) { break; } crm_element_value_int(op_reply, F_CIB_CALLID, &reply_id); if (reply_id == msg_id) { break; } else if (reply_id < msg_id) { crm_debug("Received old reply: %d (wanted %d)", reply_id, msg_id); crm_log_xml_trace(op_reply, "Old reply"); } else if ((reply_id - 10000) > msg_id) { /* wrap-around case */ crm_debug("Received old reply: %d (wanted %d)", reply_id, msg_id); crm_log_xml_trace(op_reply, "Old reply"); } else { crm_err("Received a __future__ reply:" " %d (wanted %d)", reply_id, msg_id); } free_xml(op_reply); op_reply = NULL; /* wasn't the right reply, try and read some more */ remaining_time = time(NULL) - start_time; } /* if(IPC_ISRCONN(native->command_channel) == FALSE) { */ /* crm_err("CIB disconnected: %d", */ /* native->command_channel->ch_status); */ /* cib->state = cib_disconnected; */ /* } */ if (disconnected) { crm_err("Disconnected while waiting for reply."); return -ENOTCONN; } else if (op_reply == NULL) { crm_err("No reply message - empty"); return -ENOMSG; } crm_trace("Syncronous reply received"); /* Start processing the reply... */ if (crm_element_value_int(op_reply, F_CIB_RC, &rc) != 0) { rc = -EPROTO; } if (rc == -pcmk_err_diff_resync) { /* This is an internal value that clients do not and should not care about */ rc = pcmk_ok; } if (rc == pcmk_ok || rc == -EPERM) { crm_log_xml_debug(op_reply, "passed"); } else { /* } else if(rc == -ETIME) { */ crm_err("Call failed: %s", pcmk_strerror(rc)); crm_log_xml_warn(op_reply, "failed"); } if (output_data == NULL) { /* do nothing more */ } else if (!(call_options & cib_discard_reply)) { xmlNode *tmp = get_message_xml(op_reply, F_CIB_CALLDATA); if (tmp == NULL) { crm_trace("No output in reply to \"%s\" command %d", op, cib->call_id - 1); } else { *output_data = copy_xml(tmp); } } free_xml(op_reply); return rc; } diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am index 87c3f1fb98..950d53584a 100644 --- a/lib/common/Makefile.am +++ b/lib/common/Makefile.am @@ -1,49 +1,49 @@ # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ -I$(top_builddir)/lib/gnu -I$(top_srcdir)/lib/gnu ## libraries lib_LTLIBRARIES = libcrmcommon.la # Can't use -Wcast-qual here because glib insists on pretending things are const # when they're not and thus we need the crm_element_value_const() hack # s390 needs -fPIC # s390-suse-linux/bin/ld: .libs/ipc.o: relocation R_390_PC32DBL against `__stack_chk_fail@@GLIBC_2.4' can not be used when making a shared object; recompile with -fPIC CFLAGS = $(CFLAGS_COPY:-Wcast-qual=) -fPIC libcrmcommon_la_SOURCES = ipc.c utils.c xml.c iso8601.c remote.c mainloop.c logging.c if BUILD_CIBSECRETS libcrmcommon_la_SOURCES += cib_secrets.c endif libcrmcommon_la_LDFLAGS = -version-info 5:0:2 -libcrmcommon_la_LIBADD = -ldl $(GNUTLSLIBS) +libcrmcommon_la_LIBADD = @LIBADD_DL@ $(GNUTLSLIBS) libcrmcommon_la_SOURCES += $(top_builddir)/lib/gnu/md5.c clean-generic: rm -f *.log *.debug *.xml *~ install-exec-local: uninstall-local: diff --git a/lib/common/remote.c b/lib/common/remote.c index 8b00f1660b..34864018ce 100644 --- a/lib/common/remote.c +++ b/lib/common/remote.c @@ -1,908 +1,909 @@ /* * Copyright (c) 2008 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GNUTLS_GNUTLS_H # undef KEYFILE # include #endif #ifdef HAVE_GNUTLS_GNUTLS_H const int psk_tls_kx_order[] = { GNUTLS_KX_DHE_PSK, GNUTLS_KX_PSK, }; const int anon_tls_kx_order[] = { GNUTLS_KX_ANON_DH, GNUTLS_KX_DHE_RSA, GNUTLS_KX_DHE_DSS, GNUTLS_KX_RSA, 0 }; int crm_initiate_client_tls_handshake(crm_remote_t * remote, int timeout_ms) { int rc = 0; int pollrc = 0; time_t start = time(NULL); do { rc = gnutls_handshake(*remote->tls_session); if (rc == GNUTLS_E_INTERRUPTED || rc == GNUTLS_E_AGAIN) { pollrc = crm_remote_ready(remote, 1000); if (pollrc < 0) { /* poll returned error, there is no hope */ rc = -1; } } } while (((time(NULL) - start) < (timeout_ms / 1000)) && (rc == GNUTLS_E_INTERRUPTED || rc == GNUTLS_E_AGAIN)); if (rc < 0) { crm_trace("gnutls_handshake() failed with %d", rc); } return rc; } void * crm_create_anon_tls_session(int csock, int type /* GNUTLS_SERVER, GNUTLS_CLIENT */ , void *credentials) { gnutls_session_t *session = gnutls_malloc(sizeof(gnutls_session_t)); gnutls_init(session, type); # ifdef HAVE_GNUTLS_PRIORITY_SET_DIRECT /* http://www.manpagez.com/info/gnutls/gnutls-2.10.4/gnutls_81.php#Echo-Server-with-anonymous-authentication */ gnutls_priority_set_direct(*session, "NORMAL:+ANON-DH", NULL); /* gnutls_priority_set_direct (*session, "NONE:+VERS-TLS-ALL:+CIPHER-ALL:+MAC-ALL:+SIGN-ALL:+COMP-ALL:+ANON-DH", NULL); */ # else gnutls_set_default_priority(*session); gnutls_kx_set_priority(*session, anon_tls_kx_order); # endif gnutls_transport_set_ptr(*session, (gnutls_transport_ptr_t) GINT_TO_POINTER(csock)); switch (type) { case GNUTLS_SERVER: gnutls_credentials_set(*session, GNUTLS_CRD_ANON, (gnutls_anon_server_credentials_t) credentials); break; case GNUTLS_CLIENT: gnutls_credentials_set(*session, GNUTLS_CRD_ANON, (gnutls_anon_client_credentials_t) credentials); break; } return session; } void * create_psk_tls_session(int csock, int type /* GNUTLS_SERVER, GNUTLS_CLIENT */ , void *credentials) { gnutls_session_t *session = gnutls_malloc(sizeof(gnutls_session_t)); gnutls_init(session, type); # ifdef HAVE_GNUTLS_PRIORITY_SET_DIRECT gnutls_priority_set_direct(*session, "NORMAL:+DHE-PSK:+PSK", NULL); # else gnutls_set_default_priority(*session); gnutls_kx_set_priority(*session, psk_tls_kx_order); # endif gnutls_transport_set_ptr(*session, (gnutls_transport_ptr_t) GINT_TO_POINTER(csock)); switch (type) { case GNUTLS_SERVER: gnutls_credentials_set(*session, GNUTLS_CRD_PSK, (gnutls_psk_server_credentials_t) credentials); break; case GNUTLS_CLIENT: gnutls_credentials_set(*session, GNUTLS_CRD_PSK, (gnutls_psk_client_credentials_t) credentials); break; } return session; } static int crm_send_tls(gnutls_session_t * session, const char *buf, size_t len) { const char *unsent = buf; int rc = 0; int total_send; if (buf == NULL) { return -1; } total_send = len; crm_trace("Message size: %d", len); while (TRUE) { rc = gnutls_record_send(*session, unsent, len); if (rc == GNUTLS_E_INTERRUPTED || rc == GNUTLS_E_AGAIN) { crm_debug("Retry"); } else if (rc < 0) { crm_err("Connection terminated rc = %d", rc); break; } else if (rc < len) { crm_debug("Only sent %d of %d bytes", rc, len); len -= rc; unsent += rc; } else { crm_debug("Sent %d bytes", rc); break; } } return rc < 0 ? rc : total_send; } /*! * \internal * \brief Read bytes off non blocking tls session. * * \param session - tls session to read * \param max_size - max bytes allowed to read for buffer. 0 assumes no limit * * \note only use with NON-Blocking sockets. Should only be used after polling socket. * This function will return once max_size is met, the socket read buffer * is empty, or an error is encountered. * * \retval '\0' terminated buffer on success */ static char * crm_recv_tls(gnutls_session_t * session, size_t max_size, size_t * recv_len, int *disconnected) { char *buf = NULL; int rc = 0; size_t len = 0; size_t chunk_size = max_size ? max_size : 1024; size_t buf_size = 0; size_t read_size = 0; if (session == NULL) { if (disconnected) { *disconnected = 1; } goto done; } buf = calloc(1, chunk_size + 1); buf_size = chunk_size; while (TRUE) { read_size = buf_size - len; /* automatically grow the buffer when needed if max_size is not set. */ if (!max_size && (read_size < (chunk_size / 2))) { buf_size += chunk_size; crm_trace("Grow buffer by %d more bytes. buf is now %d bytes", (int)chunk_size, buf_size); buf = realloc(buf, buf_size + 1); CRM_ASSERT(buf != NULL); read_size = buf_size - len; } rc = gnutls_record_recv(*session, buf + len, read_size); if (rc > 0) { crm_trace("Got %d more bytes.", rc); len += rc; /* always null terminate buffer, the +1 to alloc always allows for this. */ buf[len] = '\0'; } if (max_size && (max_size == read_size)) { crm_trace("Buffer max read size %d met", max_size); goto done; } /* process any errors. */ if (rc == GNUTLS_E_INTERRUPTED) { crm_trace("EINTR encoutered, retry tls read"); } else if (rc == GNUTLS_E_AGAIN) { crm_trace("non-blocking, exiting read on rc = %d", rc); goto done; } else if (rc <= 0) { if (rc == 0) { crm_debug("EOF encoutered during TLS read"); } else { crm_debug("Error receiving message: %s (%d)", gnutls_strerror(rc), rc); } if (disconnected) { *disconnected = 1; } goto done; } } done: if (recv_len) { *recv_len = len; } if (!len) { free(buf); buf = NULL; } return buf; } #endif static int crm_send_plaintext(int sock, const char *buf, size_t len) { int rc = 0; const char *unsent = buf; int total_send; if (buf == NULL) { return -1; } total_send = len; crm_trace("Message on socket %d: size=%d", sock, len); retry: rc = write(sock, unsent, len); if (rc < 0) { switch (errno) { case EINTR: case EAGAIN: crm_trace("Retry"); goto retry; default: crm_perror(LOG_ERR, "Could only write %d of the remaining %d bytes", rc, (int)len); break; } } else if (rc < len) { crm_trace("Only sent %d of %d remaining bytes", rc, len); len -= rc; unsent += rc; goto retry; } else { crm_trace("Sent %d bytes: %.100s", rc, buf); } return rc < 0 ? rc : total_send; } /*! * \internal * \brief Read bytes off non blocking socket. * * \param session - tls session to read * \param max_size - max bytes allowed to read for buffer. 0 assumes no limit * * \note only use with NON-Blocking sockets. Should only be used after polling socket. * This function will return once max_size is met, the socket read buffer * is empty, or an error is encountered. * * \retval '\0' terminated buffer on success */ static char * crm_recv_plaintext(int sock, size_t max_size, size_t * recv_len, int *disconnected) { char *buf = NULL; ssize_t rc = 0; ssize_t len = 0; ssize_t chunk_size = max_size ? max_size : 1024; size_t buf_size = 0; size_t read_size = 0; if (sock <= 0) { if (disconnected) { *disconnected = 1; } goto done; } buf = calloc(1, chunk_size + 1); buf_size = chunk_size; while (TRUE) { errno = 0; read_size = buf_size - len; /* automatically grow the buffer when needed if max_size is not set. */ if (!max_size && (read_size < (chunk_size / 2))) { buf_size += chunk_size; crm_trace("Grow buffer by %d more bytes. buf is now %d bytes", (int)chunk_size, buf_size); buf = realloc(buf, buf_size + 1); CRM_ASSERT(buf != NULL); read_size = buf_size - len; } rc = read(sock, buf + len, chunk_size); if (rc > 0) { crm_trace("Got %d more bytes. errno=%d", (int)rc, errno); len += rc; /* always null terminate buffer, the +1 to alloc always allows for this. */ buf[len] = '\0'; } if (max_size && (max_size == read_size)) { crm_trace("Buffer max read size %d met", max_size); goto done; } if (rc > 0) { continue; } else if (rc == 0) { if (disconnected) { *disconnected = 1; } crm_trace("EOF encoutered during read"); goto done; } /* process errors */ if (errno == EINTR) { crm_trace("EINTER encoutered, retry socket read."); } else if (errno == EAGAIN) { crm_trace("non-blocking, exiting read on rc = %d", rc); goto done; } else if (errno <= 0) { if (disconnected) { *disconnected = 1; } crm_debug("Error receiving message: %d", (int)rc); goto done; } } done: if (recv_len) { *recv_len = len; } if (!len) { free(buf); buf = NULL; } return buf; } static int crm_remote_send_raw(crm_remote_t * remote, const char *buf, size_t len) { int rc = -ESOCKTNOSUPPORT; if (remote->tcp_socket) { rc = crm_send_plaintext(remote->tcp_socket, buf, len); #ifdef HAVE_GNUTLS_GNUTLS_H } else if (remote->tls_session) { rc = crm_send_tls(remote->tls_session, buf, len); #endif } else { crm_err("Unsupported connection type"); } return rc; } int crm_remote_send(crm_remote_t * remote, xmlNode * msg) { int rc = -1; char *xml_text = NULL; int len = 0; xml_text = dump_xml_unformatted(msg); if (xml_text) { len = strlen(xml_text); } else { crm_err("Invalid XML, can not send msg"); return -1; } rc = crm_remote_send_raw(remote, xml_text, len); if (rc >= 0) { rc = crm_remote_send_raw(remote, REMOTE_MSG_TERMINATOR, strlen(REMOTE_MSG_TERMINATOR)); } if (rc < 0) { crm_err("Failed to send remote msg, rc = %d", rc); } free(xml_text); return rc; } /*! * \internal * \brief handles the recv buffer and parsing out msgs. * \note new_data is owned by this function once it is passed in. */ xmlNode * crm_remote_parse_buffer(crm_remote_t * remote) { char *buf = NULL; char *start = NULL; char *end = NULL; xmlNode *xml = NULL; if (remote->buffer == NULL) { return NULL; } /* take ownership of the buffer */ buf = remote->buffer; remote->buffer = NULL; /* MSGS are separated by a '\r\n\r\n'. Split a message off the buffer and return it. */ start = buf; end = strstr(start, REMOTE_MSG_TERMINATOR); while (!xml && end) { /* grab the message */ end[0] = '\0'; end += strlen(REMOTE_MSG_TERMINATOR); xml = string2xml(start); if (xml == NULL) { crm_err("Couldn't parse: '%.120s'", start); } start = end; end = strstr(start, REMOTE_MSG_TERMINATOR); } if (xml && start) { /* we have msgs left over, save it until next time */ remote->buffer = strdup(start); free(buf); } else if (!xml) { /* no msg present */ remote->buffer = buf; } return xml; } /*! * \internal * \brief Determine if a remote session has data to read * * \retval 0, timeout occured. * \retval positive, data is ready to be read * \retval negative, session has ended */ int crm_remote_ready(crm_remote_t * remote, int timeout /* ms */ ) { struct pollfd fds = { 0, }; int sock = 0; int rc = 0; time_t start; if (remote->tcp_socket) { sock = remote->tcp_socket; #ifdef HAVE_GNUTLS_GNUTLS_H } else if (remote->tls_session) { void *sock_ptr = gnutls_transport_get_ptr(*remote->tls_session); sock = GPOINTER_TO_INT(sock_ptr); #endif } else { crm_err("Unsupported connection type"); } if (sock <= 0) { crm_trace("No longer connected"); return -ENOTCONN; } start = time(NULL); errno = 0; do { fds.fd = sock; fds.events = POLLIN; /* If we got an EINTR while polling, and we have a * specific timeout we are trying to honor, attempt * to adjust the timeout to the closest second. */ if (errno == EINTR && (timeout > 0)) { timeout = timeout - ((time(NULL) - start) * 1000); if (timeout < 1000) { timeout = 1000; } } rc = poll(&fds, 1, timeout); } while (rc < 0 && errno == EINTR); return rc; } /*! * \internal * \brief Read data off the socket until at least one full message is present or timeout occures. * \retval TRUE message read * \retval FALSE full message not read */ gboolean crm_remote_recv(crm_remote_t * remote, int total_timeout /*ms */ , int *disconnected) { int ret; size_t request_len = 0; time_t start = time(NULL); char *raw_request = NULL; int remaining_timeout = 0; if (total_timeout == 0) { total_timeout = 10000; } else if (total_timeout < 0) { total_timeout = 60000; } *disconnected = 0; remaining_timeout = total_timeout; while ((remaining_timeout > 0) && !(*disconnected)) { /* read some more off the tls buffer if we still have time left. */ crm_trace("waiting to receive remote msg, starting timeout %d, remaining_timeout %d", total_timeout, remaining_timeout); ret = crm_remote_ready(remote, remaining_timeout); raw_request = NULL; if (ret == 0) { crm_err("poll timed out (%d ms) while waiting to receive msg", remaining_timeout); return FALSE; } else if (ret < 0) { if (errno != EINTR) { crm_debug("poll returned error while waiting for msg, rc: %d, errno: %d", ret, errno); *disconnected = 1; return FALSE; } crm_debug("poll EINTR encountered during poll, retrying"); } else if (remote->tcp_socket) { raw_request = crm_recv_plaintext(remote->tcp_socket, 0, &request_len, disconnected); #ifdef HAVE_GNUTLS_GNUTLS_H } else if (remote->tls_session) { raw_request = crm_recv_tls(remote->tls_session, 0, &request_len, disconnected); #endif } else { crm_err("Unsupported connection type"); } remaining_timeout = remaining_timeout - ((time(NULL) - start) * 1000); if (!raw_request) { crm_debug("Empty msg received after poll"); continue; } if (remote->buffer) { int old_len = strlen(remote->buffer); crm_trace("Expanding recv buffer from %d to %d", old_len, old_len + request_len); remote->buffer = realloc(remote->buffer, old_len + request_len + 1); memcpy(remote->buffer + old_len, raw_request, request_len); *(remote->buffer + old_len + request_len) = '\0'; free(raw_request); } else { remote->buffer = raw_request; } if (strstr(remote->buffer, REMOTE_MSG_TERMINATOR)) { return TRUE; } } return FALSE; } struct tcp_async_cb_data { gboolean success; int sock; void *userdata; void (*callback) (void *userdata, int sock); int timeout; /*ms */ time_t start; }; static gboolean check_connect_finished(gpointer userdata) { struct tcp_async_cb_data *cb_data = userdata; int rc = 0; int sock = cb_data->sock; int error = 0; fd_set rset, wset; socklen_t len = sizeof(error); struct timeval ts = { 0, }; if (cb_data->success == TRUE) { goto dispatch_done; } FD_ZERO(&rset); FD_SET(sock, &rset); wset = rset; crm_trace("fd %d: checking to see if connect finished", sock); rc = select(sock + 1, &rset, &wset, NULL, &ts); if (rc < 0) { rc = errno; if ((errno == EINPROGRESS) || (errno == EAGAIN)) { /* reschedule if there is still time left */ if ((time(NULL) - cb_data->start) < (cb_data->timeout / 1000)) { goto reschedule; } else { rc = -ETIMEDOUT; } } crm_trace("fd %d: select failed %d connect dispatch ", rc); goto dispatch_done; } else if (rc == 0) { if ((time(NULL) - cb_data->start) < (cb_data->timeout / 1000)) { goto reschedule; } crm_debug("fd %d: timeout during select", sock); rc = -ETIMEDOUT; goto dispatch_done; } else { crm_trace("fd %d: select returned success", sock); rc = 0; } /* can we read or write to the socket now? */ if (FD_ISSET(sock, &rset) || FD_ISSET(sock, &wset)) { if (getsockopt(sock, SOL_SOCKET, SO_ERROR, &error, &len) < 0) { crm_trace("fd %d: call to getsockopt failed", sock); rc = -1; goto dispatch_done; } if (error) { crm_trace("fd %d: error returned from getsockopt: %d", sock, error); rc = -1; goto dispatch_done; } } else { crm_trace("neither read nor write set after select"); rc = -1; goto dispatch_done; } dispatch_done: if (!rc) { crm_trace("fd %d: connected", sock); /* Success, set the return code to the sock to report to the callback */ rc = cb_data->sock; cb_data->sock = 0; } else { close(sock); } if (cb_data->callback) { cb_data->callback(cb_data->userdata, rc); } free(cb_data); return FALSE; reschedule: /* will check again next interval */ return TRUE; } static int internal_tcp_connect_async(int sock, const struct sockaddr *addr, socklen_t addrlen, int timeout /* ms */ , void *userdata, void (*callback) (void *userdata, int sock)) { int rc = 0; int flag = 0; int interval = 500; struct tcp_async_cb_data *cb_data = NULL; if ((flag = fcntl(sock, F_GETFL)) >= 0) { if (fcntl(sock, F_SETFL, flag | O_NONBLOCK) < 0) { crm_err("fcntl() write failed"); return -1; } } rc = connect(sock, addr, addrlen); if (rc < 0 && (errno != EINPROGRESS) && (errno != EAGAIN)) { return -1; } cb_data = calloc(1, sizeof(struct tcp_async_cb_data)); cb_data->userdata = userdata; cb_data->callback = callback; cb_data->sock = sock; cb_data->timeout = timeout; cb_data->start = time(NULL); if (rc == 0) { /* The connect was successful immediately, we still return to mainloop * and let this callback get called later. This avoids the user of this api * to have to account for the fact the callback could be invoked within this * function before returning. */ cb_data->success = TRUE; interval = 1; } /* Check connect finished is mostly doing a non-block poll on the socket * to see if we can read/write to it. Once we can, the connect has completed. * This method allows us to connect to the server without blocking mainloop. * * This is a poor man's way of polling to see when the connection finished. * At some point we should figure out a way to use a mainloop fd callback for this. * Something about the way mainloop is currently polling prevents this from working at the * moment though. */ crm_trace("fd %d: scheduling to check if connect finished in %dms second", sock, interval); g_timeout_add(interval, check_connect_finished, cb_data); return 0; } static int internal_tcp_connect(int sock, const struct sockaddr *addr, socklen_t addrlen) { int flag = 0; int rc = connect(sock, addr, addrlen); if (rc == 0) { if ((flag = fcntl(sock, F_GETFL)) >= 0) { if (fcntl(sock, F_SETFL, flag | O_NONBLOCK) < 0) { crm_err("fcntl() write failed"); return -1; } } } return rc; } /*! * \internal * \brief tcp connection to server at specified port * \retval negative, failed to connect. */ int crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */ void *userdata, void (*callback) (void *userdata, int sock)) { struct addrinfo *res = NULL; struct addrinfo *rp = NULL; struct addrinfo hints; const char *server = host; int ret_ga; int sock = -1; /* getaddrinfo */ memset(&hints, 0, sizeof(struct addrinfo)); hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ hints.ai_socktype = SOCK_STREAM; hints.ai_flags = AI_CANONNAME; crm_debug("Looking up %s", server); ret_ga = getaddrinfo(server, NULL, &hints, &res); if (ret_ga) { crm_err("getaddrinfo: %s", gai_strerror(ret_ga)); return -1; } if (!res || !res->ai_addr) { crm_err("getaddrinfo failed"); goto async_cleanup; } for (rp = res; rp != NULL; rp = rp->ai_next) { struct sockaddr *addr = rp->ai_addr; if (!addr) { continue; } if (rp->ai_canonname) { server = res->ai_canonname; } crm_debug("Got address %s for %s", server, host); /* create socket */ sock = socket(rp->ai_family, SOCK_STREAM, IPPROTO_TCP); if (sock == -1) { crm_err("Socket creation failed for remote client connection."); continue; } if (addr->sa_family == AF_INET6) { struct sockaddr_in6 *addr_in = (struct sockaddr_in6 *)addr; addr_in->sin6_port = htons(port); } else { struct sockaddr_in *addr_in = (struct sockaddr_in *)addr; addr_in->sin_port = htons(port); crm_info("Attempting to connect to remote server at %s:%d", inet_ntoa(addr_in->sin_addr), port); } if (callback) { if (internal_tcp_connect_async (sock, rp->ai_addr, rp->ai_addrlen, timeout, userdata, callback) == 0) { sock = 0; goto async_cleanup; /* Success for now, we'll hear back later in the callback */ } } else { if (internal_tcp_connect(sock, rp->ai_addr, rp->ai_addrlen) == 0) { break; /* Success */ } } close(sock); sock = -1; } async_cleanup: if (res) { freeaddrinfo(res); } return sock; } int crm_remote_tcp_connect(const char *host, int port) { return crm_remote_tcp_connect_async(host, port, -1, NULL, NULL); } diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index bdaf18ccd5..37161dc40c 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -1,2009 +1,2010 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GNUTLS_GNUTLS_H # undef KEYFILE # include #endif #include +#include #include #include #include CRM_TRACE_INIT_DATA(lrmd); static int lrmd_api_disconnect(lrmd_t * lrmd); static int lrmd_api_is_connected(lrmd_t * lrmd); /* IPC proxy functions */ int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); #ifdef HAVE_GNUTLS_GNUTLS_H # define LRMD_CLIENT_HANDSHAKE_TIMEOUT 5000 /* 5 seconds */ gnutls_psk_client_credentials_t psk_cred_s; int lrmd_tls_set_key(gnutls_datum_t * key); static void lrmd_tls_disconnect(lrmd_t * lrmd); static int global_remote_msg_id = 0; int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type); static void lrmd_tls_connection_destroy(gpointer userdata); #endif typedef struct lrmd_private_s { enum client_type type; char *token; mainloop_io_t *source; /* IPC parameters */ crm_ipc_t *ipc; crm_remote_t *remote; /* Extra TLS parameters */ char *remote_nodename; #ifdef HAVE_GNUTLS_GNUTLS_H char *server; int port; gnutls_psk_client_credentials_t psk_cred_c; int sock; GList *pending_notify; crm_trigger_t *process_notify; #endif lrmd_event_callback callback; /* Internal IPC proxy msg passing for remote guests */ void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg); void *proxy_callback_userdata; } lrmd_private_t; static lrmd_list_t * lrmd_list_add(lrmd_list_t * head, const char *value) { lrmd_list_t *p, *end; p = calloc(1, sizeof(lrmd_list_t)); p->val = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_list_freeall(lrmd_list_t * head) { lrmd_list_t *p; while (head) { char *val = (char *)head->val; p = head->next; free(val); free(head); head = p; } } lrmd_key_value_t * lrmd_key_value_add(lrmd_key_value_t * head, const char *key, const char *value) { lrmd_key_value_t *p, *end; p = calloc(1, sizeof(lrmd_key_value_t)); p->key = strdup(key); p->value = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_key_value_freeall(lrmd_key_value_t * head) { lrmd_key_value_t *p; while (head) { p = head->next; free(head->key); free(head->value); free(head); head = p; } } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, strdup(key), strdup(value)); } lrmd_event_data_t * lrmd_copy_event(lrmd_event_data_t * event) { lrmd_event_data_t *copy = NULL; copy = calloc(1, sizeof(lrmd_event_data_t)); /* This will get all the int values. * we just have to be careful not to leave any * dangling pointers to strings. */ memcpy(copy, event, sizeof(lrmd_event_data_t)); copy->rsc_id = event->rsc_id ? strdup(event->rsc_id) : NULL; copy->op_type = event->op_type ? strdup(event->op_type) : NULL; copy->user_data = event->user_data ? strdup(event->user_data) : NULL; copy->output = event->output ? strdup(event->output) : NULL; copy->remote_nodename = event->remote_nodename ? strdup(event->remote_nodename) : NULL; if (event->params) { copy->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (copy->params != NULL) { g_hash_table_foreach(event->params, dup_attr, copy->params); } } return copy; } void lrmd_free_event(lrmd_event_data_t * event) { if (!event) { return; } /* free gives me grief if i try to cast */ free((char *)event->rsc_id); free((char *)event->op_type); free((char *)event->user_data); free((char *)event->output); free((char *)event->remote_nodename); if (event->params) { g_hash_table_destroy(event->params); } free(event); } static int lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg) { const char *type; const char *proxy_session = crm_element_value(msg, F_LRMD_IPC_SESSION); lrmd_private_t *native = lrmd->private; lrmd_event_data_t event = { 0, }; if (proxy_session != NULL) { /* this is proxy business */ lrmd_internal_proxy_dispatch(lrmd, msg); return 1; } if (!native->callback) { /* no callback set */ crm_trace("notify event received but client has not set callback"); return 1; } event.remote_nodename = native->remote_nodename; type = crm_element_value(msg, F_LRMD_OPERATION); crm_element_value_int(msg, F_LRMD_CALLID, &event.call_id); event.rsc_id = crm_element_value(msg, F_LRMD_RSC_ID); if (crm_str_eq(type, LRMD_OP_RSC_REG, TRUE)) { event.type = lrmd_event_register; } else if (crm_str_eq(type, LRMD_OP_RSC_UNREG, TRUE)) { event.type = lrmd_event_unregister; } else if (crm_str_eq(type, LRMD_OP_RSC_EXEC, TRUE)) { crm_element_value_int(msg, F_LRMD_TIMEOUT, &event.timeout); crm_element_value_int(msg, F_LRMD_RSC_INTERVAL, &event.interval); crm_element_value_int(msg, F_LRMD_RSC_START_DELAY, &event.start_delay); crm_element_value_int(msg, F_LRMD_EXEC_RC, (int *)&event.rc); crm_element_value_int(msg, F_LRMD_OP_STATUS, &event.op_status); crm_element_value_int(msg, F_LRMD_RSC_DELETED, &event.rsc_deleted); crm_element_value_int(msg, F_LRMD_RSC_RUN_TIME, (int *)&event.t_run); crm_element_value_int(msg, F_LRMD_RSC_RCCHANGE_TIME, (int *)&event.t_rcchange); crm_element_value_int(msg, F_LRMD_RSC_EXEC_TIME, (int *)&event.exec_time); crm_element_value_int(msg, F_LRMD_RSC_QUEUE_TIME, (int *)&event.queue_time); event.op_type = crm_element_value(msg, F_LRMD_RSC_ACTION); event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR); event.output = crm_element_value(msg, F_LRMD_RSC_OUTPUT); event.type = lrmd_event_exec_complete; event.params = xml2list(msg); } else if (crm_str_eq(type, LRMD_OP_POKE, TRUE)) { event.type = lrmd_event_poke; } else { return 1; } crm_trace("op %s notify event received", type); native->callback(&event); if (event.params) { g_hash_table_destroy(event.params); } return 1; } static int lrmd_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; xmlNode *msg; int rc; if (!native->callback) { /* no callback set */ return 1; } msg = string2xml(buffer); rc = lrmd_dispatch_internal(lrmd, msg); free_xml(msg); return rc; } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_free_xml(gpointer userdata) { free_xml((xmlNode *) userdata); } static int lrmd_tls_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; if (native->remote->tls_session) { return TRUE; } return FALSE; } static int lrmd_tls_dispatch(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; xmlNode *xml = NULL; int rc = 0; int disconnected = 0; if (lrmd_tls_connected(lrmd) == FALSE) { crm_trace("tls dispatch triggered after disconnect"); return 0; } crm_trace("tls_dispatch triggered"); /* First check if there are any pending notifies to process that came * while we were waiting for replies earlier. */ if (native->pending_notify) { GList *iter = NULL; crm_trace("Processing pending notifies"); for (iter = native->pending_notify; iter; iter = iter->next) { lrmd_dispatch_internal(lrmd, iter->data); } g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } /* Next read the current buffer and see if there are any messages to handle. */ rc = crm_remote_ready(native->remote, 0); if (rc == 0) { /* nothing to read, see if any full messages are already in buffer. */ xml = crm_remote_parse_buffer(native->remote); } else if (rc < 0) { disconnected = 1; } else { crm_remote_recv(native->remote, -1, &disconnected); xml = crm_remote_parse_buffer(native->remote); } while (xml) { lrmd_dispatch_internal(lrmd, xml); free_xml(xml); xml = crm_remote_parse_buffer(native->remote); } if (disconnected) { crm_info("Server disconnected while reading remote server msg."); lrmd_tls_disconnect(lrmd); return 0; } return 1; } #endif /* Not used with mainloop */ int lrmd_poll(lrmd_t * lrmd, int timeout) { lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: return crm_ipc_ready(native->ipc); #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: if (native->pending_notify) { return 1; } else if (native->remote->buffer && strstr(native->remote->buffer, REMOTE_MSG_TERMINATOR)) { return 1; } return crm_remote_ready(native->remote, 0); #endif default: crm_err("Unsupported connection type: %d", native->type); } return 0; } /* Not used with mainloop */ bool lrmd_dispatch(lrmd_t * lrmd) { lrmd_private_t *private = NULL; CRM_ASSERT(lrmd != NULL); private = lrmd->private; switch (private->type) { case CRM_CLIENT_IPC: while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); lrmd_ipc_dispatch(msg, strlen(msg), lrmd); } } break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: lrmd_tls_dispatch(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", private->type); } if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Connection closed"); return FALSE; } return TRUE; } static xmlNode * lrmd_create_op(const char *token, const char *op, xmlNode * data, enum lrmd_call_options options) { xmlNode *op_msg = create_xml_node(NULL, "lrmd_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "lrmd_command"); crm_xml_add(op_msg, F_TYPE, T_LRMD); crm_xml_add(op_msg, F_LRMD_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_LRMD_OPERATION, op); crm_trace("Sending call options: %.8lx, %d", (long)options, options); crm_xml_add_int(op_msg, F_LRMD_CALLOPTS, options); if (data != NULL) { add_message_xml(op_msg, F_LRMD_CALLDATA, data); } return op_msg; } static void lrmd_ipc_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; crm_info("IPC connection destroyed"); /* Prevent these from being cleaned up in lrmd_api_disconnect() */ native->ipc = NULL; native->source = NULL; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_disconnect; event.remote_nodename = native->remote_nodename; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; crm_info("TLS connection destroyed"); if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); } if (native->psk_cred_c) { gnutls_psk_free_client_credentials(native->psk_cred_c); } if (native->sock) { close(native->sock); } if (native->process_notify) { mainloop_destroy_trigger(native->process_notify); native->process_notify = NULL; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } free(native->remote->buffer); native->remote->buffer = NULL; native->source = 0; native->sock = 0; native->psk_cred_c = NULL; native->remote->tls_session = NULL; native->sock = 0; if (native->callback) { lrmd_event_data_t event = { 0, }; event.remote_nodename = native->remote_nodename; event.type = lrmd_event_disconnect; native->callback(&event); } return; } int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type) { int rc = -1; crm_xml_add_int(msg, F_LRMD_REMOTE_MSG_ID, id); crm_xml_add(msg, F_LRMD_REMOTE_MSG_TYPE, msg_type); rc = crm_remote_send(session, msg); if (rc < 0) { crm_err("Failed to send remote lrmd tls msg, rc = %d", rc); return rc; } return rc; } static xmlNode * lrmd_tls_recv_reply(lrmd_t * lrmd, int total_timeout, int expected_reply_id, int *disconnected) { lrmd_private_t *native = lrmd->private; xmlNode *xml = NULL; time_t start = time(NULL); const char *msg_type = NULL; int reply_id = 0; int remaining_timeout = 0; /* A timeout of 0 here makes no sense. We have to wait a period of time * for the response to come back. If -1 or 0, default to 10 seconds. */ if (total_timeout <= 0) { total_timeout = 10000; } while (!xml) { xml = crm_remote_parse_buffer(native->remote); if (!xml) { /* read some more off the tls buffer if we still have time left. */ if (remaining_timeout) { remaining_timeout = remaining_timeout - ((time(NULL) - start) * 1000); } else { remaining_timeout = total_timeout; } if (remaining_timeout <= 0) { return NULL; } crm_remote_recv(native->remote, remaining_timeout, disconnected); xml = crm_remote_parse_buffer(native->remote); if (!xml || *disconnected) { return NULL; } } CRM_ASSERT(xml != NULL); crm_element_value_int(xml, F_LRMD_REMOTE_MSG_ID, &reply_id); msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE); if (!msg_type) { crm_err("Empty msg type received while waiting for reply"); free_xml(xml); xml = NULL; } else if (safe_str_eq(msg_type, "notify")) { /* got a notify while waiting for reply, trigger the notify to be processed later */ crm_info("queueing notify"); native->pending_notify = g_list_append(native->pending_notify, xml); if (native->process_notify) { crm_info("notify trigger set."); mainloop_set_trigger(native->process_notify); } xml = NULL; } else if (safe_str_neq(msg_type, "reply")) { /* msg isn't a reply, make some noise */ crm_err("Expected a reply, got %s", msg_type); free_xml(xml); xml = NULL; } else if (reply_id != expected_reply_id) { crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id); free_xml(xml); xml = NULL; } } if (native->remote->buffer && native->process_notify) { mainloop_set_trigger(native->process_notify); } return xml; } static int lrmd_tls_send(lrmd_t * lrmd, xmlNode * msg) { int rc = 0; lrmd_private_t *native = lrmd->private; global_remote_msg_id++; if (global_remote_msg_id <= 0) { global_remote_msg_id = 1; } rc = lrmd_tls_send_msg(native->remote, msg, global_remote_msg_id, "request"); if (rc <= 0) { crm_err("Remote lrmd send failed, disconnecting"); lrmd_tls_disconnect(lrmd); return -ENOTCONN; } return pcmk_ok; } static int lrmd_tls_send_recv(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = 0; int disconnected = 0; xmlNode *xml = NULL; if (lrmd_tls_connected(lrmd) == FALSE) { return -1; } rc = lrmd_tls_send(lrmd, msg); if (rc < 0) { return rc; } xml = lrmd_tls_recv_reply(lrmd, timeout, global_remote_msg_id, &disconnected); if (disconnected) { crm_err("Remote lrmd server disconnected while waiting for reply with id %d. ", global_remote_msg_id); lrmd_tls_disconnect(lrmd); rc = -ENOTCONN; } else if (!xml) { crm_err("Remote lrmd never received reply for request id %d. timeout: %dms ", global_remote_msg_id, timeout); rc = -ECOMM; } if (reply) { *reply = xml; } else { free_xml(xml); } return rc; } #endif static int lrmd_send_xml(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = -1; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_response, timeout, reply); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_send_recv(lrmd, msg, timeout, reply); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static int lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg) { int rc = -1; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_none, 0, NULL); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_send(lrmd, msg); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static int lrmd_api_is_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: return crm_ipc_connected(native->ipc); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: return lrmd_tls_connected(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return 0; } static int lrmd_send_command(lrmd_t * lrmd, const char *op, xmlNode * data, xmlNode ** output_data, int timeout, /* ms. defaults to 1000 if set to 0 */ enum lrmd_call_options options, gboolean expect_reply) { /* TODO we need to reduce usage of this boolean */ int rc = pcmk_ok; int reply_id = -1; lrmd_private_t *native = lrmd->private; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; if (!lrmd_api_is_connected(lrmd)) { return -ENOTCONN; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } CRM_CHECK(native->token != NULL,; ); crm_trace("sending %s op to lrmd", op); op_msg = lrmd_create_op(native->token, op, data, options); if (op_msg == NULL) { return -EINVAL; } crm_xml_add_int(op_msg, F_LRMD_TIMEOUT, timeout); if (expect_reply) { rc = lrmd_send_xml(lrmd, op_msg, timeout, &op_reply); } else { rc = lrmd_send_xml_no_reply(lrmd, op_msg); goto done; } if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%d): %d", op, timeout, rc); rc = -ECOMM; goto done; } rc = pcmk_ok; crm_element_value_int(op_reply, F_LRMD_CALLID, &reply_id); crm_trace("%s op reply received", op); if (crm_element_value_int(op_reply, F_LRMD_RC, &rc) != 0) { rc = -ENOMSG; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (output_data) { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } done: if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("LRMD disconnected"); } free_xml(op_msg); free_xml(op_reply); return rc; } static int lrmd_api_poke_connection(lrmd_t * lrmd) { int rc; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, FALSE); free_xml(data); return rc; } static int lrmd_handshake(lrmd_t * lrmd, const char *name) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->private; xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "lrmd_command"); crm_xml_add(hello, F_TYPE, T_LRMD); crm_xml_add(hello, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_LRMD_CLIENTNAME, name); crm_xml_add(hello, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); /* advertise that we are a proxy provider */ if (native->proxy_callback) { crm_xml_add(hello, F_LRMD_IS_IPC_PROVIDER, "true"); } rc = lrmd_send_xml(lrmd, hello, -1, &reply); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't complete registration with the lrmd API: %d", rc); rc = -ECOMM; } else if (reply == NULL) { crm_err("Did not receive registration reply"); rc = -EPROTO; } else { const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION); const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID); crm_element_value_int(reply, F_LRMD_RC, &rc); if (rc == -EPROTO) { crm_err("LRMD protocol mismatch client version %s, server version %s", LRMD_PROTOCOL_VERSION, crm_element_value(reply, F_LRMD_PROTOCOL_VERSION)); crm_log_xml_err(reply, "Protocol Error"); } else if (safe_str_neq(msg_type, CRM_OP_REGISTER)) { crm_err("Invalid registration message: %s", msg_type); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else if (tmp_ticket == NULL) { crm_err("No registration token provided"); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else { crm_trace("Obtained registration token: %s", tmp_ticket); native->token = strdup(tmp_ticket); rc = pcmk_ok; } } free_xml(reply); free_xml(hello); if (rc != pcmk_ok) { lrmd_api_disconnect(lrmd); } return rc; } static int lrmd_ipc_connect(lrmd_t * lrmd, int *fd) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->private; static struct ipc_client_callbacks lrmd_callbacks = { .dispatch = lrmd_ipc_dispatch, .destroy = lrmd_ipc_connection_destroy }; crm_info("Connecting to lrmd"); if (fd) { /* No mainloop */ native->ipc = crm_ipc_new("lrmd", 0); if (native->ipc && crm_ipc_connect(native->ipc)) { *fd = crm_ipc_get_fd(native->ipc); } else if (native->ipc) { rc = -ENOTCONN; } } else { native->source = mainloop_add_ipc_client("lrmd", G_PRIORITY_HIGH, 0, lrmd, &lrmd_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { crm_debug("Could not connect to the LRMD API"); rc = -ENOTCONN; } return rc; } #ifdef HAVE_GNUTLS_GNUTLS_H static int set_key(gnutls_datum_t * key, const char *location) { FILE *stream; int read_len = 256; int cur_len = 0; int buf_len = read_len; static char *key_cache = NULL; static size_t key_cache_len = 0; static time_t key_cache_updated; if (location == NULL) { return -1; } if (key_cache) { time_t now = time(NULL); if ((now - key_cache_updated) < 60) { key->data = gnutls_malloc(key_cache_len + 1); key->size = key_cache_len; memcpy(key->data, key_cache, key_cache_len); crm_debug("using cached LRMD key"); return 0; } else { key_cache_len = 0; key_cache_updated = 0; free(key_cache); key_cache = NULL; crm_debug("clearing lrmd key cache"); } } stream = fopen(location, "r"); if (!stream) { return -1; } key->data = gnutls_malloc(read_len); while (!feof(stream)) { int next; if (cur_len == buf_len) { buf_len = cur_len + read_len; key->data = gnutls_realloc(key->data, buf_len); } next = fgetc(stream); if (next == EOF && feof(stream)) { break; } key->data[cur_len] = next; cur_len++; } fclose(stream); key->size = cur_len; if (!cur_len) { gnutls_free(key->data); key->data = 0; return -1; } if (!key_cache) { key_cache = calloc(1, key->size + 1); memcpy(key_cache, key->data, key->size); key_cache_len = key->size; key_cache_updated = time(NULL); } return 0; } int lrmd_tls_set_key(gnutls_datum_t * key) { int rc = 0; const char *specific_location = getenv("PCMK_authkey_location"); if (set_key(key, specific_location) == 0) { crm_debug("Using custom authkey location %s", specific_location); return 0; } if (set_key(key, DEFAULT_REMOTE_KEY_LOCATION)) { rc = set_key(key, ALT_REMOTE_KEY_LOCATION); } if (rc) { crm_err("No lrmd remote key found"); return -1; } return rc; } static void lrmd_gnutls_global_init(void) { static int gnutls_init = 0; if (!gnutls_init) { gnutls_global_init(); } gnutls_init = 1; } #endif static void report_async_connection_result(lrmd_t * lrmd, int rc) { lrmd_private_t *native = lrmd->private; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_connect; event.remote_nodename = native->remote_nodename; event.connection_rc = rc; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tcp_connect_cb(void *userdata, int sock) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; char name[256] = { 0, }; static struct mainloop_fd_callbacks lrmd_tls_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; int rc = sock; gnutls_datum_t psk_key = { NULL, 0 }; if (rc < 0) { lrmd_tls_connection_destroy(lrmd); crm_info("remote lrmd connect to %s at port %d failed", native->server, native->port); report_async_connection_result(lrmd, rc); return; } /* TODO continue with tls stuff now that tcp connect passed. make this async as well soon * to avoid all blocking code in the client. */ native->sock = sock; if (lrmd_tls_set_key(&psk_key) != 0) { lrmd_tls_connection_destroy(lrmd); return; } gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); gnutls_free(psk_key.data); native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c); if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) { crm_warn("Client tls handshake failed for server %s:%d. Disconnecting", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, -1); return; } crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server, native->port); snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port); native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks); rc = lrmd_handshake(lrmd, name); report_async_connection_result(lrmd, rc); return; } static int lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ ) { int rc = 0; lrmd_private_t *native = lrmd->private; lrmd_gnutls_global_init(); rc = crm_remote_tcp_connect_async(native->server, native->port, timeout, lrmd, lrmd_tcp_connect_cb); return rc; } static int lrmd_tls_connect(lrmd_t * lrmd, int *fd) { static struct mainloop_fd_callbacks lrmd_tls_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; lrmd_private_t *native = lrmd->private; int sock; gnutls_datum_t psk_key = { NULL, 0 }; lrmd_gnutls_global_init(); sock = crm_remote_tcp_connect(native->server, native->port); if (sock < 0) { crm_warn("Could not establish remote lrmd connection to %s", native->server); lrmd_tls_connection_destroy(lrmd); return -ENOTCONN; } native->sock = sock; if (lrmd_tls_set_key(&psk_key) != 0) { lrmd_tls_connection_destroy(lrmd); return -1; } gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); gnutls_free(psk_key.data); native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c); if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) { crm_err("Session creation for %s:%d failed", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); return -1; } crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server, native->port); if (fd) { *fd = sock; } else { char name[256] = { 0, }; snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port); native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks); } return pcmk_ok; } #endif static int lrmd_api_connect(lrmd_t * lrmd, const char *name, int *fd) { int rc = -ENOTCONN; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = lrmd_ipc_connect(lrmd, fd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_connect(lrmd, fd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } if (rc == pcmk_ok) { rc = lrmd_handshake(lrmd, name); } return rc; } static int lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout) { int rc = 0; lrmd_private_t *native = lrmd->private; if (!native->callback) { crm_err("Async connect not possible, no lrmd client callback set."); return -1; } switch (native->type) { case CRM_CLIENT_IPC: /* fake async connection with ipc. it should be fast * enough that we gain very little from async */ rc = lrmd_api_connect(lrmd, name, NULL); if (!rc) { report_async_connection_result(lrmd, rc); } break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_connect_async(lrmd, timeout); if (rc) { /* connection failed, report rc now */ report_async_connection_result(lrmd, rc); } break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static void lrmd_ipc_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = 0; } if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; } else if (native->sock) { close(native->sock); } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } } #endif static int lrmd_api_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; crm_info("Disconnecting from lrmd service"); switch (native->type) { case CRM_CLIENT_IPC: lrmd_ipc_disconnect(lrmd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: lrmd_tls_disconnect(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } free(native->token); native->token = NULL; return 0; } static int lrmd_api_register_rsc(lrmd_t * lrmd, const char *rsc_id, const char *class, const char *provider, const char *type, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = NULL; if (!class || !type || !rsc_id) { return -EINVAL; } if (safe_str_eq(class, "ocf") && !provider) { return -EINVAL; } data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_CLASS, class); crm_xml_add(data, F_LRMD_PROVIDER, provider); crm_xml_add(data, F_LRMD_TYPE, type); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_REG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } static int lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_UNREG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } lrmd_rsc_info_t * lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info) { lrmd_rsc_info_t *copy = NULL; copy = calloc(1, sizeof(lrmd_rsc_info_t)); copy->id = strdup(rsc_info->id); copy->type = strdup(rsc_info->type); copy->class = strdup(rsc_info->class); if (rsc_info->provider) { copy->provider = strdup(rsc_info->provider); } return copy; } void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info) { if (!rsc_info) { return; } free(rsc_info->id); free(rsc_info->type); free(rsc_info->class); free(rsc_info->provider); free(rsc_info); } static lrmd_rsc_info_t * lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { lrmd_rsc_info_t *rsc_info = NULL; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *output = NULL; const char *class = NULL; const char *provider = NULL; const char *type = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); lrmd_send_command(lrmd, LRMD_OP_RSC_INFO, data, &output, 0, options, TRUE); free_xml(data); if (!output) { return NULL; } class = crm_element_value(output, F_LRMD_CLASS); provider = crm_element_value(output, F_LRMD_PROVIDER); type = crm_element_value(output, F_LRMD_TYPE); if (!class || !type) { free_xml(output); return NULL; } else if (safe_str_eq(class, "ocf") && !provider) { free_xml(output); return NULL; } rsc_info = calloc(1, sizeof(lrmd_rsc_info_t)); rsc_info->id = strdup(rsc_id); rsc_info->class = strdup(class); if (provider) { rsc_info->provider = strdup(provider); } rsc_info->type = strdup(type); free_xml(output); return rsc_info; } static void lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback) { lrmd_private_t *native = lrmd->private; native->callback = callback; } void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)) { lrmd_private_t *native = lrmd->private; native->proxy_callback = callback; native->proxy_callback_userdata = userdata; } void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg) { lrmd_private_t *native = lrmd->private; if (native->proxy_callback) { crm_log_xml_trace(msg, "PROXY_INBOUND"); native->proxy_callback(lrmd, native->proxy_callback_userdata, msg); } } int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg) { if (lrmd == NULL) { return -ENOTCONN; } crm_xml_add(msg, F_LRMD_OPERATION, CRM_OP_IPC_FWD); crm_log_xml_trace(msg, "PROXY_OUTBOUND"); return lrmd_send_xml_no_reply(lrmd, msg); } static int stonith_get_metadata(const char *provider, const char *type, char **output) { int rc = pcmk_ok; stonith_t *stonith_api = stonith_api_new(); if(stonith_api) { stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type, provider, output, 0); stonith_api->cmds->free(stonith_api); } if (*output == NULL) { rc = -EIO; } return rc; } #define lsb_metadata_template \ "\n" \ "\n" \ "\n" \ " 1.0\n" \ " \n" \ " %s\n" \ " \n" \ " %s\n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " \n" \ "\n" #define LSB_INITSCRIPT_INFOBEGIN_TAG "### BEGIN INIT INFO" #define LSB_INITSCRIPT_INFOEND_TAG "### END INIT INFO" #define PROVIDES "# Provides:" #define REQ_START "# Required-Start:" #define REQ_STOP "# Required-Stop:" #define SHLD_START "# Should-Start:" #define SHLD_STOP "# Should-Stop:" #define DFLT_START "# Default-Start:" #define DFLT_STOP "# Default-Stop:" #define SHORT_DSCR "# Short-Description:" #define DESCRIPTION "# Description:" #define lsb_meta_helper_free_value(m) \ if ((m) != NULL) { \ xmlFree(m); \ (m) = NULL; \ } #define lsb_meta_helper_get_value(buffer, ptr, keyword) \ if (!ptr && !strncasecmp(buffer, keyword, strlen(keyword))) { \ (ptr) = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer+strlen(keyword)); \ continue; \ } static int lsb_get_metadata(const char *type, char **output) { char ra_pathname[PATH_MAX] = { 0, }; FILE *fp; GString *meta_data = NULL; char buffer[1024]; char *provides = NULL; char *req_start = NULL; char *req_stop = NULL; char *shld_start = NULL; char *shld_stop = NULL; char *dflt_start = NULL; char *dflt_stop = NULL; char *s_dscrpt = NULL; char *xml_l_dscrpt = NULL; GString *l_dscrpt = NULL; if(type[0] == '/') { snprintf(ra_pathname, sizeof(ra_pathname), "%s", type); } else { snprintf(ra_pathname, sizeof(ra_pathname), "%s/%s", LSB_ROOT_DIR, type); } crm_trace("Looking into %s", ra_pathname); if (!(fp = fopen(ra_pathname, "r"))) { return -errno; } /* Enter into the lsb-compliant comment block */ while (fgets(buffer, sizeof(buffer), fp)) { /* Now suppose each of the following eight arguments contain only one line */ lsb_meta_helper_get_value(buffer, provides, PROVIDES) lsb_meta_helper_get_value(buffer, req_start, REQ_START) lsb_meta_helper_get_value(buffer, req_stop, REQ_STOP) lsb_meta_helper_get_value(buffer, shld_start, SHLD_START) lsb_meta_helper_get_value(buffer, shld_stop, SHLD_STOP) lsb_meta_helper_get_value(buffer, dflt_start, DFLT_START) lsb_meta_helper_get_value(buffer, dflt_stop, DFLT_STOP) lsb_meta_helper_get_value(buffer, s_dscrpt, SHORT_DSCR) /* Long description may cross multiple lines */ if ((l_dscrpt == NULL) && (0 == strncasecmp(buffer, DESCRIPTION, strlen(DESCRIPTION)))) { l_dscrpt = g_string_new(buffer + strlen(DESCRIPTION)); /* Between # and keyword, more than one space, or a tab character, * indicates the continuation line. Extracted from LSB init script standard */ while (fgets(buffer, sizeof(buffer), fp)) { if (!strncmp(buffer, "# ", 3) || !strncmp(buffer, "#\t", 2)) { buffer[0] = ' '; l_dscrpt = g_string_append(l_dscrpt, buffer); } else { fputs(buffer, fp); break; /* Long description ends */ } } continue; } if (l_dscrpt) { xml_l_dscrpt = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST(l_dscrpt->str)); } if (!strncasecmp(buffer, LSB_INITSCRIPT_INFOEND_TAG, strlen(LSB_INITSCRIPT_INFOEND_TAG))) { /* Get to the out border of LSB comment block */ break; } if (buffer[0] != '#') { break; /* Out of comment block in the beginning */ } } fclose(fp); meta_data = g_string_new(""); g_string_sprintf(meta_data, lsb_metadata_template, type, (xml_l_dscrpt == NULL) ? type : xml_l_dscrpt, (s_dscrpt == NULL) ? type : s_dscrpt, (provides == NULL) ? "" : provides, (req_start == NULL) ? "" : req_start, (req_stop == NULL) ? "" : req_stop, (shld_start == NULL) ? "" : shld_start, (shld_stop == NULL) ? "" : shld_stop, (dflt_start == NULL) ? "" : dflt_start, (dflt_stop == NULL) ? "" : dflt_stop); lsb_meta_helper_free_value(xml_l_dscrpt); lsb_meta_helper_free_value(s_dscrpt); lsb_meta_helper_free_value(provides); lsb_meta_helper_free_value(req_start); lsb_meta_helper_free_value(req_stop); lsb_meta_helper_free_value(shld_start); lsb_meta_helper_free_value(shld_stop); lsb_meta_helper_free_value(dflt_start); lsb_meta_helper_free_value(dflt_stop); if (l_dscrpt) { g_string_free(l_dscrpt, TRUE); } *output = strdup(meta_data->str); g_string_free(meta_data, TRUE); crm_trace("Created fake metadata: %d", strlen(*output)); return pcmk_ok; } #if SUPPORT_NAGIOS static int nagios_get_metadata(const char *type, char **output) { int rc = pcmk_ok; FILE *file_strm = NULL; int start = 0, length = 0, read_len = 0; char *metadata_file = NULL; int len = 36; len += strlen(NAGIOS_METADATA_DIR); len += strlen(type); metadata_file = calloc(1, len); CRM_CHECK(metadata_file != NULL, return -ENOMEM); sprintf(metadata_file, "%s/%s.xml", NAGIOS_METADATA_DIR, type); file_strm = fopen(metadata_file, "r"); if (file_strm == NULL) { crm_err("Metadata file %s does not exist", metadata_file); free(metadata_file); return -EIO; } /* see how big the file is */ start = ftell(file_strm); fseek(file_strm, 0L, SEEK_END); length = ftell(file_strm); fseek(file_strm, 0L, start); CRM_ASSERT(length >= 0); CRM_ASSERT(start == ftell(file_strm)); if (length <= 0) { crm_info("%s was not valid", metadata_file); free(*output); *output = NULL; rc = -EIO; } else { crm_trace("Reading %d bytes from file", length); *output = calloc(1, (length + 1)); read_len = fread(*output, 1, length, file_strm); if (read_len != length) { crm_err("Calculated and read bytes differ: %d vs. %d", length, read_len); free(*output); *output = NULL; rc = -EIO; } } fclose(file_strm); free(metadata_file); return rc; } #endif static int generic_get_metadata(const char *standard, const char *provider, const char *type, char **output) { svc_action_t *action = resources_action_create(type, standard, provider, type, "meta-data", 0, 5000, NULL); if (!(services_action_sync(action))) { crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } if (!action->stdout_data) { crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } *output = strdup(action->stdout_data); services_action_free(action); return pcmk_ok; } static int lrmd_api_get_metadata(lrmd_t * lrmd, const char *class, const char *provider, const char *type, char **output, enum lrmd_call_options options) { if (!class || !type) { return -EINVAL; } if (safe_str_eq(class, "stonith")) { return stonith_get_metadata(provider, type, output); } else if (safe_str_eq(class, "lsb")) { return lsb_get_metadata(type, output); #if SUPPORT_NAGIOS } else if (safe_str_eq(class, "nagios")) { return nagios_get_metadata(type, output); #endif } return generic_get_metadata(class, provider, type, output); } static int lrmd_api_exec(lrmd_t * lrmd, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */ int timeout, /* ms */ int start_delay, /* ms */ enum lrmd_call_options options, lrmd_key_value_t * params) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); lrmd_key_value_t *tmp = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_USERDATA_STR, userdata); crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval); crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout); crm_xml_add_int(data, F_LRMD_RSC_START_DELAY, start_delay); for (tmp = params; tmp; tmp = tmp->next) { hash2field((gpointer) tmp->key, (gpointer) tmp->value, args); } rc = lrmd_send_command(lrmd, LRMD_OP_RSC_EXEC, data, NULL, timeout, options, TRUE); free_xml(data); lrmd_key_value_freeall(params); return rc; } static int lrmd_api_cancel(lrmd_t * lrmd, const char *rsc_id, const char *action, int interval) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_CANCEL, data, NULL, 0, 0, TRUE); free_xml(data); return rc; } static int list_stonith_agents(lrmd_list_t ** resources) { int rc = 0; stonith_t *stonith_api = stonith_api_new(); stonith_key_value_t *stonith_resources = NULL; stonith_key_value_t *dIter = NULL; if(stonith_api) { stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL, &stonith_resources, 0); stonith_api->cmds->free(stonith_api); } for (dIter = stonith_resources; dIter; dIter = dIter->next) { rc++; if (resources) { *resources = lrmd_list_add(*resources, dIter->value); } } stonith_key_value_freeall(stonith_resources, 1, 0); return rc; } static int lrmd_api_list_agents(lrmd_t * lrmd, lrmd_list_t ** resources, const char *class, const char *provider) { int rc = 0; if (safe_str_eq(class, "stonith")) { rc += list_stonith_agents(resources); } else { GListPtr gIter = NULL; GList *agents = resources_list_agents(class, provider); for (gIter = agents; gIter != NULL; gIter = gIter->next) { *resources = lrmd_list_add(*resources, (const char *)gIter->data); rc++; } g_list_free_full(agents, free); if (!class) { rc += list_stonith_agents(resources); } } if (rc == 0) { crm_notice("No agents found for class %s", class); rc = -EPROTONOSUPPORT; } return rc; } static int does_provider_have_agent(const char *agent, const char *provider, const char *class) { int found = 0; GList *agents = NULL; GListPtr gIter2 = NULL; agents = resources_list_agents(class, provider); for (gIter2 = agents; gIter2 != NULL; gIter2 = gIter2->next) { if (safe_str_eq(agent, gIter2->data)) { found = 1; } } g_list_free_full(agents, free); return found; } static int lrmd_api_list_ocf_providers(lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers) { int rc = pcmk_ok; char *provider = NULL; GList *ocf_providers = NULL; GListPtr gIter = NULL; ocf_providers = resources_list_providers("ocf"); for (gIter = ocf_providers; gIter != NULL; gIter = gIter->next) { provider = gIter->data; if (!agent || does_provider_have_agent(agent, provider, "ocf")) { *providers = lrmd_list_add(*providers, (const char *)gIter->data); rc++; } } g_list_free_full(ocf_providers, free); return rc; } static int lrmd_api_list_standards(lrmd_t * lrmd, lrmd_list_t ** supported) { int rc = 0; GList *standards = NULL; GListPtr gIter = NULL; standards = resources_list_standards(); for (gIter = standards; gIter != NULL; gIter = gIter->next) { *supported = lrmd_list_add(*supported, (const char *)gIter->data); rc++; } if (list_stonith_agents(NULL) > 0) { *supported = lrmd_list_add(*supported, "stonith"); rc++; } g_list_free_full(standards, free); return rc; } lrmd_t * lrmd_api_new(void) { lrmd_t *new_lrmd = NULL; lrmd_private_t *pvt = NULL; new_lrmd = calloc(1, sizeof(lrmd_t)); pvt = calloc(1, sizeof(lrmd_private_t)); pvt->remote = calloc(1, sizeof(crm_remote_t)); new_lrmd->cmds = calloc(1, sizeof(lrmd_api_operations_t)); pvt->type = CRM_CLIENT_IPC; new_lrmd->private = pvt; new_lrmd->cmds->connect = lrmd_api_connect; new_lrmd->cmds->connect_async = lrmd_api_connect_async; new_lrmd->cmds->is_connected = lrmd_api_is_connected; new_lrmd->cmds->poke_connection = lrmd_api_poke_connection; new_lrmd->cmds->disconnect = lrmd_api_disconnect; new_lrmd->cmds->register_rsc = lrmd_api_register_rsc; new_lrmd->cmds->unregister_rsc = lrmd_api_unregister_rsc; new_lrmd->cmds->get_rsc_info = lrmd_api_get_rsc_info; new_lrmd->cmds->set_callback = lrmd_api_set_callback; new_lrmd->cmds->get_metadata = lrmd_api_get_metadata; new_lrmd->cmds->exec = lrmd_api_exec; new_lrmd->cmds->cancel = lrmd_api_cancel; new_lrmd->cmds->list_agents = lrmd_api_list_agents; new_lrmd->cmds->list_ocf_providers = lrmd_api_list_ocf_providers; new_lrmd->cmds->list_standards = lrmd_api_list_standards; return new_lrmd; } lrmd_t * lrmd_remote_api_new(const char *nodename, const char *server, int port) { #ifdef HAVE_GNUTLS_GNUTLS_H lrmd_t *new_lrmd = lrmd_api_new(); lrmd_private_t *native = new_lrmd->private; if (!nodename && !server) { lrmd_api_delete(new_lrmd); return NULL; } native->type = CRM_CLIENT_TLS; native->remote_nodename = nodename ? strdup(nodename) : strdup(server); native->server = server ? strdup(server) : strdup(nodename); native->port = port; if (native->port == 0) { const char *remote_port_str = getenv("PCMK_remote_port"); native->port = remote_port_str ? atoi(remote_port_str) : DEFAULT_REMOTE_PORT; } return new_lrmd; #else crm_err("GNUTLS is not enabled for this build, remote LRMD client can not be created"); return NULL; #endif } void lrmd_api_delete(lrmd_t * lrmd) { if (!lrmd) { return; } lrmd->cmds->disconnect(lrmd); /* no-op if already disconnected */ free(lrmd->cmds); if (lrmd->private) { lrmd_private_t *native = lrmd->private; #ifdef HAVE_GNUTLS_GNUTLS_H free(native->server); #endif free(native->remote_nodename); free(native->remote); } free(lrmd->private); free(lrmd); } diff --git a/lrmd/Makefile.am b/lrmd/Makefile.am index 82cb65ff27..ce08297ae4 100644 --- a/lrmd/Makefile.am +++ b/lrmd/Makefile.am @@ -1,64 +1,64 @@ # Copyright (c) 2012 David Vossel # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # MAINTAINERCLEANFILES = Makefile.in testdir = $(datadir)/$(PACKAGE)/tests/lrmd test_SCRIPTS = regression.py lrmdlibdir = $(CRM_DAEMON_DIR) lrmdlib_PROGRAMS = lrmd lrmd_test initdir = $(INITDIR) init_SCRIPTS = pacemaker_remote sbin_PROGRAMS = pacemaker_remoted if BUILD_SYSTEMD systemdunit_DATA = pacemaker_remote.service endif if BUILD_HELP man8_MANS = $(sbin_PROGRAMS:%=%.8) endif %.8: % echo Creating $@ chmod a+x $(top_builddir)/lrmd/$< $(top_builddir)/lrmd/$< --help $(HELP2MAN) --output $@ --no-info --section 8 --name "Part of the Pacemaker cluster resource manager" $(top_builddir)/lrmd/$< lrmd_SOURCES = main.c lrmd.c lrmd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ - $(top_builddir)/lib/fencing/libstonithd.la + $(top_builddir)/lib/fencing/libstonithd.la ${COMPAT_LIBS} pacemaker_remoted_SOURCES = main.c lrmd.c tls_backend.c ipc_proxy.c pacemaker_remoted_CFLAGS = -DSUPPORT_REMOTE pacemaker_remoted_LDADD = $(lrmd_LDADD) lrmd_test_SOURCES = test.c lrmd_test_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/pengine/libpengine.la noinst_HEADERS = lrmd_private.h diff --git a/lrmd/tls_backend.c b/lrmd/tls_backend.c index 780d17bedf..bfa18adb87 100644 --- a/lrmd/tls_backend.c +++ b/lrmd/tls_backend.c @@ -1,334 +1,335 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include +#include #include #include #ifdef HAVE_GNUTLS_GNUTLS_H # define LRMD_REMOTE_AUTH_TIMEOUT 10000 gnutls_psk_server_credentials_t psk_cred_s; gnutls_dh_params_t dh_params; static int ssock = 0; extern int lrmd_call_id; static void debug_log(int level, const char *str) { fputs(str, stderr); } static int lrmd_remote_client_msg(gpointer data) { int id = 0; int rc = 0; int disconnected = 0; xmlNode *request = NULL; crm_client_t *client = data; if (client->remote->tls_handshake_complete == FALSE) { int rc = 0; /* Muliple calls to handshake will be required, this callback * will be invoked once the client sends more handshake data. */ do { rc = gnutls_handshake(*client->remote->tls_session); if (rc < 0 && rc != GNUTLS_E_AGAIN) { crm_err("Remote lrmd tls handshake failed"); return -1; } } while (rc == GNUTLS_E_INTERRUPTED); if (rc == 0) { crm_debug("Remote lrmd tls handshake completed"); client->remote->tls_handshake_complete = TRUE; if (client->remote->auth_timeout) { g_source_remove(client->remote->auth_timeout); } client->remote->auth_timeout = 0; } return 0; } rc = crm_remote_ready(client->remote, 0); if (rc == 0) { /* no msg to read */ return 0; } else if (rc < 0) { crm_info("Client disconnected during remote client read"); return -1; } crm_remote_recv(client->remote, -1, &disconnected); request = crm_remote_parse_buffer(client->remote); while (request) { crm_element_value_int(request, F_LRMD_REMOTE_MSG_ID, &id); crm_trace("processing request from remote client with remote msg id %d", id); if (!client->name) { const char *value = crm_element_value(request, F_LRMD_CLIENTNAME); if (value) { client->name = strdup(value); } } lrmd_call_id++; if (lrmd_call_id < 1) { lrmd_call_id = 1; } crm_xml_add(request, F_LRMD_CLIENTID, client->id); crm_xml_add(request, F_LRMD_CLIENTNAME, client->name); crm_xml_add_int(request, F_LRMD_CALLID, lrmd_call_id); process_lrmd_message(client, id, request); free_xml(request); /* process all the messages in the current buffer */ request = crm_remote_parse_buffer(client->remote); } if (disconnected) { crm_info("Client disconnect detected in tls msg dispatcher."); return -1; } return 0; } static void lrmd_remote_client_destroy(gpointer user_data) { crm_client_t *client = user_data; if (client == NULL) { return; } ipc_proxy_remove_provider(client); client_disconnect_cleanup(client->id); crm_notice("LRMD client disconnecting remote client - name: %s id: %s", client->name ? client->name : "", client->id); if (client->remote->tls_session) { void *sock_ptr; int csock; sock_ptr = gnutls_transport_get_ptr(*client->remote->tls_session); csock = GPOINTER_TO_INT(sock_ptr); gnutls_bye(*client->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*client->remote->tls_session); gnutls_free(client->remote->tls_session); close(csock); } crm_client_destroy(client); return; } static gboolean lrmd_auth_timeout_cb(gpointer data) { crm_client_t *client = data; client->remote->auth_timeout = 0; if (client->remote->tls_handshake_complete == TRUE) { return FALSE; } mainloop_del_fd(client->remote->source); client->remote->source = NULL; crm_err("Remote client authentication timed out"); return FALSE; } static int lrmd_remote_listen(gpointer data) { int csock = 0; int flag = 0; unsigned laddr; struct sockaddr_in addr; gnutls_session_t *session = NULL; crm_client_t *new_client = NULL; static struct mainloop_fd_callbacks lrmd_remote_fd_cb = { .dispatch = lrmd_remote_client_msg, .destroy = lrmd_remote_client_destroy, }; /* accept the connection */ laddr = sizeof(addr); csock = accept(ssock, (struct sockaddr *)&addr, &laddr); crm_debug("New remote connection from %s", inet_ntoa(addr.sin_addr)); if (csock == -1) { crm_err("accept socket failed"); return TRUE; } if ((flag = fcntl(csock, F_GETFL)) >= 0) { if (fcntl(csock, F_SETFL, flag | O_NONBLOCK) < 0) { crm_err("fcntl() write failed"); close(csock); return TRUE; } } else { crm_err("fcntl() read failed"); close(csock); return TRUE; } session = create_psk_tls_session(csock, GNUTLS_SERVER, psk_cred_s); if (session == NULL) { crm_err("TLS session creation failed"); close(csock); return TRUE; } new_client = calloc(1, sizeof(crm_client_t)); new_client->remote = calloc(1, sizeof(crm_remote_t)); new_client->kind = CRM_CLIENT_TLS; new_client->remote->tls_session = session; new_client->id = crm_generate_uuid(); new_client->remote->auth_timeout = g_timeout_add(LRMD_REMOTE_AUTH_TIMEOUT, lrmd_auth_timeout_cb, new_client); crm_notice("LRMD client connection established. %p id: %s", new_client, new_client->id); new_client->remote->source = mainloop_add_fd("lrmd-remote-client", G_PRIORITY_DEFAULT, csock, new_client, &lrmd_remote_fd_cb); g_hash_table_insert(client_connections, new_client->id, new_client); return TRUE; } static void lrmd_remote_connection_destroy(gpointer user_data) { crm_notice("Remote tls server disconnected"); return; } static int lrmd_tls_server_key_cb(gnutls_session_t session, const char *username, gnutls_datum_t * key) { return lrmd_tls_set_key(key); } int lrmd_init_remote_tls_server(int port) { int rc; struct sockaddr_in saddr; int optval; static struct mainloop_fd_callbacks remote_listen_fd_callbacks = { .dispatch = lrmd_remote_listen, .destroy = lrmd_remote_connection_destroy, }; crm_notice("Starting a tls listener on port %d.", port); gnutls_global_init(); gnutls_global_set_log_function(debug_log); gnutls_dh_params_init(&dh_params); gnutls_dh_params_generate2(dh_params, 1024); gnutls_psk_allocate_server_credentials(&psk_cred_s); gnutls_psk_set_server_credentials_function(psk_cred_s, lrmd_tls_server_key_cb); gnutls_psk_set_server_dh_params(psk_cred_s, dh_params); /* create server socket */ ssock = socket(AF_INET, SOCK_STREAM, 0); if (ssock == -1) { crm_err("Can not create server socket."); return -1; } /* reuse address */ optval = 1; rc = setsockopt(ssock, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)); if (rc < 0) { crm_perror(LOG_INFO, "Couldn't allow the reuse of local addresses by our remote listener"); } rc = -1; /* bind server socket */ memset(&saddr, '\0', sizeof(saddr)); saddr.sin_family = AF_INET; saddr.sin_addr.s_addr = INADDR_ANY; saddr.sin_port = htons(port); if (bind(ssock, (struct sockaddr *)&saddr, sizeof(saddr)) == -1) { crm_err("Can not bind server socket."); goto init_remote_cleanup; } if (listen(ssock, 10) == -1) { crm_err("Can not start listen."); goto init_remote_cleanup; } mainloop_add_fd("lrmd-remote", G_PRIORITY_DEFAULT, ssock, NULL, &remote_listen_fd_callbacks); rc = ssock; init_remote_cleanup: if (rc < 0) { close(ssock); ssock = 0; } return rc; } void lrmd_tls_server_destroy(void) { if (psk_cred_s) { gnutls_psk_free_server_credentials(psk_cred_s); psk_cred_s = 0; } if (ssock > 0) { close(ssock); ssock = 0; } } #endif diff --git a/tools/crm_mon.c b/tools/crm_mon.c index f29a75c5f8..912c6df179 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -1,2477 +1,2477 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include <../lib/pengine/unpack.h> #include <../pengine/pengine.h> #include /* GMainLoop *mainloop = NULL; */ void wait_for_refresh(int offset, const char *prefix, int msec); void clean_up(int rc); void crm_diff_update(const char *event, xmlNode * msg); gboolean mon_refresh_display(gpointer user_data); int cib_connect(gboolean full); void mon_st_callback(stonith_t * st, stonith_event_t * e); char *xml_file = NULL; char *as_html_file = NULL; int as_xml = 0; char *pid_file = NULL; char *snmp_target = NULL; char *snmp_community = NULL; gboolean as_console = TRUE;; gboolean simple_status = FALSE; gboolean group_by_node = FALSE; gboolean inactive_resources = FALSE; gboolean web_cgi = FALSE; int reconnect_msec = 5000; gboolean daemonize = FALSE; GMainLoop *mainloop = NULL; guint timer_id = 0; GList *attr_list = NULL; const char *crm_mail_host = NULL; const char *crm_mail_prefix = NULL; const char *crm_mail_from = NULL; const char *crm_mail_to = NULL; const char *external_agent = NULL; const char *external_recipient = NULL; cib_t *cib = NULL; stonith_t *st = NULL; xmlNode *current_cib = NULL; gboolean one_shot = FALSE; gboolean has_warnings = FALSE; gboolean print_failcount = FALSE; gboolean print_operations = FALSE; gboolean print_timing = FALSE; gboolean print_nodes_attr = FALSE; gboolean print_last_updated = TRUE; gboolean print_last_change = TRUE; gboolean print_tickets = FALSE; gboolean watch_fencing = FALSE; /* FIXME allow, detect, and correctly interpret glob pattern or regex? */ const char *print_neg_location_prefix; const char *print_neg_location_prefix_toggle; #define FILTER_STR {"shutdown", "terminate", "standby", "fail-count", \ "last-failure", "probe_complete", "#id", "#uname", \ "#is_dc", NULL} gboolean log_diffs = FALSE; gboolean log_updates = FALSE; long last_refresh = 0; crm_trigger_t *refresh_trigger = NULL; /* * 1.3.6.1.4.1.32723 has been assigned to the project by IANA * http://www.iana.org/assignments/enterprise-numbers */ #define PACEMAKER_PREFIX "1.3.6.1.4.1.32723" #define PACEMAKER_TRAP_PREFIX PACEMAKER_PREFIX ".1" #define snmp_crm_trap_oid PACEMAKER_TRAP_PREFIX #define snmp_crm_oid_node PACEMAKER_TRAP_PREFIX ".1" #define snmp_crm_oid_rsc PACEMAKER_TRAP_PREFIX ".2" #define snmp_crm_oid_task PACEMAKER_TRAP_PREFIX ".3" #define snmp_crm_oid_desc PACEMAKER_TRAP_PREFIX ".4" #define snmp_crm_oid_status PACEMAKER_TRAP_PREFIX ".5" #define snmp_crm_oid_rc PACEMAKER_TRAP_PREFIX ".6" #define snmp_crm_oid_trc PACEMAKER_TRAP_PREFIX ".7" #if CURSES_ENABLED # define print_dot() if(as_console) { \ printw("."); \ clrtoeol(); \ refresh(); \ } else { \ fprintf(stdout, "."); \ } #else # define print_dot() fprintf(stdout, "."); #endif #if CURSES_ENABLED # define print_as(fmt, args...) if(as_console) { \ printw(fmt, ##args); \ clrtoeol(); \ refresh(); \ } else { \ fprintf(stdout, fmt, ##args); \ } #else # define print_as(fmt, args...) fprintf(stdout, fmt, ##args); #endif static void blank_screen(void) { #if CURSES_ENABLED int lpc = 0; for (lpc = 0; lpc < LINES; lpc++) { move(lpc, 0); clrtoeol(); } move(0, 0); refresh(); #endif } static gboolean mon_timer_popped(gpointer data) { int rc = pcmk_ok; if (timer_id > 0) { g_source_remove(timer_id); } rc = cib_connect(TRUE); if (rc != pcmk_ok) { print_dot(); timer_id = g_timeout_add(reconnect_msec, mon_timer_popped, NULL); } return FALSE; } static void mon_cib_connection_destroy(gpointer user_data) { print_as("Connection to the CIB terminated\n"); if (cib) { print_as("Reconnecting..."); cib->cmds->signoff(cib); timer_id = g_timeout_add(reconnect_msec, mon_timer_popped, NULL); } return; } /* * Mainloop signal handler. */ static void mon_shutdown(int nsig) { clean_up(EX_OK); } #if ON_DARWIN # define sighandler_t sig_t #endif #if CURSES_ENABLED # ifndef HAVE_SIGHANDLER_T typedef void (*sighandler_t) (int); # endif static sighandler_t ncurses_winch_handler; static void mon_winresize(int nsig) { static int not_done; int lines = 0, cols = 0; if (!not_done++) { if (ncurses_winch_handler) /* the original ncurses WINCH signal handler does the * magic of retrieving the new window size; * otherwise, we'd have to use ioctl or tgetent */ (*ncurses_winch_handler) (SIGWINCH); getmaxyx(stdscr, lines, cols); resizeterm(lines, cols); mainloop_set_trigger(refresh_trigger); } not_done--; } #endif int cib_connect(gboolean full) { int rc = pcmk_ok; static gboolean need_pass = TRUE; CRM_CHECK(cib != NULL, return -EINVAL); if (getenv("CIB_passwd") != NULL) { need_pass = FALSE; } if (watch_fencing && st == NULL) { st = stonith_api_new(); } if (watch_fencing && st->state == stonith_disconnected) { crm_trace("Connecting to stonith"); rc = st->cmds->connect(st, crm_system_name, NULL); if (rc == pcmk_ok) { crm_trace("Setting up stonith callbacks"); st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback); } } if (cib->state != cib_connected_query && cib->state != cib_connected_command) { crm_trace("Connecting to the CIB"); if (as_console && need_pass && cib->variant == cib_remote) { need_pass = FALSE; print_as("Password:"); } rc = cib->cmds->signon(cib, crm_system_name, cib_query); if (rc != pcmk_ok) { return rc; } current_cib = get_cib_copy(cib); mon_refresh_display(NULL); if (full) { if (rc == pcmk_ok) { rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy); if (rc == -EPROTONOSUPPORT) { print_as ("Notification setup not supported, won't be able to reconnect after failure"); if (as_console) { sleep(2); } rc = pcmk_ok; } } if (rc == pcmk_ok) { cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); } if (rc != pcmk_ok) { print_as("Notification setup failed, could not monitor CIB actions"); if (as_console) { sleep(2); } clean_up(-rc); } } } return rc; } /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"quiet", 0, 0, 'Q', "\tDisplay only essential output" }, {"-spacer-", 1, 0, '-', "\nModes:"}, {"as-html", 1, 0, 'h', "Write cluster status to the named html file"}, {"as-xml", 0, 0, 'X', "\tWrite cluster status as xml to stdout. This will enable one-shot mode."}, {"web-cgi", 0, 0, 'w', "\tWeb mode with output suitable for cgi"}, {"simple-status", 0, 0, 's', "Display the cluster status once as a simple one line output (suitable for nagios)"}, {"snmp-traps", 1, 0, 'S', "Send SNMP traps to this station", !ENABLE_SNMP}, {"snmp-community", 1, 0, 'C', "Specify community for SNMP traps(default is NULL)", !ENABLE_SNMP}, {"mail-to", 1, 0, 'T', "Send Mail alerts to this user. See also --mail-from, --mail-host, --mail-prefix", !ENABLE_ESMTP}, {"-spacer-", 1, 0, '-', "\nDisplay Options:"}, {"group-by-node", 0, 0, 'n', "\tGroup resources by node" }, {"inactive", 0, 0, 'r', "\tDisplay inactive resources" }, {"failcounts", 0, 0, 'f', "\tDisplay resource fail counts"}, {"operations", 0, 0, 'o', "\tDisplay resource operation history" }, {"timing-details", 0, 0, 't', "\tDisplay resource operation history with timing details" }, {"tickets", 0, 0, 'c', "\t\tDisplay cluster tickets"}, {"watch-fencing", 0, 0, 'W', "\t\tListen for fencing events. For use with --external-agent, --mail-to and/or --snmp-traps where supported"}, {"neg-locations", 2, 0, 'L', "Display negative location constraints [optionally filtered by id prefix]"}, {"show-node-attributes", 0, 0, 'A', "Display node attributes" }, {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, {"interval", 1, 0, 'i', "\tUpdate frequency in seconds" }, {"one-shot", 0, 0, '1', "\tDisplay the cluster status once on the console and exit"}, {"disable-ncurses",0, 0, 'N', "\tDisable the use of ncurses", !CURSES_ENABLED}, {"daemonize", 0, 0, 'd', "\tRun in the background as a daemon"}, {"pid-file", 1, 0, 'p', "\t(Advanced) Daemon pid file location"}, {"mail-from", 1, 0, 'F', "\tMail alerts should come from the named user", !ENABLE_ESMTP}, {"mail-host", 1, 0, 'H', "\tMail alerts should be sent via the named host", !ENABLE_ESMTP}, {"mail-prefix", 1, 0, 'P', "Subjects for mail alerts should start with this string", !ENABLE_ESMTP}, {"external-agent", 1, 0, 'E', "A program to run when resource operations take place."}, {"external-recipient",1, 0, 'e', "A recipient for your program (assuming you want the program to send something to someone)."}, {"xml-file", 1, 0, 'x', NULL, 1}, {"-spacer-", 1, 0, '-', "\nExamples:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', "Display the cluster status on the console with updates as they occur:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_mon", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Display the cluster status on the console just once then exit:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_mon -1", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Display your cluster status, group resources by node, and include inactive resources in the list:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_mon --group-by-node --inactive", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Start crm_mon as a background daemon and have it write the cluster status to an HTML file:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_mon --daemonize --as-html /path/to/docroot/filename.html", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Start crm_mon and export the current cluster status as xml to stdout, then exit.:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_mon --as-xml", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Start crm_mon as a background daemon and have it send email alerts:", pcmk_option_paragraph|!ENABLE_ESMTP}, {"-spacer-", 1, 0, '-', " crm_mon --daemonize --mail-to user@example.com --mail-host mail.example.com", pcmk_option_example|!ENABLE_ESMTP}, {"-spacer-", 1, 0, '-', "Start crm_mon as a background daemon and have it send SNMP alerts:", pcmk_option_paragraph|!ENABLE_SNMP}, {"-spacer-", 1, 0, '-', " crm_mon --daemonize --snmp-traps snmptrapd.example.com", pcmk_option_example|!ENABLE_SNMP}, {NULL, 0, 0, 0} }; /* *INDENT-ON* */ #if CURSES_ENABLED static const char * get_option_desc(char c) { int lpc; for (lpc = 0; long_options[lpc].name != NULL; lpc++) { if (long_options[lpc].name[0] == '-') continue; if (long_options[lpc].val == c) { const char * tab = NULL; tab = strrchr(long_options[lpc].desc, '\t'); return tab ? ++tab : long_options[lpc].desc; } } return NULL; } static gboolean detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer unused) { int c; gboolean config_mode = FALSE; while (1) { /* Get user input */ c = getchar(); switch (c) { case 'c': print_tickets = ! print_tickets; break; case 'f': print_failcount = ! print_failcount; break; case 'n': group_by_node = ! group_by_node; break; case 'o': print_operations = ! print_operations; break; case 'r': inactive_resources = ! inactive_resources; break; case 't': print_timing = ! print_timing; if (print_timing) print_operations = TRUE; break; case 'A': print_nodes_attr = ! print_nodes_attr; break; case 'L': if (print_neg_location_prefix) { /* toggle off */ print_neg_location_prefix_toggle = print_neg_location_prefix; print_neg_location_prefix = NULL; } else if (print_neg_location_prefix_toggle) { /* toggle on */ print_neg_location_prefix = print_neg_location_prefix_toggle; print_neg_location_prefix_toggle = NULL; } else { /* toggled on for the first time at runtime */ print_neg_location_prefix = ""; } break; case '?': config_mode = TRUE; break; default: goto refresh; } if (!config_mode) goto refresh; blank_screen(); print_as("Display option change mode\n"); print_as("\n"); print_as("%c c: \t%s\n", print_tickets ? '*': ' ', get_option_desc('c')); print_as("%c f: \t%s\n", print_failcount ? '*': ' ', get_option_desc('f')); print_as("%c n: \t%s\n", group_by_node ? '*': ' ', get_option_desc('n')); print_as("%c o: \t%s\n", print_operations ? '*': ' ', get_option_desc('o')); print_as("%c r: \t%s\n", inactive_resources ? '*': ' ', get_option_desc('r')); print_as("%c t: \t%s\n", print_timing ? '*': ' ', get_option_desc('t')); print_as("%c A: \t%s\n", print_nodes_attr ? '*': ' ', get_option_desc('A')); print_as("%c L: \t%s\n", print_neg_location_prefix ? '*': ' ', get_option_desc('L')); print_as("\n"); print_as("Toggle fields via field letter, type any other key to return"); } refresh: mon_refresh_display(NULL); return TRUE; } #endif int main(int argc, char **argv) { int flag; int argerr = 0; int exit_code = 0; int option_index = 0; pid_file = strdup("/tmp/ClusterMon.pid"); crm_log_cli_init("crm_mon"); crm_set_options(NULL, "mode [options]", long_options, "Provides a summary of cluster's current state." "\n\nOutputs varying levels of detail in a number of different formats.\n"); -#ifndef ON_DARWIN +#if !defined (ON_DARWIN) && !defined (ON_BSD) /* prevent zombies */ signal(SIGCLD, SIG_IGN); #endif if (strcmp(crm_system_name, "crm_mon.cgi") == 0) { web_cgi = TRUE; one_shot = TRUE; } while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'Q': print_last_updated = FALSE; print_last_change = FALSE; break; case 'i': reconnect_msec = crm_get_msec(optarg); break; case 'n': group_by_node = TRUE; break; case 'r': inactive_resources = TRUE; break; case 'W': watch_fencing = TRUE; break; case 'd': daemonize = TRUE; break; case 't': print_timing = TRUE; print_operations = TRUE; break; case 'o': print_operations = TRUE; break; case 'f': print_failcount = TRUE; break; case 'A': print_nodes_attr = TRUE; break; case 'L': print_neg_location_prefix = optarg ?: ""; break; case 'c': print_tickets = TRUE; break; case 'p': free(pid_file); pid_file = strdup(optarg); break; case 'x': xml_file = strdup(optarg); one_shot = TRUE; break; case 'h': as_html_file = strdup(optarg); break; case 'X': as_xml = TRUE; one_shot = TRUE; break; case 'w': web_cgi = TRUE; one_shot = TRUE; break; case 's': simple_status = TRUE; one_shot = TRUE; break; case 'S': snmp_target = optarg; break; case 'T': crm_mail_to = optarg; break; case 'F': crm_mail_from = optarg; break; case 'H': crm_mail_host = optarg; break; case 'P': crm_mail_prefix = optarg; break; case 'E': external_agent = optarg; break; case 'e': external_recipient = optarg; break; case '1': one_shot = TRUE; break; case 'N': as_console = FALSE; break; case 'C': snmp_community = optarg; break; case '$': case '?': crm_help(flag, EX_OK); break; default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (argerr) { crm_help('?', EX_USAGE); } if (one_shot) { as_console = FALSE; } else if (daemonize) { as_console = FALSE; crm_enable_stderr(FALSE); if (!as_html_file && !snmp_target && !crm_mail_to && !external_agent && !as_xml) { printf ("Looks like you forgot to specify one or more of: --as-html, --as-xml, --mail-to, --snmp-target, --external-agent\n"); crm_help('?', EX_USAGE); } crm_make_daemon(crm_system_name, TRUE, pid_file); } else if (as_console) { #if CURSES_ENABLED initscr(); cbreak(); noecho(); crm_enable_stderr(FALSE); #else one_shot = TRUE; as_console = FALSE; printf("Defaulting to one-shot mode\n"); printf("You need to have curses available at compile time to enable console mode\n"); #endif } crm_info("Starting %s", crm_system_name); if (xml_file != NULL) { current_cib = filename2xml(xml_file); mon_refresh_display(NULL); return exit_code; } if (current_cib == NULL) { cib = cib_new(); if (!one_shot) { print_as("Attempting connection to the cluster..."); } do { exit_code = cib_connect(!one_shot); if (one_shot) { break; } else if (exit_code != pcmk_ok) { print_dot(); sleep(reconnect_msec / 1000); } } while (exit_code == -ENOTCONN); if (exit_code != pcmk_ok) { print_as("\nConnection to cluster failed: %s\n", pcmk_strerror(exit_code)); if (as_console) { sleep(2); } clean_up(-exit_code); } } if (one_shot) { return exit_code; } mainloop = g_main_new(FALSE); mainloop_add_signal(SIGTERM, mon_shutdown); mainloop_add_signal(SIGINT, mon_shutdown); #if CURSES_ENABLED if (as_console) { ncurses_winch_handler = signal(SIGWINCH, mon_winresize); if (ncurses_winch_handler == SIG_DFL || ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR) ncurses_winch_handler = NULL; g_io_add_watch(g_io_channel_unix_new(STDIN_FILENO), G_IO_IN, detect_user_input, NULL); } #endif refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL); g_main_run(mainloop); g_main_destroy(mainloop); crm_info("Exiting %s", crm_system_name); clean_up(0); return 0; /* never reached */ } void wait_for_refresh(int offset, const char *prefix, int msec) { int lpc = msec / 1000; struct timespec sleept = { 1, 0 }; if (as_console == FALSE) { timer_id = g_timeout_add(msec, mon_timer_popped, NULL); return; } crm_notice("%sRefresh in %ds...", prefix ? prefix : "", lpc); while (lpc > 0) { #if CURSES_ENABLED move(offset, 0); /* printw("%sRefresh in \033[01;32m%ds\033[00m...", prefix?prefix:"", lpc); */ printw("%sRefresh in %ds...\n", prefix ? prefix : "", lpc); clrtoeol(); refresh(); #endif lpc--; if (lpc == 0) { timer_id = g_timeout_add(1000, mon_timer_popped, NULL); } else { if (nanosleep(&sleept, NULL) != 0) { return; } } } } #define mon_warn(fmt...) do { \ if (!has_warnings) { \ print_as("Warning:"); \ } else { \ print_as(","); \ } \ print_as(fmt); \ has_warnings = TRUE; \ } while(0) static int count_resources(pe_working_set_t * data_set, resource_t * rsc) { int count = 0; GListPtr gIter = NULL; if (rsc == NULL) { gIter = data_set->resources; } else if (rsc->children) { gIter = rsc->children; } else { return is_not_set(rsc->flags, pe_rsc_orphan); } for (; gIter != NULL; gIter = gIter->next) { count += count_resources(data_set, gIter->data); } return count; } static int print_simple_status(pe_working_set_t * data_set) { node_t *dc = NULL; GListPtr gIter = NULL; int nodes_online = 0; int nodes_standby = 0; int nodes_maintenance = 0; dc = data_set->dc_node; if (dc == NULL) { mon_warn("No DC "); } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node->details->standby && node->details->online) { nodes_standby++; } else if (node->details->maintenance && node->details->online) { nodes_maintenance++; } else if (node->details->online) { nodes_online++; } else { mon_warn("offline node: %s", node->details->uname); } } if (!has_warnings) { print_as("Ok: %d nodes online", nodes_online); if (nodes_standby > 0) { print_as(", %d standby nodes", nodes_standby); } if (nodes_maintenance > 0) { print_as(", %d maintenance nodes", nodes_maintenance); } print_as(", %d resources configured", count_resources(data_set, NULL)); } print_as("\n"); return 0; } static void print_date(time_t time) { int lpc = 0; char date_str[26]; asctime_r(localtime(&time), date_str); for (; lpc < 26; lpc++) { if (date_str[lpc] == '\n') { date_str[lpc] = 0; } } print_as("'%s'", date_str); } #include static void print_rsc_summary(pe_working_set_t * data_set, node_t * node, resource_t * rsc, gboolean all) { gboolean printed = FALSE; time_t last_failure = 0; int failcount = get_failcount_full(node, rsc, &last_failure, FALSE, data_set); if (all || failcount || last_failure > 0) { printed = TRUE; print_as(" %s: migration-threshold=%d", rsc_printable_id(rsc), rsc->migration_threshold); } if (failcount > 0) { printed = TRUE; print_as(" fail-count=%d", failcount); } if (last_failure > 0) { printed = TRUE; print_as(" last-failure="); print_date(last_failure); } if (printed) { print_as("\n"); } } static void print_rsc_history(pe_working_set_t * data_set, node_t * node, xmlNode * rsc_entry) { GListPtr gIter = NULL; GListPtr op_list = NULL; gboolean print_name = TRUE; GListPtr sorted_op_list = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); xmlNode *rsc_op = NULL; for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { op_list = g_list_append(op_list, rsc_op); } } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *xml_op = (xmlNode *) gIter->data; const char *value = NULL; const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *op_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); const char *interval = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); int rc = crm_parse_int(op_rc, "0"); if (safe_str_eq(task, CRMD_ACTION_STATUS) && safe_str_eq(interval, "0")) { task = "probe"; } if (rc == 7 && safe_str_eq(task, "probe")) { continue; } else if (safe_str_eq(task, CRMD_ACTION_NOTIFY)) { continue; } if (print_name) { print_name = FALSE; if (rsc == NULL) { print_as("Orphan resource: %s", rsc_id); } else { print_rsc_summary(data_set, node, rsc, TRUE); } } print_as(" + (%s) %s:", call, task); if (safe_str_neq(interval, "0")) { print_as(" interval=%sms", interval); } if (print_timing) { int int_value; const char *attr = XML_RSC_OP_LAST_CHANGE; value = crm_element_value(xml_op, attr); if (value) { int_value = crm_parse_int(value, NULL); if (int_value > 0) { print_as(" %s=", attr); print_date(int_value); } } attr = XML_RSC_OP_LAST_RUN; value = crm_element_value(xml_op, attr); if (value) { int_value = crm_parse_int(value, NULL); if (int_value > 0) { print_as(" %s=", attr); print_date(int_value); } } attr = XML_RSC_OP_T_EXEC; value = crm_element_value(xml_op, attr); if (value) { int_value = crm_parse_int(value, NULL); print_as(" %s=%dms", attr, int_value); } attr = XML_RSC_OP_T_QUEUE; value = crm_element_value(xml_op, attr); if (value) { int_value = crm_parse_int(value, NULL); print_as(" %s=%dms", attr, int_value); } } print_as(" rc=%s (%s)\n", op_rc, services_ocf_exitcode_str(rc)); } /* no need to free the contents */ g_list_free(sorted_op_list); } static void print_attr_msg(node_t * node, GListPtr rsc_list, const char *attrname, const char *attrvalue) { GListPtr gIter = NULL; for (gIter = rsc_list; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; const char *type = g_hash_table_lookup(rsc->meta, "type"); if (rsc->children != NULL) { print_attr_msg(node, rsc->children, attrname, attrvalue); } if (safe_str_eq(type, "ping") || safe_str_eq(type, "pingd")) { const char *name = g_hash_table_lookup(rsc->parameters, "name"); if (name == NULL) { name = "pingd"; } /* To identify the resource with the attribute name. */ if (safe_str_eq(name, attrname)) { int host_list_num = 0; int expected_score = 0; int value = crm_parse_int(attrvalue, "0"); const char *hosts = g_hash_table_lookup(rsc->parameters, "host_list"); const char *multiplier = g_hash_table_lookup(rsc->parameters, "multiplier"); if(hosts) { char **host_list = g_strsplit(hosts, " ", 0); host_list_num = g_strv_length(host_list); g_strfreev(host_list); } /* pingd multiplier is the same as the default value. */ expected_score = host_list_num * crm_parse_int(multiplier, "1"); /* pingd is abnormal score. */ if (value <= 0) { print_as("\t: Connectivity is lost"); } else if (value < expected_score) { print_as("\t: Connectivity is degraded (Expected=%d)", expected_score); } } } } } static int compare_attribute(gconstpointer a, gconstpointer b) { int rc; rc = strcmp((const char *)a, (const char *)b); return rc; } static void create_attr_list(gpointer name, gpointer value, gpointer data) { int i; const char *filt_str[] = FILTER_STR; CRM_CHECK(name != NULL, return); /* filtering automatic attributes */ for (i = 0; filt_str[i] != NULL; i++) { if (g_str_has_prefix(name, filt_str[i])) { return; } } attr_list = g_list_insert_sorted(attr_list, name, compare_attribute); } static void print_node_attribute(gpointer name, gpointer node_data) { const char *value = NULL; node_t *node = (node_t *) node_data; value = g_hash_table_lookup(node->details->attrs, name); print_as(" + %-32s\t: %-10s", (char *)name, value); print_attr_msg(node, node->details->running_rsc, name, value); print_as("\n"); } static void print_node_summary(pe_working_set_t * data_set, gboolean operations) { xmlNode *lrm_rsc = NULL; xmlNode *rsc_entry = NULL; xmlNode *node_state = NULL; xmlNode *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); if (operations) { print_as("\nOperations:\n"); } else { print_as("\nMigration summary:\n"); } for (node_state = __xml_first_child(cib_status); node_state != NULL; node_state = __xml_next(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) { node_t *node = pe_find_node_id(data_set->nodes, ID(node_state)); if (node == NULL || node->details->online == FALSE) { continue; } print_as("* Node %s: ", crm_element_value(node_state, XML_ATTR_UNAME)); print_as("\n"); lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); for (rsc_entry = __xml_first_child(lrm_rsc); rsc_entry != NULL; rsc_entry = __xml_next(rsc_entry)) { if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) { if (operations) { print_rsc_history(data_set, node, rsc_entry); } else { const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc) { print_rsc_summary(data_set, node, rsc, FALSE); } else { print_as(" %s: orphan\n", rsc_id); } } } } } } } static void print_ticket(gpointer name, gpointer value, gpointer data) { ticket_t *ticket = (ticket_t *) value; print_as(" %s\t%s%10s", ticket->id, ticket->granted ? "granted" : "revoked", ticket->standby ? " [standby]" : ""); if (ticket->last_granted > -1) { print_as(" last-granted="); print_date(ticket->last_granted); } print_as("\n"); return; } static void print_cluster_tickets(pe_working_set_t * data_set) { print_as("\nTickets:\n"); g_hash_table_foreach(data_set->tickets, print_ticket, NULL); return; } static void print_neg_locations(pe_working_set_t *data_set) { GListPtr gIter, gIter2; print_as("\nFencing constraints:\n"); for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { rsc_to_node_t *location = (rsc_to_node_t *) gIter->data; if (!g_str_has_prefix(location->id, print_neg_location_prefix)) continue; for (gIter2 = location->node_list_rh; gIter2 != NULL; gIter2 = gIter2->next) { node_t *node = (node_t *) gIter2->data; if (node->weight >= 0) /* != -INFINITY ??? */ continue; print_as(" %s\tprevents %s from running %son %s\n", location->id, location->rsc_lh->id, location->role_filter == RSC_ROLE_MASTER ? "as Master " : "", node->details->uname); } } } static int print_status(pe_working_set_t * data_set) { static int updates = 0; GListPtr gIter = NULL; node_t *dc = NULL; char *since_epoch = NULL; char *online_nodes = NULL; char *offline_nodes = NULL; const char *stack_s = NULL; xmlNode *dc_version = NULL; xmlNode *quorum_node = NULL; xmlNode *stack = NULL; time_t a_time = time(NULL); int print_opts = pe_print_ncurses; const char *quorum_votes = "unknown"; if (as_console) { blank_screen(); } else { print_opts = pe_print_printf; } updates++; dc = data_set->dc_node; if (a_time == (time_t) - 1) { crm_perror(LOG_ERR, "set_node_tstamp(): Invalid time returned"); return 1; } since_epoch = ctime(&a_time); if (since_epoch != NULL && print_last_updated) { print_as("Last updated: %s", since_epoch); } if (print_last_change) { const char *last_written = crm_element_value(data_set->input, XML_CIB_ATTR_WRITTEN); const char *user = crm_element_value(data_set->input, XML_ATTR_UPDATE_USER); const char *client = crm_element_value(data_set->input, XML_ATTR_UPDATE_CLIENT); const char *origin = crm_element_value(data_set->input, XML_ATTR_UPDATE_ORIG); print_as("Last change: %s", last_written ? last_written : ""); if (user) { print_as(" by %s", user); } if (client) { print_as(" via %s", client); } if (origin) { print_as(" on %s", origin); } print_as("\n"); } stack = get_xpath_object("//nvpair[@name='cluster-infrastructure']", data_set->input, LOG_DEBUG); if (stack) { stack_s = crm_element_value(stack, XML_NVPAIR_ATTR_VALUE); print_as("Stack: %s\n", stack_s); } dc_version = get_xpath_object("//nvpair[@name='dc-version']", data_set->input, LOG_DEBUG); if (dc == NULL) { print_as("Current DC: NONE\n"); } else { const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); if (safe_str_neq(dc->details->uname, dc->details->id)) { print_as("Current DC: %s (%s)", dc->details->uname, dc->details->id); } else { print_as("Current DC: %s", dc->details->uname); } print_as(" - partition %s quorum\n", crm_is_true(quorum) ? "with" : "WITHOUT"); if (dc_version) { print_as("Version: %s\n", crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE)); } } quorum_node = get_xpath_object("//nvpair[@name='" XML_ATTR_EXPECTED_VOTES "']", data_set->input, LOG_DEBUG); if (quorum_node) { quorum_votes = crm_element_value(quorum_node, XML_NVPAIR_ATTR_VALUE); } if(stack_s && strstr(stack_s, "classic openais") != NULL) { print_as("%d Nodes configured, %s expected votes\n", g_list_length(data_set->nodes), quorum_votes); } else { print_as("%d Nodes configured\n", g_list_length(data_set->nodes)); } print_as("%d Resources configured\n", count_resources(data_set, NULL)); print_as("\n\n"); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; const char *node_mode = NULL; char *node_name = NULL; if(node->details->remote_rsc) { node_name = g_strdup_printf("%s:%s", node->details->uname, node->details->remote_rsc->container->id); } else { node_name = g_strdup_printf("%s", node->details->uname); } if (node->details->unclean) { if (node->details->online && node->details->unclean) { node_mode = "UNCLEAN (online)"; } else if (node->details->pending) { node_mode = "UNCLEAN (pending)"; } else { node_mode = "UNCLEAN (offline)"; } } else if (node->details->pending) { node_mode = "pending"; } else if (node->details->standby_onfail && node->details->online) { node_mode = "standby (on-fail)"; } else if (node->details->standby) { if (node->details->online) { node_mode = "standby"; } else { node_mode = "OFFLINE (standby)"; } } else if (node->details->maintenance) { if (node->details->online) { node_mode = "maintenance"; } else { node_mode = "OFFLINE (maintenance)"; } } else if (node->details->online) { node_mode = "online"; if (group_by_node == FALSE) { online_nodes = add_list_element(online_nodes, node_name); continue; } } else { node_mode = "OFFLINE"; if (group_by_node == FALSE) { offline_nodes = add_list_element(offline_nodes, node_name); continue; } } if(node->details->remote_rsc) { online_nodes = add_list_element(online_nodes, node->details->remote_rsc->id); } if (safe_str_eq(node->details->uname, node->details->id)) { print_as("Node %s: %s\n", node_name, node_mode); } else { print_as("Node %s (%s): %s\n", node_name, node->details->id, node_mode); } if (group_by_node) { GListPtr gIter2 = NULL; for (gIter2 = node->details->running_rsc; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; rsc->fns->print(rsc, "\t", print_opts | pe_print_rsconly, stdout); } } free(node_name); } if (online_nodes) { print_as("Online: [%s ]\n", online_nodes); free(online_nodes); } if (offline_nodes) { print_as("OFFLINE: [%s ]\n", offline_nodes); free(offline_nodes); } if (group_by_node == FALSE && inactive_resources) { print_as("\nFull list of resources:\n"); } else if (inactive_resources) { print_as("\nInactive resources:\n"); } if (group_by_node == FALSE || inactive_resources) { print_as("\n"); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; gboolean is_active = rsc->fns->active(rsc, TRUE); gboolean partially_active = rsc->fns->active(rsc, FALSE); if (is_set(rsc->flags, pe_rsc_orphan) && is_active == FALSE) { continue; } else if (group_by_node == FALSE) { if (partially_active || inactive_resources) { rsc->fns->print(rsc, NULL, print_opts, stdout); } } else if (is_active == FALSE && inactive_resources) { rsc->fns->print(rsc, NULL, print_opts, stdout); } } } if (print_nodes_attr) { print_as("\nNode Attributes:\n"); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node == NULL || node->details->online == FALSE) { continue; } print_as("* Node %s:\n", node->details->uname); g_hash_table_foreach(node->details->attrs, create_attr_list, NULL); g_list_foreach(attr_list, print_node_attribute, node); g_list_free(attr_list); attr_list = NULL; } } if (print_operations || print_failcount) { print_node_summary(data_set, print_operations); } if (xml_has_children(data_set->failed)) { xmlNode *xml_op = NULL; print_as("\nFailed actions:\n"); for (xml_op = __xml_first_child(data_set->failed); xml_op != NULL; xml_op = __xml_next(xml_op)) { int status = 0; int rc = 0; const char *id = ID(xml_op); const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); const char *last = crm_element_value(xml_op, XML_RSC_OP_LAST_CHANGE); const char *node = crm_element_value(xml_op, XML_ATTR_UNAME); const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); const char *rc_s = crm_element_value(xml_op, XML_LRM_ATTR_RC); const char *status_s = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); rc = crm_parse_int(rc_s, "0"); status = crm_parse_int(status_s, "0"); if (last) { time_t run_at = crm_parse_int(last, "0"); char *run_at_s = ctime(&run_at); if(run_at_s) { run_at_s[24] = 0; /* Overwrite the newline */ } print_as(" %s on %s '%s' (%d): call=%s, status=%s, last-rc-change='%s', queued=%sms, exec=%sms\n", op_key ? op_key : id, node, services_ocf_exitcode_str(rc), rc, call, services_lrm_status_str(status), run_at_s, crm_element_value(xml_op, XML_RSC_OP_T_EXEC), crm_element_value(xml_op, XML_RSC_OP_T_QUEUE)); } else { print_as(" %s on %s '%s' (%d): call=%s, status=%s\n", op_key ? op_key : id, node, services_ocf_exitcode_str(rc), rc, call, services_lrm_status_str(status)); } } print_as("\n"); } if (print_tickets || print_neg_location_prefix) { /* For recording the tickets that are referenced in rsc_ticket constraints * but have never been granted yet. * To be able to print negative location constraint summary, * we also need them to be unpacked. */ xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); unpack_constraints(cib_constraints, data_set); } if (print_tickets) { print_cluster_tickets(data_set); } if (print_neg_location_prefix) { print_neg_locations(data_set); } #if CURSES_ENABLED if (as_console) { refresh(); } #endif return 0; } static int print_xml_status(pe_working_set_t * data_set) { FILE *stream = stdout; GListPtr gIter = NULL; node_t *dc = NULL; xmlNode *stack = NULL; xmlNode *quorum_node = NULL; const char *quorum_votes = "unknown"; dc = data_set->dc_node; fprintf(stream, "\n"); fprintf(stream, "\n", VERSION); /*** SUMMARY ***/ fprintf(stream, " \n"); if (print_last_updated) { time_t now = time(NULL); char *now_str = ctime(&now); now_str[24] = EOS; /* replace the newline */ fprintf(stream, " \n", now_str); } if (print_last_change) { const char *last_written = crm_element_value(data_set->input, XML_CIB_ATTR_WRITTEN); const char *user = crm_element_value(data_set->input, XML_ATTR_UPDATE_USER); const char *client = crm_element_value(data_set->input, XML_ATTR_UPDATE_CLIENT); const char *origin = crm_element_value(data_set->input, XML_ATTR_UPDATE_ORIG); fprintf(stream, " \n", last_written ? last_written : "", user ? user : "", client ? client : "", origin ? origin : ""); } stack = get_xpath_object("//nvpair[@name='cluster-infrastructure']", data_set->input, LOG_DEBUG); if (stack) { fprintf(stream, " \n", crm_element_value(stack, XML_NVPAIR_ATTR_VALUE)); } if (!dc) { fprintf(stream, " \n"); } else { const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); const char *uname = dc->details->uname; const char *id = dc->details->id; xmlNode *dc_version = get_xpath_object("//nvpair[@name='dc-version']", data_set->input, LOG_DEBUG); fprintf(stream, " \n", dc_version ? crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE) : "", uname, id, quorum ? (crm_is_true(quorum) ? "true" : "false") : "false"); } quorum_node = get_xpath_object("//nvpair[@name='" XML_ATTR_EXPECTED_VOTES "']", data_set->input, LOG_DEBUG); if (quorum_node) { quorum_votes = crm_element_value(quorum_node, XML_NVPAIR_ATTR_VALUE); } fprintf(stream, " \n", g_list_length(data_set->nodes), quorum_votes); fprintf(stream, " \n", count_resources(data_set, NULL)); fprintf(stream, " \n"); /*** NODES ***/ fprintf(stream, " \n"); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; const char *node_type = "unknown"; switch (node->details->type) { case node_member: node_type = "member"; break; case node_remote: node_type = "remote"; break; case node_ping: node_type = "ping"; break; } fprintf(stream, " details->uname); fprintf(stream, "id=\"%s\" ", node->details->id); fprintf(stream, "online=\"%s\" ", node->details->online ? "true" : "false"); fprintf(stream, "standby=\"%s\" ", node->details->standby ? "true" : "false"); fprintf(stream, "standby_onfail=\"%s\" ", node->details->standby_onfail ? "true" : "false"); fprintf(stream, "maintenance=\"%s\" ", node->details->maintenance ? "true" : "false"); fprintf(stream, "pending=\"%s\" ", node->details->pending ? "true" : "false"); fprintf(stream, "unclean=\"%s\" ", node->details->unclean ? "true" : "false"); fprintf(stream, "shutdown=\"%s\" ", node->details->shutdown ? "true" : "false"); fprintf(stream, "expected_up=\"%s\" ", node->details->expected_up ? "true" : "false"); fprintf(stream, "is_dc=\"%s\" ", node->details->is_dc ? "true" : "false"); fprintf(stream, "resources_running=\"%d\" ", g_list_length(node->details->running_rsc)); fprintf(stream, "type=\"%s\" ", node_type); if (group_by_node) { GListPtr lpc2 = NULL; fprintf(stream, ">\n"); for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) { resource_t *rsc = (resource_t *) lpc2->data; rsc->fns->print(rsc, " ", pe_print_xml | pe_print_rsconly, stream); } fprintf(stream, " \n"); } else { fprintf(stream, "/>\n"); } } fprintf(stream, " \n"); /*** RESOURCES ***/ if (group_by_node == FALSE || inactive_resources) { fprintf(stream, " \n"); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; gboolean is_active = rsc->fns->active(rsc, TRUE); gboolean partially_active = rsc->fns->active(rsc, FALSE); if (is_set(rsc->flags, pe_rsc_orphan) && is_active == FALSE) { continue; } else if (group_by_node == FALSE) { if (partially_active || inactive_resources) { rsc->fns->print(rsc, " ", pe_print_xml, stream); } } else if (is_active == FALSE && inactive_resources) { rsc->fns->print(rsc, " ", pe_print_xml, stream); } } fprintf(stream, " \n"); } fprintf(stream, "\n"); fflush(stream); fclose(stream); return 0; } static int print_html_status(pe_working_set_t * data_set, const char *filename, gboolean web_cgi) { FILE *stream; GListPtr gIter = NULL; node_t *dc = NULL; static int updates = 0; char *filename_tmp = NULL; if (web_cgi) { stream = stdout; fprintf(stream, "Content-type: text/html\n\n"); } else { filename_tmp = crm_concat(filename, "tmp", '.'); stream = fopen(filename_tmp, "w"); if (stream == NULL) { crm_perror(LOG_ERR, "Cannot open %s for writing", filename_tmp); free(filename_tmp); return -1; } } updates++; dc = data_set->dc_node; fprintf(stream, ""); fprintf(stream, ""); fprintf(stream, "Cluster status"); /* content="%d;url=http://webdesign.about.com" */ fprintf(stream, "", reconnect_msec / 1000); fprintf(stream, ""); /*** SUMMARY ***/ fprintf(stream, "

Cluster summary

"); { char *now_str = NULL; time_t now = time(NULL); now_str = ctime(&now); now_str[24] = EOS; /* replace the newline */ fprintf(stream, "Last updated: %s
\n", now_str); } if (dc == NULL) { fprintf(stream, "Current DC: NONE
"); } else { fprintf(stream, "Current DC: %s (%s)
", dc->details->uname, dc->details->id); } fprintf(stream, "%d Nodes configured.
", g_list_length(data_set->nodes)); fprintf(stream, "%d Resources configured.
", count_resources(data_set, NULL)); /*** CONFIG ***/ fprintf(stream, "

Config Options

\n"); fprintf(stream, "\n"); fprintf(stream, "\n", is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); fprintf(stream, "\n", is_set(data_set->flags, pe_flag_symmetric_cluster) ? "" : "a-"); fprintf(stream, "\n
STONITH of failed nodes:%s
Cluster is:%ssymmetric
No Quorum Policy:"); switch (data_set->no_quorum_policy) { case no_quorum_freeze: fprintf(stream, "Freeze resources"); break; case no_quorum_stop: fprintf(stream, "Stop ALL resources"); break; case no_quorum_ignore: fprintf(stream, "Ignore"); break; case no_quorum_suicide: fprintf(stream, "Suicide"); break; } fprintf(stream, "\n
\n"); /*** NODE LIST ***/ fprintf(stream, "

Node List

\n"); fprintf(stream, "
    \n"); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; fprintf(stream, "
  • "); if (node->details->standby_onfail && node->details->online) { fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, "standby (on-fail)\n"); } else if (node->details->standby && node->details->online) { fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, "standby\n"); } else if (node->details->standby) { fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, "OFFLINE (standby)\n"); } else if (node->details->maintenance && node->details->online) { fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, "maintenance\n"); } else if (node->details->maintenance) { fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, "OFFLINE (maintenance)\n"); } else if (node->details->online) { fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, "online\n"); } else { fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, "OFFLINE\n"); } if (group_by_node) { GListPtr lpc2 = NULL; fprintf(stream, "
      \n"); for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) { resource_t *rsc = (resource_t *) lpc2->data; fprintf(stream, "
    • "); rsc->fns->print(rsc, NULL, pe_print_html | pe_print_rsconly, stream); fprintf(stream, "
    • \n"); } fprintf(stream, "
    \n"); } fprintf(stream, "
  • \n"); } fprintf(stream, "
\n"); if (group_by_node && inactive_resources) { fprintf(stream, "

Inactive Resources

\n"); } else if (group_by_node == FALSE) { fprintf(stream, "

Resource List

\n"); } if (group_by_node == FALSE || inactive_resources) { for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; gboolean is_active = rsc->fns->active(rsc, TRUE); gboolean partially_active = rsc->fns->active(rsc, FALSE); if (is_set(rsc->flags, pe_rsc_orphan) && is_active == FALSE) { continue; } else if (group_by_node == FALSE) { if (partially_active || inactive_resources) { rsc->fns->print(rsc, NULL, pe_print_html, stream); } } else if (is_active == FALSE && inactive_resources) { rsc->fns->print(rsc, NULL, pe_print_html, stream); } } } fprintf(stream, ""); fflush(stream); fclose(stream); if (!web_cgi) { if (rename(filename_tmp, filename) != 0) { crm_perror(LOG_ERR, "Unable to rename %s->%s", filename_tmp, filename); } free(filename_tmp); } return 0; } #if ENABLE_SNMP # include # include # include # include # include # include # define add_snmp_field(list, oid_string, value) do { \ oid name[MAX_OID_LEN]; \ size_t name_length = MAX_OID_LEN; \ if (snmp_parse_oid(oid_string, name, &name_length)) { \ int s_rc = snmp_add_var(list, name, name_length, 's', (value)); \ if(s_rc != 0) { \ crm_err("Could not add %s=%s rc=%d", oid_string, value, s_rc); \ } else { \ crm_trace("Added %s=%s", oid_string, value); \ } \ } else { \ crm_err("Could not parse OID: %s", oid_string); \ } \ } while(0) \ # define add_snmp_field_int(list, oid_string, value) do { \ oid name[MAX_OID_LEN]; \ size_t name_length = MAX_OID_LEN; \ if (snmp_parse_oid(oid_string, name, &name_length)) { \ if(NULL == snmp_pdu_add_variable( \ list, name, name_length, ASN_INTEGER, \ (u_char *) & value, sizeof(value))) { \ crm_err("Could not add %s=%d", oid_string, value); \ } else { \ crm_trace("Added %s=%d", oid_string, value); \ } \ } else { \ crm_err("Could not parse OID: %s", oid_string); \ } \ } while(0) \ static int snmp_input(int operation, netsnmp_session * session, int reqid, netsnmp_pdu * pdu, void *magic) { return 1; } static netsnmp_session * crm_snmp_init(const char *target, char *community) { static netsnmp_session *session = NULL; # ifdef NETSNMPV53 char target53[128]; snprintf(target53, sizeof(target53), "%s:162", target); # endif if (session) { return session; } if (target == NULL) { return NULL; } if (get_crm_log_level() > LOG_INFO) { char *debug_tokens = strdup("run:shell,snmptrap,tdomain"); debug_register_tokens(debug_tokens); snmp_set_do_debugging(1); } session = calloc(1, sizeof(netsnmp_session)); snmp_sess_init(session); session->version = SNMP_VERSION_2c; session->callback = snmp_input; session->callback_magic = NULL; if (community) { session->community_len = strlen(community); session->community = (unsigned char *)community; } session = snmp_add(session, # ifdef NETSNMPV53 netsnmp_tdomain_transport(target53, 0, "udp"), # else netsnmp_transport_open_client("snmptrap", target), # endif NULL, NULL); if (session == NULL) { snmp_sess_perror("Could not create snmp transport", session); } return session; } #endif static int send_snmp_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { int ret = 1; #if ENABLE_SNMP static oid snmptrap_oid[] = { 1, 3, 6, 1, 6, 3, 1, 1, 4, 1, 0 }; static oid sysuptime_oid[] = { 1, 3, 6, 1, 2, 1, 1, 3, 0 }; netsnmp_pdu *trap_pdu; netsnmp_session *session = crm_snmp_init(snmp_target, snmp_community); trap_pdu = snmp_pdu_create(SNMP_MSG_TRAP2); if (!trap_pdu) { crm_err("Failed to create SNMP notification"); return SNMPERR_GENERR; } if (1) { /* send uptime */ char csysuptime[20]; time_t now = time(NULL); sprintf(csysuptime, "%ld", now); snmp_add_var(trap_pdu, sysuptime_oid, sizeof(sysuptime_oid) / sizeof(oid), 't', csysuptime); } /* Indicate what the trap is by setting snmpTrapOid.0 */ ret = snmp_add_var(trap_pdu, snmptrap_oid, sizeof(snmptrap_oid) / sizeof(oid), 'o', snmp_crm_trap_oid); if (ret != 0) { crm_err("Failed set snmpTrapOid.0=%s", snmp_crm_trap_oid); return ret; } /* Add extries to the trap */ if (rsc) { add_snmp_field(trap_pdu, snmp_crm_oid_rsc, rsc); } add_snmp_field(trap_pdu, snmp_crm_oid_node, node); add_snmp_field(trap_pdu, snmp_crm_oid_task, task); add_snmp_field(trap_pdu, snmp_crm_oid_desc, desc); add_snmp_field_int(trap_pdu, snmp_crm_oid_rc, rc); add_snmp_field_int(trap_pdu, snmp_crm_oid_trc, target_rc); add_snmp_field_int(trap_pdu, snmp_crm_oid_status, status); /* Send and cleanup */ ret = snmp_send(session, trap_pdu); if (ret == 0) { /* error */ snmp_sess_perror("Could not send SNMP trap", session); snmp_free_pdu(trap_pdu); ret = SNMPERR_GENERR; } else { ret = SNMPERR_SUCCESS; } #else crm_err("Sending SNMP traps is not supported by this installation"); #endif return ret; } #if ENABLE_ESMTP # include # include static void print_recipient_status(smtp_recipient_t recipient, const char *mailbox, void *arg) { const smtp_status_t *status; status = smtp_recipient_status(recipient); printf("%s: %d %s", mailbox, status->code, status->text); } static void event_cb(smtp_session_t session, int event_no, void *arg, ...) { int *ok; va_list alist; va_start(alist, arg); switch (event_no) { case SMTP_EV_CONNECT: case SMTP_EV_MAILSTATUS: case SMTP_EV_RCPTSTATUS: case SMTP_EV_MESSAGEDATA: case SMTP_EV_MESSAGESENT: case SMTP_EV_DISCONNECT: break; case SMTP_EV_WEAK_CIPHER:{ int bits = va_arg(alist, long); ok = va_arg(alist, int *); crm_debug("SMTP_EV_WEAK_CIPHER, bits=%d - accepted.", bits); *ok = 1; break; } case SMTP_EV_STARTTLS_OK: crm_debug("SMTP_EV_STARTTLS_OK - TLS started here."); break; case SMTP_EV_INVALID_PEER_CERTIFICATE:{ long vfy_result = va_arg(alist, long); ok = va_arg(alist, int *); /* There is a table in handle_invalid_peer_certificate() of mail-file.c */ crm_err("SMTP_EV_INVALID_PEER_CERTIFICATE: %ld", vfy_result); *ok = 1; break; } case SMTP_EV_NO_PEER_CERTIFICATE: ok = va_arg(alist, int *); crm_debug("SMTP_EV_NO_PEER_CERTIFICATE - accepted."); *ok = 1; break; case SMTP_EV_WRONG_PEER_CERTIFICATE: ok = va_arg(alist, int *); crm_debug("SMTP_EV_WRONG_PEER_CERTIFICATE - accepted."); *ok = 1; break; case SMTP_EV_NO_CLIENT_CERTIFICATE: ok = va_arg(alist, int *); crm_debug("SMTP_EV_NO_CLIENT_CERTIFICATE - accepted."); *ok = 1; break; default: crm_debug("Got event: %d - ignored.\n", event_no); } va_end(alist); } #endif #define BODY_MAX 2048 #if ENABLE_ESMTP static void crm_smtp_debug(const char *buf, int buflen, int writing, void *arg) { char type = 0; int lpc = 0, last = 0, level = *(int *)arg; if (writing == SMTP_CB_HEADERS) { type = 'H'; } else if (writing) { type = 'C'; } else { type = 'S'; } for (; lpc < buflen; lpc++) { switch (buf[lpc]) { case 0: case '\n': if (last > 0) { do_crm_log(level, " %.*s", lpc - last, buf + last); } else { do_crm_log(level, "%c: %.*s", type, lpc - last, buf + last); } last = lpc + 1; break; } } } #endif static int send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { pid_t pid; /*setenv needs chars, these are ints */ char *rc_s = crm_itoa(rc); char *status_s = crm_itoa(status); char *target_rc_s = crm_itoa(target_rc); crm_debug("Sending external notification to '%s' via '%s'", external_recipient, external_agent); setenv("CRM_notify_recipient", external_recipient, 1); setenv("CRM_notify_node", node, 1); setenv("CRM_notify_rsc", rsc, 1); setenv("CRM_notify_task", task, 1); setenv("CRM_notify_desc", desc, 1); setenv("CRM_notify_rc", rc_s, 1); setenv("CRM_notify_target_rc", target_rc_s, 1); setenv("CRM_notify_status", status_s, 1); pid = fork(); if (pid == -1) { crm_perror(LOG_ERR, "notification fork() failed."); } if (pid == 0) { /* crm_debug("notification: I am the child. Executing the nofitication program."); */ execl(external_agent, external_agent, NULL); } crm_trace("Finished running custom notification program '%s'.", external_agent); free(target_rc_s); free(status_s); free(rc_s); return 0; } static int send_smtp_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { #if ENABLE_ESMTP smtp_session_t session; smtp_message_t message; auth_context_t authctx; struct sigaction sa; int len = 20; int noauth = 1; int smtp_debug = LOG_DEBUG; char crm_mail_body[BODY_MAX]; char *crm_mail_subject = NULL; memset(&sa, 0, sizeof(struct sigaction)); if (node == NULL) { node = "-"; } if (rsc == NULL) { rsc = "-"; } if (desc == NULL) { desc = "-"; } if (crm_mail_to == NULL) { return 1; } if (crm_mail_host == NULL) { crm_mail_host = "localhost:25"; } if (crm_mail_prefix == NULL) { crm_mail_prefix = "Cluster notification"; } crm_debug("Sending '%s' mail to %s via %s", crm_mail_prefix, crm_mail_to, crm_mail_host); len += strlen(crm_mail_prefix); len += strlen(task); len += strlen(rsc); len += strlen(node); len += strlen(desc); len++; crm_mail_subject = calloc(1, len); snprintf(crm_mail_subject, len, "%s - %s event for %s on %s: %s\r\n", crm_mail_prefix, task, rsc, node, desc); len = 0; len += snprintf(crm_mail_body + len, BODY_MAX - len, "\r\n%s\r\n", crm_mail_prefix); len += snprintf(crm_mail_body + len, BODY_MAX - len, "====\r\n\r\n"); if (rc == target_rc) { len += snprintf(crm_mail_body + len, BODY_MAX - len, "Completed operation %s for resource %s on %s\r\n", task, rsc, node); } else { len += snprintf(crm_mail_body + len, BODY_MAX - len, "Operation %s for resource %s on %s failed: %s\r\n", task, rsc, node, desc); } len += snprintf(crm_mail_body + len, BODY_MAX - len, "\r\nDetails:\r\n"); len += snprintf(crm_mail_body + len, BODY_MAX - len, "\toperation status: (%d) %s\r\n", status, services_lrm_status_str(status)); if (status == PCMK_LRM_OP_DONE) { len += snprintf(crm_mail_body + len, BODY_MAX - len, "\tscript returned: (%d) %s\r\n", rc, services_ocf_exitcode_str(rc)); len += snprintf(crm_mail_body + len, BODY_MAX - len, "\texpected return value: (%d) %s\r\n", target_rc, services_ocf_exitcode_str(target_rc)); } auth_client_init(); session = smtp_create_session(); message = smtp_add_message(session); smtp_starttls_enable(session, Starttls_ENABLED); sa.sa_handler = SIG_IGN; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; sigaction(SIGPIPE, &sa, NULL); smtp_set_server(session, crm_mail_host); authctx = auth_create_context(); auth_set_mechanism_flags(authctx, AUTH_PLUGIN_PLAIN, 0); smtp_set_eventcb(session, event_cb, NULL); /* Now tell libESMTP it can use the SMTP AUTH extension. */ if (!noauth) { crm_debug("Adding authentication context"); smtp_auth_set_context(session, authctx); } if (crm_mail_from == NULL) { struct utsname us; char auto_from[BODY_MAX]; CRM_ASSERT(uname(&us) == 0); snprintf(auto_from, BODY_MAX, "crm_mon@%s", us.nodename); smtp_set_reverse_path(message, auto_from); } else { /* NULL is ok */ smtp_set_reverse_path(message, crm_mail_from); } smtp_set_header(message, "To", NULL /*phrase */ , NULL /*addr */ ); /* "Phrase" */ smtp_add_recipient(message, crm_mail_to); /* Set the Subject: header and override any subject line in the message headers. */ smtp_set_header(message, "Subject", crm_mail_subject); smtp_set_header_option(message, "Subject", Hdr_OVERRIDE, 1); smtp_set_message_str(message, crm_mail_body); smtp_set_monitorcb(session, crm_smtp_debug, &smtp_debug, 1); if (smtp_start_session(session)) { char buf[128]; int rc = smtp_errno(); crm_err("SMTP server problem: %s (%d)", smtp_strerror(rc, buf, sizeof buf), rc); } else { char buf[128]; int rc = smtp_errno(); const smtp_status_t *smtp_status = smtp_message_transfer_status(message); if (rc != 0) { crm_err("SMTP server problem: %s (%d)", smtp_strerror(rc, buf, sizeof buf), rc); } crm_info("Send status: %d %s", smtp_status->code, crm_str(smtp_status->text)); smtp_enumerate_recipients(message, print_recipient_status, NULL); } smtp_destroy_session(session); auth_destroy_context(authctx); auth_client_exit(); #endif return 0; } static void handle_rsc_op(xmlNode * rsc_op) { int rc = -1; int status = -1; int action = -1; int interval = 0; int target_rc = -1; int transition_num = -1; gboolean notify = TRUE; char *rsc = NULL; char *task = NULL; const char *desc = NULL; const char *node = NULL; const char *magic = NULL; const char *id = crm_element_value(rsc_op, XML_LRM_ATTR_TASK_KEY); char *update_te_uuid = NULL; xmlNode *n = rsc_op; if (id == NULL) { /* Compatability with <= 1.1.5 */ id = ID(rsc_op); } magic = crm_element_value(rsc_op, XML_ATTR_TRANSITION_MAGIC); if (magic == NULL) { /* non-change */ return; } if (FALSE == decode_transition_magic(magic, &update_te_uuid, &transition_num, &action, &status, &rc, &target_rc)) { crm_err("Invalid event %s detected for %s", magic, id); return; } if (parse_op_key(id, &rsc, &task, &interval) == FALSE) { crm_err("Invalid event detected for %s", id); goto bail; } while (n != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(n))) { n = n->parent; } node = crm_element_value(n, XML_ATTR_UNAME); if (node == NULL) { node = ID(n); } if (node == NULL) { crm_err("No node detected for event %s (%s)", magic, id); goto bail; } /* look up where we expected it to be? */ desc = pcmk_strerror(pcmk_ok); if (status == PCMK_LRM_OP_DONE && target_rc == rc) { crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc); if (rc == PCMK_OCF_NOT_RUNNING) { notify = FALSE; } } else if (status == PCMK_LRM_OP_DONE) { desc = services_ocf_exitcode_str(rc); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } else { desc = services_lrm_status_str(status); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } if (notify && snmp_target) { send_snmp_trap(node, rsc, task, target_rc, rc, status, desc); } if (notify && crm_mail_to) { send_smtp_trap(node, rsc, task, target_rc, rc, status, desc); } if (notify && external_agent) { send_custom_trap(node, rsc, task, target_rc, rc, status, desc); } bail: free(update_te_uuid); free(rsc); free(task); } void crm_diff_update(const char *event, xmlNode * msg) { int rc = -1; long now = time(NULL); static bool stale = FALSE; print_dot(); if (current_cib != NULL) { xmlNode *cib_last = current_cib; current_cib = NULL; rc = cib_apply_patch_event(msg, cib_last, ¤t_cib, LOG_DEBUG); free_xml(cib_last); switch (rc) { case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); case pcmk_ok: break; default: crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); } } if (current_cib == NULL) { current_cib = get_cib_copy(cib); } if (crm_mail_to || snmp_target || external_agent) { /* Process operation updates */ xmlXPathObject *xpathObj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); int lpc = 0, max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); handle_rsc_op(rsc_op); } freeXpathObject(xpathObj); } if (current_cib == NULL) { if(!stale) { print_as("--- Stale data ---"); } stale = TRUE; return; } stale = FALSE; if ((now - last_refresh) > (reconnect_msec / 1000)) { /* Force a refresh */ mon_refresh_display(NULL); } else { mainloop_set_trigger(refresh_trigger); } } gboolean mon_refresh_display(gpointer user_data) { xmlNode *cib_copy = copy_xml(current_cib); pe_working_set_t data_set; last_refresh = time(NULL); if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) { if (cib) { cib->cmds->signoff(cib); } print_as("Upgrade failed: %s", pcmk_strerror(-pcmk_err_dtd_validation)); if (as_console) { sleep(2); } clean_up(EX_USAGE); return FALSE; } set_working_set_defaults(&data_set); data_set.input = cib_copy; cluster_status(&data_set); if (as_html_file || web_cgi) { if (print_html_status(&data_set, as_html_file, web_cgi) != 0) { fprintf(stderr, "Critical: Unable to output html file\n"); clean_up(EX_USAGE); } } else if (as_xml) { if (print_xml_status(&data_set) != 0) { fprintf(stderr, "Critical: Unable to output xml file\n"); clean_up(EX_USAGE); } } else if (daemonize) { /* do nothing */ } else if (simple_status) { print_simple_status(&data_set); if (has_warnings) { clean_up(EX_USAGE); } } else { print_status(&data_set); } cleanup_calculations(&data_set); return TRUE; } void mon_st_callback(stonith_t * st, stonith_event_t * e) { char *desc = g_strdup_printf("Operation %s requested by %s for peer %s: %s (ref=%s)", e->operation, e->origin, e->target, pcmk_strerror(e->result), e->id); if (snmp_target) { send_snmp_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc); } if (crm_mail_to) { send_smtp_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc); } if (external_agent) { send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc); } g_free(desc); } /* * De-init ncurses, signoff from the CIB and deallocate memory. */ void clean_up(int rc) { #if ENABLE_SNMP netsnmp_session *session = crm_snmp_init(NULL, NULL); if (session) { snmp_close(session); snmp_shutdown("snmpapp"); } #endif #if CURSES_ENABLED if (as_console) { as_console = FALSE; echo(); nocbreak(); endwin(); } #endif if (cib != NULL) { cib->cmds->signoff(cib); cib_delete(cib); cib = NULL; } free(as_html_file); free(xml_file); free(pid_file); if (rc >= 0) { crm_exit(rc); } return; }