Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/.gitignore b/.gitignore
index 764ad18545..26d871f41d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,208 +1,208 @@
# Common
\#*
.\#*
GPATH
GRTAGS
GTAGS
TAGS
Makefile
Makefile.in
.deps
.libs
*.pc
*.pyc
*.bz2
*.tar.gz
*.rpm
*.la
*.lo
*.o
*~
*.gcda
*.gcno
# Autobuild
aclocal.m4
autoconf
autoheader
autom4te.cache/
automake
build.counter
compile
config.guess
config.log
config.status
config.sub
configure
depcomp
install-sh
include/stamp-*
libtool
libtool.m4
ltdl.m4
libltdl
ltmain.sh
missing
py-compile
/m4/argz.m4
/m4/ltargz.m4
/m4/ltoptions.m4
/m4/ltsugar.m4
/m4/ltversion.m4
/m4/lt~obsolete.m4
test-driver
ylwrap
# Configure targets
Doxyfile
/cts/CTS.py
/cts/CTSlab.py
/cts/CTSvars.py
/cts/LSBDummy
/cts/OCFIPraTest.py
/cts/benchmark/clubench
/cts/cluster_test
/cts/cts
/cts/cts-cli
/cts/cts-coverage
/cts/cts-lrmd
/cts/cts-pengine
/cts/cts-regression
/cts/cts-stonithd
/cts/fence_dummy
/cts/lxc_autogen.sh
/cts/pacemaker-cts-dummyd
/cts/pacemaker-cts-dummyd@.service
/daemons/pacemakerd/pacemaker
/daemons/pacemakerd/pacemaker.combined.upstart
/daemons/pacemakerd/pacemaker.service
/daemons/pacemakerd/pacemaker.upstart
extra/logrotate/pacemaker
/fencing/fence_legacy
include/config.h
include/config.h.in
include/crm_config.h
lrmd/pacemaker_remote
lrmd/pacemaker_remoted
lrmd/pacemaker_remote.service
publican.cfg
/tools/cibsecret
/tools/crm_error
/tools/crm_failcount
/tools/crm_master
/tools/crm_mon.service
/tools/crm_mon.upstart
/tools/crm_report
/tools/crm_standby
/tools/report.collector
/tools/report.common
# Build targets
*.7
*.7.xml
*.7.html
*.8
*.8.xml
*.8.html
doc/*/en-US/images/*.png
doc/*/tmp/**
doc/*/publish
cib/cib
cib/cibmon
cib/cibpipe
crmd/atest
crmd/crmd
-/daemons/attrd/attrd
+/daemons/attrd/pacemaker-attrd
/daemons/pacemakerd/pacemakerd
doc/api/*
doc/Clusters_from_Scratch.txt
doc/Pacemaker_Explained.txt
doc/acls.html
doc/crm_fencing.html
doc/publican-catalog*
fencing/stonith-test
fencing/stonith_admin
fencing/stonithd
fencing/stonithd.xml
lrmd/lrmd
lrmd/lrmd_internal_ctl
lrmd/lrmd_test
pengine/pengine
pengine/pengine.xml
pengine/ptest
scratch
tools/attrd_updater
tools/cibadmin
tools/crm_attribute
tools/crm_diff
tools/crm_mon
tools/crm_node
tools/crm_resource
tools/crm_shadow
tools/crm_simulate
tools/crm_verify
tools/crmadmin
tools/iso8601
tools/crm_ticket
tools/report.collector.1
xml/crm.dtd
xml/pacemaker*.rng
xml/versions.rng
doc/shared/en-US/*.xml
doc/Clusters_from_Scratch.build
doc/Clusters_from_Scratch/en-US/Ap-*.xml
doc/Clusters_from_Scratch/en-US/Ch-*.xml
doc/Pacemaker_Administration.build
doc/Pacemaker_Administration/en-US/Ch-*.xml
doc/Pacemaker_Development.build
doc/Pacemaker_Development/en-US/Ch-*.xml
doc/Pacemaker_Explained.build
doc/Pacemaker_Explained/en-US/Ch-*.xml
doc/Pacemaker_Explained/en-US/Ap-*.xml
doc/Pacemaker_Remote.build
doc/Pacemaker_Remote/en-US/Ch-*.xml
lib/gnu/libgnu.a
lib/gnu/stdalign.h
*.coverity
# Test detritus
/cts/.regression.failed.diff
/cts/pengine/*.ref
/cts/pengine/*.up
/cts/pengine/*.up.err
/cts/pengine/bug-rh-1097457.log
/cts/pengine/bug-rh-1097457.trs
/cts/pengine/shadow.*
/cts/test-suite.log
/xml/test-2/*.up
/xml/test-2/*.up.err
# Formerly built files (helps when jumping back and forth in checkout)
/attrd
/coverage.sh
/cts/HBDummy
/fencing/regression.py
/lrmd/regression.py
/mcp
/pengine/.regression.failed.diff
/pengine/regression.core.sh
/pengine/test10/shadow.*
#Other
mock
HTML
pacemaker*.spec
coverity-*
compat_reports
.ABI-build
abi_dumps
logs
*.patch
*.diff
*.sed
*.orig
*.rej
*.swp
diff --git a/configure.ac b/configure.ac
index 7ea30e7056..2292e98b9f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,1818 +1,1818 @@
dnl
dnl autoconf for Pacemaker
dnl
dnl Copyright 2009-2018 Andrew Beekhof <andrew@beekhof.net>
dnl
dnl This source code is licensed under the GNU General Public License version 2
dnl or later (GPLv2+) WITHOUT ANY WARRANTY.
dnl ===============================================
dnl Bootstrap
dnl ===============================================
AC_PREREQ(2.64)
AC_CONFIG_MACRO_DIR([m4])
AC_DEFUN([AC_DATAROOTDIR_CHECKED])
dnl Suggested structure:
dnl information on the package
dnl checks for programs
dnl checks for libraries
dnl checks for header files
dnl checks for types
dnl checks for structures
dnl checks for compiler characteristics
dnl checks for library functions
dnl checks for system services
m4_include([version.m4])
AC_INIT([pacemaker], VERSION_NUMBER, [users@clusterlabs.org], [pacemaker],
PCMK_URL)
PCMK_FEATURES=""
AC_CONFIG_AUX_DIR(.)
AC_CANONICAL_HOST
dnl Where #defines go (e.g. `AC_CHECK_HEADERS' below)
dnl
dnl Internal header: include/config.h
dnl - Contains ALL defines
dnl - include/config.h.in is generated automatically by autoheader
dnl - NOT to be included in any header files except crm_internal.h
dnl (which is also not to be included in any other header files)
dnl
dnl External header: include/crm_config.h
dnl - Contains a subset of defines checked here
dnl - Manually edit include/crm_config.h.in to have configure include
dnl new defines
dnl - Should not include HAVE_* defines
dnl - Safe to include anywhere
AM_CONFIG_HEADER(include/config.h include/crm_config.h)
AC_ARG_WITH(version,
[ --with-version=version Override package version (if you are a packager needing to pretend) ],
[ PACKAGE_VERSION="$withval" ])
AC_ARG_WITH(pkg-name,
[ --with-pkg-name=name Override package name (if you are a packager needing to pretend) ],
[ PACKAGE_NAME="$withval" ])
dnl 1.11: minimum automake version required
dnl foreign: don't require GNU-standard top-level files
dnl silent-rules: allow "--enable-silent-rules" (no-op in 1.13+)
AM_INIT_AUTOMAKE([1.11 foreign silent-rules])
dnl Example 2.4. Silent Custom Rule to Generate a File
dnl %-bar.pc: %.pc
dnl $(AM_V_GEN)$(LN_S) $(notdir $^) $@
AC_DEFINE_UNQUOTED(PACEMAKER_VERSION, "$PACKAGE_VERSION",
[Current pacemaker version])
dnl Versioned attributes implementation is not yet production-ready
AC_DEFINE_UNQUOTED(ENABLE_VERSIONED_ATTRS, 0, [Enable versioned attributes])
PACKAGE_SERIES=`echo $PACKAGE_VERSION | awk -F. '{ print $1"."$2 }'`
AC_SUBST(PACKAGE_SERIES)
AC_SUBST(PACKAGE_VERSION)
CC_IN_CONFIGURE=yes
export CC_IN_CONFIGURE
LDD=ldd
dnl ========================================================================
dnl Compiler characteristics
dnl ========================================================================
AC_PROG_CC dnl Can force other with environment variable "CC".
AM_PROG_CC_C_O
AC_PROG_CC_STDC
gl_EARLY
gl_INIT
LT_INIT([dlopen])
LTDL_INIT([convenience])
AC_PROG_YACC
AM_PROG_LEX
AC_C_STRINGIZE
AC_TYPE_SIZE_T
AC_CHECK_SIZEOF(char)
AC_CHECK_SIZEOF(short)
AC_CHECK_SIZEOF(int)
AC_CHECK_SIZEOF(long)
AC_CHECK_SIZEOF(long long)
AC_STRUCT_TIMEZONE
dnl ===============================================
dnl Helpers
dnl ===============================================
cc_supports_flag() {
local CFLAGS="-Werror $@"
AC_MSG_CHECKING(whether $CC supports "$@")
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ ]], [[ ]])],
[RC=0; AC_MSG_RESULT(yes)],
[RC=1; AC_MSG_RESULT(no)])
return $RC
}
# Some tests need to use their own CFLAGS
cc_temp_flags() {
ac_save_CFLAGS="$CFLAGS"
CFLAGS="$*"
}
cc_restore_flags() {
CFLAGS=$ac_save_CFLAGS
}
dnl ===============================================
dnl Configure Options
dnl ===============================================
dnl Some systems, like Solaris require a custom package name
AC_ARG_WITH(pkgname,
[ --with-pkgname=name name for pkg (typically for Solaris) ],
[ PKGNAME="$withval" ],
[ PKGNAME="LXHAhb" ],
)
AC_SUBST(PKGNAME)
AC_ARG_ENABLE([ansi],
[ --enable-ansi Force GCC to compile to ANSI standard for older compilers. @<:@no@:>@])
AC_ARG_ENABLE([fatal-warnings],
[ --enable-fatal-warnings Enable pedantic and fatal warnings for gcc @<:@yes@:>@])
AC_ARG_ENABLE([quiet],
[ --enable-quiet Suppress make output unless there is an error @<:@no@:>@])
AC_ARG_ENABLE([no-stack],
[ --enable-no-stack Build only the Policy Engine and its requirements @<:@no@:>@])
AC_ARG_ENABLE([upstart],
[ --enable-upstart Enable support for managing resources via Upstart @<:@try@:>@ ],
[],
[enable_upstart=try],
)
AC_ARG_ENABLE([systemd],
[ --enable-systemd Enable support for managing resources via systemd @<:@try@:>@],
[],
[enable_systemd=try],
)
AC_ARG_ENABLE(hardening,
[ --with-hardening Harden the resulting executables/libraries @<:@try@:>@],
[ HARDENING="${enableval}" ],
[ HARDENING=try ],
)
AC_ARG_WITH(corosync,
[ --with-corosync Support the Corosync messaging and membership layer ],
[ SUPPORT_CS=$withval ],
[ SUPPORT_CS=try ],
)
AC_ARG_WITH(nagios,
[ --with-nagios Support nagios remote monitoring ],
[ SUPPORT_NAGIOS=$withval ],
[ SUPPORT_NAGIOS=try ],
)
AC_ARG_WITH(nagios-plugin-dir,
[ --with-nagios-plugin-dir=DIR Directory for nagios plugins @<:@LIBEXECDIR/nagios/plugins@:>@],
[ NAGIOS_PLUGIN_DIR="$withval" ]
)
AC_ARG_WITH(nagios-metadata-dir,
[ --with-nagios-metadata-dir=DIR Directory for nagios plugins metadata @<:@DATADIR/nagios/plugins-metadata@:>@],
[ NAGIOS_METADATA_DIR="$withval" ]
)
AC_ARG_WITH(acl,
[ --with-acl Support CIB ACL ],
[ SUPPORT_ACL=$withval ],
[ SUPPORT_ACL=yes ],
)
AC_ARG_WITH(cibsecrets,
[ --with-cibsecrets Support separate file for CIB secrets ],
[ SUPPORT_CIBSECRETS=$withval ],
[ SUPPORT_CIBSECRETS=no ],
)
AC_ARG_WITH(gnutls-priorities,
[ --with-gnutls-priorities GnuTLS cipher priorities @<:@NORMAL@:>@ ],
[ PCMK_GNUTLS_PRIORITIES="$withval" ],
[ PCMK_GNUTLS_PRIORITIES="NORMAL" ],
)
INITDIR=""
AC_ARG_WITH(initdir,
[ --with-initdir=DIR Directory for init (rc) scripts],
[ INITDIR="$withval" ])
SUPPORT_PROFILING=0
AC_ARG_WITH(profiling,
[ --with-profiling Disable optimizations for effective profiling ],
[ SUPPORT_PROFILING=$withval ])
AC_ARG_WITH(coverage,
[ --with-coverage Disable optimizations for effective profiling ],
[ SUPPORT_COVERAGE=$withval ])
PUBLICAN_BRAND="common"
AC_ARG_WITH(brand,
[ --with-brand=brand Brand to use for generated documentation (set empty for no docs) @<:@common@:>@],
[ test x"$withval" = x"no" || PUBLICAN_BRAND="$withval" ])
AC_SUBST(PUBLICAN_BRAND)
CONFIGDIR=""
AC_ARG_WITH(configdir,
[ --with-configdir=DIR Directory for Pacemaker configuration file @<:@SYSCONFDIR/sysconfig@:>@],
[ CONFIGDIR="$withval" ]
)
CRM_LOG_DIR=""
AC_ARG_WITH(logdir,
[ --with-logdir=DIR Directory for Pacemaker log file @<:@LOCALSTATEDIR/log/pacemaker@:>@ ],
[ CRM_LOG_DIR="$withval" ]
)
CRM_BUNDLE_DIR=""
AC_ARG_WITH(bundledir,
[ --with-bundledir=DIR Directory for Pacemaker bundle logs @<:@LOCALSTATEDIR/log/pacemaker/bundles@:>@ ],
[ CRM_BUNDLE_DIR="$withval" ]
)
dnl ===============================================
dnl General Processing
dnl ===============================================
if cc_supports_flag -Werror; then
WERROR="-Werror"
else
WERROR=""
fi
# Normalize enable_fatal_warnings (defaulting to yes, when compiler supports it)
if test "x${enable_fatal_warnings}" != "xno" ; then
if test "$GCC" = "yes" && test "x${WERROR}" != "x" ; then
enable_fatal_warnings=yes
else
AC_MSG_NOTICE(Compiler does not support fatal warnings)
enable_fatal_warnings=no
fi
fi
INIT_EXT=""
echo Our Host OS: $host_os/$host
AC_MSG_NOTICE(Sanitizing prefix: ${prefix})
case $prefix in
NONE)
prefix=/usr
dnl Fix default variables - "prefix" variable if not specified
if test "$localstatedir" = "\${prefix}/var"; then
localstatedir="/var"
fi
if test "$sysconfdir" = "\${prefix}/etc"; then
sysconfdir="/etc"
fi
;;
esac
AC_MSG_NOTICE(Sanitizing exec_prefix: ${exec_prefix})
case $exec_prefix in
prefix|NONE)
exec_prefix=$prefix
;;
esac
AC_MSG_NOTICE(Sanitizing INITDIR: ${INITDIR})
case $INITDIR in
prefix) INITDIR=$prefix;;
"")
AC_MSG_CHECKING(which init (rc) directory to use)
for initdir in /etc/init.d /etc/rc.d/init.d /sbin/init.d \
/usr/local/etc/rc.d /etc/rc.d
do
if
test -d $initdir
then
INITDIR=$initdir
break
fi
done
AC_MSG_RESULT($INITDIR)
;;
esac
AC_SUBST(INITDIR)
AC_MSG_NOTICE(Sanitizing libdir: ${libdir})
case $libdir in
prefix|NONE)
AC_MSG_CHECKING(which lib directory to use)
for aDir in lib64 lib
do
trydir="${exec_prefix}/${aDir}"
if
test -d ${trydir}
then
libdir=${trydir}
break
fi
done
AC_MSG_RESULT($libdir);
;;
esac
dnl Expand autoconf variables so that we don't end up with '${prefix}'
dnl in #defines and python scripts
dnl NOTE: Autoconf deliberately leaves them unexpanded to allow
dnl make exec_prefix=/foo install
dnl No longer being able to do this seems like no great loss to me...
eval prefix="`eval echo ${prefix}`"
eval exec_prefix="`eval echo ${exec_prefix}`"
eval bindir="`eval echo ${bindir}`"
eval sbindir="`eval echo ${sbindir}`"
eval libexecdir="`eval echo ${libexecdir}`"
eval datadir="`eval echo ${datadir}`"
eval sysconfdir="`eval echo ${sysconfdir}`"
eval sharedstatedir="`eval echo ${sharedstatedir}`"
eval localstatedir="`eval echo ${localstatedir}`"
eval libdir="`eval echo ${libdir}`"
eval includedir="`eval echo ${includedir}`"
eval oldincludedir="`eval echo ${oldincludedir}`"
eval infodir="`eval echo ${infodir}`"
eval mandir="`eval echo ${mandir}`"
dnl Home-grown variables
eval INITDIR="${INITDIR}"
eval docdir="`eval echo ${docdir}`"
if test x"${docdir}" = x""; then
docdir=${datadir}/doc/${PACKAGE}-${VERSION}
fi
AC_SUBST(docdir)
if test x"${CONFIGDIR}" = x""; then
CONFIGDIR="${sysconfdir}/sysconfig"
fi
AC_SUBST(CONFIGDIR)
if test x"${CRM_LOG_DIR}" = x""; then
CRM_LOG_DIR="${localstatedir}/log/pacemaker"
fi
AC_DEFINE_UNQUOTED(CRM_LOG_DIR,"$CRM_LOG_DIR", Location for Pacemaker log file)
AC_SUBST(CRM_LOG_DIR)
if test x"${CRM_BUNDLE_DIR}" = x""; then
CRM_BUNDLE_DIR="${localstatedir}/log/pacemaker/bundles"
fi
AC_DEFINE_UNQUOTED(CRM_BUNDLE_DIR,"$CRM_BUNDLE_DIR", Location for Pacemaker bundle logs)
AC_SUBST(CRM_BUNDLE_DIR)
AC_DEFINE_UNQUOTED([PCMK_GNUTLS_PRIORITIES], ["$PCMK_GNUTLS_PRIORITIES"],
[GnuTLS cipher priorities])
for j in prefix exec_prefix bindir sbindir libexecdir datadir sysconfdir \
sharedstatedir localstatedir libdir includedir oldincludedir infodir \
mandir INITDIR docdir CONFIGDIR
do
dirname=`eval echo '${'${j}'}'`
if
test ! -d "$dirname"
then
AC_MSG_WARN([$j directory ($dirname) does not exist!])
fi
done
dnl This OS-based decision-making is poor autotools practice;
dnl feature-based mechanisms are strongly preferred.
dnl
dnl So keep this section to a bare minimum; regard as a "necessary evil".
case "$host_os" in
*bsd*)
AC_DEFINE_UNQUOTED(ON_BSD, 1, Compiling for BSD platform)
LIBS="-L/usr/local/lib"
CPPFLAGS="$CPPFLAGS -I/usr/local/include"
INIT_EXT=".sh"
;;
*solaris*)
AC_DEFINE_UNQUOTED(ON_SOLARIS, 1, Compiling for Solaris platform)
;;
*linux*)
AC_DEFINE_UNQUOTED(ON_LINUX, 1, Compiling for Linux platform)
;;
darwin*)
AC_DEFINE_UNQUOTED(ON_DARWIN, 1, Compiling for Darwin platform)
LIBS="$LIBS -L${prefix}/lib"
CFLAGS="$CFLAGS -I${prefix}/include"
;;
esac
AC_SUBST(INIT_EXT)
AC_MSG_NOTICE(Host CPU: $host_cpu)
case "$host_cpu" in
ppc64|powerpc64)
case $CFLAGS in
*powerpc64*)
;;
*)
if test "$GCC" = yes; then
CFLAGS="$CFLAGS -m64"
fi
;;
esac
;;
esac
AC_MSG_CHECKING(which format is needed to print uint64_t)
cc_temp_flags "-Wall $WERROR"
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM(
[
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
],
[
int max = 512;
uint64_t bignum = 42;
char *buffer = malloc(max);
const char *random = "random";
snprintf(buffer, max-1, "<quorum id=%lu quorate=%s/>", bignum, random);
fprintf(stderr, "Result: %s\n", buffer);
]
)],
[U64T="%lu"],
[U64T="%llu"]
)
cc_restore_flags
AC_MSG_RESULT($U64T)
AC_DEFINE_UNQUOTED(U64T, "$U64T", Correct printf format for logging uint64_t)
dnl ===============================================
dnl Program Paths
dnl ===============================================
PATH="$PATH:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin"
export PATH
dnl Replacing AC_PROG_LIBTOOL with AC_CHECK_PROG because LIBTOOL
dnl was NOT being expanded all the time thus causing things to fail.
AC_CHECK_PROGS(LIBTOOL, glibtool libtool libtool15 libtool13)
dnl Pacemaker's executable python scripts will invoke the python specified by
dnl configure's PYTHON variable. If not specified, AM_PATH_PYTHON will check a
dnl built-in list with (unversioned) "python" having precedence. To configure
dnl Pacemaker to use a specific python interpreter version, define PYTHON
dnl when calling configure, for example: ./configure PYTHON=/usr/bin/python3.6
dnl PYTHON must be a full path
case "x$PYTHON" in
/*)
;;
*)
AC_PATH_PROG([PYTHON], [$PYTHON])
;;
esac
case "x$PYTHON" in
x*python3*)
dnl When used with Python 3, Pacemaker requires a minimum of 3.2
AM_PATH_PYTHON([3.2])
;;
*)
dnl Otherwise, Pacemaker requires a minimum of 2.7
AM_PATH_PYTHON([2.7])
;;
esac
AC_CHECK_PROGS(MAKE, gmake make)
AC_PATH_PROGS(HTML2TXT, lynx w3m)
AC_PATH_PROGS(HELP2MAN, help2man)
AC_PATH_PROGS(POD2MAN, pod2man, pod2man)
AC_PATH_PROGS(ASCIIDOC, asciidoc)
AC_PATH_PROGS(PUBLICAN, publican)
AC_PATH_PROGS(INKSCAPE, inkscape)
AC_PATH_PROGS(XSLTPROC, xsltproc)
AC_PATH_PROGS(XMLCATALOG, xmlcatalog)
AC_PATH_PROGS(FOP, fop)
AC_PATH_PROGS(SSH, ssh, /usr/bin/ssh)
AC_PATH_PROGS(SCP, scp, /usr/bin/scp)
AC_PATH_PROGS(TAR, tar)
AC_PATH_PROGS(MD5, md5)
dnl BASH is already an environment variable, so use something else
AC_PATH_PROG([BASH_PATH], [bash])
AC_PATH_PROGS(TEST, test)
PKG_PROG_PKG_CONFIG
AC_PATH_PROGS(VALGRIND_BIN, valgrind, /usr/bin/valgrind)
AC_DEFINE_UNQUOTED(VALGRIND_BIN, "$VALGRIND_BIN", Valgrind command)
if test x"${LIBTOOL}" = x""; then
AC_MSG_ERROR(You need (g)libtool installed in order to build ${PACKAGE})
fi
if test x"${MAKE}" = x""; then
AC_MSG_ERROR(You need (g)make installed in order to build ${PACKAGE})
fi
dnl Bash is needed for building man pages and running regression tests
if test x"${BASH_PATH}" = x""; then
AC_MSG_ERROR(bash must be installed in order to build ${PACKAGE})
fi
AM_CONDITIONAL(BUILD_HELP, test x"${HELP2MAN}" != x"")
if test x"${HELP2MAN}" != x""; then
PCMK_FEATURES="$PCMK_FEATURES generated-manpages"
fi
MANPAGE_XSLT=""
if test x"${XSLTPROC}" != x""; then
AC_MSG_CHECKING(docbook to manpage transform)
# first try to figure out correct template using xmlcatalog query,
# resort to extensive (semi-deterministic) file search if that fails
DOCBOOK_XSL_URI='http://docbook.sourceforge.net/release/xsl/current'
DOCBOOK_XSL_PATH='manpages/docbook.xsl'
MANPAGE_XSLT=$(${XMLCATALOG} "" ${DOCBOOK_XSL_URI}/${DOCBOOK_XSL_PATH} \
| sed -n 's|^file://||p;q')
if test x"${MANPAGE_XSLT}" = x""; then
DIRS=$(find "${datadir}" -name $(basename $(dirname ${DOCBOOK_XSL_PATH})) \
-type d | LC_ALL=C sort)
XSLT=$(basename ${DOCBOOK_XSL_PATH})
for d in ${DIRS}; do
if test -f "${d}/${XSLT}"; then
MANPAGE_XSLT="${d}/${XSLT}"
break
fi
done
fi
fi
AC_MSG_RESULT($MANPAGE_XSLT)
AC_SUBST(MANPAGE_XSLT)
AM_CONDITIONAL(BUILD_XML_HELP, test x"${MANPAGE_XSLT}" != x"")
if test x"${MANPAGE_XSLT}" != x""; then
PCMK_FEATURES="$PCMK_FEATURES agent-manpages"
fi
AM_CONDITIONAL(BUILD_ASCIIDOC, test x"${ASCIIDOC}" != x"")
if test x"${ASCIIDOC}" != x""; then
PCMK_FEATURES="$PCMK_FEATURES ascii-docs"
fi
publican_intree_brand=no
if test x"${PUBLICAN_BRAND}" != x"" \
&& test x"${PUBLICAN}" != x"" \
&& test x"${INKSCAPE}" != x""; then
dnl special handling for clusterlabs brand (possibly in-tree version used)
test "${PUBLICAN_BRAND}" != "clusterlabs" \
|| test -d /usr/share/publican/Common_Content/clusterlabs
if test $? -ne 0; then
dnl Unknown option: brand_dir vs. Option brand_dir requires an argument
if ${PUBLICAN} build --brand_dir 2>&1 | grep -Eq 'brand_dir$'; then
AC_MSG_WARN([Cannot use in-tree clusterlabs brand, resorting to common])
PUBLICAN_BRAND=common
else
publican_intree_brand=yes
fi
fi
AC_MSG_NOTICE([Enabling Publican-generated documentation using ${PUBLICAN_BRAND} brand])
PCMK_FEATURES="$PCMK_FEATURES publican-docs"
fi
AM_CONDITIONAL([BUILD_DOCBOOK],
[test x"${PUBLICAN_BRAND}" != x"" \
&& test x"${PUBLICAN}" != x"" \
&& test x"${INKSCAPE}" != x""])
AM_CONDITIONAL([PUBLICAN_INTREE_BRAND],
[test x"${publican_intree_brand}" = x"yes"])
dnl Pacemaker's shell scripts (and thus man page builders) rely on GNU getopt
AC_MSG_CHECKING([for GNU-compatible getopt])
IFS_orig=$IFS
IFS=:
for PATH_DIR in $PATH; do
IFS=$IFS_orig
GETOPT_PATH="${PATH_DIR}/getopt"
if test -f "$GETOPT_PATH" && test -x "$GETOPT_PATH" ; then
$GETOPT_PATH -T >/dev/null 2>/dev/null
if test $? -eq 4; then
break
fi
fi
GETOPT_PATH=""
done
IFS=$IFS_orig
if test -n "$GETOPT_PATH"; then
AC_MSG_RESULT([$GETOPT_PATH])
else
AC_MSG_RESULT([no])
AC_MSG_ERROR(Pacemaker build requires a GNU-compatible getopt)
fi
AC_SUBST([GETOPT_PATH])
dnl ========================================================================
dnl checks for library functions to replace them
dnl
dnl NoSuchFunctionName:
dnl is a dummy function which no system supplies. It is here to make
dnl the system compile semi-correctly on OpenBSD which doesn't know
dnl how to create an empty archive
dnl
dnl scandir: Only on BSD.
dnl System-V systems may have it, but hidden and/or deprecated.
dnl A replacement function is supplied for it.
dnl
dnl setenv: is some bsdish function that should also be avoided (use
dnl putenv instead)
dnl On the other hand, putenv doesn't provide the right API for the
dnl code and has memory leaks designed in (sigh...) Fortunately this
dnl A replacement function is supplied for it.
dnl
dnl strerror: returns a string that corresponds to an errno.
dnl A replacement function is supplied for it.
dnl
dnl strnlen: is a gnu function similar to strlen, but safer.
dnl We wrote a tolearably-fast replacement function for it.
dnl
dnl strndup: is a gnu function similar to strdup, but safer.
dnl We wrote a tolearably-fast replacement function for it.
AC_REPLACE_FUNCS(alphasort NoSuchFunctionName scandir setenv strerror strchrnul unsetenv strnlen strndup)
dnl ===============================================
dnl Libraries
dnl ===============================================
AC_CHECK_LIB(socket, socket) dnl -lsocket
AC_CHECK_LIB(c, dlopen) dnl if dlopen is in libc...
AC_CHECK_LIB(dl, dlopen) dnl -ldl (for Linux)
AC_CHECK_LIB(rt, sched_getscheduler) dnl -lrt (for Tru64)
AC_CHECK_LIB(gnugetopt, getopt_long) dnl -lgnugetopt ( if available )
AC_CHECK_LIB(pam, pam_start) dnl -lpam (if available)
AC_CHECK_FUNCS([sched_setscheduler])
AC_CHECK_LIB(uuid, uuid_parse) dnl load the library if necessary
AC_CHECK_FUNCS(uuid_unparse) dnl OSX ships uuid_* as standard functions
AC_CHECK_HEADERS(uuid/uuid.h)
if test "x$ac_cv_func_uuid_unparse" != xyes; then
AC_MSG_ERROR(You do not have the libuuid development package installed)
fi
if test x"${PKG_CONFIG}" = x""; then
AC_MSG_ERROR(You need pkgconfig installed in order to build ${PACKAGE})
fi
if
$PKG_CONFIG --exists glib-2.0
then
GLIBCONFIG="$PKG_CONFIG glib-2.0"
else
set -x
echo PKG_CONFIG_PATH=$PKG_CONFIG_PATH
$PKG_CONFIG --exists glib-2.0; echo $?
$PKG_CONFIG --cflags glib-2.0; echo $?
$PKG_CONFIG glib-2.0; echo $?
set +x
AC_MSG_ERROR(You need glib2-devel installed in order to build ${PACKAGE})
fi
AC_MSG_RESULT(using $GLIBCONFIG)
#
# Where is dlopen?
#
if test "$ac_cv_lib_c_dlopen" = yes; then
LIBADD_DL=""
elif test "$ac_cv_lib_dl_dlopen" = yes; then
LIBADD_DL=-ldl
else
LIBADD_DL=${lt_cv_dlopen_libs}
fi
if test "X$GLIBCONFIG" != X; then
AC_MSG_CHECKING(for special glib includes: )
GLIBHEAD=`$GLIBCONFIG --cflags`
AC_MSG_RESULT($GLIBHEAD)
CPPFLAGS="$CPPFLAGS $GLIBHEAD"
AC_MSG_CHECKING(for glib library flags)
GLIBLIB=`$GLIBCONFIG --libs`
AC_MSG_RESULT($GLIBLIB)
LIBS="$LIBS $GLIBLIB"
fi
dnl FreeBSD needs -lcompat for ftime() used by lrmd.c
AC_CHECK_LIB([compat], [ftime], [COMPAT_LIBS='-lcompat'])
AC_SUBST(COMPAT_LIBS)
dnl ========================================================================
dnl Headers
dnl ========================================================================
dnl Some distributions insert #warnings into deprecated headers such as
dnl timeb.h. If we will enable fatal warnings for the build, then enable
dnl them for the header checks as well, otherwise the build could fail
dnl even though the header check succeeds. (We should probably be doing
dnl this in more places.)
if test "x${enable_fatal_warnings}" = xyes ; then
cc_temp_flags "$CFLAGS $WERROR"
fi
AC_CHECK_HEADERS(arpa/inet.h)
AC_CHECK_HEADERS(ctype.h)
AC_CHECK_HEADERS(dirent.h)
AC_CHECK_HEADERS(errno.h)
AC_CHECK_HEADERS(getopt.h)
AC_CHECK_HEADERS(glib.h)
AC_CHECK_HEADERS(grp.h)
AC_CHECK_HEADERS(limits.h)
AC_CHECK_HEADERS(linux/swab.h)
AC_CHECK_HEADERS(malloc.h)
AC_CHECK_HEADERS(netdb.h)
AC_CHECK_HEADERS(netinet/in.h)
AC_CHECK_HEADERS(netinet/ip.h)
AC_CHECK_HEADERS(pwd.h)
AC_CHECK_HEADERS(sgtty.h)
AC_CHECK_HEADERS(signal.h)
AC_CHECK_HEADERS(stdarg.h)
AC_CHECK_HEADERS(stddef.h)
AC_CHECK_HEADERS(stdio.h)
AC_CHECK_HEADERS(stdlib.h)
AC_CHECK_HEADERS(string.h)
AC_CHECK_HEADERS(strings.h)
AC_CHECK_HEADERS(sys/dir.h)
AC_CHECK_HEADERS(sys/ioctl.h)
AC_CHECK_HEADERS(sys/param.h)
AC_CHECK_HEADERS(sys/reboot.h)
AC_CHECK_HEADERS(sys/resource.h)
AC_CHECK_HEADERS(sys/socket.h)
AC_CHECK_HEADERS(sys/signalfd.h)
AC_CHECK_HEADERS(sys/sockio.h)
AC_CHECK_HEADERS(sys/stat.h)
AC_CHECK_HEADERS(sys/time.h)
AC_CHECK_HEADERS(sys/timeb.h)
AC_CHECK_HEADERS(sys/types.h)
AC_CHECK_HEADERS(sys/utsname.h)
AC_CHECK_HEADERS(sys/wait.h)
AC_CHECK_HEADERS(time.h)
AC_CHECK_HEADERS(unistd.h)
if test "x${enable_fatal_warnings}" = xyes ; then
cc_restore_flags
fi
dnl These headers need prerequisites before the tests will pass
dnl AC_CHECK_HEADERS(net/if.h)
PKG_CHECK_MODULES(LIBXML2, [libxml-2.0],
[CPPFLAGS="${CPPFLAGS} ${LIBXML2_CFLAGS}"
LIBS="${LIBS} ${LIBXML2_LIBS}"])
AC_CHECK_HEADERS(libxml/xpath.h)
if test "$ac_cv_header_libxml_xpath_h" != "yes"; then
AC_MSG_ERROR(libxml development headers not found)
fi
AC_CHECK_LIB(xslt, xsltApplyStylesheet, [],
AC_MSG_ERROR(Unsupported libxslt library version))
AC_CHECK_HEADERS(libxslt/xslt.h)
if test "$ac_cv_header_libxslt_xslt_h" != "yes"; then
AC_MSG_ERROR(libxslt development headers not found)
fi
AC_CACHE_CHECK(whether __progname and __progname_full are available,
pf_cv_var_progname,
AC_TRY_LINK([extern char *__progname, *__progname_full;],
[__progname = "foo"; __progname_full = "foo bar";],
pf_cv_var_progname="yes", pf_cv_var_progname="no"))
if test "$pf_cv_var_progname" = "yes"; then
AC_DEFINE(HAVE___PROGNAME,1,[ ])
fi
dnl ========================================================================
dnl Structures
dnl ========================================================================
AC_CHECK_MEMBERS([struct tm.tm_gmtoff],,,[[#include <time.h>]])
AC_CHECK_MEMBERS([lrm_op_t.rsc_deleted],,,[[#include <lrm/lrm_api.h>]])
AC_CHECK_MEMBER([struct dirent.d_type],
AC_DEFINE(HAVE_STRUCT_DIRENT_D_TYPE,1,[Define this if struct dirent has d_type]),,
[#include <dirent.h>])
dnl ========================================================================
dnl Functions
dnl ========================================================================
AC_CHECK_FUNCS(getopt, AC_DEFINE(HAVE_DECL_GETOPT, 1, [Have getopt function]))
AC_CHECK_FUNCS(nanosleep, AC_DEFINE(HAVE_DECL_NANOSLEEP, 1, [Have nanosleep function]))
dnl ========================================================================
dnl bzip2
dnl ========================================================================
AC_CHECK_HEADERS(bzlib.h)
AC_CHECK_LIB(bz2, BZ2_bzBuffToBuffCompress)
if test x$ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress != xyes ; then
AC_MSG_ERROR(BZ2 libraries not found)
fi
if test x$ac_cv_header_bzlib_h != xyes; then
AC_MSG_ERROR(BZ2 Development headers not found)
fi
dnl ========================================================================
dnl sighandler_t is missing from Illumos, Solaris11 systems
dnl ========================================================================
AC_MSG_CHECKING([for sighandler_t])
AC_TRY_COMPILE([#include <signal.h>],[sighandler_t *f;],
has_sighandler_t=yes,has_sighandler_t=no)
AC_MSG_RESULT($has_sighandler_t)
if test "$has_sighandler_t" = "yes" ; then
AC_DEFINE( HAVE_SIGHANDLER_T, 1, [Define if sighandler_t available] )
fi
dnl ========================================================================
dnl ncurses
dnl ========================================================================
dnl
dnl A few OSes (e.g. Linux) deliver a default "ncurses" alongside "curses".
dnl Many non-Linux deliver "curses"; sites may add "ncurses".
dnl
dnl However, the source-code recommendation for both is to #include "curses.h"
dnl (i.e. "ncurses" still wants the include to be simple, no-'n', "curses.h").
dnl
dnl ncurse takes precedence.
dnl
AC_CHECK_HEADERS(curses.h)
AC_CHECK_HEADERS(curses/curses.h)
AC_CHECK_HEADERS(ncurses.h)
AC_CHECK_HEADERS(ncurses/ncurses.h)
dnl Although n-library is preferred, only look for it if the n-header was found.
CURSESLIBS=''
if test "$ac_cv_header_ncurses_h" = "yes"; then
AC_CHECK_LIB(ncurses, printw,
[AC_DEFINE(HAVE_LIBNCURSES,1, have ncurses library)])
CURSESLIBS=`$PKG_CONFIG --libs ncurses` || CURSESLIBS='-lncurses'
fi
if test "$ac_cv_header_ncurses_ncurses_h" = "yes"; then
AC_CHECK_LIB(ncurses, printw,
[AC_DEFINE(HAVE_LIBNCURSES,1, have ncurses library)])
CURSESLIBS=`$PKG_CONFIG --libs ncurses` || CURSESLIBS='-lncurses'
fi
dnl Only look for non-n-library if there was no n-library.
if test X"$CURSESLIBS" = X"" -a "$ac_cv_header_curses_h" = "yes"; then
AC_CHECK_LIB(curses, printw,
[CURSESLIBS='-lcurses'; AC_DEFINE(HAVE_LIBCURSES,1, have curses library)])
fi
dnl Only look for non-n-library if there was no n-library.
if test X"$CURSESLIBS" = X"" -a "$ac_cv_header_curses_curses_h" = "yes"; then
AC_CHECK_LIB(curses, printw,
[CURSESLIBS='-lcurses'; AC_DEFINE(HAVE_LIBCURSES,1, have curses library)])
fi
if test "x$CURSESLIBS" != "x"; then
PCMK_FEATURES="$PCMK_FEATURES ncurses"
fi
dnl Check for printw() prototype compatibility
if test X"$CURSESLIBS" != X"" && cc_supports_flag -Wcast-qual; then
ac_save_LIBS=$LIBS
LIBS="$CURSESLIBS"
cc_temp_flags "-Wcast-qual $WERROR"
# avoid broken test because of hardened build environment in Fedora 23+
# - https://fedoraproject.org/wiki/Changes/Harden_All_Packages
# - https://bugzilla.redhat.com/1297985
if cc_supports_flag -fPIC; then
CFLAGS="$CFLAGS -fPIC"
fi
AC_MSG_CHECKING(whether printw() requires argument of "const char *")
AC_LINK_IFELSE(
[AC_LANG_PROGRAM([
#if defined(HAVE_NCURSES_H)
# include <ncurses.h>
#elif defined(HAVE_NCURSES_NCURSES_H)
# include <ncurses/ncurses.h>
#elif defined(HAVE_CURSES_H)
# include <curses.h>
#endif
],
[printw((const char *)"Test");]
)],
[ac_cv_compatible_printw=yes],
[ac_cv_compatible_printw=no]
)
LIBS=$ac_save_LIBS
cc_restore_flags
AC_MSG_RESULT([$ac_cv_compatible_printw])
if test "$ac_cv_compatible_printw" = no; then
AC_MSG_WARN([The printw() function of your ncurses or curses library is old, we will disable usage of the library. If you want to use this library anyway, please update to newer version of the library, ncurses 5.4 or later is recommended. You can get the library from http://www.gnu.org/software/ncurses/.])
AC_MSG_NOTICE([Disabling curses])
AC_DEFINE(HAVE_INCOMPATIBLE_PRINTW, 1, [Do we have incompatible printw() in curses library?])
fi
fi
AC_SUBST(CURSESLIBS)
dnl ========================================================================
dnl Profiling and GProf
dnl ========================================================================
AC_MSG_NOTICE(Old CFLAGS: $CFLAGS)
case $SUPPORT_COVERAGE in
1|yes|true)
SUPPORT_PROFILING=1
PCMK_FEATURES="$PCMK_FEATURES coverage"
CFLAGS="$CFLAGS -fprofile-arcs -ftest-coverage"
dnl During linking, make sure to specify -lgcov or -coverage
;;
esac
case $SUPPORT_PROFILING in
1|yes|true)
SUPPORT_PROFILING=1
dnl Disable various compiler optimizations
CFLAGS="$CFLAGS -fno-omit-frame-pointer -fno-inline -fno-builtin "
dnl CFLAGS="$CFLAGS -fno-inline-functions -fno-default-inline -fno-inline-functions-called-once -fno-optimize-sibling-calls"
dnl Turn off optimization so tools can get accurate line numbers
CFLAGS=`echo $CFLAGS | sed -e 's/-O.\ //g' -e 's/-Wp,-D_FORTIFY_SOURCE=.\ //g' -e 's/-D_FORTIFY_SOURCE=.\ //g'`
CFLAGS="$CFLAGS -O0 -g3 -gdwarf-2"
dnl Update features
PCMK_FEATURES="$PCMK_FEATURES profile"
;;
*)
SUPPORT_PROFILING=0
;;
esac
AC_MSG_NOTICE(New CFLAGS: $CFLAGS)
AC_DEFINE_UNQUOTED(SUPPORT_PROFILING, $SUPPORT_PROFILING, Support for profiling)
dnl ========================================================================
dnl Cluster infrastructure - LibQB
dnl ========================================================================
if test x${enable_no_stack} = xyes; then
SUPPORT_CS=no
fi
PKG_CHECK_MODULES(libqb, libqb >= 0.13)
CPPFLAGS="$libqb_CFLAGS $CPPFLAGS"
LIBS="$libqb_LIBS $LIBS"
dnl libqb 0.14.0+ (2012-06)
AC_CHECK_LIB(qb, qb_ipcs_connection_auth_set)
PCMK_FEATURES="$PCMK_FEATURES libqb-logging libqb-ipc"
dnl libqb 0.17.0+ (2014-02)
AC_CHECK_FUNCS(qb_ipcs_connection_get_buffer_size,
AC_DEFINE(HAVE_IPCS_GET_BUFFER_SIZE, 1,
[Have qb_ipcc_get_buffer_size function]))
dnl Support Linux-HA fence agents if available
if test "$cross_compiling" != "yes"; then
CPPFLAGS="$CPPFLAGS -I${prefix}/include/heartbeat"
fi
AC_CHECK_HEADERS(stonith/stonith.h)
if test "$ac_cv_header_stonith_stonith_h" = "yes"; then
dnl On Debian, AC_CHECK_LIBS fail if a library has any unresolved symbols
dnl So check for all the dependencies (so they're added to LIBS) before checking for -lplumb
AC_CHECK_LIB(pils, PILLoadPlugin)
AC_CHECK_LIB(plumb, G_main_add_IPC_Channel)
PCMK_FEATURES="$PCMK_FEATURES lha-fencing"
fi
dnl ===============================================
dnl Variables needed for substitution
dnl ===============================================
CRM_SCHEMA_DIRECTORY="${datadir}/pacemaker"
AC_DEFINE_UNQUOTED(CRM_SCHEMA_DIRECTORY,"$CRM_SCHEMA_DIRECTORY", Location for the Pacemaker Relax-NG Schema)
AC_SUBST(CRM_SCHEMA_DIRECTORY)
CRM_CORE_DIR="${localstatedir}/lib/pacemaker/cores"
AC_DEFINE_UNQUOTED(CRM_CORE_DIR,"$CRM_CORE_DIR", Location to store core files produced by Pacemaker daemons)
AC_SUBST(CRM_CORE_DIR)
CRM_DAEMON_USER="hacluster"
AC_DEFINE_UNQUOTED(CRM_DAEMON_USER,"$CRM_DAEMON_USER", User to run Pacemaker daemons as)
AC_SUBST(CRM_DAEMON_USER)
CRM_DAEMON_GROUP="haclient"
AC_DEFINE_UNQUOTED(CRM_DAEMON_GROUP,"$CRM_DAEMON_GROUP", Group to run Pacemaker daemons as)
AC_SUBST(CRM_DAEMON_GROUP)
CRM_STATE_DIR=${localstatedir}/run/crm
AC_DEFINE_UNQUOTED(CRM_STATE_DIR,"$CRM_STATE_DIR", Where to keep state files and sockets)
AC_SUBST(CRM_STATE_DIR)
CRM_PACEMAKER_DIR=${localstatedir}/lib/pacemaker
AC_DEFINE_UNQUOTED(CRM_PACEMAKER_DIR,"$CRM_PACEMAKER_DIR", Location to store directory produced by Pacemaker daemons)
AC_SUBST(CRM_PACEMAKER_DIR)
CRM_BLACKBOX_DIR=${localstatedir}/lib/pacemaker/blackbox
AC_DEFINE_UNQUOTED(CRM_BLACKBOX_DIR,"$CRM_BLACKBOX_DIR", Where to keep blackbox dumps)
AC_SUBST(CRM_BLACKBOX_DIR)
PE_STATE_DIR="${localstatedir}/lib/pacemaker/pengine"
AC_DEFINE_UNQUOTED(PE_STATE_DIR,"$PE_STATE_DIR", Where to keep PEngine outputs)
AC_SUBST(PE_STATE_DIR)
CRM_CONFIG_DIR="${localstatedir}/lib/pacemaker/cib"
AC_DEFINE_UNQUOTED(CRM_CONFIG_DIR,"$CRM_CONFIG_DIR", Where to keep configuration files)
AC_SUBST(CRM_CONFIG_DIR)
CRM_CONFIG_CTS="${localstatedir}/lib/pacemaker/cts"
AC_DEFINE_UNQUOTED(CRM_CONFIG_CTS,"$CRM_CONFIG_CTS", Where to keep cts stateful data)
AC_SUBST(CRM_CONFIG_CTS)
CRM_DAEMON_DIR="${libexecdir}/pacemaker"
AC_DEFINE_UNQUOTED(CRM_DAEMON_DIR,"$CRM_DAEMON_DIR", Location for Pacemaker daemons)
AC_SUBST(CRM_DAEMON_DIR)
HA_STATE_DIR="${localstatedir}/run"
AC_DEFINE_UNQUOTED(HA_STATE_DIR,"$HA_STATE_DIR", Where sbd keeps its PID file)
AC_SUBST(HA_STATE_DIR)
CRM_RSCTMP_DIR="${localstatedir}/run/resource-agents"
AC_DEFINE_UNQUOTED(CRM_RSCTMP_DIR,"$CRM_RSCTMP_DIR", Where resource agents should keep state files)
AC_SUBST(CRM_RSCTMP_DIR)
PACEMAKER_CONFIG_DIR="${sysconfdir}/pacemaker"
AC_DEFINE_UNQUOTED(PACEMAKER_CONFIG_DIR,"$PACEMAKER_CONFIG_DIR", Where to keep configuration files like authkey)
AC_SUBST(PACEMAKER_CONFIG_DIR)
OCF_ROOT_DIR="/usr/lib/ocf"
if test "X$OCF_ROOT_DIR" = X; then
AC_MSG_ERROR(Could not locate OCF directory)
fi
AC_SUBST(OCF_ROOT_DIR)
OCF_RA_DIR="$OCF_ROOT_DIR/resource.d"
AC_DEFINE_UNQUOTED(OCF_RA_DIR,"$OCF_RA_DIR", Location for OCF RAs)
AC_SUBST(OCF_RA_DIR)
RH_STONITH_DIR="$sbindir"
AC_DEFINE_UNQUOTED(RH_STONITH_DIR,"$RH_STONITH_DIR", Location for Red Hat Stonith agents)
AC_DEFINE_UNQUOTED(SBIN_DIR,"$sbindir", Location for system binaries)
RH_STONITH_PREFIX="fence_"
AC_DEFINE_UNQUOTED(RH_STONITH_PREFIX,"$RH_STONITH_PREFIX", Prefix for Red Hat Stonith agents)
AC_PATH_PROGS(GIT, git false)
AC_MSG_CHECKING(build version)
BUILD_VERSION=$Format:%h$
if test $BUILD_VERSION != ":%h$"; then
AC_MSG_RESULT(archive hash: $BUILD_VERSION)
elif test -x $GIT -a -d .git; then
BUILD_VERSION=`$GIT log --pretty="format:%h" -n 1`
AC_MSG_RESULT(git hash: $BUILD_VERSION)
else
# The current directory name make a reasonable default
# Most generated archives will include the hash or tag
BASE=`basename $PWD`
BUILD_VERSION=`echo $BASE | sed s:.*[[Pp]]acemaker-::`
AC_MSG_RESULT(directory based hash: $BUILD_VERSION)
fi
AC_DEFINE_UNQUOTED(BUILD_VERSION, "$BUILD_VERSION", Build version)
AC_SUBST(BUILD_VERSION)
HAVE_dbus=1
HAVE_upstart=0
HAVE_systemd=0
PKG_CHECK_MODULES(DBUS, dbus-1, ,HAVE_dbus=0)
AC_DEFINE_UNQUOTED(SUPPORT_DBUS, $HAVE_dbus, Support dbus)
AM_CONDITIONAL(BUILD_DBUS, test $HAVE_dbus = 1)
if test $HAVE_dbus = 1; then
CFLAGS="$CFLAGS `$PKG_CONFIG --cflags dbus-1`"
fi
DBUS_LIBS="$CFLAGS `$PKG_CONFIG --libs dbus-1`"
AC_SUBST(DBUS_LIBS)
AC_CHECK_TYPES([DBusBasicValue],,,[[#include <dbus/dbus.h>]])
if test "x${enable_systemd}" != xno; then
if test $HAVE_dbus = 0; then
if test "x${enable_systemd}" = xyes; then
AC_MSG_FAILURE([cannot enable systemd without DBus])
else
enable_systemd=no
fi
fi
if test "x${enable_systemd}" = xtry; then
AC_MSG_CHECKING([for systemd version query result via dbus-send])
ret=$({ dbus-send --system --print-reply \
--dest=org.freedesktop.systemd1 \
/org/freedesktop/systemd1 \
org.freedesktop.DBus.Properties.Get \
string:org.freedesktop.systemd1.Manager \
string:Version 2>/dev/null \
|| echo "this borked"; } | tail -n1)
# sanitize output a bit (interested just in value, not type),
# ret is intentionally unenquoted so as to normalize whitespace
ret=$(echo ${ret} | cut -d' ' -f2-)
AC_MSG_RESULT([${ret}])
if test "x${ret}" != xborked \
|| systemctl --version 2>/dev/null | grep -q systemd; then
enable_systemd=yes
else
enable_systemd=no
fi
fi
fi
AC_MSG_CHECKING([whether to enable support for managing resources via systemd])
AC_MSG_RESULT([${enable_systemd}])
if test "x${enable_systemd}" = xyes; then
HAVE_systemd=1
PCMK_FEATURES="$PCMK_FEATURES systemd"
AC_MSG_CHECKING([for systemd path for system unit files])
systemdunitdir="${systemdunitdir-}"
PKG_CHECK_VAR([systemdunitdir], [systemd],
[systemdsystemunitdir], [], [systemdunitdir=no])
AC_MSG_RESULT([${systemdunitdir}])
if test "x${systemdunitdir}" = xno; then
AC_MSG_FAILURE([cannot enable systemd when systemdunitdir unresolved])
fi
fi
AC_SUBST(systemdunitdir)
AC_DEFINE_UNQUOTED(SUPPORT_SYSTEMD, $HAVE_systemd, Support systemd based system services)
AM_CONDITIONAL(BUILD_SYSTEMD, test $HAVE_systemd = 1)
AC_SUBST(SUPPORT_SYSTEMD)
if test "x${enable_upstart}" != xno; then
if test $HAVE_dbus = 0; then
if test "x${enable_upstart}" = xyes; then
AC_MSG_FAILURE([cannot enable Upstart without DBus])
else
enable_upstart=no
fi
fi
if test "x${enable_upstart}" = xtry; then
AC_MSG_CHECKING([for Upstart version query result via dbus-send])
ret=$({ dbus-send --system --print-reply --dest=com.ubuntu.Upstart \
/com/ubuntu/Upstart org.freedesktop.DBus.Properties.Get \
string:com.ubuntu.Upstart0_6 string:version 2>/dev/null \
|| echo "this borked"; } | tail -n1)
# sanitize output a bit (interested just in value, not type),
# ret is intentionally unenquoted so as to normalize whitespace
ret=$(echo ${ret} | cut -d' ' -f2-)
AC_MSG_RESULT([${ret}])
if test "x${ret}" != xborked \
|| initctl --version 2>/dev/null | grep -q upstart; then
enable_upstart=yes
else
enable_upstart=no
fi
fi
fi
AC_MSG_CHECKING([whether to enable support for managing resources via Upstart])
AC_MSG_RESULT([${enable_upstart}])
if test "x${enable_upstart}" = xyes; then
HAVE_upstart=1
PCMK_FEATURES="$PCMK_FEATURES upstart"
fi
AC_DEFINE_UNQUOTED(SUPPORT_UPSTART, $HAVE_upstart, Support upstart based system services)
AM_CONDITIONAL(BUILD_UPSTART, test $HAVE_upstart = 1)
AC_SUBST(SUPPORT_UPSTART)
case $SUPPORT_NAGIOS in
1|yes|true|try)
SUPPORT_NAGIOS=1
;;
*)
SUPPORT_NAGIOS=0
;;
esac
if test $SUPPORT_NAGIOS = 1; then
PCMK_FEATURES="$PCMK_FEATURES nagios"
fi
AC_DEFINE_UNQUOTED(SUPPORT_NAGIOS, $SUPPORT_NAGIOS, Support nagios plugins)
AM_CONDITIONAL(BUILD_NAGIOS, test $SUPPORT_NAGIOS = 1)
if test x"$NAGIOS_PLUGIN_DIR" = x""; then
NAGIOS_PLUGIN_DIR="${libexecdir}/nagios/plugins"
fi
AC_DEFINE_UNQUOTED(NAGIOS_PLUGIN_DIR, "$NAGIOS_PLUGIN_DIR", Directory for nagios plugins)
AC_SUBST(NAGIOS_PLUGIN_DIR)
if test x"$NAGIOS_METADATA_DIR" = x""; then
NAGIOS_METADATA_DIR="${datadir}/nagios/plugins-metadata"
fi
AC_DEFINE_UNQUOTED(NAGIOS_METADATA_DIR, "$NAGIOS_METADATA_DIR", Directory for nagios plugins metadata)
AC_SUBST(NAGIOS_METADATA_DIR)
STACKS=""
CLUSTERLIBS=""
dnl ========================================================================
dnl Cluster stack - Corosync
dnl ========================================================================
dnl Normalize the values
case $SUPPORT_CS in
1|yes|true)
SUPPORT_CS=yes
missingisfatal=1
;;
try)
missingisfatal=0
;;
*)
SUPPORT_CS=no
;;
esac
AC_MSG_CHECKING(for native corosync)
COROSYNC_LIBS=""
if test $SUPPORT_CS = no; then
AC_MSG_RESULT(no (disabled))
SUPPORT_CS=0
else
AC_MSG_RESULT($SUPPORT_CS)
SUPPORT_CS=1
PKG_CHECK_MODULES(cpg, libcpg) dnl Fatal
PKG_CHECK_MODULES(cfg, libcfg) dnl Fatal
PKG_CHECK_MODULES(cmap, libcmap) dnl Fatal
PKG_CHECK_MODULES(quorum, libquorum) dnl Fatal
PKG_CHECK_MODULES(libcorosync_common, libcorosync_common) dnl Fatal
CFLAGS="$CFLAGS $libqb_FLAGS $cpg_FLAGS $cfg_FLAGS $cmap_CFLAGS $quorum_CFLAGS $libcorosync_common_CFLAGS"
COROSYNC_LIBS="$COROSYNC_LIBS $libqb_LIBS $cpg_LIBS $cfg_LIBS $cmap_LIBS $quorum_LIBS $libcorosync_common_LIBS"
CLUSTERLIBS="$CLUSTERLIBS $COROSYNC_LIBS"
STACKS="$STACKS corosync-native"
fi
AC_DEFINE_UNQUOTED(SUPPORT_COROSYNC, $SUPPORT_CS, Support the Corosync messaging and membership layer)
AM_CONDITIONAL(BUILD_CS_SUPPORT, test $SUPPORT_CS = 1)
AC_SUBST(SUPPORT_COROSYNC)
dnl
dnl Cluster stack - Sanity
dnl
if test x${enable_no_stack} = xyes; then
AC_MSG_NOTICE(No cluster stack supported. Just building the Policy Engine)
PCMK_FEATURES="$PCMK_FEATURES no-cluster-stack"
else
AC_MSG_CHECKING(for supported stacks)
if test x"$STACKS" = x; then
AC_MSG_FAILURE(You must support at least one cluster stack)
fi
AC_MSG_RESULT($STACKS)
PCMK_FEATURES="$PCMK_FEATURES $STACKS"
fi
PCMK_FEATURES="$PCMK_FEATURES atomic-attrd"
AC_SUBST(CLUSTERLIBS)
dnl ========================================================================
dnl ACL
dnl ========================================================================
case $SUPPORT_ACL in
1|yes|true)
missingisfatal=1
;;
try)
missingisfatal=0
;;
*)
SUPPORT_ACL=no
;;
esac
AC_MSG_CHECKING(for acl support)
if test $SUPPORT_ACL = no; then
AC_MSG_RESULT(no (disabled))
SUPPORT_ACL=0
else
AC_MSG_RESULT($SUPPORT_ACL)
SUPPORT_ACL=1
AC_CHECK_LIB(qb, qb_ipcs_connection_auth_set)
if test $ac_cv_lib_qb_qb_ipcs_connection_auth_set != yes; then
SUPPORT_ACL=0
fi
if test $SUPPORT_ACL = 0; then
if test $missingisfatal = 0; then
AC_MSG_WARN(Unable to support ACL. You need to use libqb > 0.13.0)
else
AC_MSG_FAILURE(Unable to support ACL. You need to use libqb > 0.13.0)
fi
fi
fi
if test $SUPPORT_ACL = 1; then
PCMK_FEATURES="$PCMK_FEATURES acls"
fi
AM_CONDITIONAL(ENABLE_ACL, test "$SUPPORT_ACL" = "1")
AC_DEFINE_UNQUOTED(ENABLE_ACL, $SUPPORT_ACL, Build in support for CIB ACL)
dnl ========================================================================
dnl CIB secrets
dnl ========================================================================
case $SUPPORT_CIBSECRETS in
1|yes|true|try)
SUPPORT_CIBSECRETS=1
;;
*)
SUPPORT_CIBSECRETS=0
;;
esac
AC_DEFINE_UNQUOTED(SUPPORT_CIBSECRETS, $SUPPORT_CIBSECRETS, Support CIB secrets)
AM_CONDITIONAL(BUILD_CIBSECRETS, test $SUPPORT_CIBSECRETS = 1)
if test $SUPPORT_CIBSECRETS = 1; then
PCMK_FEATURES="$PCMK_FEATURES cibsecrets"
LRM_CIBSECRETS_DIR="${localstatedir}/lib/pacemaker/lrm/secrets"
AC_DEFINE_UNQUOTED(LRM_CIBSECRETS_DIR,"$LRM_CIBSECRETS_DIR", Location for CIB secrets)
AC_SUBST(LRM_CIBSECRETS_DIR)
fi
dnl ========================================================================
dnl GnuTLS
dnl ========================================================================
dnl gnutls_priority_set_direct available since 2.1.7 (released 2007-11-29)
AC_CHECK_LIB(gnutls, gnutls_priority_set_direct)
if test "$ac_cv_lib_gnutls_gnutls_priority_set_direct" != ""; then
AC_CHECK_HEADERS(gnutls/gnutls.h)
fi
dnl ========================================================================
dnl PAM
dnl ========================================================================
AC_CHECK_HEADERS(security/pam_appl.h pam/pam_appl.h)
dnl ========================================================================
dnl System Health
dnl ========================================================================
dnl Check if servicelog development package is installed
SERVICELOG=servicelog-1
SERVICELOG_EXISTS="no"
AC_MSG_CHECKING(for $SERVICELOG packages)
if
$PKG_CONFIG --exists $SERVICELOG
then
PKG_CHECK_MODULES([SERVICELOG], [servicelog-1])
SERVICELOG_EXISTS="yes"
fi
AC_MSG_RESULT($SERVICELOG_EXISTS)
AM_CONDITIONAL(BUILD_SERVICELOG, test "$SERVICELOG_EXISTS" = "yes")
dnl Check if OpenIMPI packages and servicelog are installed
OPENIPMI="OpenIPMI OpenIPMIposix"
OPENIPMI_SERVICELOG_EXISTS="no"
AC_MSG_CHECKING(for $SERVICELOG $OPENIPMI packages)
if
$PKG_CONFIG --exists $OPENIPMI $SERVICELOG
then
PKG_CHECK_MODULES([OPENIPMI_SERVICELOG],[OpenIPMI OpenIPMIposix])
OPENIPMI_SERVICELOG_EXISTS="yes"
fi
AC_MSG_RESULT($OPENIPMI_SERVICELOG_EXISTS)
AM_CONDITIONAL(BUILD_OPENIPMI_SERVICELOG, test "$OPENIPMI_SERVICELOG_EXISTS" = "yes")
dnl ========================================================================
dnl Compiler flags
dnl ========================================================================
dnl Make sure that CFLAGS is not exported. If the user did
dnl not have CFLAGS in their environment then this should have
dnl no effect. However if CFLAGS was exported from the user's
dnl environment, then the new CFLAGS will also be exported
dnl to sub processes.
if export | fgrep " CFLAGS=" > /dev/null; then
SAVED_CFLAGS="$CFLAGS"
unset CFLAGS
CFLAGS="$SAVED_CFLAGS"
unset SAVED_CFLAGS
fi
AC_ARG_VAR([CFLAGS_HARDENED_LIB], [extra C compiler flags for hardened libraries])
AC_ARG_VAR([LDFLAGS_HARDENED_LIB], [extra linker flags for hardened libraries])
AC_ARG_VAR([CFLAGS_HARDENED_EXE], [extra C compiler flags for hardened executables])
AC_ARG_VAR([LDFLAGS_HARDENED_EXE], [extra linker flags for hardened executables])
CC_EXTRAS=""
if test "$GCC" != yes; then
CFLAGS="$CFLAGS -g"
else
CFLAGS="$CFLAGS -ggdb"
dnl When we don't have diagnostic push / pull, we can't explicitly disable
dnl checking for nonliteral formats in the places where they occur on purpose
dnl thus we disable nonliteral format checking globally as we are aborting
dnl on warnings.
dnl what makes the things really ugly is that nonliteral format checking is
dnl obviously available as an extra switch in very modern gcc but for older
dnl gcc this is part of -Wformat=2
dnl so if we have push/pull we can enable -Wformat=2 -Wformat-nonliteral
dnl if we don't have push/pull but -Wformat-nonliteral we can enable -Wformat=2
dnl otherwise none of both
gcc_diagnostic_push_pull=no
cc_temp_flags "$CFLAGS $WERROR"
AC_MSG_CHECKING([for gcc diagnostic push / pull])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#pragma GCC diagnostic push
#pragma GCC diagnostic pop
]])],
[
AC_MSG_RESULT([yes])
gcc_diagnostic_push_pull=yes
], AC_MSG_RESULT([no]))
cc_restore_flags
if cc_supports_flag "-Wformat-nonliteral"; then
gcc_format_nonliteral=yes
else
gcc_format_nonliteral=no
fi
# We had to eliminate -Wnested-externs because of libtool changes
# Make sure to order options so that the former stand for prerequisites
# of the latter (e.g., -Wformat-nonliteral requires -Wformat).
EXTRA_FLAGS="-fgnu89-inline
-Wall
-Waggregate-return
-Wbad-function-cast
-Wcast-align
-Wdeclaration-after-statement
-Wendif-labels
-Wfloat-equal
-Wformat-security
-Wmissing-prototypes
-Wmissing-declarations
-Wnested-externs
-Wno-long-long
-Wno-strict-aliasing
-Wpointer-arith
-Wstrict-prototypes
-Wwrite-strings
-Wunused-but-set-variable
-Wunsigned-char"
if test "x$gcc_diagnostic_push_pull" = "xyes"; then
AC_DEFINE([GCC_FORMAT_NONLITERAL_CHECKING_ENABLED], [],
[gcc can complain about nonliterals in format])
EXTRA_FLAGS="$EXTRA_FLAGS
-Wformat=2
-Wformat-nonliteral"
else
if test "x$gcc_format_nonliteral" = "xyes"; then
EXTRA_FLAGS="$EXTRA_FLAGS -Wformat=2"
fi
fi
# Additional warnings it might be nice to enable one day
# -Wshadow
# -Wunreachable-code
for j in $EXTRA_FLAGS
do
if
cc_supports_flag $CC_EXTRAS $j
then
CC_EXTRAS="$CC_EXTRAS $j"
fi
done
if test "x${enable_ansi}" = xyes && cc_supports_flag -std=iso9899:199409 ; then
AC_MSG_NOTICE(Enabling ANSI Compatibility)
CC_EXTRAS="$CC_EXTRAS -ansi -D_GNU_SOURCE -DANSI_ONLY"
fi
AC_MSG_NOTICE(Activated additional gcc flags: ${CC_EXTRAS})
fi
dnl
dnl Hardening flags
dnl
dnl The prime control of whether to apply (targeted) hardening build flags and
dnl which ones is --{enable,disable}-hardening option passed to ./configure:
dnl
dnl --enable-hardening=try (default):
dnl depending on whether any of CFLAGS_HARDENED_EXE, LDFLAGS_HARDENED_EXE,
dnl CFLAGS_HARDENED_LIB or LDFLAGS_HARDENED_LIB environment variables
dnl (see below) is set and non-null, all these custom flags (even if not
dnl set) are used as are, otherwise the best effort is made to offer
dnl reasonably strong hardening in several categories (RELRO, PIE,
dnl "bind now", stack protector) according to what the selected toolchain
dnl can offer
dnl
dnl --enable-hardening:
dnl same effect as --enable-hardening=try when the environment variables
dnl in question are suppressed
dnl
dnl --disable-hardening:
dnl do not apply any targeted hardening measures at all
dnl
dnl The user-injected environment variables that regulate the hardening in
dnl default case are as follows:
dnl
dnl * CFLAGS_HARDENED_EXE, LDFLAGS_HARDENED_EXE
dnl compiler and linker flags (respectively) for daemon programs
-dnl (attrd, cib, crmd, lrmd, stonithd, pacemakerd, pacemaker_remoted,
-dnl pengine)
+dnl (pacemaker-attrd, cib, crmd, lrmd, stonithd, pacemakerd,
+dnl pacemaker_remoted, pengine)
dnl
dnl * CFLAGS_HARDENED_LIB, LDFLAGS_HARDENED_LIB
dnl compiler and linker flags (respectively) for libraries linked
dnl with the daemon programs
dnl
dnl Note that these are purposedly targeted variables (addressing particular
dnl targets all over the scattered Makefiles) and have no effect outside of
dnl the predestined scope (e.g., CLI utilities). For a global reach,
dnl use CFLAGS, LDFLAGS, etc. as usual.
dnl
dnl For guidance on the suitable flags consult, for instance:
dnl https://fedoraproject.org/wiki/Changes/Harden_All_Packages#Detailed_Harden_Flags_Description
dnl https://owasp.org/index.php/C-Based_Toolchain_Hardening#GCC.2FBinutils
dnl
if test "x${HARDENING}" != "xtry"; then
unset CFLAGS_HARDENED_EXE
unset CFLAGS_HARDENED_LIB
unset LDFLAGS_HARDENED_EXE
unset LDFLAGS_HARDENED_LIB
fi
if test "x${HARDENING}" = "xno"; then
AC_MSG_NOTICE([Hardening: explicitly disabled])
elif test "x${HARDENING}" = "xyes" \
|| test "$(env | grep -Ec '^(C|LD)FLAGS_HARDENED_(EXE|LIB)=.')" = 0; then
dnl We'll figure out on our own...
CFLAGS_HARDENED_EXE=
CFLAGS_HARDENED_LIB=
LDFLAGS_HARDENED_EXE=
LDFLAGS_HARDENED_LIB=
relro=0
pie=0
bindnow=0
# daemons incl. libs: partial RELRO
flag="-Wl,-z,relro"
CC_CHECK_LDFLAGS(["${flag}"],
[LDFLAGS_HARDENED_EXE="${LDFLAGS_HARDENED_EXE} ${flag}";
LDFLAGS_HARDENED_LIB="${LDFLAGS_HARDENED_LIB} ${flag}";
relro=1])
# daemons: PIE for both CFLAGS and LDFLAGS
if cc_supports_flag -fPIE; then
flag="-pie"
CC_CHECK_LDFLAGS(["${flag}"],
[CFLAGS_HARDENED_EXE="${CFLAGS_HARDENED_EXE} -fPIE";
LDFLAGS_HARDENED_EXE="${LDFLAGS_HARDENED_EXE} ${flag}";
pie=1])
fi
# daemons incl. libs: full RELRO if sensible + as-needed linking
# so as to possibly mitigate startup performance
# hit caused by excessive linking with unneeded
# libraries
if test "${relro}" = 1 && test "${pie}" = 1; then
flag="-Wl,-z,now"
CC_CHECK_LDFLAGS(["${flag}"],
[LDFLAGS_HARDENED_EXE="${LDFLAGS_HARDENED_EXE} ${flag}";
LDFLAGS_HARDENED_LIB="${LDFLAGS_HARDENED_LIB} ${flag}";
bindnow=1])
fi
if test "${bindnow}" = 1; then
flag="-Wl,--as-needed"
CC_CHECK_LDFLAGS(["${flag}"],
[LDFLAGS_HARDENED_EXE="${LDFLAGS_HARDENED_EXE} ${flag}";
LDFLAGS_HARDENED_LIB="${LDFLAGS_HARDENED_LIB} ${flag}"])
fi
# universal: prefer strong > all > default stack protector if possible
flag=
if cc_supports_flag -fstack-protector-strong; then
flag="-fstack-protector-strong"
elif cc_supports_flag -fstack-protector-all; then
flag="-fstack-protector-all"
elif cc_supports_flag -fstack-protector; then
flag="-fstack-protector"
fi
if test -n "${flag}"; then
CC_EXTRAS="${CC_EXTRAS} ${flag}"
stackprot=1
fi
if test "${relro}" = 1 \
|| test "${pie}" = 1 \
|| test "${stackprot}" = 1; then
AC_MSG_NOTICE([Hardening: relro=${relro} pie=${pie} bindnow=${bindnow} stackprot=${flag}])
else
AC_MSG_WARN([Hardening: no suitable features in the toolchain detected])
fi
else
AC_MSG_NOTICE([Hardening: using custom flags])
fi
CFLAGS="$CFLAGS $CC_EXTRAS"
NON_FATAL_CFLAGS="$CFLAGS"
AC_SUBST(NON_FATAL_CFLAGS)
dnl
dnl We reset CFLAGS to include our warnings *after* all function
dnl checking goes on, so that our warning flags don't keep the
dnl AC_*FUNCS() calls above from working. In particular, -Werror will
dnl *always* cause us troubles if we set it before here.
dnl
dnl
if test "x${enable_fatal_warnings}" = xyes ; then
AC_MSG_NOTICE(Enabling Fatal Warnings)
CFLAGS="$CFLAGS $WERROR"
fi
AC_SUBST(CFLAGS)
dnl This is useful for use in Makefiles that need to remove one specific flag
CFLAGS_COPY="$CFLAGS"
AC_SUBST(CFLAGS_COPY)
AC_SUBST(LIBADD_DL) dnl extra flags for dynamic linking libraries
AC_SUBST(LIBADD_INTL) dnl extra flags for GNU gettext stuff...
AC_SUBST(LOCALE)
dnl Options for cleaning up the compiler output
QUIET_LIBTOOL_OPTS=""
QUIET_MAKE_OPTS=""
if test "x${enable_quiet}" = "xyes"; then
QUIET_LIBTOOL_OPTS="--quiet"
QUIET_MAKE_OPTS="--quiet"
fi
AC_MSG_RESULT(Supress make details: ${enable_quiet})
dnl Put the above variables to use
LIBTOOL="${LIBTOOL} --tag=CC \$(QUIET_LIBTOOL_OPTS)"
MAKE="${MAKE} \$(QUIET_MAKE_OPTS)"
AC_SUBST(CC)
AC_SUBST(MAKE)
AC_SUBST(LIBTOOL)
AC_SUBST(QUIET_MAKE_OPTS)
AC_SUBST(QUIET_LIBTOOL_OPTS)
AC_DEFINE_UNQUOTED(CRM_FEATURES, "$PCMK_FEATURES", Set of enabled features)
AC_SUBST(PCMK_FEATURES)
dnl Files we output that need to be executable
AC_CONFIG_FILES([cts/CTSlab.py], [chmod +x cts/CTSlab.py])
AC_CONFIG_FILES([cts/LSBDummy], [chmod +x cts/LSBDummy])
AC_CONFIG_FILES([cts/OCFIPraTest.py], [chmod +x cts/OCFIPraTest.py])
AC_CONFIG_FILES([cts/cluster_test], [chmod +x cts/cluster_test])
AC_CONFIG_FILES([cts/cts], [chmod +x cts/cts])
AC_CONFIG_FILES([cts/cts-cli], [chmod +x cts/cts-cli])
AC_CONFIG_FILES([cts/cts-coverage], [chmod +x cts/cts-coverage])
AC_CONFIG_FILES([cts/cts-lrmd], [chmod +x cts/cts-lrmd])
AC_CONFIG_FILES([cts/cts-pengine], [chmod +x cts/cts-pengine])
AC_CONFIG_FILES([cts/cts-regression], [chmod +x cts/cts-regression])
AC_CONFIG_FILES([cts/cts-stonithd], [chmod +x cts/cts-stonithd])
AC_CONFIG_FILES([cts/lxc_autogen.sh], [chmod +x cts/lxc_autogen.sh])
AC_CONFIG_FILES([cts/benchmark/clubench], [chmod +x cts/benchmark/clubench])
AC_CONFIG_FILES([cts/fence_dummy], [chmod +x cts/fence_dummy])
AC_CONFIG_FILES([cts/pacemaker-cts-dummyd], [chmod +x cts/pacemaker-cts-dummyd])
AC_CONFIG_FILES([fencing/fence_legacy], [chmod +x fencing/fence_legacy])
AC_CONFIG_FILES([tools/crm_failcount], [chmod +x tools/crm_failcount])
AC_CONFIG_FILES([tools/crm_master], [chmod +x tools/crm_master])
AC_CONFIG_FILES([tools/crm_report], [chmod +x tools/crm_report])
AC_CONFIG_FILES([tools/crm_standby], [chmod +x tools/crm_standby])
AC_CONFIG_FILES([tools/cibsecret], [chmod +x tools/cibsecret])
dnl Other files we output
AC_CONFIG_FILES(Makefile \
Doxyfile \
cts/Makefile \
cts/CTS.py \
cts/CTSvars.py \
cts/benchmark/Makefile \
cts/pacemaker-cts-dummyd@.service \
cib/Makefile \
daemons/Makefile \
daemons/attrd/Makefile \
daemons/pacemakerd/Makefile \
daemons/pacemakerd/pacemaker \
daemons/pacemakerd/pacemaker.service \
daemons/pacemakerd/pacemaker.upstart \
daemons/pacemakerd/pacemaker.combined.upstart \
crmd/Makefile \
pengine/Makefile \
doc/Makefile \
doc/Clusters_from_Scratch/publican.cfg \
doc/Pacemaker_Administration/publican.cfg \
doc/Pacemaker_Development/publican.cfg \
doc/Pacemaker_Explained/publican.cfg \
doc/Pacemaker_Remote/publican.cfg \
fencing/Makefile \
include/Makefile \
include/crm/Makefile \
include/crm/cib/Makefile \
include/crm/common/Makefile \
include/crm/cluster/Makefile \
include/crm/fencing/Makefile \
include/crm/pengine/Makefile \
replace/Makefile \
lib/Makefile \
lib/pacemaker.pc \
lib/pacemaker-cib.pc \
lib/pacemaker-lrmd.pc \
lib/pacemaker-service.pc \
lib/pacemaker-pengine.pc \
lib/pacemaker-fencing.pc \
lib/pacemaker-cluster.pc \
lib/common/Makefile \
lib/cluster/Makefile \
lib/cib/Makefile \
lib/pengine/Makefile \
lib/transition/Makefile \
lib/fencing/Makefile \
lib/lrmd/Makefile \
lib/services/Makefile \
lrmd/Makefile \
lrmd/pacemaker_remote.service \
lrmd/pacemaker_remote \
extra/Makefile \
extra/alerts/Makefile \
extra/resources/Makefile \
extra/logrotate/Makefile \
extra/logrotate/pacemaker \
tools/Makefile \
tools/report.collector \
tools/report.common \
tools/crm_mon.service \
tools/crm_mon.upstart \
xml/Makefile \
lib/gnu/Makefile \
)
dnl Now process the entire list of files added by previous
dnl calls to AC_CONFIG_FILES()
AC_OUTPUT()
dnl *****************
dnl Configure summary
dnl *****************
AC_MSG_RESULT([])
AC_MSG_RESULT([$PACKAGE configuration:])
AC_MSG_RESULT([ Version = ${VERSION} (Build: $BUILD_VERSION)])
AC_MSG_RESULT([ Features =${PCMK_FEATURES}])
AC_MSG_RESULT([])
AC_MSG_RESULT([ Prefix = ${prefix}])
AC_MSG_RESULT([ Executables = ${sbindir}])
AC_MSG_RESULT([ Man pages = ${mandir}])
AC_MSG_RESULT([ Libraries = ${libdir}])
AC_MSG_RESULT([ Header files = ${includedir}])
AC_MSG_RESULT([ Arch-independent files = ${datadir}])
AC_MSG_RESULT([ State information = ${localstatedir}])
AC_MSG_RESULT([ System configuration = ${sysconfdir}])
AC_MSG_RESULT([])
AC_MSG_RESULT([ HA group name = ${CRM_DAEMON_GROUP}])
AC_MSG_RESULT([ HA user name = ${CRM_DAEMON_USER}])
AC_MSG_RESULT([])
AC_MSG_RESULT([ CFLAGS = ${CFLAGS}])
AC_MSG_RESULT([ CFLAGS_HARDENED_EXE = ${CFLAGS_HARDENED_EXE}])
AC_MSG_RESULT([ CFLAGS_HARDENED_LIB = ${CFLAGS_HARDENED_LIB}])
AC_MSG_RESULT([ LDFLAGS_HARDENED_EXE = ${LDFLAGS_HARDENED_EXE}])
AC_MSG_RESULT([ LDFLAGS_HARDENED_LIB = ${LDFLAGS_HARDENED_LIB}])
AC_MSG_RESULT([ Libraries = ${LIBS}])
AC_MSG_RESULT([ Stack Libraries = ${CLUSTERLIBS}])
diff --git a/crmd/attrd.c b/crmd/attrd.c
index b75551e41d..e181788940 100644
--- a/crmd/attrd.c
+++ b/crmd/attrd.c
@@ -1,142 +1,142 @@
/*
* Copyright 2006-2018 Andrew Beekhof <andrew@beekhof.net>
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/attrd.h>
#include <crm/msg_xml.h>
#include <crmd_fsa.h>
#include <crmd_utils.h>
#include <crmd_messages.h>
crm_ipc_t *attrd_ipc = NULL;
static void
log_attrd_error(const char *host, const char *name, const char *value,
gboolean is_remote, char command, int rc)
{
const char *display_command; /* for commands without name/value */
const char *node_type = (is_remote? "Pacemaker Remote" : "cluster");
gboolean shutting_down = is_set(fsa_input_register, R_SHUTDOWN);
const char *when = (shutting_down? " at shutdown" : "");
switch (command) {
case 'R':
display_command = "refresh";
break;
case 'C':
display_command = "purge";
break;
default:
display_command = NULL;
}
if (display_command) {
crm_err("Could not request %s of %s node %s%s: %s (%d)",
display_command, node_type, host, when, pcmk_strerror(rc), rc);
} else {
crm_err("Could not request update of %s=%s for %s node %s%s: %s (%d)",
name, value, node_type, host, when, pcmk_strerror(rc), rc);
}
/* If we can't request shutdown via attribute, fast-track it */
if ((command == 'U') && shutting_down) {
register_fsa_input(C_FSA_INTERNAL, I_FAIL, NULL);
}
}
static void
update_attrd_helper(const char *host, const char *name, const char *value,
const char *interval_spec, const char *user_name,
gboolean is_remote_node, char command)
{
int rc;
int max = 5;
int attrd_opts = attrd_opt_none;
if (is_remote_node) {
attrd_opts |= attrd_opt_remote;
}
if (attrd_ipc == NULL) {
attrd_ipc = crm_ipc_new(T_ATTRD, 0);
}
do {
if (crm_ipc_connected(attrd_ipc) == FALSE) {
crm_ipc_close(attrd_ipc);
crm_info("Connecting to attribute manager ... %d retries remaining",
max);
if (crm_ipc_connect(attrd_ipc) == FALSE) {
crm_perror(LOG_INFO, "Connection to attribute manager failed");
}
}
if (command) {
rc = attrd_update_delegate(attrd_ipc, command, host, name, value,
XML_CIB_TAG_STATUS, NULL, NULL,
user_name, attrd_opts);
} else {
/* (ab)using name/value as resource/operation */
rc = attrd_clear_delegate(attrd_ipc, host, name, value,
interval_spec, user_name, attrd_opts);
}
if (rc == pcmk_ok) {
break;
} else if (rc != -EAGAIN && rc != -EALREADY) {
crm_info("Disconnecting from attribute manager: %s (%d)",
pcmk_strerror(rc), rc);
crm_ipc_close(attrd_ipc);
}
sleep(5 - max);
} while (max--);
if (rc != pcmk_ok) {
log_attrd_error(host, name, value, is_remote_node, command, rc);
}
}
void
update_attrd(const char *host, const char *name, const char *value,
const char *user_name, gboolean is_remote_node)
{
update_attrd_helper(host, name, value, NULL, user_name, is_remote_node,
'U');
}
void
update_attrd_remote_node_removed(const char *host, const char *user_name)
{
- crm_trace("Asking attrd to purge Pacemaker Remote node %s", host);
+ crm_trace("Asking pacemaker-attrd to purge Pacemaker Remote node %s", host);
update_attrd_helper(host, NULL, NULL, NULL, user_name, TRUE, 'C');
}
void
update_attrd_clear_failures(const char *host, const char *rsc, const char *op,
const char *interval_spec, gboolean is_remote_node)
{
const char *op_desc = NULL;
const char *interval_desc = NULL;
const char *node_type = is_remote_node? "Pacemaker Remote" : "cluster";
if (op) {
interval_desc = interval_spec? interval_spec : "nonrecurring";
op_desc = op;
} else {
interval_desc = "all";
op_desc = "operations";
}
- crm_info("Asking attrd to clear failure of %s %s for %s on %s node %s",
+ crm_info("Asking pacemaker-attrd to clear failure of %s %s for %s on %s node %s",
interval_desc, op_desc, rsc, node_type, host);
update_attrd_helper(host, rsc, op, interval_spec, NULL, is_remote_node, 0);
}
diff --git a/crmd/control.c b/crmd/control.c
index 2c6b3bb26b..c665493deb 100644
--- a/crmd/control.c
+++ b/crmd/control.c
@@ -1,917 +1,906 @@
/*
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/pengine/rules.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election.h>
#include <crm/common/ipcs.h>
#include <crmd.h>
#include <crmd_fsa.h>
#include <fsa_proto.h>
#include <crmd_messages.h>
#include <crmd_callbacks.h>
#include <crmd_lrm.h>
#include <crmd_alerts.h>
#include <crmd_metadata.h>
#include <tengine.h>
#include <throttle.h>
#include <sys/types.h>
#include <sys/stat.h>
qb_ipcs_service_t *ipcs = NULL;
#if SUPPORT_COROSYNC
extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
#endif
void crm_shutdown(int nsig);
gboolean crm_read_options(gpointer user_data);
gboolean fsa_has_quorum = FALSE;
crm_trigger_t *fsa_source = NULL;
crm_trigger_t *config_read = NULL;
bool no_quorum_suicide_escalation = FALSE;
static gboolean
election_timeout_popped(gpointer data)
{
/* Not everyone voted */
crm_info("Election failed: Declaring ourselves the winner");
register_fsa_input(C_TIMER_POPPED, I_ELECTION_DC, NULL);
return FALSE;
}
/* A_HA_CONNECT */
void
do_ha_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
gboolean registered = FALSE;
static crm_cluster_t *cluster = NULL;
if (cluster == NULL) {
cluster = calloc(1, sizeof(crm_cluster_t));
}
if (action & A_HA_DISCONNECT) {
crm_cluster_disconnect(cluster);
crm_info("Disconnected from the cluster");
set_bit(fsa_input_register, R_HA_DISCONNECTED);
}
if (action & A_HA_CONNECT) {
crm_set_status_callback(&peer_update_callback);
crm_set_autoreap(FALSE);
if (is_corosync_cluster()) {
#if SUPPORT_COROSYNC
registered = crm_connect_corosync(cluster);
#endif
}
fsa_election = election_init(NULL, cluster->uname, 60000/*60s*/, election_timeout_popped);
fsa_our_uname = cluster->uname;
fsa_our_uuid = cluster->uuid;
if(cluster->uuid == NULL) {
crm_err("Could not obtain local uuid");
registered = FALSE;
}
if (registered == FALSE) {
set_bit(fsa_input_register, R_HA_DISCONNECTED);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
populate_cib_nodes(node_update_none, __FUNCTION__);
clear_bit(fsa_input_register, R_HA_DISCONNECTED);
crm_info("Connected to the cluster");
}
if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
}
}
/* A_SHUTDOWN */
void
do_shutdown(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* just in case */
set_bit(fsa_input_register, R_SHUTDOWN);
if (stonith_api) {
/* Prevent it from coming up again */
clear_bit(fsa_input_register, R_ST_REQUIRED);
crm_info("Disconnecting STONITH...");
stonith_api->cmds->disconnect(stonith_api);
}
}
/* A_SHUTDOWN_REQ */
void
do_shutdown_req(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *msg = NULL;
set_bit(fsa_input_register, R_SHUTDOWN);
crm_info("Sending shutdown request to all peers (DC is %s)",
(fsa_our_dc? fsa_our_dc : "not set"));
msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
/* set_bit(fsa_input_register, R_STAYDOWN); */
if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
free_xml(msg);
}
extern crm_ipc_t *attrd_ipc;
extern char *max_generation_from;
extern xmlNode *max_generation_xml;
extern GHashTable *resource_history;
extern GHashTable *voted;
extern char *te_client_id;
crm_exit_t
crmd_fast_exit(crm_exit_t exit_code)
{
if (is_set(fsa_input_register, R_STAYDOWN)) {
crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
exit_code, CRM_EX_FATAL);
exit_code = CRM_EX_FATAL;
} else if ((exit_code == CRM_EX_OK)
&& is_set(fsa_input_register, R_IN_RECOVERY)) {
crm_err("Could not recover from internal error");
exit_code = CRM_EX_ERROR;
}
return crm_exit(exit_code);
}
crm_exit_t
crmd_exit(crm_exit_t exit_code)
{
GListPtr gIter = NULL;
GMainLoop *mloop = crmd_mainloop;
static bool in_progress = FALSE;
if (in_progress && (exit_code == CRM_EX_OK)) {
crm_debug("Exit is already in progress");
return exit_code;
} else if(in_progress) {
crm_notice("Error during shutdown process, exiting now with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
in_progress = TRUE;
crm_trace("Preparing to exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
/* Suppress secondary errors resulting from us disconnecting everything */
set_bit(fsa_input_register, R_HA_DISCONNECTED);
/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
if(ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
if (attrd_ipc) {
- crm_trace("Closing attrd connection");
+ crm_trace("Closing connection to pacemaker-attrd");
crm_ipc_close(attrd_ipc);
crm_ipc_destroy(attrd_ipc);
attrd_ipc = NULL;
}
pe_subsystem_free();
if(stonith_api) {
crm_trace("Disconnecting fencing API");
clear_bit(fsa_input_register, R_ST_REQUIRED);
stonith_api->cmds->free(stonith_api); stonith_api = NULL;
}
if ((exit_code == CRM_EX_OK) && (crmd_mainloop == NULL)) {
crm_debug("No mainloop detected");
exit_code = CRM_EX_ERROR;
}
/* On an error, just get out.
*
* Otherwise, make the effort to have mainloop exit gracefully so
* that it (mostly) cleans up after itself and valgrind has less
* to report on - allowing real errors stand out
*/
if (exit_code != CRM_EX_OK) {
crm_notice("Forcing immediate exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
return crmd_fast_exit(exit_code);
}
/* Clean up as much memory as possible for valgrind */
for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) {
fsa_data_t *fsa_data = gIter->data;
crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
fsa_input2string(fsa_data->fsa_input),
fsa_state2string(fsa_state),
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
delete_fsa_input(fsa_data);
}
clear_bit(fsa_input_register, R_MEMBERSHIP);
g_list_free(fsa_message_queue); fsa_message_queue = NULL;
metadata_cache_fini();
election_fini(fsa_election);
fsa_election = NULL;
/* Tear down the CIB connection, but don't free it yet -- it could be used
* when we drain the mainloop later.
*/
cib_free_callbacks(fsa_cib_conn);
fsa_cib_conn->cmds->signoff(fsa_cib_conn);
verify_stopped(fsa_state, LOG_WARNING);
clear_bit(fsa_input_register, R_LRM_CONNECTED);
lrm_state_destroy_all();
/* This basically will not work, since mainloop has a reference to it */
mainloop_destroy_trigger(fsa_source); fsa_source = NULL;
mainloop_destroy_trigger(config_read); config_read = NULL;
mainloop_destroy_trigger(stonith_reconnect); stonith_reconnect = NULL;
mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL;
crm_client_cleanup();
crm_peer_destroy();
crm_timer_stop(transition_timer);
crm_timer_stop(integration_timer);
crm_timer_stop(finalization_timer);
crm_timer_stop(election_trigger);
election_timeout_stop(fsa_election);
crm_timer_stop(shutdown_escalation_timer);
crm_timer_stop(wait_timer);
crm_timer_stop(recheck_timer);
free(transition_timer); transition_timer = NULL;
free(integration_timer); integration_timer = NULL;
free(finalization_timer); finalization_timer = NULL;
free(election_trigger); election_trigger = NULL;
free(shutdown_escalation_timer); shutdown_escalation_timer = NULL;
free(wait_timer); wait_timer = NULL;
free(recheck_timer); recheck_timer = NULL;
free(fsa_our_dc_version); fsa_our_dc_version = NULL;
free(fsa_our_uname); fsa_our_uname = NULL;
free(fsa_our_uuid); fsa_our_uuid = NULL;
free(fsa_our_dc); fsa_our_dc = NULL;
free(fsa_cluster_name); fsa_cluster_name = NULL;
free(te_uuid); te_uuid = NULL;
free(te_client_id); te_client_id = NULL;
free(fsa_pe_ref); fsa_pe_ref = NULL;
free(failed_stop_offset); failed_stop_offset = NULL;
free(failed_start_offset); failed_start_offset = NULL;
free(max_generation_from); max_generation_from = NULL;
free_xml(max_generation_xml); max_generation_xml = NULL;
mainloop_destroy_signal(SIGPIPE);
mainloop_destroy_signal(SIGUSR1);
mainloop_destroy_signal(SIGTERM);
mainloop_destroy_signal(SIGTRAP);
/* leave SIGCHLD engaged as we might still want to drain some service-actions */
if (mloop) {
GMainContext *ctx = g_main_loop_get_context(crmd_mainloop);
/* Don't re-enter this block */
crmd_mainloop = NULL;
/* no signals on final draining anymore */
mainloop_destroy_signal(SIGCHLD);
crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
{
int lpc = 0;
while((g_main_context_pending(ctx) && lpc < 10)) {
lpc++;
crm_trace("Iteration %d", lpc);
g_main_context_dispatch(ctx);
}
}
crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
g_main_loop_quit(mloop);
/* Won't do anything yet, since we're inside it now */
g_main_loop_unref(mloop);
} else {
mainloop_destroy_signal(SIGCHLD);
}
cib_delete(fsa_cib_conn);
fsa_cib_conn = NULL;
throttle_fini();
/* Graceful */
crm_trace("Done preparing for exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
return exit_code;
}
/* A_EXIT_0, A_EXIT_1 */
void
do_exit(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_exit_t exit_code = CRM_EX_OK;
int log_level = LOG_INFO;
const char *exit_type = "gracefully";
if (action & A_EXIT_1) {
log_level = LOG_ERR;
exit_type = "forcefully";
exit_code = CRM_EX_ERROR;
}
verify_stopped(cur_state, LOG_ERR);
do_crm_log(log_level, "Performing %s - %s exiting the CRMd",
fsa_action2string(action), exit_type);
crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
crmd_exit(exit_code);
}
static void sigpipe_ignore(int nsig) { return; }
/* A_STARTUP */
void
do_startup(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int was_error = 0;
crm_debug("Registering Signal Handlers");
mainloop_add_signal(SIGTERM, crm_shutdown);
mainloop_add_signal(SIGPIPE, sigpipe_ignore);
fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL);
config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL);
transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL);
crm_debug("Creating CIB and LRM objects");
fsa_cib_conn = cib_new();
lrm_state_init_local();
/* set up the timers */
transition_timer = calloc(1, sizeof(fsa_timer_t));
integration_timer = calloc(1, sizeof(fsa_timer_t));
finalization_timer = calloc(1, sizeof(fsa_timer_t));
election_trigger = calloc(1, sizeof(fsa_timer_t));
shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t));
wait_timer = calloc(1, sizeof(fsa_timer_t));
recheck_timer = calloc(1, sizeof(fsa_timer_t));
if (election_trigger != NULL) {
election_trigger->source_id = 0;
election_trigger->period_ms = -1;
election_trigger->fsa_input = I_DC_TIMEOUT;
election_trigger->callback = crm_timer_popped;
election_trigger->repeat = FALSE;
} else {
was_error = TRUE;
}
if (transition_timer != NULL) {
transition_timer->source_id = 0;
transition_timer->period_ms = -1;
transition_timer->fsa_input = I_PE_CALC;
transition_timer->callback = crm_timer_popped;
transition_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (integration_timer != NULL) {
integration_timer->source_id = 0;
integration_timer->period_ms = -1;
integration_timer->fsa_input = I_INTEGRATED;
integration_timer->callback = crm_timer_popped;
integration_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (finalization_timer != NULL) {
finalization_timer->source_id = 0;
finalization_timer->period_ms = -1;
finalization_timer->fsa_input = I_FINALIZED;
finalization_timer->callback = crm_timer_popped;
finalization_timer->repeat = FALSE;
/* for possible enabling... a bug in the join protocol left
* a slave in S_PENDING while we think it's in S_NOT_DC
*
* raising I_FINALIZED put us into a transition loop which is
* never resolved.
* in this loop we continually send probes which the node
* NACK's because it's in S_PENDING
*
* if we have nodes where the cluster layer is active but the
* CRM is not... then this will be handled in the
* integration phase
*/
finalization_timer->fsa_input = I_ELECTION;
} else {
was_error = TRUE;
}
if (shutdown_escalation_timer != NULL) {
shutdown_escalation_timer->source_id = 0;
shutdown_escalation_timer->period_ms = -1;
shutdown_escalation_timer->fsa_input = I_STOP;
shutdown_escalation_timer->callback = crm_timer_popped;
shutdown_escalation_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (wait_timer != NULL) {
wait_timer->source_id = 0;
wait_timer->period_ms = 2000;
wait_timer->fsa_input = I_NULL;
wait_timer->callback = crm_timer_popped;
wait_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (recheck_timer != NULL) {
recheck_timer->source_id = 0;
recheck_timer->period_ms = -1;
recheck_timer->fsa_input = I_PE_CALC;
recheck_timer->callback = crm_timer_popped;
recheck_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (was_error) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
static int32_t
crmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
crmd_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
static int32_t
crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
crm_client_t *client = crm_client_get(c);
xmlNode *msg = crm_ipcs_recv(client, data, size, &id, &flags);
crm_trace("Invoked: %s", crm_client_name(client));
crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__);
if (msg == NULL) {
return 0;
}
#if ENABLE_ACL
CRM_ASSERT(client->user != NULL);
crm_acl_get_set_user(msg, F_CRM_USER, client->user);
#endif
crm_trace("Processing msg from %s", crm_client_name(client));
crm_log_xml_trace(msg, "CRMd[inbound]");
crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
if (crmd_authorize_message(msg, client, NULL)) {
route_message(C_IPC_MESSAGE, msg);
}
trigger_fsa(fsa_source);
free_xml(msg);
return 0;
}
static int32_t
crmd_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
if (client) {
crm_trace("Disconnecting %sregistered client %s (%p/%p)",
(client->userdata? "" : "un"), crm_client_name(client),
c, client);
free(client->userdata);
crm_client_destroy(client);
trigger_fsa(fsa_source);
}
return 0;
}
static void
crmd_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
crmd_ipc_closed(c);
}
/* A_STOP */
void
do_stop(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs); ipcs = NULL;
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* A_STARTED */
void
do_started(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
static struct qb_ipcs_service_handlers crmd_callbacks = {
.connection_accept = crmd_ipc_accept,
.connection_created = crmd_ipc_created,
.msg_process = crmd_ipc_dispatch,
.connection_closed = crmd_ipc_closed,
.connection_destroyed = crmd_ipc_destroy
};
if (cur_state != S_STARTING) {
crm_err("Start cancelled... %s", fsa_state2string(cur_state));
return;
} else if (is_set(fsa_input_register, R_MEMBERSHIP) == FALSE) {
crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) {
crm_info("Delaying start, LRM not connected (%.16llx)", R_LRM_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_READ_CONFIG) == FALSE) {
crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_PEER_DATA) == FALSE) {
crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
crmd_fsa_stall(TRUE);
return;
}
crm_debug("Init server comms");
ipcs = crmd_ipc_server_init(&crmd_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
if (stonith_reconnect == NULL) {
int dummy;
stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, &dummy);
}
set_bit(fsa_input_register, R_ST_REQUIRED);
mainloop_set_trigger(stonith_reconnect);
crm_notice("The local CRM is operational");
clear_bit(fsa_input_register, R_STARTING);
register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
}
/* A_RECOVER */
void
do_recover(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
set_bit(fsa_input_register, R_IN_RECOVERY);
crm_warn("Fast-tracking shutdown in response to errors");
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* *INDENT-OFF* */
static pe_cluster_option crmd_opts[] = {
/* name, old-name, validate, values, default, short description, long description */
{ "dc-version", NULL, "string", NULL, "none", NULL,
"Version of Pacemaker on the cluster's DC.",
"Includes the hash which identifies the exact changeset it was built from. Used for diagnostic purposes."
},
{ "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL,
"The messaging stack on which Pacemaker is currently running.",
"Used for informational and diagnostic purposes." },
{ XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", NULL, "20s", &check_time,
"How long to wait for a response from other nodes during startup.",
"The \"correct\" value will depend on the speed/load of your network and the type of switches used."
},
{ XML_CONFIG_ATTR_RECHECK, NULL, "time",
"Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)",
"15min", &check_timer,
"Polling interval for time based changes to options, resource parameters and constraints.",
"The Cluster is primarily event driven, however the configuration can have elements that change based on time."
" To ensure these changes take effect, we can optionally poll the cluster's status for changes."
},
{ "load-threshold", NULL, "percentage", NULL, "80%", &check_utilization,
"The maximum amount of system resources that should be used by nodes in the cluster",
"The cluster will slow down its recovery process when the amount of system resources used"
" (currently CPU) approaches this limit",
},
{ "node-action-limit", NULL, "integer", NULL, "0", &check_number,
"The maximum number of jobs that can be scheduled per node. Defaults to 2x cores"},
{ XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, "2min", &check_timer,
"*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
},
{ XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, "20min", &check_timer,
"*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
},
{ "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer,
"*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
},
{ "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer,
"*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug."
},
{ "crmd-transition-delay", NULL, "time", NULL, "0s", &check_timer,
"*** Advanced Use Only ***\n"
"Enabling this option will slow down cluster recovery under all conditions",
"Delay cluster recovery for the configured interval to allow for additional/related events to occur.\n"
"Useful if your configuration is sensitive to the order in which ping updates arrive."
},
{ "stonith-watchdog-timeout", NULL, "time", NULL, NULL, &check_sbd_timeout,
"How long to wait before we can assume nodes are safely down", NULL
},
{ "stonith-max-attempts",NULL,"integer",NULL,"10",&check_positive_number,
"How many times stonith can fail before it will no longer be attempted on a target"
},
{ "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL },
};
/* *INDENT-ON* */
void
crmd_metadata(void)
{
config_metadata("CRM Daemon", "1.0",
"CRM Daemon Options",
"This is a fake resource that details the options that can be configured for the CRM Daemon.",
crmd_opts, DIMOF(crmd_opts));
}
static void
verify_crmd_options(GHashTable * options)
{
verify_all_options(options, crmd_opts, DIMOF(crmd_opts));
}
static const char *
crmd_pref(GHashTable * options, const char *name)
{
return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name);
}
static void
config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
const char *value = NULL;
GHashTable *config_hash = NULL;
crm_time_t *now = crm_time_new(NULL);
xmlNode *crmconfig = NULL;
xmlNode *alerts = NULL;
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
crm_err("The cluster is mis-configured - shutting down and staying down");
set_bit(fsa_input_register, R_STAYDOWN);
}
goto bail;
}
crmconfig = output;
if ((crmconfig) &&
(crm_element_name(crmconfig)) &&
(strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) {
crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
}
if (!crmconfig) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
goto bail;
}
crm_debug("Call %d : Parsing CIB options", call_id);
config_hash = crm_str_table_new();
unpack_instance_attributes(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL, config_hash,
CIB_OPTIONS_FIRST, FALSE, now);
verify_crmd_options(config_hash);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
election_trigger->period_ms = crm_get_msec(value);
value = crmd_pref(config_hash, "node-action-limit"); /* Also checks migration-limit */
throttle_update_job_max(value);
value = crmd_pref(config_hash, "load-threshold");
if(value) {
throttle_set_load_target(strtof(value, NULL) / 100.0);
}
value = crmd_pref(config_hash, "no-quorum-policy");
if (safe_str_eq(value, "suicide") && pcmk_locate_sbd()) {
no_quorum_suicide_escalation = TRUE;
}
value = crmd_pref(config_hash,"stonith-max-attempts");
update_stonith_max_attempts(value);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT);
shutdown_escalation_timer->period_ms = crm_get_msec(value);
/* How long to declare an election over - even if not everyone voted */
crm_debug("Shutdown escalation occurs after: %dms", shutdown_escalation_timer->period_ms);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL);
election_timeout_set_period(fsa_election, crm_get_msec(value));
value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK);
recheck_timer->period_ms = crm_get_msec(value);
crm_debug("Checking for expired actions every %dms", recheck_timer->period_ms);
value = crmd_pref(config_hash, "crmd-transition-delay");
transition_timer->period_ms = crm_get_msec(value);
value = crmd_pref(config_hash, "crmd-integration-timeout");
integration_timer->period_ms = crm_get_msec(value);
value = crmd_pref(config_hash, "crmd-finalization-timeout");
finalization_timer->period_ms = crm_get_msec(value);
free(fsa_cluster_name);
fsa_cluster_name = NULL;
value = g_hash_table_lookup(config_hash, "cluster-name");
if (value) {
fsa_cluster_name = strdup(value);
}
alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
crmd_unpack_alerts(alerts);
set_bit(fsa_input_register, R_READ_CONFIG);
crm_trace("Triggering FSA: %s", __FUNCTION__);
mainloop_set_trigger(fsa_source);
g_hash_table_destroy(config_hash);
bail:
crm_time_free(now);
}
gboolean
crm_read_options(gpointer user_data)
{
int call_id =
fsa_cib_conn->cmds->query(fsa_cib_conn,
"//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS,
NULL, cib_xpath | cib_scope_local);
fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback);
crm_trace("Querying the CIB... call %d", call_id);
return TRUE;
}
/* A_READCONFIG */
void
do_read_config(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
throttle_init();
mainloop_set_trigger(config_read);
}
void
crm_shutdown(int nsig)
{
if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) {
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_err("Escalating the shutdown");
register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
} else {
set_bit(fsa_input_register, R_SHUTDOWN);
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
if (shutdown_escalation_timer->period_ms < 1) {
const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT);
int msec = crm_get_msec(value);
crm_debug("Using default shutdown escalation: %dms", msec);
shutdown_escalation_timer->period_ms = msec;
}
/* can't rely on this... */
crm_notice("Shutting down cluster resource manager " CRM_XS
" limit=%dms", shutdown_escalation_timer->period_ms);
crm_timer_start(shutdown_escalation_timer);
}
} else {
crm_info("exit from shutdown");
crmd_exit(CRM_EX_OK);
}
}
diff --git a/crmd/lrm.c b/crmd/lrm.c
index 7ff7b12080..d88a769939 100644
--- a/crmd/lrm.c
+++ b/crmd/lrm.c
@@ -1,2671 +1,2671 @@
/*
* Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crmd.h>
#include <crmd_fsa.h>
#include <crmd_messages.h>
#include <crmd_callbacks.h>
#include <crmd_lrm.h>
#include <regex.h>
#include <crm/pengine/rules.h>
#define START_DELAY_THRESHOLD 5 * 60 * 1000
#define MAX_LRM_REG_FAILS 30
#define s_if_plural(i) (((i) == 1)? "" : "s")
struct delete_event_s {
int rc;
const char *rsc;
lrm_state_t *lrm_state;
};
static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
const char *user_name);
static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op,
const char *rsc_id, const char *operation);
static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation,
xmlNode * msg, xmlNode * request);
void send_direct_ack(const char *to_host, const char *to_sys,
lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id);
static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
int log_level);
static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op);
static void
lrm_connection_destroy(void)
{
if (is_set(fsa_input_register, R_LRM_CONNECTED)) {
crm_crit("LRM Connection failed");
register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
clear_bit(fsa_input_register, R_LRM_CONNECTED);
} else {
crm_info("LRM Connection disconnected");
}
}
static char *
make_stop_id(const char *rsc, int call_id)
{
return crm_strdup_printf("%s:%d", rsc, call_id);
}
static void
copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
{
if (strstr(key, CRM_META "_") == NULL) {
g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
}
}
static void
copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
{
if (strstr(key, CRM_META "_") != NULL) {
g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
}
}
/*!
* \internal
* \brief Remove a recurring operation from a resource's history
*
* \param[in,out] history Resource history to modify
* \param[in] op Operation to remove
*
* \return TRUE if the operation was found and removed, FALSE otherwise
*/
static gboolean
history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op)
{
GList *iter;
for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
lrmd_event_data_t *existing = iter->data;
if ((op->interval_ms == existing->interval_ms)
&& crm_str_eq(op->rsc_id, existing->rsc_id, TRUE)
&& safe_str_eq(op->op_type, existing->op_type)) {
history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter);
lrmd_free_event(existing);
return TRUE;
}
}
return FALSE;
}
/*!
* \internal
* \brief Free all recurring operations in resource history
*
* \param[in,out] history Resource history to modify
*/
static void
history_free_recurring_ops(rsc_history_t *history)
{
GList *iter;
for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
lrmd_free_event(iter->data);
}
g_list_free(history->recurring_op_list);
history->recurring_op_list = NULL;
}
/*!
* \internal
* \brief Free resource history
*
* \param[in,out] history Resource history to free
*/
void
history_free(gpointer data)
{
rsc_history_t *history = (rsc_history_t*)data;
if (history->stop_params) {
g_hash_table_destroy(history->stop_params);
}
/* Don't need to free history->rsc.id because it's set to history->id */
free(history->rsc.type);
free(history->rsc.standard);
free(history->rsc.provider);
lrmd_free_event(history->failed);
lrmd_free_event(history->last);
free(history->id);
history_free_recurring_ops(history);
free(history);
}
static void
update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
{
int target_rc = 0;
rsc_history_t *entry = NULL;
if (op->rsc_deleted) {
crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL);
return;
}
if (safe_str_eq(op->op_type, RSC_NOTIFY)) {
return;
}
crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);
entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
if (entry == NULL && rsc) {
entry = calloc(1, sizeof(rsc_history_t));
entry->id = strdup(op->rsc_id);
g_hash_table_insert(lrm_state->resource_history, entry->id, entry);
entry->rsc.id = entry->id;
entry->rsc.type = strdup(rsc->type);
entry->rsc.standard = strdup(rsc->standard);
if (rsc->provider) {
entry->rsc.provider = strdup(rsc->provider);
} else {
entry->rsc.provider = NULL;
}
} else if (entry == NULL) {
crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
return;
}
entry->last_callid = op->call_id;
target_rc = rsc_op_expected_rc(op);
if (op->op_status == PCMK_LRM_OP_CANCELLED) {
if (op->interval_ms > 0) {
crm_trace("Removing cancelled recurring op: " CRM_OP_FMT,
op->rsc_id, op->op_type, op->interval_ms);
history_remove_recurring_op(entry, op);
return;
} else {
crm_trace("Skipping " CRM_OP_FMT " rc=%d, status=%d",
op->rsc_id, op->op_type, op->interval_ms, op->rc,
op->op_status);
}
} else if (did_rsc_op_fail(op, target_rc)) {
/* Store failed monitors here, otherwise the block below will cause them
* to be forgotten when a stop happens.
*/
if (entry->failed) {
lrmd_free_event(entry->failed);
}
entry->failed = lrmd_copy_event(op);
} else if (op->interval_ms == 0) {
if (entry->last) {
lrmd_free_event(entry->last);
}
entry->last = lrmd_copy_event(op);
if (op->params &&
(safe_str_eq(CRMD_ACTION_START, op->op_type) ||
safe_str_eq("reload", op->op_type) ||
safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) {
if (entry->stop_params) {
g_hash_table_destroy(entry->stop_params);
}
entry->stop_params = crm_str_table_new();
g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
}
}
if (op->interval_ms > 0) {
/* Ensure there are no duplicates */
history_remove_recurring_op(entry, op);
crm_trace("Adding recurring op: " CRM_OP_FMT,
op->rsc_id, op->op_type, op->interval_ms);
entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));
} else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) {
crm_trace("Dropping %d recurring ops because of: " CRM_OP_FMT,
g_list_length(entry->recurring_op_list), op->rsc_id,
op->op_type, op->interval_ms);
history_free_recurring_ops(entry);
}
}
/*!
* \internal
* \brief Send a direct OK ack for a resource task
*
* \param[in] lrm_state LRM connection
* \param[in] input Input message being ack'ed
* \param[in] rsc_id ID of affected resource
* \param[in] rsc Affected resource (if available)
* \param[in] task Operation task being ack'ed
* \param[in] ack_host Name of host to send ack to
* \param[in] ack_sys IPC system name to ack
*/
static void
send_task_ok_ack(lrm_state_t *lrm_state, ha_msg_input_t *input,
const char *rsc_id, lrmd_rsc_info_t *rsc, const char *task,
const char *ack_host, const char *ack_sys)
{
lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task);
op->rc = PCMK_OCF_OK;
op->op_status = PCMK_LRM_OP_DONE;
send_direct_ack(ack_host, ack_sys, rsc, op, rsc_id);
lrmd_free_event(op);
}
void
lrm_op_callback(lrmd_event_data_t * op)
{
const char *nodename = NULL;
lrm_state_t *lrm_state = NULL;
CRM_CHECK(op != NULL, return);
/* determine the node name for this connection. */
nodename = op->remote_nodename ? op->remote_nodename : fsa_our_uname;
if (op->type == lrmd_event_disconnect && (safe_str_eq(nodename, fsa_our_uname))) {
/* if this is the local lrmd ipc connection, set the right bits in the
* crmd when the connection goes down */
lrm_connection_destroy();
return;
} else if (op->type != lrmd_event_exec_complete) {
/* we only need to process execution results */
return;
}
lrm_state = lrm_state_find(nodename);
CRM_ASSERT(lrm_state != NULL);
process_lrm_event(lrm_state, op, NULL);
}
/* A_LRM_CONNECT */
void
do_lrm_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* This only pertains to local lrmd connections. Remote connections are handled as
* resources within the pengine. Connecting and disconnecting from remote lrmd instances
* handled differently than the local. */
lrm_state_t *lrm_state = NULL;
if(fsa_our_uname == NULL) {
return; /* Nothing to do */
}
lrm_state = lrm_state_find_or_create(fsa_our_uname);
if (lrm_state == NULL) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
if (action & A_LRM_DISCONNECT) {
if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
if (action == A_LRM_DISCONNECT) {
crmd_fsa_stall(FALSE);
return;
}
}
clear_bit(fsa_input_register, R_LRM_CONNECTED);
crm_info("Disconnecting from the LRM");
lrm_state_disconnect(lrm_state);
lrm_state_reset_tables(lrm_state, FALSE);
crm_notice("Disconnected from the LRM");
}
if (action & A_LRM_CONNECT) {
int ret = pcmk_ok;
crm_debug("Connecting to the LRM");
ret = lrm_state_ipc_connect(lrm_state);
if (ret != pcmk_ok) {
if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
crm_warn("Failed to connect to the LRM %d time%s (%d max)",
lrm_state->num_lrm_register_fails,
s_if_plural(lrm_state->num_lrm_register_fails),
MAX_LRM_REG_FAILS);
crm_timer_start(wait_timer);
crmd_fsa_stall(FALSE);
return;
}
}
if (ret != pcmk_ok) {
crm_err("Failed to connect to the LRM the max allowed %d time%s",
lrm_state->num_lrm_register_fails,
s_if_plural(lrm_state->num_lrm_register_fails));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
set_bit(fsa_input_register, R_LRM_CONNECTED);
crm_info("LRM connection established");
}
if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
}
}
static gboolean
lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
{
int counter = 0;
gboolean rc = TRUE;
const char *when = "lrm disconnect";
GHashTableIter gIter;
const char *key = NULL;
rsc_history_t *entry = NULL;
struct recurring_op_s *pending = NULL;
crm_debug("Checking for active resources before exit");
if (cur_state == S_TERMINATE) {
log_level = LOG_ERR;
when = "shutdown";
} else if (is_set(fsa_input_register, R_SHUTDOWN)) {
when = "shutdown... waiting";
}
if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) {
guint removed = g_hash_table_foreach_remove(
lrm_state->pending_ops, stop_recurring_actions, lrm_state);
guint nremaining = g_hash_table_size(lrm_state->pending_ops);
if (removed || nremaining) {
crm_notice("Stopped %u recurring operation%s at %s (%u remaining)",
removed, s_if_plural(removed), when, nremaining);
}
}
if (lrm_state->pending_ops) {
g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
/* Ignore recurring actions in the shutdown calculations */
if (pending->interval_ms == 0) {
counter++;
}
}
}
if (counter > 0) {
do_crm_log(log_level, "%d pending LRM operation%s at %s",
counter, s_if_plural(counter), when);
if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) {
g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) {
do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
}
} else {
rc = FALSE;
}
return rc;
}
if (lrm_state->resource_history == NULL) {
return rc;
}
if (is_set(fsa_input_register, R_SHUTDOWN)) {
/* At this point we're not waiting, we're just shutting down */
when = "shutdown";
}
counter = 0;
g_hash_table_iter_init(&gIter, lrm_state->resource_history);
while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
if (is_rsc_active(lrm_state, entry->id) == FALSE) {
continue;
}
counter++;
if (log_level == LOG_ERR) {
crm_info("Found %s active at %s", entry->id, when);
} else {
crm_trace("Found %s active at %s", entry->id, when);
}
if (lrm_state->pending_ops) {
GHashTableIter hIter;
g_hash_table_iter_init(&hIter, lrm_state->pending_ops);
while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) {
if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) {
crm_notice("%sction %s (%s) incomplete at %s",
pending->interval_ms == 0 ? "A" : "Recurring a",
key, pending->op_key, when);
}
}
}
}
if (counter) {
crm_err("%d resource%s active at %s",
counter, (counter == 1)? " was" : "s were", when);
}
return rc;
}
static char *
build_parameter_list(const lrmd_event_data_t *op,
const struct ra_metadata_s *metadata,
xmlNode *result, enum ra_param_flags_e param_type,
bool invert_for_xml)
{
int len = 0;
int max = 0;
char *list = NULL;
GList *iter = NULL;
/* Newer resource agents support the "private" parameter attribute to
* indicate sensitive parameters. For backward compatibility with older
* agents, this list is used if the agent doesn't specify any as "private".
*/
const char *secure_terms[] = {
"password",
"passwd",
"user",
};
if (is_not_set(metadata->ra_flags, ra_uses_private)
&& (param_type == ra_param_private)) {
max = DIMOF(secure_terms);
}
for (iter = metadata->ra_params; iter != NULL; iter = iter->next) {
struct ra_param_s *param = (struct ra_param_s *) iter->data;
bool accept = FALSE;
if (is_set(param->rap_flags, param_type)) {
accept = TRUE;
} else if (max) {
for (int lpc = 0; lpc < max; lpc++) {
if (safe_str_eq(secure_terms[lpc], param->rap_name)) {
accept = TRUE;
break;
}
}
}
if (accept) {
int start = len;
crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type));
len += strlen(param->rap_name) + 2; // include spaces around
list = realloc_safe(list, len + 1); // include null terminator
// spaces before and after make parsing simpler
sprintf(list + start, " %s ", param->rap_name);
} else {
crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type));
}
if (result && (invert_for_xml? !accept : accept)) {
const char *v = g_hash_table_lookup(op->params, param->rap_name);
if (v != NULL) {
crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v);
crm_xml_add(result, param->rap_name, v);
}
}
}
return list;
}
static void
append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata,
xmlNode *update, const char *version)
{
char *list = NULL;
char *digest = NULL;
xmlNode *restart = NULL;
CRM_LOG_ASSERT(op->params != NULL);
if (op->interval_ms > 0) {
/* monitors are not reloadable */
return;
}
if (is_set(metadata->ra_flags, ra_supports_reload)) {
restart = create_xml_node(NULL, XML_TAG_PARAMS);
/* Add any parameters with unique="1" to the "op-force-restart" list.
*
* (Currently, we abuse "unique=0" to indicate reloadability. This is
* nonstandard and should eventually be replaced once the OCF standard
* is updated with something better.)
*/
list = build_parameter_list(op, metadata, restart, ra_param_unique,
FALSE);
} else {
/* Resource does not support reloads */
return;
}
digest = calculate_operation_digest(restart, version);
/* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload,
* no matter if it actually supports any parameters with unique="1"). */
crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: "");
crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest);
crm_trace("%s: %s, %s", op->rsc_id, digest, list);
crm_log_xml_trace(restart, "restart digest source");
free_xml(restart);
free(digest);
free(list);
}
static void
append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata,
xmlNode *update, const char *version)
{
char *list = NULL;
char *digest = NULL;
xmlNode *secure = NULL;
CRM_LOG_ASSERT(op->params != NULL);
/*
* To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the
* secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on
* the insecure ones
*/
secure = create_xml_node(NULL, XML_TAG_PARAMS);
list = build_parameter_list(op, metadata, secure, ra_param_private, TRUE);
if (list != NULL) {
digest = calculate_operation_digest(secure, version);
crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list);
crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest);
crm_trace("%s: %s, %s", op->rsc_id, digest, list);
crm_log_xml_trace(secure, "secure digest source");
} else {
crm_trace("%s: no secure parameters", op->rsc_id);
}
free_xml(secure);
free(digest);
free(list);
}
static gboolean
build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op,
const char *node_name, const char *src)
{
int target_rc = 0;
xmlNode *xml_op = NULL;
struct ra_metadata_s *metadata = NULL;
const char *caller_version = NULL;
lrm_state_t *lrm_state = NULL;
if (op == NULL) {
return FALSE;
}
target_rc = rsc_op_expected_rc(op);
/* there is a small risk in formerly mixed clusters that it will
* be sub-optimal.
*
* however with our upgrade policy, the update we send should
* still be completely supported anyway
*/
caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION);
CRM_LOG_ASSERT(caller_version != NULL);
if(caller_version == NULL) {
caller_version = CRM_FEATURE_SET;
}
crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version);
xml_op = create_operation_update(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG);
if (xml_op == NULL) {
return TRUE;
}
if ((rsc == NULL) || (op->params == NULL)
|| !crm_op_needs_metadata(rsc->standard, op->op_type)) {
crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)",
op->op_type, op->rsc_id, op->params, rsc);
return TRUE;
}
lrm_state = lrm_state_find(node_name);
if (lrm_state == NULL) {
crm_warn("Cannot calculate digests for operation " CRM_OP_FMT
" because we have no LRM connection to %s",
op->rsc_id, op->op_type, op->interval_ms, node_name);
return TRUE;
}
metadata = metadata_cache_get(lrm_state->metadata_cache, rsc);
if (metadata == NULL) {
/* For now, we always collect resource agent meta-data via a local,
* synchronous, direct execution of the agent. This has multiple issues:
* the lrmd should execute agents, not the crmd; meta-data for
* Pacemaker Remote nodes should be collected on those nodes, not
* locally; and the meta-data call shouldn't eat into the timeout of the
* real action being performed.
*
* These issues are planned to be addressed by having the PE schedule
* a meta-data cache check at the beginning of each transition. Once
* that is working, this block will only be a fallback in case the
* initial collection fails.
*/
char *metadata_str = NULL;
int rc = lrm_state_get_metadata(lrm_state, rsc->standard,
rsc->provider, rsc->type,
&metadata_str, 0);
if (rc != pcmk_ok) {
crm_warn("Failed to get metadata for %s (%s:%s:%s)",
rsc->id, rsc->standard, rsc->provider, rsc->type);
return TRUE;
}
metadata = metadata_cache_update(lrm_state->metadata_cache, rsc,
metadata_str);
free(metadata_str);
if (metadata == NULL) {
crm_warn("Failed to update metadata for %s (%s:%s:%s)",
rsc->id, rsc->standard, rsc->provider, rsc->type);
return TRUE;
}
}
#if ENABLE_VERSIONED_ATTRS
crm_xml_add(xml_op, XML_ATTR_RA_VERSION, metadata->ra_version);
#endif
crm_trace("Including additional digests for %s::%s:%s", rsc->standard, rsc->provider, rsc->type);
append_restart_list(op, metadata, xml_op, caller_version);
append_secure_list(op, metadata, xml_op, caller_version);
return TRUE;
}
static gboolean
is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
{
rsc_history_t *entry = NULL;
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
if (entry == NULL || entry->last == NULL) {
return FALSE;
}
crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type,
entry->last->interval_ms, entry->last->rc);
if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) {
return FALSE;
} else if (entry->last->rc == PCMK_OCF_OK
&& safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) {
/* a stricter check is too complex...
* leave that to the PE
*/
return FALSE;
} else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
return FALSE;
} else if ((entry->last->interval_ms == 0)
&& (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) {
/* Badly configured resources can't be reliably stopped */
return FALSE;
}
return TRUE;
}
static gboolean
build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
{
GHashTableIter iter;
rsc_history_t *entry = NULL;
g_hash_table_iter_init(&iter, lrm_state->resource_history);
while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
GList *gIter = NULL;
xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE);
crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id);
crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type);
crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard);
crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider);
if (entry->last && entry->last->params) {
const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
if (container) {
crm_trace("Resource %s is a part of container resource %s", entry->id, container);
crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container);
}
}
build_operation_update(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name, __FUNCTION__);
build_operation_update(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name, __FUNCTION__);
for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
build_operation_update(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name, __FUNCTION__);
}
}
return FALSE;
}
static xmlNode *
do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags)
{
xmlNode *xml_state = NULL;
xmlNode *xml_data = NULL;
xmlNode *rsc_list = NULL;
crm_node_t *peer = NULL;
peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY);
CRM_CHECK(peer != NULL, return NULL);
xml_state = create_node_state_update(peer, update_flags, NULL,
__FUNCTION__);
if (xml_state == NULL) {
return NULL;
}
xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM);
crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid);
rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES);
/* Build a list of active (not always running) resources */
build_active_RAs(lrm_state, rsc_list);
crm_log_xml_trace(xml_state, "Current state of the LRM");
return xml_state;
}
xmlNode *
do_lrm_query(gboolean is_replace, const char *node_name)
{
lrm_state_t *lrm_state = lrm_state_find(node_name);
if (!lrm_state) {
crm_err("Could not query lrm state for lrmd node %s", node_name);
return NULL;
}
return do_lrm_query_internal(lrm_state,
node_update_cluster|node_update_peer);
}
static void
notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
{
lrmd_event_data_t *op = NULL;
const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
crm_info("Notifying %s on %s that %s was%s deleted",
from_sys, (from_host? from_host : "localhost"), rsc_id,
((rc == pcmk_ok)? "" : " not"));
op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE);
if (rc == pcmk_ok) {
op->op_status = PCMK_LRM_OP_DONE;
op->rc = PCMK_OCF_OK;
} else {
op->op_status = PCMK_LRM_OP_ERROR;
op->rc = PCMK_OCF_UNKNOWN_ERROR;
}
send_direct_ack(from_host, from_sys, NULL, op, rsc_id);
lrmd_free_event(op);
if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
/* this isn't expected - trigger a new transition */
time_t now = time(NULL);
char *now_s = crm_itoa(now);
crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc_id);
update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
"last-lrm-refresh", now_s, FALSE, NULL, NULL);
free(now_s);
}
}
static gboolean
lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
{
struct delete_event_s *event = user_data;
struct pending_deletion_op_s *op = value;
if (crm_str_eq(event->rsc, op->rsc, TRUE)) {
notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
return TRUE;
}
return FALSE;
}
static gboolean
lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
{
const char *rsc = user_data;
struct recurring_op_s *pending = value;
if (crm_str_eq(rsc, pending->rsc_id, TRUE)) {
crm_info("Removing op %s:%d for deleted resource %s",
pending->op_key, pending->call_id, rsc);
return TRUE;
}
return FALSE;
}
/*
* Remove the rsc from the CIB
*
* Avoids refreshing the entire LRM section of this host
*/
#define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']"
static int
delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
const char *user_name)
{
char *rsc_xpath = NULL;
int rc = pcmk_ok;
CRM_CHECK(rsc_id != NULL, return -ENXIO);
rsc_xpath = crm_strdup_printf(rsc_template, lrm_state->node_name, rsc_id);
rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath,
NULL, NULL, call_options | cib_xpath, user_name);
free(rsc_xpath);
return rc;
}
static void
delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id,
GHashTableIter * rsc_gIter, int rc, const char *user_name)
{
struct delete_event_s event;
CRM_CHECK(rsc_id != NULL, return);
if (rc == pcmk_ok) {
char *rsc_id_copy = strdup(rsc_id);
if (rsc_gIter)
g_hash_table_iter_remove(rsc_gIter);
else
g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
crm_debug("sync: Sending delete op for %s", rsc_id_copy);
delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name);
g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy);
free(rsc_id_copy);
}
if (input) {
notify_deleted(lrm_state, input, rsc_id, rc);
}
event.rc = rc;
event.rsc = rsc_id;
event.lrm_state = lrm_state;
g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
}
/*!
* \internal
* \brief Erase an LRM history entry from the CIB, given the operation data
*
* \param[in] lrm_state LRM state of the desired node
* \param[in] op Operation whose history should be deleted
*/
static void
erase_lrm_history_by_op(lrm_state_t *lrm_state, lrmd_event_data_t *op)
{
xmlNode *xml_top = NULL;
CRM_CHECK(op != NULL, return);
xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP);
crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id);
crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data);
if (op->interval_ms > 0) {
char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval_ms);
/* Avoid deleting last_failure too (if it was a result of this recurring op failing) */
crm_xml_add(xml_top, XML_ATTR_ID, op_id);
free(op_id);
}
crm_debug("Erasing LRM resource history for " CRM_OP_FMT " (call=%d)",
op->rsc_id, op->op_type, op->interval_ms, op->call_id);
fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top,
cib_quorum_override);
crm_log_xml_trace(xml_top, "op:cancel");
free_xml(xml_top);
}
/* Define xpath to find LRM resource history entry by node and resource */
#define XPATH_HISTORY \
"/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \
"/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \
"/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \
"/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \
"/" XML_LRM_TAG_RSC_OP
/* ... and also by operation key */
#define XPATH_HISTORY_ID XPATH_HISTORY \
"[@" XML_ATTR_ID "='%s']"
/* ... and also by operation key and operation call ID */
#define XPATH_HISTORY_CALL XPATH_HISTORY \
"[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']"
/* ... and also by operation key and original operation key */
#define XPATH_HISTORY_ORIG XPATH_HISTORY \
"[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']"
/*!
* \internal
* \brief Erase an LRM history entry from the CIB, given operation identifiers
*
* \param[in] lrm_state LRM state of the node to clear history for
* \param[in] rsc_id Name of resource to clear history for
* \param[in] key Operation key of operation to clear history for
* \param[in] orig_op If specified, delete only if it has this original op
* \param[in] call_id If specified, delete entry only if it has this call ID
*/
static void
erase_lrm_history_by_id(lrm_state_t *lrm_state, const char *rsc_id,
const char *key, const char *orig_op, int call_id)
{
char *op_xpath = NULL;
CRM_CHECK((rsc_id != NULL) && (key != NULL), return);
if (call_id > 0) {
op_xpath = crm_strdup_printf(XPATH_HISTORY_CALL,
lrm_state->node_name, rsc_id, key,
call_id);
} else if (orig_op) {
op_xpath = crm_strdup_printf(XPATH_HISTORY_ORIG,
lrm_state->node_name, rsc_id, key,
orig_op);
} else {
op_xpath = crm_strdup_printf(XPATH_HISTORY_ID,
lrm_state->node_name, rsc_id, key);
}
crm_debug("Erasing LRM resource history for %s on %s (call=%d)",
key, rsc_id, call_id);
fsa_cib_conn->cmds->remove(fsa_cib_conn, op_xpath, NULL,
cib_quorum_override | cib_xpath);
free(op_xpath);
}
static inline gboolean
last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms)
{
if (entry == NULL) {
return FALSE;
}
if (op == NULL) {
return TRUE;
}
return (safe_str_eq(op, entry->failed->op_type)
&& (interval_ms == entry->failed->interval_ms));
}
/*!
* \internal
* \brief Clear a resource's last failure
*
* Erase a resource's last failure on a particular node from both the
* LRM resource history in the CIB, and the resource history remembered
* for the LRM state.
*
* \param[in] rsc_id Resource name
* \param[in] node_name Node name
* \param[in] operation If specified, only clear if matching this operation
* \param[in] interval_ms If operation is specified, it has this interval
*/
void
lrm_clear_last_failure(const char *rsc_id, const char *node_name,
const char *operation, guint interval_ms)
{
char *op_key = NULL;
char *orig_op_key = NULL;
lrm_state_t *lrm_state = NULL;
lrm_state = lrm_state_find(node_name);
if (lrm_state == NULL) {
return;
}
/* Erase from CIB */
op_key = generate_op_key(rsc_id, "last_failure", 0);
if (operation) {
orig_op_key = generate_op_key(rsc_id, operation, interval_ms);
}
erase_lrm_history_by_id(lrm_state, rsc_id, op_key, orig_op_key, 0);
free(op_key);
free(orig_op_key);
/* Remove from memory */
if (lrm_state->resource_history) {
rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history,
rsc_id);
if (last_failed_matches_op(entry, operation, interval_ms)) {
lrmd_free_event(entry->failed);
entry->failed = NULL;
}
}
}
/* Returns: gboolean - cancellation is in progress */
static gboolean
cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove)
{
int rc = pcmk_ok;
char *local_key = NULL;
struct recurring_op_s *pending = NULL;
CRM_CHECK(op != 0, return FALSE);
CRM_CHECK(rsc_id != NULL, return FALSE);
if (key == NULL) {
local_key = make_stop_id(rsc_id, op);
key = local_key;
}
pending = g_hash_table_lookup(lrm_state->pending_ops, key);
if (pending) {
if (remove && pending->remove == FALSE) {
pending->remove = TRUE;
crm_debug("Scheduling %s for removal", key);
}
if (pending->cancelled) {
crm_debug("Operation %s already cancelled", key);
free(local_key);
return FALSE;
}
pending->cancelled = TRUE;
} else {
crm_info("No pending op found for %s", key);
free(local_key);
return FALSE;
}
crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type,
pending->interval_ms);
if (rc == pcmk_ok) {
crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
free(local_key);
return TRUE;
}
crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
/* The caller needs to make sure the entry is
* removed from the pending_ops list
*
* Usually by returning TRUE inside the worker function
* supplied to g_hash_table_foreach_remove()
*
* Not removing the entry from pending_ops will block
* the node from shutting down
*/
free(local_key);
return FALSE;
}
struct cancel_data {
gboolean done;
gboolean remove;
const char *key;
lrmd_rsc_info_t *rsc;
lrm_state_t *lrm_state;
};
static gboolean
cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
struct cancel_data *data = user_data;
struct recurring_op_s *op = (struct recurring_op_s *)value;
if (crm_str_eq(op->op_key, data->key, TRUE)) {
data->done = TRUE;
remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
}
return remove;
}
static gboolean
cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
{
guint removed = 0;
struct cancel_data data;
CRM_CHECK(rsc != NULL, return FALSE);
CRM_CHECK(key != NULL, return FALSE);
data.key = key;
data.rsc = rsc;
data.done = FALSE;
data.remove = remove;
data.lrm_state = lrm_state;
removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data);
crm_trace("Removed %u op cache entries, new size: %u",
removed, g_hash_table_size(lrm_state->pending_ops));
return data.done;
}
/*!
* \internal
* \brief Retrieve resource information from LRM
*
* \param[in] lrm_state LRM connection to use
* \param[in] rsc_xml XML containing resource configuration
* \param[in] do_create If true, register resource with LRM if not already
* \param[out] rsc_info Where to store resource information obtained from LRM
*
* \retval pcmk_ok Success (and rsc_info holds newly allocated result)
* \retval -EINVAL Required information is missing from arguments
* \retval -ENOTCONN No active connection to LRM
* \retval -ENODEV Resource not found
* \retval -errno Error communicating with lrmd when registering resource
*
* \note Caller is responsible for freeing result on success.
*/
static int
get_lrm_resource(lrm_state_t *lrm_state, xmlNode *rsc_xml, gboolean do_create,
lrmd_rsc_info_t **rsc_info)
{
const char *id = ID(rsc_xml);
CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL);
CRM_CHECK(id, return -EINVAL);
if (lrm_state_is_connected(lrm_state) == FALSE) {
return -ENOTCONN;
}
crm_trace("Retrieving resource information for %s from the LRM", id);
*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
// If resource isn't known by ID, try clone name, if provided
if (!*rsc_info) {
const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG);
if (long_id) {
*rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0);
}
}
if ((*rsc_info == NULL) && do_create) {
const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS);
const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER);
const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE);
int rc;
crm_trace("Registering resource %s with LRM", id);
rc = lrm_state_register_rsc(lrm_state, id, class, provider, type,
lrmd_opt_drop_recurring);
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Could not register resource %s with LRM on %s: %s "
CRM_XS " rc=%d",
id, lrm_state->node_name, pcmk_strerror(rc), rc);
/* Register this as an internal error if this involves the local
* lrmd. Otherwise, we're likely dealing with an unresponsive remote
* node, which is not an FSA failure.
*/
if (lrm_state_is_local(lrm_state) == TRUE) {
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
}
return rc;
}
*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
}
return *rsc_info? pcmk_ok : -ENODEV;
}
static void
delete_resource(lrm_state_t * lrm_state,
const char *id,
lrmd_rsc_info_t * rsc,
GHashTableIter * gIter,
const char *sys,
const char *host,
const char *user,
ha_msg_input_t * request,
gboolean unregister)
{
int rc = pcmk_ok;
crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host);
if (rsc && unregister) {
rc = lrm_state_unregister_rsc(lrm_state, id, 0);
}
if (rc == pcmk_ok) {
crm_trace("Resource '%s' deleted", id);
} else if (rc == -EINPROGRESS) {
crm_info("Deletion of resource '%s' pending", id);
if (request) {
struct pending_deletion_op_s *op = NULL;
char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE);
op = calloc(1, sizeof(struct pending_deletion_op_s));
op->rsc = strdup(rsc->id);
op->input = copy_ha_msg_input(request);
g_hash_table_insert(lrm_state->deletion_ops, ref, op);
}
return;
} else {
crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d",
id, sys, user ? user : "internal", host, rc);
}
delete_rsc_entry(lrm_state, request, id, gIter, rc, user);
}
static int
get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id)
{
int call_id = 999999999;
rsc_history_t *entry = NULL;
if(lrm_state) {
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
}
/* Make sure the call id is greater than the last successful operation,
* otherwise the failure will not result in a possible recovery of the resource
* as it could appear the failure occurred before the successful start */
if (entry) {
call_id = entry->last_callid + 1;
}
if (call_id < 0) {
call_id = 1;
}
return call_id;
}
static void
fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status,
enum ocf_exitcode op_exitcode)
{
op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
op->t_run = time(NULL);
op->t_rcchange = op->t_run;
op->op_status = op_status;
op->rc = op_exitcode;
}
static void
force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
const char *from_host, const char *user_name,
gboolean is_remote_node)
{
GHashTableIter gIter;
rsc_history_t *entry = NULL;
crm_info("Clearing resource history on node %s", lrm_state->node_name);
g_hash_table_iter_init(&gIter, lrm_state->resource_history);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
/* only unregister the resource during a reprobe if it is not a remote connection
* resource. otherwise unregistering the connection will terminate remote-node
* membership */
gboolean unregister = TRUE;
if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
lrm_state_t *remote_lrm_state = lrm_state_find(entry->id);
if (remote_lrm_state) {
/* when forcing a reprobe, make sure to clear remote node before
* clearing the remote node's connection resource */
force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE);
}
unregister = FALSE;
}
delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host,
user_name, NULL, unregister);
}
/* Now delete the copy in the CIB */
erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local);
- /* And finally, _delete_ the value in attrd
+ /* And finally, _delete_ the value in pacemaker-attrd
* Setting it to FALSE results in the PE sending us back here again
*/
update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node);
}
static void
synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc)
{
lrmd_event_data_t *op = NULL;
lrmd_rsc_info_t *rsc_info = NULL;
const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK);
const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET);
xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE);
if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) {
/* @TODO Should we do something else, like direct ack? */
crm_info("Can't fake %s failure (%d) on %s without resource configuration",
crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc,
target_node);
return;
} else if(operation == NULL) {
/* This probably came from crm_resource -C, nothing to do */
crm_info("Can't fake %s failure (%d) on %s without operation",
ID(xml_rsc), rc, target_node);
return;
}
op = construct_op(lrm_state, action, ID(xml_rsc), operation);
if (safe_str_eq(operation, RSC_NOTIFY)) { // Notifications can't fail
fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_OK);
} else {
fake_op_status(lrm_state, op, PCMK_LRM_OP_ERROR, rc);
}
crm_info("Faking " CRM_OP_FMT " result (%d) on %s",
op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node);
/* Process the result as if it came from the LRM, if possible
* (i.e. resource info can be obtained from the lrm_state).
*/
if (lrm_state) {
rsc_info = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
}
if (rsc_info) {
process_lrm_event(lrm_state, op, NULL);
} else {
/* If we can't process the result normally, at least write it to the CIB
* if possible, so the PE can act on it.
*/
const char *standard = crm_element_value(xml_rsc, XML_AGENT_ATTR_CLASS);
const char *provider = crm_element_value(xml_rsc, XML_AGENT_ATTR_PROVIDER);
const char *type = crm_element_value(xml_rsc, XML_ATTR_TYPE);
if (standard && type) {
rsc_info = lrmd_new_rsc_info(op->rsc_id, standard, provider, type);
do_update_resource(target_node, rsc_info, op);
lrmd_free_rsc_info(rsc_info);
} else {
// @TODO Should we direct ack?
crm_info("Can't fake %s failure (%d) on %s without resource standard and type",
crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc,
target_node);
}
}
lrmd_free_event(op);
}
/*!
* \internal
* \brief Get target of an LRM operation
*
* \param[in] xml LRM operation data XML
*
* \return LRM operation target node name (local node or Pacemaker Remote node)
*/
static const char *
lrm_op_target(xmlNode *xml)
{
const char *target = NULL;
if (xml) {
target = crm_element_value(xml, XML_LRM_ATTR_TARGET);
}
if (target == NULL) {
target = fsa_our_uname;
}
return target;
}
static void
fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
const char *from_host, const char *from_sys)
{
lrmd_event_data_t *op = NULL;
lrmd_rsc_info_t *rsc = NULL;
xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE);
CRM_CHECK(xml_rsc != NULL, return);
/* The lrmd simply executes operations and reports the results, without any
* concept of success or failure, so to fail a resource, we must fake what a
* failure looks like.
*
* To do this, we create a fake lrmd operation event for the resource, and
* pass that event to the lrmd client callback so it will be processed as if
* it came from the lrmd.
*/
op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon");
fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR);
free((char*) op->user_data);
op->user_data = NULL;
op->interval_ms = 0;
#if ENABLE_ACL
if (user_name && is_privileged(user_name) == FALSE) {
crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc));
send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
lrmd_free_event(op);
return;
}
#endif
if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) {
crm_info("Failing resource %s...", rsc->id);
process_lrm_event(lrm_state, op, NULL);
op->op_status = PCMK_LRM_OP_DONE;
op->rc = PCMK_OCF_OK;
lrmd_free_rsc_info(rsc);
} else {
crm_info("Cannot find/create resource in order to fail it...");
crm_log_xml_warn(xml, "bad input");
}
send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
lrmd_free_event(op);
}
static void
handle_refresh_op(lrm_state_t *lrm_state, const char *user_name,
const char *from_host, const char *from_sys)
{
int rc = pcmk_ok;
xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all);
fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name);
crm_info("Forced a local LRM refresh: call=%d", rc);
if (safe_str_neq(CRM_SYSTEM_CRMD, from_sys)) {
xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, fragment, from_host,
from_sys, CRM_SYSTEM_LRMD,
fsa_our_uuid);
crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host);
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
free_xml(reply);
}
free_xml(fragment);
}
static void
handle_query_op(xmlNode *msg, lrm_state_t *lrm_state)
{
xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all);
xmlNode *reply = create_reply(msg, data);
if (relay_message(reply, TRUE) == FALSE) {
crm_err("Unable to route reply");
crm_log_xml_err(reply, "reply");
}
free_xml(reply);
free_xml(data);
}
static void
handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys,
const char *from_host, const char *user_name,
gboolean is_remote_node)
{
crm_notice("Forcing the status of all resources to be redetected");
force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node);
if (safe_str_neq(CRM_SYSTEM_PENGINE, from_sys)
&& safe_str_neq(CRM_SYSTEM_TENGINE, from_sys)) {
xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host,
from_sys, CRM_SYSTEM_LRMD,
fsa_our_uuid);
crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
free_xml(reply);
}
}
static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state,
lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys)
{
char *op_key = NULL;
char *meta_key = NULL;
int call = 0;
const char *call_id = NULL;
const char *op_task = NULL;
const char *interval_ms_s = NULL;
gboolean in_progress = FALSE;
xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE);
CRM_CHECK(params != NULL, return FALSE);
meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS);
interval_ms_s = crm_element_value(params, meta_key);
free(meta_key);
CRM_CHECK(interval_ms_s != NULL, return FALSE);
meta_key = crm_meta_name(XML_LRM_ATTR_TASK);
op_task = crm_element_value(params, meta_key);
free(meta_key);
CRM_CHECK(op_task != NULL, return FALSE);
meta_key = crm_meta_name(XML_LRM_ATTR_CALLID);
call_id = crm_element_value(params, meta_key);
free(meta_key);
op_key = generate_op_key(rsc->id, op_task, crm_parse_ms(interval_ms_s));
crm_debug("PE requested op %s (call=%s) be cancelled",
op_key, (call_id? call_id : "NA"));
call = crm_parse_int(call_id, "0");
if (call == 0) {
/* the normal case when the PE cancels a recurring op */
in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);
} else {
/* the normal case when the PE cancels an orphan op */
in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
}
// Acknowledge cancellation operation if for a remote connection resource
if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
char *op_id = make_stop_id(rsc->id, call);
if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) {
crm_info("Nothing known about operation %d for %s", call, op_key);
}
erase_lrm_history_by_id(lrm_state, rsc->id, op_key, NULL, call);
send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
from_host, from_sys);
/* needed at least for cancellation of a remote operation */
g_hash_table_remove(lrm_state->pending_ops, op_id);
free(op_id);
} else {
/* No ack is needed since abcdaa8, but peers with older versions
* in a rolling upgrade need one. We didn't bump the feature set
* at that commit, so we can only compare against the previous
* CRM version (3.0.8). If any peers have feature set 3.0.9 but
* not abcdaa8, they will time out waiting for the ack (no
* released versions of Pacemaker are affected).
*/
const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION);
if (compare_version(peer_version, "3.0.8") <= 0) {
crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)",
op_key, from_host, peer_version);
send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
from_host, from_sys);
}
}
free(op_key);
return TRUE;
}
static void
do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state,
lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host,
bool crm_rsc_delete, const char *user_name)
{
gboolean unregister = TRUE;
#if ENABLE_ACL
int cib_rc = delete_rsc_status(lrm_state, rsc->id,
cib_dryrun|cib_sync_call, user_name);
if (cib_rc != pcmk_ok) {
lrmd_event_data_t *op = NULL;
crm_err("Could not delete resource status of %s for %s (user %s) on %s: %s"
CRM_XS " rc=%d",
rsc->id, from_sys, (user_name? user_name : "unknown"),
from_host, pcmk_strerror(cib_rc), cib_rc);
op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE);
op->op_status = PCMK_LRM_OP_ERROR;
if (cib_rc == -EACCES) {
op->rc = PCMK_OCF_INSUFFICIENT_PRIV;
} else {
op->rc = PCMK_OCF_UNKNOWN_ERROR;
}
send_direct_ack(from_host, from_sys, NULL, op, rsc->id);
lrmd_free_event(op);
return;
}
#endif
if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
unregister = FALSE;
}
delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host,
user_name, input, unregister);
}
/* A_LRM_INVOKE */
void
do_lrm_invoke(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
lrm_state_t *lrm_state = NULL;
const char *crm_op = NULL;
const char *from_sys = NULL;
const char *from_host = NULL;
const char *operation = NULL;
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
const char *user_name = NULL;
const char *target_node = NULL;
gboolean is_remote_node = FALSE;
bool crm_rsc_delete = FALSE;
target_node = lrm_op_target(input->xml);
is_remote_node = safe_str_neq(target_node, fsa_our_uname);
lrm_state = lrm_state_find(target_node);
if ((lrm_state == NULL) && is_remote_node) {
crm_err("Failing action because local node has never had connection to remote node %s",
target_node);
synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED);
return;
}
CRM_ASSERT(lrm_state != NULL);
#if ENABLE_ACL
user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL);
crm_trace("LRM command from user '%s'", user_name);
#endif
crm_op = crm_element_value(input->msg, F_CRM_TASK);
from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
}
crm_trace("LRM %s command from %s", crm_op, from_sys);
if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) {
crm_rsc_delete = TRUE; // Only crm_resource uses this op
operation = CRMD_ACTION_DELETE;
} else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) {
fail_lrm_resource(input->xml, lrm_state, user_name, from_host,
from_sys);
return;
} else if (input->xml != NULL) {
operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK);
}
if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
handle_refresh_op(lrm_state, user_name, from_host, from_sys);
} else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) {
handle_query_op(input->msg, lrm_state);
} else if (safe_str_eq(operation, CRM_OP_PROBED)) {
update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE,
user_name, is_remote_node);
} else if (safe_str_eq(operation, CRM_OP_REPROBE)
|| safe_str_eq(crm_op, CRM_OP_REPROBE)) {
handle_reprobe_op(lrm_state, from_sys, from_host, user_name,
is_remote_node);
} else if (operation != NULL) {
lrmd_rsc_info_t *rsc = NULL;
xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
gboolean create_rsc = safe_str_neq(operation, CRMD_ACTION_DELETE);
int rc;
// We can't return anything meaningful without a resource ID
CRM_CHECK(xml_rsc && ID(xml_rsc), return);
rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc);
if (rc == -ENOTCONN) {
synthesize_lrmd_failure(lrm_state, input->xml,
PCMK_OCF_CONNECTION_DIED);
return;
} else if ((rc < 0) && !create_rsc) {
/* Delete of malformed or nonexistent resource
* (deleting something that does not exist is a success)
*/
crm_notice("Not registering resource '%s' for a %s event "
CRM_XS " get-rc=%d (%s) transition-key=%s",
ID(xml_rsc), operation,
rc, pcmk_strerror(rc), ID(input->xml));
delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok,
user_name);
send_task_ok_ack(lrm_state, input, ID(xml_rsc), NULL, operation,
from_host, from_sys);
return;
} else if (rc == -EINVAL) {
// Resource operation on malformed resource
crm_err("Invalid resource definition for %s", ID(xml_rsc));
crm_log_xml_warn(input->msg, "invalid resource");
synthesize_lrmd_failure(lrm_state, input->xml,
PCMK_OCF_NOT_CONFIGURED); // fatal error
return;
} else if (rc < 0) {
// Error communicating with lrmd
crm_err("Could not register resource '%s' with lrmd: %s " CRM_XS " rc=%d",
ID(xml_rsc), pcmk_strerror(rc), rc);
crm_log_xml_warn(input->msg, "failed registration");
synthesize_lrmd_failure(lrm_state, input->xml,
PCMK_OCF_INVALID_PARAM); // hard error
return;
}
if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) {
if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) {
crm_log_xml_warn(input->xml, "Bad command");
}
} else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) {
do_lrm_delete(input, lrm_state, rsc, from_sys, from_host,
crm_rsc_delete, user_name);
} else {
do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg);
}
lrmd_free_rsc_info(rsc);
} else {
crm_err("Cannot perform operation %s of unknown type", crm_str(crm_op));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
static lrmd_event_data_t *
construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation)
{
lrmd_event_data_t *op = NULL;
const char *op_delay = NULL;
const char *op_timeout = NULL;
const char *interval_ms_s = NULL;
GHashTable *params = NULL;
const char *transition = NULL;
CRM_ASSERT(rsc_id && operation);
op = calloc(1, sizeof(lrmd_event_data_t));
CRM_ASSERT(op != NULL);
op->type = lrmd_event_exec_complete;
op->op_type = strdup(operation);
op->op_status = PCMK_LRM_OP_PENDING;
op->rc = -1;
op->rsc_id = strdup(rsc_id);
op->interval_ms = 0;
op->timeout = 0;
op->start_delay = 0;
if (rsc_op == NULL) {
CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation));
op->user_data = NULL;
/* the stop_all_resources() case
* by definition there is no DC (or they'd be shutting
* us down).
* So we should put our version here.
*/
op->params = crm_str_table_new();
g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET));
crm_trace("Constructed %s op for %s", operation, rsc_id);
return op;
}
params = xml2list(rsc_op);
g_hash_table_remove(params, CRM_META "_op_target_rc");
op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY);
op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT);
interval_ms_s = crm_meta_value(params, XML_LRM_ATTR_INTERVAL_MS);
op->interval_ms = crm_parse_ms(interval_ms_s);
op->timeout = crm_parse_int(op_timeout, "0");
op->start_delay = crm_parse_int(op_delay, "0");
#if ENABLE_VERSIONED_ATTRS
// Resolve any versioned parameters
if (lrm_state && safe_str_neq(op->op_type, RSC_METADATA)
&& safe_str_neq(op->op_type, CRMD_ACTION_DELETE)
&& !is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
// Resource info *should* already be cached, so we don't get lrmd call
lrmd_rsc_info_t *rsc = lrm_state_get_rsc_info(lrm_state, rsc_id, 0);
struct ra_metadata_s *metadata;
metadata = metadata_cache_get(lrm_state->metadata_cache, rsc);
if (metadata) {
xmlNode *versioned_attrs = NULL;
GHashTable *hash = NULL;
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_ATTRS);
hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
g_hash_table_iter_steal(&iter);
g_hash_table_replace(params, key, value);
}
g_hash_table_destroy(hash);
versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_META);
hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
g_hash_table_replace(params, crm_meta_name(key), strdup(value));
if (safe_str_eq(key, XML_ATTR_TIMEOUT)) {
op->timeout = crm_parse_int(value, "0");
} else if (safe_str_eq(key, XML_OP_ATTR_START_DELAY)) {
op->start_delay = crm_parse_int(value, "0");
}
}
g_hash_table_destroy(hash);
versioned_attrs = first_named_child(rsc_op, XML_TAG_RSC_VER_ATTRS);
hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
g_hash_table_iter_steal(&iter);
g_hash_table_replace(params, key, value);
}
g_hash_table_destroy(hash);
}
lrmd_free_rsc_info(rsc);
}
#endif
if (safe_str_neq(operation, RSC_STOP)) {
op->params = params;
} else {
rsc_history_t *entry = NULL;
if (lrm_state) {
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
}
/* If we do not have stop parameters cached, use
* whatever we are given */
if (!entry || !entry->stop_params) {
op->params = params;
} else {
/* Copy the cached parameter list so that we stop the resource
* with the old attributes, not the new ones */
op->params = crm_str_table_new();
g_hash_table_foreach(params, copy_meta_keys, op->params);
g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
g_hash_table_destroy(params);
params = NULL;
}
}
/* sanity */
if (op->timeout <= 0) {
op->timeout = op->interval_ms;
}
if (op->start_delay < 0) {
op->start_delay = 0;
}
transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY);
CRM_CHECK(transition != NULL, return op);
op->user_data = strdup(transition);
if (op->interval_ms != 0) {
if (safe_str_eq(operation, CRMD_ACTION_START)
|| safe_str_eq(operation, CRMD_ACTION_STOP)) {
crm_err("Start and Stop actions cannot have an interval: %u",
op->interval_ms);
op->interval_ms = 0;
}
}
crm_trace("Constructed %s op for %s: interval=%u",
operation, rsc_id, op->interval_ms);
return op;
}
void
send_direct_ack(const char *to_host, const char *to_sys,
lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id)
{
xmlNode *reply = NULL;
xmlNode *update, *iter;
crm_node_t *peer = NULL;
CRM_CHECK(op != NULL, return);
if (op->rsc_id == NULL) {
CRM_ASSERT(rsc_id != NULL);
op->rsc_id = strdup(rsc_id);
}
if (to_sys == NULL) {
to_sys = CRM_SYSTEM_TENGINE;
}
peer = crm_get_peer(0, fsa_our_uname);
update = create_node_state_update(peer, node_update_none, NULL,
__FUNCTION__);
iter = create_xml_node(update, XML_CIB_TAG_LRM);
crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
build_operation_update(iter, rsc, op, fsa_our_uname, __FUNCTION__);
reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL);
crm_log_xml_trace(update, "ACK Update");
crm_debug("ACK'ing resource op " CRM_OP_FMT " from %s: %s",
op->rsc_id, op->op_type, op->interval_ms, op->user_data,
crm_element_value(reply, XML_ATTR_REFERENCE));
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
free_xml(update);
free_xml(reply);
}
gboolean
verify_stopped(enum crmd_fsa_state cur_state, int log_level)
{
gboolean res = TRUE;
GList *lrm_state_list = lrm_state_get_list();
GList *state_entry;
for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
lrm_state_t *lrm_state = state_entry->data;
if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
/* keep iterating through all even when false is returned */
res = FALSE;
}
}
set_bit(fsa_input_register, R_SENT_RSC_STOP);
g_list_free(lrm_state_list); lrm_state_list = NULL;
return res;
}
struct stop_recurring_action_s {
lrmd_rsc_info_t *rsc;
lrm_state_t *lrm_state;
};
static gboolean
stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
struct stop_recurring_action_s *event = user_data;
struct recurring_op_s *op = (struct recurring_op_s *)value;
if ((op->interval_ms != 0)
&& crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) {
crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key);
remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
}
return remove;
}
static gboolean
stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
lrm_state_t *lrm_state = user_data;
struct recurring_op_s *op = (struct recurring_op_s *)value;
if (op->interval_ms != 0) {
crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id,
(const char *) key);
remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
}
return remove;
}
static void
record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op)
{
const char *record_pending = NULL;
CRM_CHECK(node_name != NULL, return);
CRM_CHECK(rsc != NULL, return);
CRM_CHECK(op != NULL, return);
if ((op->op_type == NULL) || (op->params == NULL)
|| safe_str_eq(op->op_type, CRMD_ACTION_CANCEL)
|| safe_str_eq(op->op_type, CRMD_ACTION_DELETE)) {
return;
}
// defaults to true
record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING);
if (record_pending && !crm_is_true(record_pending)) {
return;
}
op->call_id = -1;
op->op_status = PCMK_LRM_OP_PENDING;
op->rc = PCMK_OCF_UNKNOWN;
op->t_run = time(NULL);
op->t_rcchange = op->t_run;
/* write a "pending" entry to the CIB, inhibit notification */
crm_debug("Recording pending op " CRM_OP_FMT " on %s in the CIB",
op->rsc_id, op->op_type, op->interval_ms, node_name);
do_update_resource(node_name, rsc, op);
}
static void
do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg,
xmlNode * request)
{
int call_id = 0;
char *op_id = NULL;
lrmd_event_data_t *op = NULL;
lrmd_key_value_t *params = NULL;
fsa_data_t *msg_data = NULL;
const char *transition = NULL;
gboolean stop_recurring = FALSE;
bool send_nack = FALSE;
CRM_CHECK(rsc != NULL, return);
CRM_CHECK(operation != NULL, return);
if (msg != NULL) {
transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
if (transition == NULL) {
crm_log_xml_err(msg, "Missing transition number");
}
}
op = construct_op(lrm_state, msg, rsc->id, operation);
CRM_CHECK(op != NULL, return);
if (is_remote_lrmd_ra(NULL, NULL, rsc->id)
&& (op->interval_ms == 0)
&& strcmp(operation, CRMD_ACTION_MIGRATE) == 0) {
/* pcmk remote connections are a special use case.
* We never ever want to stop monitoring a connection resource until
* the entire migration has completed. If the connection is unexpectedly
* severed, even during a migration, this is an event we must detect.*/
stop_recurring = FALSE;
} else if ((op->interval_ms == 0)
&& strcmp(operation, CRMD_ACTION_STATUS) != 0
&& strcmp(operation, CRMD_ACTION_NOTIFY) != 0) {
/* stop any previous monitor operations before changing the resource state */
stop_recurring = TRUE;
}
if (stop_recurring == TRUE) {
guint removed = 0;
struct stop_recurring_action_s data;
data.rsc = rsc;
data.lrm_state = lrm_state;
removed = g_hash_table_foreach_remove(
lrm_state->pending_ops, stop_recurring_action_by_rsc, &data);
if (removed) {
crm_debug("Stopped %u recurring operation%s in preparation for " CRM_OP_FMT,
removed, s_if_plural(removed),
rsc->id, operation, op->interval_ms);
}
}
/* now do the op */
crm_info("Performing key=%s op=" CRM_OP_FMT,
transition, rsc->id, operation, op->interval_ms);
if (is_set(fsa_input_register, R_SHUTDOWN) && safe_str_eq(operation, RSC_START)) {
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
send_nack = TRUE;
} else if (fsa_state != S_NOT_DC
&& fsa_state != S_POLICY_ENGINE /* Recalculating */
&& fsa_state != S_TRANSITION_ENGINE
&& safe_str_neq(operation, CRMD_ACTION_STOP)) {
send_nack = TRUE;
}
if(send_nack) {
crm_notice("Discarding attempt to perform action %s on %s in state %s (shutdown=%s)",
operation, rsc->id, fsa_state2string(fsa_state),
is_set(fsa_input_register, R_SHUTDOWN)?"true":"false");
op->rc = CRM_DIRECT_NACK_RC;
op->op_status = PCMK_LRM_OP_ERROR;
send_direct_ack(NULL, NULL, rsc, op, rsc->id);
lrmd_free_event(op);
free(op_id);
return;
}
record_pending_op(lrm_state->node_name, rsc, op);
op_id = generate_op_key(rsc->id, op->op_type, op->interval_ms);
if (op->interval_ms > 0) {
/* cancel it so we can then restart it without conflict */
cancel_op_key(lrm_state, rsc, op_id, FALSE);
}
if (op->params) {
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, op->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
params = lrmd_key_value_add(params, key, value);
}
}
call_id = lrm_state_exec(lrm_state, rsc->id, op->op_type, op->user_data,
op->interval_ms, op->timeout, op->start_delay,
params);
if (call_id <= 0 && lrm_state_is_local(lrm_state)) {
crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id);
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
} else if (call_id <= 0) {
crm_err("Operation %s on resource %s failed to execute on remote node %s: %d",
operation, rsc->id, lrm_state->node_name, call_id);
fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR);
process_lrm_event(lrm_state, op, NULL);
} else {
/* record all operations so we can wait
* for them to complete during shutdown
*/
char *call_id_s = make_stop_id(rsc->id, call_id);
struct recurring_op_s *pending = NULL;
pending = calloc(1, sizeof(struct recurring_op_s));
crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
pending->call_id = call_id;
pending->interval_ms = op->interval_ms;
pending->op_type = strdup(operation);
pending->op_key = strdup(op_id);
pending->rsc_id = strdup(rsc->id);
pending->start_time = time(NULL);
pending->user_data = strdup(op->user_data);
g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending);
if ((op->interval_ms > 0)
&& (op->start_delay > START_DELAY_THRESHOLD)) {
char *uuid = NULL;
int dummy = 0, target_rc = 0;
crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc);
free(uuid);
op->rc = target_rc;
op->op_status = PCMK_LRM_OP_DONE;
send_direct_ack(NULL, NULL, rsc, op, rsc->id);
}
pending->params = op->params;
op->params = NULL;
}
free(op_id);
lrmd_free_event(op);
return;
}
int last_resource_update = 0;
static void
cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
switch (rc) {
case pcmk_ok:
case -pcmk_err_diff_failed:
case -pcmk_err_diff_resync:
crm_trace("Resource update %d complete: rc=%d", call_id, rc);
break;
default:
crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc));
}
if (call_id == last_resource_update) {
last_resource_update = 0;
trigger_fsa(fsa_source);
}
}
static int
do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
{
/*
<status>
<nodes_status id=uname>
<lrm>
<lrm_resources>
<lrm_resource id=...>
</...>
*/
int rc = pcmk_ok;
xmlNode *update, *iter = NULL;
int call_opt = crmd_cib_smart_opt();
const char *uuid = NULL;
CRM_CHECK(op != NULL, return 0);
iter = create_xml_node(iter, XML_CIB_TAG_STATUS);
update = iter;
iter = create_xml_node(iter, XML_CIB_TAG_STATE);
if (safe_str_eq(node_name, fsa_our_uname)) {
uuid = fsa_our_uuid;
} else {
/* remote nodes uuid and uname are equal */
uuid = node_name;
crm_xml_add(iter, XML_NODE_IS_REMOTE, "true");
}
CRM_LOG_ASSERT(uuid != NULL);
if(uuid == NULL) {
rc = -EINVAL;
goto done;
}
crm_xml_add(iter, XML_ATTR_UUID, uuid);
crm_xml_add(iter, XML_ATTR_UNAME, node_name);
crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__);
iter = create_xml_node(iter, XML_CIB_TAG_LRM);
crm_xml_add(iter, XML_ATTR_ID, uuid);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
build_operation_update(iter, rsc, op, node_name, __FUNCTION__);
if (rsc) {
const char *container = NULL;
crm_xml_add(iter, XML_ATTR_TYPE, rsc->type);
crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->standard);
crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider);
if (op->params) {
container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
}
if (container) {
crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container);
crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container);
}
} else {
crm_warn("Resource %s no longer exists in the lrmd", op->rsc_id);
send_direct_ack(NULL, NULL, rsc, op, op->rsc_id);
goto cleanup;
}
crm_log_xml_trace(update, __FUNCTION__);
/* make it an asynchronous call and be done with it
*
* Best case:
* the resource state will be discovered during
* the next signup or election.
*
* Bad case:
* we are shutting down and there is no DC at the time,
* but then why were we shutting down then anyway?
* (probably because of an internal error)
*
* Worst case:
* we get shot for having resources "running" that really weren't
*
* the alternative however means blocking here for too long, which
* isn't acceptable
*/
fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL);
if (rc > 0) {
last_resource_update = rc;
}
done:
/* the return code is a call number, not an error code */
crm_trace("Sent resource state update message: %d for %s=%u on %s",
rc, op->op_type, op->interval_ms, op->rsc_id);
fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback);
cleanup:
free_xml(update);
return rc;
}
void
do_lrm_event(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)
{
CRM_CHECK(FALSE, return);
}
static char *
unescape_newlines(const char *string)
{
char *pch = NULL;
char *ret = NULL;
static const char *escaped_newline = "\\n";
if (!string) {
return NULL;
}
ret = strdup(string);
pch = strstr(ret, escaped_newline);
while (pch != NULL) {
/* 2 chars for 2 chars, null-termination irrelevant */
memcpy(pch, "\n ", 2 * sizeof(char));
pch = strstr(pch, escaped_newline);
}
return ret;
}
gboolean
process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending)
{
char *op_id = NULL;
char *op_key = NULL;
int update_id = 0;
gboolean remove = FALSE;
gboolean removed = FALSE;
lrmd_rsc_info_t *rsc = NULL;
CRM_CHECK(op != NULL, return FALSE);
CRM_CHECK(op->rsc_id != NULL, return FALSE);
op_id = make_stop_id(op->rsc_id, op->call_id);
op_key = generate_op_key(op->rsc_id, op->op_type, op->interval_ms);
rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
if(pending == NULL) {
remove = TRUE;
pending = g_hash_table_lookup(lrm_state->pending_ops, op_id);
}
if (op->op_status == PCMK_LRM_OP_ERROR) {
switch(op->rc) {
case PCMK_OCF_NOT_RUNNING:
case PCMK_OCF_RUNNING_MASTER:
case PCMK_OCF_DEGRADED:
case PCMK_OCF_DEGRADED_MASTER:
/* Leave it up to the TE/PE to decide if this is an error */
op->op_status = PCMK_LRM_OP_DONE;
break;
default:
/* Nothing to do */
break;
}
}
if (op->op_status != PCMK_LRM_OP_CANCELLED) {
if (safe_str_eq(op->op_type, RSC_NOTIFY) || safe_str_eq(op->op_type, RSC_METADATA)) {
/* Keep notify and meta-data ops out of the CIB */
send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
} else {
update_id = do_update_resource(lrm_state->node_name, rsc, op);
}
} else if (op->interval_ms == 0) {
/* This will occur when "crm resource cleanup" is called while actions are in-flight */
crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id);
send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
} else if (pending == NULL) {
/* We don't need to do anything for cancelled ops
* that are not in our pending op list. There are no
* transition actions waiting on these operations. */
} else if (op->user_data == NULL) {
/* At this point we have a pending entry, but no transition
* key present in the user_data field. report this */
crm_err("Op %s (call=%d): No user data", op_key, op->call_id);
} else if (pending->remove) {
/* The tengine canceled this op, we have been waiting for the cancel to finish. */
erase_lrm_history_by_op(lrm_state, op);
} else if (op->rsc_deleted) {
/* The tengine initiated this op, but it was cancelled outside of the
* tengine's control during a resource cleanup/re-probe request. The tengine
* must be alerted that this operation completed, otherwise the tengine
* will continue waiting for this update to occur until it is timed out.
* We don't want this update going to the cib though, so use a direct ack. */
crm_trace("Op %s (call=%d): cancelled due to rsc deletion", op_key, op->call_id);
send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
} else {
/* Before a stop is called, no need to direct ack */
crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id);
}
if(remove == FALSE) {
/* The caller will do this afterwards, but keep the logging consistent */
removed = TRUE;
} else if ((op->interval_ms == 0)
&& g_hash_table_remove(lrm_state->pending_ops, op_id)) {
removed = TRUE;
crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops));
} else if ((op->interval_ms != 0)
&& (op->op_status == PCMK_LRM_OP_CANCELLED)) {
removed = TRUE;
g_hash_table_remove(lrm_state->pending_ops, op_id);
}
switch (op->op_status) {
case PCMK_LRM_OP_CANCELLED:
crm_info("Result of %s operation for %s on %s: %s "
CRM_XS " call=%d key=%s confirmed=%s",
crm_action_str(op->op_type, op->interval_ms),
op->rsc_id, lrm_state->node_name,
services_lrm_status_str(op->op_status),
op->call_id, op_key, (removed? "true" : "false"));
break;
case PCMK_LRM_OP_DONE:
do_crm_log((op->interval_ms? LOG_INFO : LOG_NOTICE),
"Result of %s operation for %s on %s: %d (%s) "
CRM_XS " call=%d key=%s confirmed=%s cib-update=%d",
crm_action_str(op->op_type, op->interval_ms),
op->rsc_id, lrm_state->node_name,
op->rc, services_ocf_exitcode_str(op->rc),
op->call_id, op_key, (removed? "true" : "false"),
update_id);
break;
case PCMK_LRM_OP_TIMEOUT:
crm_err("Result of %s operation for %s on %s: %s "
CRM_XS " call=%d key=%s timeout=%dms",
crm_action_str(op->op_type, op->interval_ms),
op->rsc_id, lrm_state->node_name,
services_lrm_status_str(op->op_status),
op->call_id, op_key, op->timeout);
break;
default:
crm_err("Result of %s operation for %s on %s: %s "
CRM_XS " call=%d key=%s confirmed=%s status=%d cib-update=%d",
crm_action_str(op->op_type, op->interval_ms),
op->rsc_id, lrm_state->node_name,
services_lrm_status_str(op->op_status), op->call_id, op_key,
(removed? "true" : "false"), op->op_status, update_id);
}
if (op->output) {
char *prefix =
crm_strdup_printf("%s-" CRM_OP_FMT ":%d", lrm_state->node_name,
op->rsc_id, op->op_type, op->interval_ms,
op->call_id);
if (op->rc) {
crm_log_output(LOG_NOTICE, prefix, op->output);
} else {
crm_log_output(LOG_DEBUG, prefix, op->output);
}
free(prefix);
}
if (safe_str_neq(op->op_type, RSC_METADATA)) {
crmd_alert_resource_op(lrm_state->node_name, op);
} else if (op->rc == PCMK_OCF_OK) {
char *metadata = unescape_newlines(op->output);
metadata_cache_update(lrm_state->metadata_cache, rsc, metadata);
free(metadata);
}
if (op->rsc_deleted) {
crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL);
}
/* If a shutdown was escalated while operations were pending,
* then the FSA will be stalled right now... allow it to continue
*/
mainloop_set_trigger(fsa_source);
update_history_cache(lrm_state, rsc, op);
lrmd_free_rsc_info(rsc);
free(op_key);
free(op_id);
return TRUE;
}
diff --git a/cts/CM_corosync.py b/cts/CM_corosync.py
index 959b7932e3..0f1a394c2f 100644
--- a/cts/CM_corosync.py
+++ b/cts/CM_corosync.py
@@ -1,70 +1,70 @@
-''' Corosync-specific class for Pacemaker's Cluster Test Suite (CTS)
-'''
+""" Corosync-specific class for Pacemaker's Cluster Test Suite (CTS)
+"""
# Pacemaker targets compatibility with Python 2.7 and 3.2+
from __future__ import print_function, unicode_literals, absolute_import, division
__copyright__ = "Copyright 2007-2018 Andrew Beekhof <andrew@beekhof.net>"
__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
from cts.CTSvars import *
from cts.CM_common import crm_common
from cts.CTS import Process
from cts.patterns import PatternSelector
class crm_corosync(crm_common):
'''
Corosync version 2 cluster manager class
'''
def __init__(self, Environment, randseed=None, name=None):
if not name: name="crm-corosync"
crm_common.__init__(self, Environment, randseed=randseed, name=name)
self.fullcomplist = {}
self.templates = PatternSelector(self.name)
def Components(self):
complist = []
if not len(list(self.fullcomplist.keys())):
- for c in ["cib", "lrmd", "crmd", "attrd" ]:
+ for c in ["cib", "lrmd", "crmd", "pacemaker-attrd" ]:
self.fullcomplist[c] = Process(
self, c,
pats = self.templates.get_component(self.name, c),
badnews_ignore = self.templates.get_component(self.name, "%s-ignore" % c),
common_ignore = self.templates.get_component(self.name, "common-ignore"))
# pengine uses dc_pats instead of pats
self.fullcomplist["pengine"] = Process(
self, "pengine",
dc_pats = self.templates.get_component(self.name, "pengine"),
badnews_ignore = self.templates.get_component(self.name, "pengine-ignore"),
common_ignore = self.templates.get_component(self.name, "common-ignore"))
# stonith-ng's process name is different from its component name
self.fullcomplist["stonith-ng"] = Process(
self, "stonith-ng", process="stonithd",
pats = self.templates.get_component(self.name, "stonith"),
badnews_ignore = self.templates.get_component(self.name, "stonith-ignore"),
common_ignore = self.templates.get_component(self.name, "common-ignore"))
# add (or replace) extra components
self.fullcomplist["corosync"] = Process(
self, "corosync",
pats = self.templates.get_component(self.name, "corosync"),
badnews_ignore = self.templates.get_component(self.name, "corosync-ignore"),
common_ignore = self.templates.get_component(self.name, "common-ignore")
)
# Processes running under valgrind can't be shot with "killall -9 processname",
# so don't include them in the returned list
vgrind = self.Env["valgrind-procs"].split()
for key in list(self.fullcomplist.keys()):
if self.Env["valgrind-tests"]:
if key in vgrind:
self.log("Filtering %s from the component list as it is being profiled by valgrind" % key)
continue
if key == "stonith-ng" and not self.Env["DoFencing"]:
continue
complist.append(self.fullcomplist[key])
return complist
diff --git a/cts/README.md b/cts/README.md
index c6eba68f3f..886e8dd078 100644
--- a/cts/README.md
+++ b/cts/README.md
@@ -1,284 +1,284 @@
# Pacemaker Cluster Test Suite (CTS)
## Purpose
Pacemaker's CTS is primarily for developers and packagers of the Pacemaker
source code, but it can be useful for users who wish to see how their cluster
will react to various situations.
CTS consists of two main parts: a set of regression tests for verifying the
functionality of particular Pacemaker components, and a cluster exerciser for
intensively testing the behavior of an entire working cluster.
The primary regression test front end is cts-regression in this directory. Run
it with the --help option to see its usage. The regression tests can be run on
any single cluster node. The cluster should be stopped on that node when
running the tests.
The rest of this document focuses on the cluster exerciser. The cluster
exerciser runs a randomized series of predefined tests on the cluster. CTS can
be run against a pre-existing cluster configuration or overwrite the existing
configuration with a test configuration.
## Requirements
* Three or more machines (one test exerciser and two or more test cluster
machines).
* The test cluster machines should be on the same subnet and have journalling
filesystems (ext3, ext4, xfs, etc.) for all of their filesystems other than
/boot. You also need a number of free IP addresses on that subnet if you
intend to test mutual IP address takeover.
* The test exerciser machine doesn't need to be on the same subnet as the test
cluster machines. Minimal demands are made on the exerciser machine - it
just has to stay up during the tests.
* It helps a lot in tracking problems if all machines' clocks are closely
synchronized. NTP does this automatically, but you can do it by hand if you
want.
* The exerciser needs to be able to ssh over to the cluster nodes as root
without a password challenge. Configure ssh accordingly (see the Mini-HOWTO
at the end of this document for more details).
* The exerciser needs to be able to resolve the machine names of the
test cluster - either by DNS or by /etc/hosts.
* CTS is not guaranteed to run on all platforms that pacemaker itself does.
It calls commands such as service that may not be provided by all OSes.
## Preparation
Install Pacemaker (including CTS) on all machines. These scripts are
coordinated with particular versions of Pacemaker, so you need the same version
of CTS as the rest of Pacemaker, and you need the same version of
pacemaker and CTS on both the test exerciser and the test cluster machines.
You can install CTS from source, although many distributions provide
packages that include it (e.g. pacemaker-cts or pacemaker-dev).
Typically, packages will install CTS as /usr/share/pacemaker/tests/cts.
Configure cluster communications (Corosync) on the
cluster machines and verify everything works.
NOTE: Do not run the cluster on the test exerciser machine.
NOTE: Wherever machine names are mentioned in these configuration files,
they must match the machines' `uname -n` name. This may or may not match
the machines' FQDN (fully qualified domain name) - it depends on how
you (and your OS) have named the machines.
## Run CTS
Now assuming you did all this, what you need to do is run CTSlab.py:
python ./CTSlab.py [options] number-of-tests-to-run
You must specify which nodes are part of the cluster with --nodes, e.g.:
--node "pcmk-1 pcmk-2 pcmk-3"
Most people will want to save the output with --outputfile, e.g.:
--outputfile ~/cts.log
Unless you want to test your pre-existing cluster configuration, you also want:
--clobber-cib
--populate-resources
--test-ip-base $IP # e.g. --test-ip-base 192.168.9.100
and configure some sort of fencing:
--stonith $TYPE # e.g. "--stonith xvm" to use fence_xvm or "--stonith ssh" to use external/ssh
A complete command line might look like:
python ./CTSlab.py --nodes "pcmk-1 pcmk-2 pcmk-3" --outputfile ~/cts.log \
--clobber-cib --populate-resources --test-ip-base 192.168.9.100 \
--stonith xvm 50
For more options, use the --help option.
NOTE: Perhaps more convenient way to compile a command line like above
is to use cluster_test script that, at least in the source repository,
sits in the same directory as this very file.
To extract the result of a particular test, run:
crm_report -T $test
## Optional/advanced testing
### Memory testing
Pacemaker and CTS have various options for testing memory management. On the
cluster nodes, pacemaker components will use various environment variables to
control these options. How these variables are set varies by OS, but usually
they are set in the /etc/sysconfig/pacemaker or /etc/default/pacemaker file.
Valgrind is a program for detecting memory management problems (such as
use-after-free errors). If you have valgrind installed, you can enable it by
setting the following environment variables on all cluster nodes:
- PCMK_valgrind_enabled=attrd,cib,crmd,lrmd,pengine,stonith-ng
+ PCMK_valgrind_enabled=pacemaker-attrd,cib,crmd,lrmd,pengine,stonith-ng
VALGRIND_OPTS="--leak-check=full --trace-children=no --num-callers=25
--log-file=/var/lib/pacemaker/valgrind-%p
--suppressions=/usr/share/pacemaker/tests/valgrind-pcmk.suppressions
--gen-suppressions=all"
and running CTS with these options:
- --valgrind-tests --valgrind-procs="attrd cib crmd lrmd pengine stonith-ng"
+ --valgrind-tests --valgrind-procs="pacemaker-attrd cib crmd lrmd pengine stonith-ng"
These options should only be set while specifically testing memory management,
because they may slow down the cluster significantly, and they will disable
writes to the CIB. If desired, you can enable valgrind on a subset of pacemaker
components rather than all of them as listed above.
Valgrind will put a text file for each process in the location specified by
valgrind's --log-file option. For explanations of the messages valgrind
generates, see http://valgrind.org/docs/manual/mc-manual.html
Separately, if you are using the GNU C library, the G_SLICE, MALLOC_PERTURB_,
and MALLOC_CHECK_ environment variables can be set to affect the library's
memory management functions.
When using valgrind, G_SLICE should be set to "always-malloc", which helps
valgrind track memory by always using the malloc() and free() routines
directly. When not using valgrind, G_SLICE can be left unset, or set to
"debug-blocks", which enables the C library to catch many memory errors
but may impact performance.
If the MALLOC_PERTURB_ environment variable is set to an 8-bit integer, the C
library will initialize all newly allocated bytes of memory to the integer
value, and will set all newly freed bytes of memory to the bitwise inverse of
the integer value. This helps catch uses of uninitialized or freed memory
blocks that might otherwise go unnoticed. Example:
MALLOC_PERTURB_=221
If the MALLOC_CHECK_ environment variable is set, the C library will check for
certain heap corruption errors. The most useful value in testing is 3, which
will cause the library to print a message to stderr and abort execution.
Example:
MALLOC_CHECK_=3
Valgrind should be enabled for either all nodes or none, but the C library
variables may be set differently on different nodes.
### Remote node testing
If the pacemaker_remoted daemon is installed on all cluster nodes, CTS will
enable remote node tests.
The remote node tests choose a random node, stop the cluster on it, start
pacemaker_remote on it, and add an ocf:pacemaker:remote resource to turn it
into a remote node. When the test is done, CTS will turn the node back into
a cluster node.
To avoid conflicts, CTS will rename the node, prefixing the original node name
with "remote-". For example, "pcmk-1" will become "remote-pcmk-1".
The name change may require special stonith configuration, if the fence agent
expects the node name to be the same as its hostname. A common approach is to
specify the "remote-" names in pcmk_host_list. If you use pcmk_host_list=all,
CTS will expand that to all cluster nodes and their "remote-" names.
You may additionally need a pcmk_host_map argument to map the "remote-" names
to the hostnames. Example:
--stonith xvm --stonith-args \
pcmk_arg_map=domain:uname,pcmk_host_list=all,pcmk_host_map=remote-pcmk-1:pcmk-1;remote-pcmk-2:pcmk-2
### Remote node testing with valgrind
When running the remote node tests, the pacemaker components on the cluster
nodes can be run under valgrind as described in the "Memory testing" section.
However, pacemaker_remote cannot be run under valgrind that way, because it is
started by the OS's regular boot system and not by pacemaker.
Details vary by system, but the goal is to set the VALGRIND_OPTS environment
variable and then start pacemaker_remoted by prefixing it with the path to
valgrind.
The init script and systemd service file provided with pacemaker_remote will
load the pacemaker environment variables from the same location used by other
pacemaker components, so VALGRIND_OPTS will be set correctly if using one of
those.
For an OS using systemd, you can override the ExecStart parameter to run
valgrind. For example:
mkdir /etc/systemd/system/pacemaker_remote.service.d
cat >/etc/systemd/system/pacemaker_remote.service.d/valgrind.conf <<EOF
[Service]
ExecStart=
ExecStart=/usr/bin/valgrind /usr/sbin/pacemaker_remoted
EOF
### Container testing
If the --container-tests option is given to CTS, it will enable
testing of LXC resources (currently only the RemoteLXC test,
which starts a remote node using an LXC container).
The container tests have additional package dependencies (see the toplevel
README). Also, SELinux must be enabled (in either permissive or enforcing mode),
libvirtd must be enabled and running, and root must be able to ssh without a
password between all cluster nodes (not just from the test machine). Before
running the tests, you can verify your environment with:
/usr/share/pacemaker/tests/cts/lxc_autogen.sh -v
LXC tests will create two containers with hardcoded parameters: a NAT'ed bridge
named virbr0 using the IP network 192.168.123.0/24 will be created on the
cluster node hosting the containers; the host will be assigned
52:54:00:A8:12:35 as the MAC address and 192.168.123.1 as the IP address.
Each container will be assigned a random MAC address starting with 52:54:,
the IP address 192.168.123.11 or 192.168.123.12, the hostname lxc1 or lxc2
(which will be added to the host's /etc/hosts file), and 196MB RAM.
The test will revert all of the configuration when it is done.
## Mini-HOWTO: Allow passwordless remote SSH connections
The CTS scripts run "ssh -l root" so you don't have to do any of your testing
logged in as root on the test machine. Here is how to allow such connections
without requiring a password to be entered each time:
* On your test exerciser, create an SSH key if you do not already have one.
Most commonly, SSH keys will be in your ~/.ssh directory, with the
private key file not having an extension, and the public key file
named the same with the extension ".pub" (for example, ~/.ssh/id_rsa.pub).
If you don't already have a key, you can create one with:
ssh-keygen -t rsa
* From your test exerciser, authorize your SSH public key for root on all test
machines (both the exerciser and the cluster test machines):
ssh-copy-id -i ~/.ssh/id_rsa.pub root@$MACHINE
You will probably have to provide your password, and possibly say
"yes" to some questions about accepting the identity of the test machines.
The above assumes you have a RSA SSH key in the specified location;
if you have some other type of key (DSA, ECDSA, etc.), use its file name
in the -i option above.
* To test, try this command from the exerciser machine for each
of your cluster machines, and for the exerciser machine itself.
ssh -l root $MACHINE
If this works without prompting for a password, you're in business.
If not, look at the documentation for your version of ssh.
diff --git a/cts/environment.py b/cts/environment.py
index 6bcf097af8..dff6fc7a31 100644
--- a/cts/environment.py
+++ b/cts/environment.py
@@ -1,651 +1,635 @@
-'''
-Classes related to producing and searching logs
-'''
-from __future__ import print_function
-
-__copyright__='''
-Copyright (C) 2014 Andrew Beekhof <andrew@beekhof.net>
-Licensed under the GNU GPL.
-'''
-
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+""" Test environment classes for Pacemaker's Cluster Test Suite (CTS)
+"""
+
+# Pacemaker targets compatibility with Python 2.7 and 3.2+
+from __future__ import print_function, unicode_literals, absolute_import, division
+
+__copyright__ = "Copyright 2014-2018 Andrew Beekhof <andrew@beekhof.net>"
+__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
import sys, time, os, socket, random
from cts.remote import *
from cts.CTSvars import *
class Environment(object):
def __init__(self, args):
self.data = {}
self.Nodes = []
self["DeadTime"] = 300
self["StartTime"] = 300
self["StableTime"] = 30
self["tests"] = []
self["IPagent"] = "IPaddr2"
self["DoStandby"] = 1
self["DoFencing"] = 1
self["XmitLoss"] = "0.0"
self["RecvLoss"] = "0.0"
self["ClobberCIB"] = 0
self["CIBfilename"] = None
self["CIBResource"] = 0
self["DoBSC"] = 0
self["oprofile"] = []
self["warn-inactive"] = 0
self["ListTests"] = 0
self["benchmark"] = 0
self["LogWatcher"] = "any"
self["SyslogFacility"] = "daemon"
self["LogFileName"] = "/var/log/messages"
self["Schema"] = "pacemaker-2.0"
self["Stack"] = "corosync"
self["stonith-type"] = "external/ssh"
self["stonith-params"] = "hostlist=all,livedangerously=yes"
self["notification-agent"] = "/var/lib/pacemaker/notify.sh"
self["notification-recipient"] = "/var/lib/pacemaker/notify.log"
self["loop-minutes"] = 60
self["valgrind-prefix"] = None
- self["valgrind-procs"] = "attrd cib crmd lrmd pengine stonith-ng"
+ self["valgrind-procs"] = "pacemaker-attrd cib crmd lrmd pengine stonith-ng"
self["valgrind-opts"] = """--leak-check=full --show-reachable=yes --trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp"""
self["experimental-tests"] = 0
self["container-tests"] = 0
self["valgrind-tests"] = 0
self["unsafe-tests"] = 1
self["loop-tests"] = 1
self["scenario"] = "random"
self["stats"] = 0
self["docker"] = 0
self["continue"] = 0
self.RandomGen = random.Random()
self.logger = LogFactory()
self.SeedRandom()
self.rsh = RemoteFactory().getInstance()
self.target = "localhost"
self.parse_args(args)
self.discover()
self.validate()
def SeedRandom(self, seed=None):
if not seed:
seed = int(time.time())
self["RandSeed"] = seed
self.RandomGen.seed(str(seed))
def dump(self):
keys = []
for key in list(self.data.keys()):
keys.append(key)
keys.sort()
for key in keys:
self.logger.debug("Environment["+key+"]:\t"+str(self[key]))
def keys(self):
return list(self.data.keys())
def has_key(self, key):
if key == "nodes":
return True
return key in self.data
def __getitem__(self, key):
if str(key) == "0":
raise ValueError("Bad call to 'foo in X', should reference 'foo in X.keys()' instead")
if key == "nodes":
return self.Nodes
elif key == "Name":
return self.get_stack_short()
elif key in self.data:
return self.data[key]
else:
return None
def __setitem__(self, key, value):
if key == "Stack":
self.set_stack(value)
elif key == "node-limit":
self.data[key] = value
self.filter_nodes()
elif key == "nodes":
self.Nodes = []
for node in value:
# I don't think I need the IP address, etc. but this validates
# the node name against /etc/hosts and/or DNS, so it's a
# GoodThing(tm).
try:
n = node.strip()
if self.data["docker"] == 0:
socket.gethostbyname_ex(n)
self.Nodes.append(n)
except:
self.logger.log(node+" not found in DNS... aborting")
raise
self.filter_nodes()
else:
self.data[key] = value
def RandomNode(self):
'''Choose a random node from the cluster'''
return self.RandomGen.choice(self["nodes"])
def set_stack(self, name):
# Normalize stack names
if name == "corosync" or name == "cs" or name == "mcp":
self.data["Stack"] = "corosync 2.x"
else:
raise ValueError("Unknown stack: "+name)
def get_stack_short(self):
# Create the Cluster Manager object
if not "Stack" in self.data:
return "unknown"
elif self.data["Stack"] == "corosync 2.x":
if self["docker"]:
return "crm-corosync-docker"
else:
return "crm-corosync"
else:
LogFactory().log("Unknown stack: "+self["stack"])
raise ValueError("Unknown stack: "+self["stack"])
def detect_syslog(self):
# Detect syslog variant
if not "syslogd" in self.data:
if self["have_systemd"]:
# Systemd
self["syslogd"] = self.rsh(self.target, "systemctl list-units | grep syslog.*\.service.*active.*running | sed 's:.service.*::'", stdout=1).strip()
else:
# SYS-V
self["syslogd"] = self.rsh(self.target, "chkconfig --list | grep syslog.*on | awk '{print $1}' | head -n 1", stdout=1).strip()
if not "syslogd" in self.data or not self["syslogd"]:
# default
self["syslogd"] = "rsyslog"
def detect_at_boot(self):
# Detect if the cluster starts at boot
if not "at-boot" in self.data:
atboot = 0
if self["have_systemd"]:
# Systemd
atboot = atboot or not self.rsh(self.target, "systemctl is-enabled corosync.service")
atboot = atboot or not self.rsh(self.target, "systemctl is-enabled pacemaker.service")
else:
# SYS-V
atboot = atboot or not self.rsh(self.target, "chkconfig --list | grep -e corosync.*on -e pacemaker.*on")
self["at-boot"] = atboot
def detect_ip_offset(self):
# Try to determin an offset for IPaddr resources
if self["CIBResource"] and not "IPBase" in self.data:
network=self.rsh(self.target, "ip addr | grep inet | grep -v -e link -e inet6 -e '/32' -e ' lo' | awk '{print $2}'", stdout=1).strip()
self["IPBase"] = self.rsh(self.target, "nmap -sn -n %s | grep 'scan report' | awk '{print $NF}' | sed 's:(::' | sed 's:)::' | sort -V | tail -n 1" % network, stdout=1).strip()
if not self["IPBase"]:
self["IPBase"] = " fe80::1234:56:7890:1000"
self.logger.log("Could not determine an offset for IPaddr resources. Perhaps nmap is not installed on the nodes.")
self.logger.log("Defaulting to '%s', use --test-ip-base to override" % self["IPBase"])
elif int(self["IPBase"].split('.')[3]) >= 240:
self.logger.log("Could not determine an offset for IPaddr resources. Upper bound is too high: %s %s"
% (self["IPBase"], self["IPBase"].split('.')[3]))
self["IPBase"] = " fe80::1234:56:7890:1000"
self.logger.log("Defaulting to '%s', use --test-ip-base to override" % self["IPBase"])
def filter_nodes(self):
if self['node-limit'] is not None and self["node-limit"] > 0:
if len(self["nodes"]) > self["node-limit"]:
self.logger.log("Limiting the number of nodes configured=%d (max=%d)"
%(len(self["nodes"]), self["node-limit"]))
while len(self["nodes"]) > self["node-limit"]:
self["nodes"].pop(len(self["nodes"])-1)
def validate(self):
if len(self["nodes"]) < 1:
print("No nodes specified!")
sys.exit(1)
def discover(self):
self.target = random.Random().choice(self["nodes"])
master = socket.gethostname()
# Use the IP where possible to avoid name lookup failures
for ip in socket.gethostbyname_ex(master)[2]:
if ip != "127.0.0.1":
master = ip
break;
self["cts-master"] = master
if not "have_systemd" in self.data:
self["have_systemd"] = not self.rsh(self.target,
"systemctl list-units",
silent=True)
self.detect_syslog()
self.detect_at_boot()
self.detect_ip_offset()
self.validate()
def parse_args(self, args):
skipthis=None
if not args:
args=sys.argv[1:]
for i in range(0, len(args)):
if skipthis:
skipthis=None
continue
elif args[i] == "-l" or args[i] == "--limit-nodes":
skipthis=1
self["node-limit"] = int(args[i+1])
elif args[i] == "-r" or args[i] == "--populate-resources":
self["CIBResource"] = 1
self["ClobberCIB"] = 1
elif args[i] == "--outputfile":
skipthis=1
self["OutputFile"] = args[i+1]
LogFactory().add_file(self["OutputFile"])
elif args[i] == "-L" or args[i] == "--logfile":
skipthis=1
self["LogWatcher"] = "remote"
self["LogAuditDisabled"] = 1
self["LogFileName"] = args[i+1]
elif args[i] == "--ip" or args[i] == "--test-ip-base":
skipthis=1
self["IPBase"] = args[i+1]
self["CIBResource"] = 1
self["ClobberCIB"] = 1
elif args[i] == "--oprofile":
skipthis=1
self["oprofile"] = args[i+1].split(' ')
elif args[i] == "--trunc":
self["TruncateLog"]=1
elif args[i] == "--list-tests" or args[i] == "--list" :
self["ListTests"]=1
elif args[i] == "--benchmark":
self["benchmark"]=1
elif args[i] == "--bsc":
self["DoBSC"] = 1
self["scenario"] = "basic-sanity"
elif args[i] == "--qarsh":
RemoteFactory().enable_qarsh()
elif args[i] == "--docker":
self["docker"] = 1
RemoteFactory().enable_docker()
elif args[i] == "--yes" or args[i] == "-y":
self["continue"] = 1
elif args[i] == "--stonith" or args[i] == "--fencing":
skipthis=1
if args[i+1] == "1" or args[i+1] == "yes":
self["DoFencing"]=1
elif args[i+1] == "0" or args[i+1] == "no":
self["DoFencing"]=0
elif args[i+1] == "phd":
self["DoStonith"]=1
self["stonith-type"] = "fence_phd_kvm"
elif args[i+1] == "rhcs" or args[i+1] == "xvm" or args[i+1] == "virt":
self["DoStonith"]=1
self["stonith-type"] = "fence_xvm"
elif args[i+1] == "docker":
self["DoStonith"]=1
self["stonith-type"] = "fence_docker_cts"
elif args[i+1] == "scsi":
self["DoStonith"]=1
self["stonith-type"] = "fence_scsi"
elif args[i+1] == "ssh" or args[i+1] == "lha":
self["DoStonith"]=1
self["stonith-type"] = "external/ssh"
self["stonith-params"] = "hostlist=all,livedangerously=yes"
elif args[i+1] == "north":
self["DoStonith"]=1
self["stonith-type"] = "fence_apc"
self["stonith-params"] = "ipaddr=north-apc,login=apc,passwd=apc,pcmk_host_map=north-01:2;north-02:3;north-03:4;north-04:5;north-05:6;north-06:7;north-07:9;north-08:10;north-09:11;north-10:12;north-11:13;north-12:14;north-13:15;north-14:18;north-15:17;north-16:19;"
elif args[i+1] == "south":
self["DoStonith"]=1
self["stonith-type"] = "fence_apc"
self["stonith-params"] = "ipaddr=south-apc,login=apc,passwd=apc,pcmk_host_map=south-01:2;south-02:3;south-03:4;south-04:5;south-05:6;south-06:7;south-07:9;south-08:10;south-09:11;south-10:12;south-11:13;south-12:14;south-13:15;south-14:18;south-15:17;south-16:19;"
elif args[i+1] == "east":
self["DoStonith"]=1
self["stonith-type"] = "fence_apc"
self["stonith-params"] = "ipaddr=east-apc,login=apc,passwd=apc,pcmk_host_map=east-01:2;east-02:3;east-03:4;east-04:5;east-05:6;east-06:7;east-07:9;east-08:10;east-09:11;east-10:12;east-11:13;east-12:14;east-13:15;east-14:18;east-15:17;east-16:19;"
elif args[i+1] == "west":
self["DoStonith"]=1
self["stonith-type"] = "fence_apc"
self["stonith-params"] = "ipaddr=west-apc,login=apc,passwd=apc,pcmk_host_map=west-01:2;west-02:3;west-03:4;west-04:5;west-05:6;west-06:7;west-07:9;west-08:10;west-09:11;west-10:12;west-11:13;west-12:14;west-13:15;west-14:18;west-15:17;west-16:19;"
elif args[i+1] == "openstack":
self["DoStonith"]=1
self["stonith-type"] = "fence_openstack"
print("Obtaining OpenStack credentials from the current environment")
self["stonith-params"] = "region=%s,tenant=%s,auth=%s,user=%s,password=%s" % (
os.environ['OS_REGION_NAME'],
os.environ['OS_TENANT_NAME'],
os.environ['OS_AUTH_URL'],
os.environ['OS_USERNAME'],
os.environ['OS_PASSWORD']
)
elif args[i+1] == "rhevm":
self["DoStonith"]=1
self["stonith-type"] = "fence_rhevm"
print("Obtaining RHEV-M credentials from the current environment")
self["stonith-params"] = "login=%s,passwd=%s,ipaddr=%s,ipport=%s,ssl=1,shell_timeout=10" % (
os.environ['RHEVM_USERNAME'],
os.environ['RHEVM_PASSWORD'],
os.environ['RHEVM_SERVER'],
os.environ['RHEVM_PORT'],
)
else:
self.usage(args[i+1])
elif args[i] == "--stonith-type":
self["stonith-type"] = args[i+1]
skipthis=1
elif args[i] == "--stonith-args":
self["stonith-params"] = args[i+1]
skipthis=1
elif args[i] == "--standby":
skipthis=1
if args[i+1] == "1" or args[i+1] == "yes":
self["DoStandby"] = 1
elif args[i+1] == "0" or args[i+1] == "no":
self["DoStandby"] = 0
else:
self.usage(args[i+1])
elif args[i] == "--clobber-cib" or args[i] == "-c":
self["ClobberCIB"] = 1
elif args[i] == "--cib-filename":
skipthis=1
self["CIBfilename"] = args[i+1]
elif args[i] == "--xmit-loss":
try:
float(args[i+1])
except ValueError:
print("--xmit-loss parameter should be float")
self.usage(args[i+1])
skipthis=1
self["XmitLoss"] = args[i+1]
elif args[i] == "--recv-loss":
try:
float(args[i+1])
except ValueError:
print("--recv-loss parameter should be float")
self.usage(args[i+1])
skipthis=1
self["RecvLoss"] = args[i+1]
elif args[i] == "--choose":
skipthis=1
self["tests"].append(args[i+1])
self["scenario"] = "sequence"
elif args[i] == "--nodes":
skipthis=1
self["nodes"] = args[i+1].split(' ')
elif args[i] == "-g" or args[i] == "--group" or args[i] == "--dsh-group":
skipthis=1
self["OutputFile"] = "%s/cluster-%s.log" % (os.environ['HOME'], args[i+1])
LogFactory().add_file(self["OutputFile"], "CTS")
dsh_file = "%s/.dsh/group/%s" % (os.environ['HOME'], args[i+1])
# Hacks to make my life easier
if args[i+1] == "virt1":
self["Stack"] = "corosync"
self["DoStonith"]=1
self["stonith-type"] = "fence_xvm"
self["stonith-params"] = "delay=0"
self["IPBase"] = " fe80::1234:56:7890:1000"
elif args[i+1] == "east16" or args[i+1] == "nsew":
self["Stack"] = "corosync"
self["DoStonith"]=1
self["stonith-type"] = "fence_apc"
self["stonith-params"] = "ipaddr=east-apc,login=apc,passwd=apc,pcmk_host_map=east-01:2;east-02:3;east-03:4;east-04:5;east-05:6;east-06:7;east-07:9;east-08:10;east-09:11;east-10:12;east-11:13;east-12:14;east-13:15;east-14:18;east-15:17;east-16:19;"
self["IPBase"] = " fe80::1234:56:7890:2000"
if args[i+1] == "east16":
# Requires newer python than available via nsew
self["IPagent"] = "Dummy"
elif args[i+1] == "corosync8":
self["Stack"] = "corosync"
self["DoStonith"]=1
self["stonith-type"] = "fence_rhevm"
print("Obtaining RHEV-M credentials from the current environment")
self["stonith-params"] = "login=%s,passwd=%s,ipaddr=%s,ipport=%s,ssl=1,shell_timeout=10" % (
os.environ['RHEVM_USERNAME'],
os.environ['RHEVM_PASSWORD'],
os.environ['RHEVM_SERVER'],
os.environ['RHEVM_PORT'],
)
self["IPBase"] = " fe80::1234:56:7890:3000"
if os.path.isfile(dsh_file):
self["nodes"] = []
f = open(dsh_file, 'r')
for line in f:
l = line.strip().rstrip()
if not l.startswith('#'):
self["nodes"].append(l)
f.close()
else:
print("Unknown DSH group: %s" % args[i+1])
elif args[i] == "--syslog-facility" or args[i] == "--facility":
skipthis=1
self["SyslogFacility"] = args[i+1]
elif args[i] == "--seed":
skipthis=1
self.SeedRandom(args[i+1])
elif args[i] == "--warn-inactive":
self["warn-inactive"] = 1
elif args[i] == "--schema":
skipthis=1
self["Schema"] = args[i+1]
elif args[i] == "--at-boot" or args[i] == "--cluster-starts-at-boot":
skipthis=1
if args[i+1] == "1" or args[i+1] == "yes":
self["at-boot"] = 1
elif args[i+1] == "0" or args[i+1] == "no":
self["at-boot"] = 0
else:
self.usage(args[i+1])
elif args[i] == "--stack":
if args[i+1] == "fedora" or args[i+1] == "fedora-17" or args[i+1] == "fedora-18":
self["Stack"] = "corosync"
elif args[i+1] == "rhel-7":
self["Stack"] = "corosync"
else:
self["Stack"] = args[i+1]
skipthis=1
elif args[i] == "--once":
self["scenario"] = "all-once"
elif args[i] == "--boot":
self["scenario"] = "boot"
elif args[i] == "--notification-agent":
self["notification-agent"] = args[i+1]
skipthis = 1
elif args[i] == "--notification-recipient":
self["notification-recipient"] = args[i+1]
skipthis = 1
elif args[i] == "--valgrind-tests":
self["valgrind-tests"] = 1
elif args[i] == "--valgrind-procs":
self["valgrind-procs"] = args[i+1]
skipthis = 1
elif args[i] == "--no-loop-tests":
self["loop-tests"] = 0
elif args[i] == "--loop-minutes":
skipthis=1
try:
self["loop-minutes"]=int(args[i+1])
except ValueError:
self.usage(args[i])
elif args[i] == "--no-unsafe-tests":
self["unsafe-tests"] = 0
elif args[i] == "--experimental-tests":
self["experimental-tests"] = 1
elif args[i] == "--container-tests":
self["container-tests"] = 1
elif args[i] == "--set":
skipthis=1
(name, value) = args[i+1].split('=')
self[name] = value
print("Setting %s = %s" % (name, value))
elif args[i] == "--help":
self.usage(args[i], 0)
elif args[i] == "--":
break
else:
try:
NumIter=int(args[i])
self["iterations"] = NumIter
except ValueError:
self.usage(args[i])
def usage(self, arg, status=1):
if status:
print("Illegal argument %s" % arg)
print("usage: " + sys.argv[0] +" [options] number-of-iterations")
print("\nCommon options: ")
print("\t [--nodes 'node list'] list of cluster nodes separated by whitespace")
print("\t [--group | -g 'name'] use the nodes listed in the named DSH group (~/.dsh/groups/$name)")
print("\t [--limit-nodes max] only use the first 'max' cluster nodes supplied with --nodes")
print("\t [--stack corosync] which cluster stack is installed")
print("\t [--list-tests] list the valid tests")
print("\t [--benchmark] add the timing information")
print("\t ")
print("Options that CTS will usually auto-detect correctly: ")
print("\t [--logfile path] where should the test software look for logs from cluster nodes")
print("\t [--syslog-facility name] which syslog facility should the test software log to")
print("\t [--at-boot (1|0)] does the cluster software start at boot time")
print("\t [--test-ip-base ip] offset for generated IP address resources")
print("\t ")
print("Options for release testing: ")
print("\t [--populate-resources | -r] generate a sample configuration")
print("\t [--choose name] run only the named test")
print("\t [--stonith (1 | 0 | yes | no | rhcs | ssh)]")
print("\t [--once] run all valid tests once")
print("\t ")
print("Additional (less common) options: ")
print("\t [--clobber-cib | -c ] erase any existing configuration")
print("\t [--outputfile path] optional location for the test software to write logs to")
print("\t [--trunc] truncate logfile before starting")
print("\t [--xmit-loss lost-rate(0.0-1.0)]")
print("\t [--recv-loss lost-rate(0.0-1.0)]")
print("\t [--standby (1 | 0 | yes | no)]")
print("\t [--fencing (1 | 0 | yes | no | rhcs | lha | openstack )]")
print("\t [--stonith-type type]")
print("\t [--stonith-args name=value]")
print("\t [--bsc]")
print("\t [--notification-agent path] script to configure for Pacemaker alerts")
print("\t [--notification-recipient r] recipient to pass to alert script")
print("\t [--no-loop-tests] don't run looping/time-based tests")
print("\t [--no-unsafe-tests] don't run tests that are unsafe for use with ocfs2/drbd")
print("\t [--valgrind-tests] include tests using valgrind")
print("\t [--experimental-tests] include experimental tests")
print("\t [--container-tests] include pacemaker_remote tests that run in lxc container resources")
print("\t [--oprofile 'node list'] list of cluster nodes to run oprofile on]")
print("\t [--qarsh] use the QARSH backdoor to access nodes instead of SSH")
print("\t [--docker] Indicates nodes are docker nodes.")
print("\t [--seed random_seed]")
print("\t [--set option=value]")
print("\t [--yes | -y] continue to run cts when there is an interaction whether to continue running pacemaker-cts")
print("\t ")
print("\t Example: ")
print("\t python sys.argv[0] -g virt1 --stack cs -r --stonith ssh --schema pacemaker-1.0 500")
sys.exit(status)
class EnvFactory(object):
instance = None
def __init__(self):
pass
def getInstance(self, args=None):
if not EnvFactory.instance:
EnvFactory.instance = Environment(args)
return EnvFactory.instance
diff --git a/cts/patterns.py b/cts/patterns.py
index 57e4af6332..ede6ae3025 100644
--- a/cts/patterns.py
+++ b/cts/patterns.py
@@ -1,403 +1,403 @@
""" Pattern-holding classes for Pacemaker's Cluster Test Suite (CTS)
"""
# Pacemaker targets compatibility with Python 2.7 and 3.2+
from __future__ import print_function, unicode_literals, absolute_import, division
__copyright__ = "Copyright 2008-2018 Andrew Beekhof <andrew@beekhof.net>"
__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
import sys, os
from cts.CTSvars import *
patternvariants = {}
class BasePatterns(object):
def __init__(self, name):
self.name = name
patternvariants[name] = self
self.ignore = [
"avoid confusing Valgrind",
]
self.BadNews = []
self.components = {}
self.commands = {
"StatusCmd" : "crmadmin -t 60000 -S %s 2>/dev/null",
"CibQuery" : "cibadmin -Ql",
"CibAddXml" : "cibadmin --modify -c --xml-text %s",
"CibDelXpath" : "cibadmin --delete --xpath %s",
# 300,000 == 5 minutes
"RscRunning" : CTSvars.CRM_DAEMON_DIR + "/lrmd_test -R -r %s",
"CIBfile" : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml",
"TmpDir" : "/tmp",
"BreakCommCmd" : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1",
"FixCommCmd" : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1",
# tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit
# tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated
# tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1
# tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null
"ReduceCommCmd" : "",
"RestoreCommCmd" : "tc qdisc del dev lo root",
"SetCheckInterval" : "cibadmin --modify -c --xml-text '<cluster_property_set id=\"cib-bootstrap-options\"><nvpair id=\"cts-recheck-interval-setting\" name=\"cluster-recheck-interval\" value=\"%s\"/></cluster_property_set>'",
"ClearCheckInterval" : "cibadmin --delete --xpath \"//nvpair[@name='cluster-recheck-interval']\"",
"MaintenanceModeOn" : "cibadmin --modify -c --xml-text '<cluster_property_set id=\"cib-bootstrap-options\"><nvpair id=\"cts-maintenance-mode-setting\" name=\"maintenance-mode\" value=\"true\"/></cluster_property_set>'",
"MaintenanceModeOff" : "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"",
"StandbyCmd" : "crm_attribute -Vq -U %s -n standby -l forever -v %s 2>/dev/null",
"StandbyQueryCmd" : "crm_attribute -qG -U %s -n standby -l forever -d off 2>/dev/null",
}
self.search = {
"Pat:DC_IDLE" : "crmd.*State transition.*-> S_IDLE",
# This won't work if we have multiple partitions
"Pat:Local_started" : "%s\W.*The local CRM is operational",
"Pat:NonDC_started" : r"%s\W.*State transition.*-> S_NOT_DC",
"Pat:DC_started" : r"%s\W.*State transition.*-> S_IDLE",
"Pat:We_stopped" : "%s\W.*OVERRIDE THIS PATTERN",
"Pat:They_stopped" : "%s\W.*LOST:.* %s ",
"Pat:They_dead" : "node %s.*: is dead",
"Pat:TransitionComplete" : "Transition status: Complete: complete",
"Pat:Fencing_start" : "(Initiating remote operation|Requesting peer fencing ).* (for|of) %s",
"Pat:Fencing_ok" : r"stonith.*:\s*Operation .* of %s by .* for .*@.*: OK",
"Pat:Fencing_recover" : r"pengine.*: Recover %s",
"Pat:RscOpOK" : r"crmd.*:\s+Result of %s operation for %s.*: (0 \()?ok",
"Pat:RscRemoteOpOK" : r"crmd.*:\s+Result of %s operation for %s on %s: (0 \()?ok",
"Pat:NodeFenced" : r"crmd.*:\s* Peer %s was terminated \(.*\) by .* on behalf of .*: OK",
"Pat:FenceOpOK" : "Operation .* for host '%s' with device .* returned: 0",
}
def get_component(self, key):
if key in self.components:
return self.components[key]
print("Unknown component '%s' for %s" % (key, self.name))
return []
def get_patterns(self, key):
if key == "BadNews":
return self.BadNews
elif key == "BadNewsIgnore":
return self.ignore
elif key == "Commands":
return self.commands
elif key == "Search":
return self.search
elif key == "Components":
return self.components
def __getitem__(self, key):
if key == "Name":
return self.name
elif key in self.commands:
return self.commands[key]
elif key in self.search:
return self.search[key]
else:
print("Unknown template '%s' for %s" % (key, self.name))
return None
class crm_corosync(BasePatterns):
'''
Patterns for Corosync version 2 cluster manager class
'''
def __init__(self, name):
BasePatterns.__init__(self, name)
self.commands.update({
"StartCmd" : "service corosync start && service pacemaker start",
"StopCmd" : "service pacemaker stop; [ ! -e /usr/sbin/pacemaker_remoted ] || service pacemaker_remote stop; service corosync stop",
"EpochCmd" : "crm_node -e",
"QuorumCmd" : "crm_node -q",
"PartitionCmd" : "crm_node -p",
})
self.search.update({
# Close enough ... "Corosync Cluster Engine exiting normally" isn't
# printed reliably.
"Pat:We_stopped" : "%s\W.*Unloading all Corosync service engines",
"Pat:They_stopped" : "%s\W.*crmd.*Node %s(\[|\s).*state is now lost",
"Pat:They_dead" : "crmd.*Node %s(\[|\s).*state is now lost",
"Pat:ChildExit" : r"\[[0-9]+\] exited with status [0-9]+ \(",
"Pat:ChildKilled" : r"%s\W.*pacemakerd.*%s\[[0-9]+\] terminated with signal 9",
"Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning failed child process: %s",
"Pat:InfraUp" : "%s\W.*corosync.*Initializing transport",
"Pat:PacemakerUp" : "%s\W.*pacemakerd.*Starting Pacemaker",
})
self.ignore = self.ignore + [
r"crm_mon:",
r"crmadmin:",
r"update_trace_data",
r"async_notify:.*strange, client not found",
r"Parse error: Ignoring unknown option .*nodename",
r"error.*: Operation 'reboot' .* with device 'FencingFail' returned:",
r"getinfo response error: 1$",
"sbd.* error: inquisitor_child: DEBUG MODE IS ACTIVE",
r"sbd.* pcmk:\s*error:.*Connection to cib_ro failed",
r"sbd.* pcmk:\s*error:.*Connection to cib_ro.* closed .I/O condition=17",
]
self.BadNews = [
r"error:",
r"crit:",
r"ERROR:",
r"CRIT:",
r"Shutting down...NOW",
r"Timer I_TERMINATE just popped",
r"input=I_ERROR",
r"input=I_FAIL",
r"input=I_INTEGRATED cause=C_TIMER_POPPED",
r"input=I_FINALIZED cause=C_TIMER_POPPED",
r"input=I_ERROR",
r"(pacemakerd|lrmd|crmd):.*, exiting",
r"pengine.*Attempting recovery of resource",
r"is taking more than 2x its timeout",
r"Confirm not received from",
r"Welcome reply not received from",
r"Attempting to schedule .* after a stop",
r"Resource .* was active at shutdown",
r"duplicate entries for call_id",
r"Search terminated:",
r":global_timer_callback",
r"Faking parameter digest creation",
r"Parameters to .* action changed:",
r"Parameters to .* changed",
r"\[[0-9]+\] terminated with signal [0-9]+ \(",
r"pengine:.*Recover .*\(.* -\> .*\)",
r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting",
r"Peer is not part of our cluster",
r"We appear to be in an election loop",
r"Unknown node -> we will not deliver message",
r"(Blackbox dump requested|Problem detected)",
r"pacemakerd.*Could not connect to Cluster Configuration Database API",
r"Receiving messages from a node we think is dead",
r"share the same cluster nodeid",
r"share the same name",
#r"crm_ipc_send:.*Request .* failed",
#r"crm_ipc_send:.*Sending to .* is disabled until pending reply is received",
# Not inherently bad, but worth tracking
#r"No need to invoke the TE",
#r"ping.*: DEBUG: Updated connected = 0",
#r"Digest mis-match:",
r"crmd:.*Transition failed: terminated",
r"Local CIB .* differs from .*:",
r"warn.*:\s*Continuing but .* will NOT be used",
r"warn.*:\s*Cluster configuration file .* is corrupt",
#r"Executing .* fencing operation",
r"Election storm",
r"stalled the FSA with pending inputs",
]
self.components["common-ignore"] = [
"Pending action:",
"error: crm_log_message_adv:",
r"resource( was|s were) active at shutdown",
"pending LRM operations at shutdown",
"Lost connection to the CIB service",
"Connection to the CIB terminated...",
"Sending message to CIB service FAILED",
"apply_xml_diff:.*Diff application failed!",
r"crmd.*:\s*Action A_RECOVER .* not supported",
"unconfirmed_actions:.*Waiting on .* unconfirmed actions",
"cib_native_msgready:.*Message pending on command channel",
r"crmd.*:\s*Performing A_EXIT_1 - forcefully exiting the CRMd",
"verify_stopped:.*Resource .* was active at shutdown. You may ignore this error if it is unmanaged.",
"error: attrd_connection_destroy:.*Lost connection to attrd",
r".*:\s*Executing .* fencing operation \(.*\) on ",
r".*:\s*Requesting fencing \([^)]+\) of node ",
r"(Blackbox dump requested|Problem detected)",
# "error: native_create_actions: Resource .*stonith::.* is active on 2 nodes attempting recovery",
# "error: process_pe_message: Transition .* ERRORs found during PE processing",
]
self.components["corosync-ignore"] = [
r"error:.*Connection to the CPG API failed: Library error",
r"\[[0-9]+\] exited with status [0-9]+ \(",
r"cib.*error:.*Corosync connection lost",
r"stonith-ng.*error:.*Corosync connection terminated",
r"lrmd.*error:.*Connection to stonith-ng.* (failed|closed)",
r"lrmd.*error:.*LRMD lost STONITH connection",
r"crmd.*State transition .* S_RECOVERY",
r"crmd.*error:.*Input (I_ERROR|I_TERMINATE ) .*received in state",
r"crmd.*error:.*Could not recover from internal error",
r"error:.*Connection to cib_(shm|rw).* (failed|closed)",
r"error:.*STONITH connection failed",
r"error: Connection to stonith-ng.* (failed|closed)",
r"crit: Fencing daemon connection failed",
]
self.components["corosync"] = [
r"pacemakerd.*error:.*Connection destroyed",
r"attrd.*:\s*(crit|error):.*Lost connection to (Corosync|CIB) service",
r"stonith.*:\s*(Corosync connection terminated|Shutting down)",
r"cib.*:\s*Corosync connection lost!\s+Exiting.",
r"crmd.*:\s*(connection terminated|Disconnected from Corosync)",
r"pengine.*Scheduling Node .* for STONITH",
r"crmd.*:\s*Peer .* was terminated \(.*\) by .* for .*:\s*OK",
]
self.components["cib-ignore"] = [
"lrmd.*Connection to stonith-ng failed",
"lrmd.*Connection to stonith-ng.* closed",
"lrmd.*LRMD lost STONITH connection",
"lrmd.*STONITH connection failed, finalizing .* pending operations",
]
self.components["cib"] = [
"State transition .* S_RECOVERY",
- r"Respawning failed child process: (attrd|crmd)",
+ r"Respawning failed child process: (pacemaker-attrd|crmd)",
"Connection to cib_.* failed",
"Connection to cib_.* closed",
r"crmd.*:.*Connection to the CIB terminated...",
r"attrd.*:.*(Lost connection to CIB service|Connection to the CIB terminated)",
r"crmd\[[0-9]+\] exited with status 1 \(",
r"attrd\[[0-9]+\] exited with status 102 \(",
r"crmd.*: Input I_TERMINATE .*from do_recover",
"crmd.*I_ERROR.*crmd_cib_connection_destroy",
"crmd.*Could not recover from internal error",
]
self.components["lrmd"] = [
"State transition .* S_RECOVERY",
"LRM Connection failed",
r"Respawning failed child process: crmd",
"Connection to lrmd failed",
"Connection to lrmd.* closed",
"crmd.*I_ERROR.*lrm_connection_destroy",
r"crmd\[[0-9]+\] exited with status 1 \(",
r"crmd.*: Input I_TERMINATE .*from do_recover",
"crmd.*Could not recover from internal error",
]
self.components["lrmd-ignore"] = []
self.components["crmd"] = [
# "WARN: determine_online_status: Node .* is unclean",
# "Scheduling Node .* for STONITH",
# "Executing .* fencing operation",
# Only if the node wasn't the DC: "State transition S_IDLE",
"State transition .* -> S_IDLE",
]
self.components["crmd-ignore"] = []
- self.components["attrd"] = []
- self.components["attrd-ignore"] = []
+ self.components["pacemaker-attrd"] = []
+ self.components["pacemaker-attrd-ignore"] = []
self.components["pengine"] = [
"State transition .* S_RECOVERY",
r"Respawning failed child process: crmd",
r"crmd\[[0-9]+\] exited with status 1 \(",
"Connection to pengine failed",
"Connection to pengine.* closed",
"Connection to the Policy Engine failed",
"crmd.*I_ERROR.*save_cib_contents",
r"crmd.*: Input I_TERMINATE .*from do_recover",
"crmd.*Could not recover from internal error",
]
self.components["pengine-ignore"] = []
self.components["stonith"] = [
"Connection to stonith-ng failed",
"LRMD lost STONITH connection",
"Connection to stonith-ng.* closed",
"Fencing daemon connection failed",
r"crmd.*:\s*warn.*:\s*Callback already present",
]
self.components["stonith-ignore"] = [
r"pengine.*: Recover Fencing",
r"Updating failcount for Fencing",
r"error:.*Connection to stonith-ng failed",
r"error:.*Connection to stonith-ng.*closed \(I/O condition=17\)",
r"crit:.*Fencing daemon connection failed",
r"error:.*Sign-in failed: triggered a retry",
"STONITH connection failed, finalizing .* pending operations.",
r"crmd.*:\s+Result of .* operation for Fencing.*Error",
]
self.components["stonith-ignore"].extend(self.components["common-ignore"])
class crm_corosync_docker(crm_corosync):
'''
Patterns for Corosync version 2 cluster manager class
'''
def __init__(self, name):
crm_corosync.__init__(self, name)
self.commands.update({
"StartCmd" : "pcmk_start",
"StopCmd" : "pcmk_stop",
})
class PatternSelector(object):
def __init__(self, name=None):
self.name = name
self.base = BasePatterns("crm-base")
if not name:
crm_corosync("crm-corosync")
elif name == "crm-corosync":
crm_corosync(name)
elif name == "crm-corosync-docker":
crm_corosync_docker(name)
def get_variant(self, variant):
if variant in patternvariants:
return patternvariants[variant]
print("defaulting to crm-base for %s" % variant)
return self.base
def get_patterns(self, variant, kind):
return self.get_variant(variant).get_patterns(kind)
def get_template(self, variant, key):
v = self.get_variant(variant)
return v[key]
def get_component(self, variant, kind):
return self.get_variant(variant).get_component(kind)
def __getitem__(self, key):
return self.get_template(self.name, key)
# python cts/CTSpatt.py -k crm-corosync -t StartCmd
if __name__ == '__main__':
pdir=os.path.dirname(sys.path[0])
sys.path.insert(0, pdir) # So that things work from the source directory
kind=None
template=None
skipthis=None
args=sys.argv[1:]
for i in range(0, len(args)):
if skipthis:
skipthis=None
continue
elif args[i] == "-k" or args[i] == "--kind":
skipthis=1
kind = args[i+1]
elif args[i] == "-t" or args[i] == "--template":
skipthis=1
template = args[i+1]
else:
print("Illegal argument " + args[i])
print(PatternSelector(kind)[template])
diff --git a/daemons/attrd/Makefile.am b/daemons/attrd/Makefile.am
index 122ec15c10..899db5acd1 100644
--- a/daemons/attrd/Makefile.am
+++ b/daemons/attrd/Makefile.am
@@ -1,29 +1,29 @@
#
# Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
#
# This source code is licensed under the GNU General Public License version 2
# or later (GPLv2+) WITHOUT ANY WARRANTY.
#
include $(top_srcdir)/Makefile.common
halibdir = $(CRM_DAEMON_DIR)
-halib_PROGRAMS = attrd
+halib_PROGRAMS = pacemaker-attrd
## SOURCES
noinst_HEADERS = internal.h attrd_common.h
-attrd_CFLAGS = $(CFLAGS_HARDENED_EXE)
-attrd_LDFLAGS = $(LDFLAGS_HARDENED_EXE)
+pacemaker_attrd_CFLAGS = $(CFLAGS_HARDENED_EXE)
+pacemaker_attrd_LDFLAGS = $(LDFLAGS_HARDENED_EXE)
-attrd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \
- $(top_builddir)/lib/pengine/libpe_rules.la \
- $(top_builddir)/lib/common/libcrmcommon.la \
- $(top_builddir)/lib/cib/libcib.la \
- $(top_builddir)/lib/lrmd/liblrmd.la \
- $(CLUSTERLIBS)
+pacemaker_attrd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \
+ $(top_builddir)/lib/pengine/libpe_rules.la \
+ $(top_builddir)/lib/common/libcrmcommon.la \
+ $(top_builddir)/lib/cib/libcib.la \
+ $(top_builddir)/lib/lrmd/liblrmd.la \
+ $(CLUSTERLIBS)
-attrd_SOURCES = main.c commands.c attrd_common.c attrd_common_alerts.c
+pacemaker_attrd_SOURCES = main.c commands.c attrd_common.c attrd_common_alerts.c
clean-generic:
rm -f *.log *.debug *.xml *~
diff --git a/daemons/pacemakerd/pacemaker.in b/daemons/pacemakerd/pacemaker.in
index 8f5ca1905b..6d3127d796 100644
--- a/daemons/pacemakerd/pacemaker.in
+++ b/daemons/pacemakerd/pacemaker.in
@@ -1,189 +1,189 @@
#!@BASH_PATH@
# Authors:
# Andrew Beekhof <abeekhof@redhat.com>
# Fabio M. Di Nitto <fdinitto@redhat.com>
#
# License: Revised BSD
# chkconfig: - 99 01
# description: Pacemaker Cluster Manager
# processname: pacemakerd
#
### BEGIN INIT INFO
# Provides: pacemaker
# Required-Start: $network $remote_fs corosync
# Should-Start: $syslog
# Required-Stop: $network $remote_fs corosync
# Default-Start:
# Default-Stop:
# Short-Description: Starts and stops Pacemaker Cluster Manager.
# Description: Starts and stops Pacemaker Cluster Manager.
### END INIT INFO
desc="Pacemaker Cluster Manager"
prog="pacemakerd"
# set secure PATH
PATH="/sbin:/bin:/usr/sbin:/usr/bin:@sbindir@"
checkrc() {
if [ $? = 0 ]; then
success
else
failure
fi
}
success()
{
echo -ne "[ OK ]\r"
}
failure()
{
echo -ne "[FAILED]\r"
}
log()
{
logger -t pacemaker -p daemon.notice "$*"
}
notify()
{
log "$*"
echo -n "$*"
}
status()
{
pid=$(pidof $1 2>/dev/null)
local rtrn=$?
if [ $rtrn -ne 0 ]; then
echo "$1 is stopped"
if [ -f "@localstatedir@/run/$prog.pid" ]; then
rtrn=1
else
rtrn=3
fi
else
echo "$1 (pid $pid) is running..."
fi
return $rtrn
}
if [ -d @CONFIGDIR@ ]; then
[ -f @INITDIR@/functions ] && . @INITDIR@/functions
set -a
[ -f @CONFIGDIR@/pacemaker ] && . @CONFIGDIR@/pacemaker
[ -f @CONFIGDIR@/sbd ] && . @CONFIGDIR@/sbd
set +a
fi
LOCK_DIR="."
if [ -d "@localstatedir@/lock/subsys" ]; then
LOCK_DIR="@localstatedir@/lock/subsys"
elif [ -d "@localstatedir@/lock" ]; then
LOCK_DIR="@localstatedir@/lock"
fi
[ -z "$LOCK_FILE" ] && LOCK_FILE="$LOCK_DIR/pacemaker"
# Check if there is a valid watchdog-device configured in sbd config
if [ x != "x$SBD_WATCHDOG_DEV" -a "/dev/null" != "$SBD_WATCHDOG_DEV" -a -c "$SBD_WATCHDOG_DEV" ]; then
# enhance for unavailable chkconfig - don't touch sbd for now
if chkconfig --list sbd_helper 2>/dev/null | grep -q ":on"; then
SBD_SERVICE=sbd_helper
fi
fi
start()
{
notify "Starting $desc"
# most recent distributions use tmpfs for $@localstatedir@/run
# to avoid to clean it up on every boot.
# they also assume that init scripts will create
# required subdirectories for proper operations
mkdir -p "@localstatedir@/run"
if status $prog > /dev/null 2>&1; then
success
else
$prog > /dev/null 2>&1 &
# Time to connect to corosync and fail
sleep 5
if status $prog > /dev/null 2>&1; then
touch "$LOCK_FILE"
pidof $prog > "@localstatedir@/run/$prog.pid"
success
else
failure
rtrn=1
fi
fi
echo
}
stop()
{
shutdown_prog=$prog
if ! status $prog > /dev/null 2>&1; then
shutdown_prog="crmd"
fi
if status $shutdown_prog > /dev/null 2>&1; then
notify "Signaling $desc to terminate"
kill -TERM $(pidof $prog) > /dev/null 2>&1
checkrc
echo
notify "Waiting for cluster services to unload"
while status $prog > /dev/null 2>&1; do
sleep 1
echo -n "."
done
else
echo -n "$desc is already stopped"
fi
rm -f "$LOCK_FILE"
rm -f "@localstatedir@/run/$prog.pid"
- killall -q -9 'crmd stonithd attrd cib lrmd pacemakerd'
+ killall -q -9 pacemakerd pacemaker-attrd crmd stonithd cib lrmd
success
echo
}
rtrn=0
case "$1" in
start)
start
;;
restart|reload|force-reload)
stop
start
;;
condrestart|try-restart)
if status $prog > /dev/null 2>&1; then
stop
start
fi
;;
status)
status $prog
rtrn=$?
;;
stop)
stop
[ "x$SBD_SERVICE" != x ] && service $SBD_SERVICE stop
;;
*)
echo "usage: $0 {start|stop|restart|reload|force-reload|condrestart|try-restart|status}"
rtrn=2
;;
esac
exit $rtrn
diff --git a/daemons/pacemakerd/pacemaker.service.in b/daemons/pacemakerd/pacemaker.service.in
index 3c60b8e442..d8fb609251 100644
--- a/daemons/pacemakerd/pacemaker.service.in
+++ b/daemons/pacemakerd/pacemaker.service.in
@@ -1,88 +1,88 @@
[Unit]
Description=Pacemaker High Availability Cluster Manager
Documentation=man:pacemakerd
Documentation=https://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/2.0/html-single/Pacemaker_Explained/index.html
# DefaultDependencies takes care of sysinit.target,
# basic.target, and shutdown.target
# We need networking to bind to a network address. It is recommended not to
# use Wants or Requires with network.target, and not to use
# network-online.target for server daemons.
After=network.target
# Time syncs can make the clock jump backward, which messes with logging
# and failure timestamps, so wait until it's done.
After=time-sync.target
# Managing systemd resources requires DBus.
After=dbus.service
Wants=dbus.service
# Some OCF resources may have dependencies that aren't managed by the cluster;
# these must be started before Pacemaker and stopped after it. The
# resource-agents package provides this target, which lets system adminstrators
# add drop-ins for those dependencies.
After=resource-agents-deps.target
Wants=resource-agents-deps.target
After=syslog.service
After=rsyslog.service
After=corosync.service
Requires=corosync.service
[Install]
WantedBy=multi-user.target
[Service]
Type=simple
KillMode=process
NotifyAccess=main
EnvironmentFile=-@CONFIGDIR@/pacemaker
EnvironmentFile=-@CONFIGDIR@/sbd
SuccessExitStatus=100
ExecStart=@sbindir@/pacemakerd -f
# Systemd v227 and above can limit the number of processes spawned by a
# service. That is a bad idea for an HA cluster resource manager, so disable it
# by default. The administrator can create a local override if they really want
# a limit. If your systemd version does not support TasksMax, and you want to
# get rid of the resulting log warnings, comment out this option.
TasksMax=infinity
# If pacemakerd doesn't stop, it's probably waiting on a cluster
# resource. Sending -KILL will just get the node fenced
SendSIGKILL=no
-# If we ever hit the StartLimitInterval/StartLimitBurst limit and the
+# If we ever hit the StartLimitInterval/StartLimitBurst limit, and the
# admin wants to stop the cluster while pacemakerd is not running, it
# might be a good idea to enable the ExecStopPost directive below.
#
-# Although the node will likely end up being fenced as a result so it's
-# not on by default
+# However, the node will likely end up being fenced as a result, so it's
+# not enabled by default.
#
-# ExecStopPost=/usr/bin/killall -TERM crmd attrd stonithd cib pengine lrmd
+# ExecStopPost=/usr/bin/killall -TERM pacemaker-attrd crmd stonithd cib pengine lrmd
# If you want Corosync to stop whenever Pacemaker is stopped,
# uncomment the next line too:
#
# ExecStopPost=/bin/sh -c 'pidof crmd || killall -TERM corosync'
# Uncomment this for older versions of systemd that didn't support
# TimeoutStopSec
# TimeoutSec=30min
# Pacemaker can only exit after all managed services have shut down
# A HA database could conceivably take even longer than this
TimeoutStopSec=30min
TimeoutStartSec=60s
# Restart options include: no, on-success, on-failure, on-abort or always
Restart=on-failure
# crm_perror() writes directly to stderr, so ignore it here
# to avoid double-logging with the wrong format
StandardError=null
diff --git a/daemons/pacemakerd/pacemaker.sysconfig b/daemons/pacemakerd/pacemaker.sysconfig
index 1034191b95..c780e46417 100644
--- a/daemons/pacemakerd/pacemaker.sysconfig
+++ b/daemons/pacemakerd/pacemaker.sysconfig
@@ -1,127 +1,127 @@
#==#==# Variables that control logging
# Enable debug logging globally or per-subsystem.
# Multiple subsystems may be listed separated by commas,
# e.g. PCMK_debug=crmd,pengine
-# PCMK_debug=yes|no|crmd|pengine|cib|stonith-ng|attrd|pacemakerd
+# PCMK_debug=yes|no|crmd|pengine|cib|stonith-ng|pacemaker-attrd|pacemakerd
# Send detailed log messages to the specified file. Compared to messages logged
# via syslog, messages in this file may have extended information, and will
# include messages of "info" severity (and, if debug and/or trace logging
# has been enabled, those as well). This log is of more use to developers and
# advanced system administrators, and when reporting problems.
# PCMK_logfile=/var/log/pacemaker/pacemaker.log
# Enable logging via syslog, using the specified syslog facility. Messages sent
# here are of value to all Pacemaker users. This can be disabled using "none",
# but that is not recommended. The default is "daemon".
# PCMK_logfacility=none|daemon|user|local0|local1|local2|local3|local4|local5|local6|local7
# Unless syslog logging is disabled using PCMK_logfacility=none, messages of
# the specified severity and higher will be sent to syslog. The default value
# of "notice" is appropriate for most installations; "info" is highly verbose
# and "debug" is almost certain to send you blind (which is why there is a
# separate detail log specified by PCMK_logfile).
# PCMK_logpriority=emerg|alert|crit|error|warning|notice|info|debug
# Log all messages from a comma-separated list of functions.
# PCMK_trace_functions=function1,function2,function3
# Log all messages from a comma-separated list of files (no path).
# Wildcards are supported, e.g. PCMK_trace_files=prefix*.c
# PCMK_trace_files=file.c,other.h
# Log all messages matching comma-separated list of formats.
# PCMK_trace_formats="Sent delete %d"
# Log all messages from a comma-separated list of tags.
# PCMK_trace_tags=tag1,tag2
# Dump the blackbox whenever the message at function and line is emitted,
# e.g. PCMK_trace_blackbox=te_graph_trigger:223,unpack_clone:81
# PCMK_trace_blackbox=fn:line,fn2:line2,...
# Enable blackbox logging globally or per-subsystem. The blackbox contains a
# rolling buffer of all logs (including info, debug, and trace) and is written
# after a crash or assertion failure, and/or when SIGTRAP is received. The
# blackbox recorder can also be enabled for Pacemaker daemons at runtime by
# sending SIGUSR1 (or SIGTRAP), and disabled by sending SIGUSR2. Multiple
# subsystems may be listed separated by commas, e.g. PCMK_blackbox=crmd,pengine
-# PCMK_blackbox=yes|no|crmd|pengine|cib|stonith-ng|attrd|pacemakerd
+# PCMK_blackbox=yes|no|crmd|pengine|cib|stonith-ng|pacemaker-attrd|pacemakerd
#==#==# Advanced use only
# By default, nodes will join the cluster in an online state when they first
# start, unless they were previously put into standby mode. If this variable is
# set to "standby" or "online", it will force this node to join in the
# specified state when starting.
# (only supported for cluster nodes, not Pacemaker Remote nodes)
# PCMK_node_start_state=default
# Specify an alternate location for RNG schemas and XSL transforms.
# (This is of use only to developers.)
# PCMK_schema_directory=/some/path
# Pacemaker consists of a master process with multiple subsidiary daemons. If
# one of the daemons crashes, the master process will normally attempt to
# restart it. If this is set to "true", the master process will instead panic
# the host (see PCMK_panic_action). The default is unset.
# PCMK_fail_fast=no
# Pacemaker will panic its host under certain conditions. If this is set to
# "crash", Pacemaker will trigger a kernel crash (which is useful if you want a
# kernel dump to investigate). For any other value, Pacemaker will trigger a
# host reboot. The default is unset.
# PCMK_panic_action=crash
#==#==# Pacemaker Remote
# Use the contents of this file as the authorization key to use with Pacemaker
# Remote connections. This file must be readable by Pacemaker daemons (that is,
# it must allow read permissions to either the hacluster user or the haclient
# group), and its contents must be identical on all nodes. The default is
# "/etc/pacemaker/authkey".
# PCMK_authkey_location=/etc/pacemaker/authkey
# Use this TCP port number when connecting to a Pacemaker Remote node. This
# value must be the same on all nodes. The default is "3121".
# PCMK_remote_port=3121
#==#==# IPC
# Force use of a particular class of IPC connection.
# PCMK_ipc_type=shared-mem|socket|posix|sysv
# Specify an IPC buffer size in bytes. This is useful when connecting to really
# big clusters that exceed the default 128KB buffer.
# PCMK_ipc_buffer=131072
#==#==# Profiling and memory leak testing (mainly useful to developers)
# Affect the behavior of glib's memory allocator. Setting to "always-malloc"
# when running under valgrind will help valgrind track malloc/free better;
# setting to "debug-blocks" when not running under valgrind will perform
# (somewhat expensive) memory checks.
# G_SLICE=always-malloc
# Uncommenting this will make malloc() initialize newly allocated memory
# and free() wipe it (to help catch uninitialized-memory/use-after-free).
# MALLOC_PERTURB_=221
# Uncommenting this will make malloc() and friends print to stderr and abort
# for some (inexpensive) memory checks.
# MALLOC_CHECK_=3
# Set to yes/no or cib,crmd etc. to run some or all daemons under valgrind.
# PCMK_valgrind_enabled=yes
# PCMK_valgrind_enabled=cib,crmd
# Set to yes/no or cib,crmd etc. to run some or all daemons under valgrind with
# the callgrind tool enabled.
# PCMK_callgrind_enabled=yes
# PCMK_callgrind_enabled=cib,crmd
# Set the options to pass to valgrind, when valgrind is enabled. See
# valgrind(1) man page for details. "--vgdb=no" is specified because lrmd can
# lower privileges when executing commands, which would otherwise leave a bunch
# of unremovable files in /tmp.
VALGRIND_OPTS="--leak-check=full --trace-children=no --vgdb=no --num-callers=25 --log-file=/var/lib/pacemaker/valgrind-%p --suppressions=/usr/share/pacemaker/tests/valgrind-pcmk.suppressions --gen-suppressions=all"
diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c
index d5bfa34c77..78cc20a466 100644
--- a/daemons/pacemakerd/pacemakerd.c
+++ b/daemons/pacemakerd/pacemakerd.c
@@ -1,1102 +1,1105 @@
/*
* Copyright 2010-2018 Andrew Beekhof <andrew@beekhof.net>
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include "pacemakerd.h"
#include <pwd.h>
#include <grp.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/reboot.h>
#include <crm/msg_xml.h>
#include <crm/common/ipcs.h>
#include <crm/common/mainloop.h>
#include <crm/cluster/internal.h>
#include <crm/cluster.h>
#include <dirent.h>
#include <ctype.h>
gboolean pcmk_quorate = FALSE;
gboolean fatal_error = FALSE;
GMainLoop *mainloop = NULL;
#define PCMK_PROCESS_CHECK_INTERVAL 5
const char *local_name = NULL;
uint32_t local_nodeid = 0;
crm_trigger_t *shutdown_trigger = NULL;
const char *pid_file = "/var/run/pacemaker.pid";
typedef struct pcmk_child_s {
int pid;
long flag;
int start_seq;
int respawn_count;
gboolean respawn;
const char *name;
const char *uid;
const char *command;
gboolean active_before_startup;
} pcmk_child_t;
/* Index into the array below */
#define pcmk_child_crmd 3
/* *INDENT-OFF* */
static pcmk_child_t pcmk_children[] = {
{ 0, crm_proc_none, 0, 0, FALSE, "none", NULL, NULL },
{ 0, crm_proc_lrmd, 3, 0, TRUE, "lrmd", NULL, CRM_DAEMON_DIR"/lrmd" },
{ 0, crm_proc_cib, 1, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib" },
{ 0, crm_proc_crmd, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd" },
- { 0, crm_proc_attrd, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd" },
+ {
+ 0, crm_proc_attrd, 4, 0, TRUE, "pacemaker-attrd",
+ CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-attrd"
+ },
{ 0, crm_proc_stonithd, 0, 0, TRUE, "stonithd", NULL, NULL },
{ 0, crm_proc_pe, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine" },
{ 0, crm_proc_stonith_ng, 2, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd" },
};
/* *INDENT-ON* */
static gboolean start_child(pcmk_child_t * child);
static gboolean check_active_before_startup_processes(gpointer user_data);
static gboolean update_node_processes(uint32_t id, const char *uname,
uint32_t procs);
void update_process_clients(crm_client_t *client);
static uint32_t
get_process_list(void)
{
int lpc = 0;
uint32_t procs = crm_get_cluster_proc();
for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) {
if (pcmk_children[lpc].pid != 0) {
procs |= pcmk_children[lpc].flag;
}
}
return procs;
}
static void
pcmk_process_exit(pcmk_child_t * child)
{
child->pid = 0;
child->active_before_startup = FALSE;
/* Broadcast the fact that one of our processes died ASAP
*
* Try to get some logging of the cause out first though
* because we're probably about to get fenced
*
* Potentially do this only if respawn_count > N
* to allow for local recovery
*/
update_node_processes(local_nodeid, NULL, get_process_list());
child->respawn_count += 1;
if (child->respawn_count > MAX_RESPAWN) {
crm_err("Child respawn count exceeded by %s", child->name);
child->respawn = FALSE;
}
if (shutdown_trigger) {
mainloop_set_trigger(shutdown_trigger);
update_node_processes(local_nodeid, NULL, get_process_list());
} else if (child->respawn && crm_is_true(getenv("PCMK_fail_fast"))) {
crm_err("Rebooting system because of %s", child->name);
pcmk_panic(__FUNCTION__);
} else if (child->respawn) {
crm_notice("Respawning failed child process: %s", child->name);
start_child(child);
}
}
static void
pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
pcmk_child_t *child = mainloop_child_userdata(p);
const char *name = mainloop_child_name(p);
if (signo) {
do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
"%s[%d] terminated with signal %d (core=%d)",
name, pid, signo, core);
} else {
switch(exitcode) {
case CRM_EX_OK:
crm_info("%s[%d] exited with status %d (%s)",
name, pid, exitcode, crm_exit_str(exitcode));
break;
case CRM_EX_FATAL:
crm_warn("Shutting cluster down because %s[%d] had fatal failure",
name, pid);
child->respawn = FALSE;
fatal_error = TRUE;
pcmk_shutdown(SIGTERM);
break;
case CRM_EX_PANIC:
do_crm_log_always(LOG_EMERG,
"%s[%d] instructed the machine to reset",
name, pid);
child->respawn = FALSE;
fatal_error = TRUE;
pcmk_panic(__FUNCTION__);
pcmk_shutdown(SIGTERM);
break;
default:
crm_err("%s[%d] exited with status %d (%s)",
name, pid, exitcode, crm_exit_str(exitcode));
break;
}
}
pcmk_process_exit(child);
}
static gboolean
stop_child(pcmk_child_t * child, int signal)
{
if (signal == 0) {
signal = SIGTERM;
}
if (child->command == NULL) {
crm_debug("Nothing to do for child \"%s\"", child->name);
return TRUE;
}
if (child->pid <= 0) {
crm_trace("Client %s not running", child->name);
return TRUE;
}
errno = 0;
if (kill(child->pid, signal) == 0) {
crm_notice("Stopping %s "CRM_XS" sent signal %d to process %d",
child->name, signal, child->pid);
} else {
crm_perror(LOG_ERR, "Could not stop %s (process %d) with signal %d",
child->name, child->pid, signal);
}
return TRUE;
}
static char *opts_default[] = { NULL, NULL };
static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
static gboolean
start_child(pcmk_child_t * child)
{
int lpc = 0;
uid_t uid = 0;
gid_t gid = 0;
struct rlimit oflimits;
gboolean use_valgrind = FALSE;
gboolean use_callgrind = FALSE;
const char *devnull = "/dev/null";
const char *env_valgrind = getenv("PCMK_valgrind_enabled");
const char *env_callgrind = getenv("PCMK_callgrind_enabled");
child->active_before_startup = FALSE;
if (child->command == NULL) {
crm_info("Nothing to do for child \"%s\"", child->name);
return TRUE;
}
if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
use_valgrind = TRUE;
} else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
use_valgrind = TRUE;
}
if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
crm_warn("Cannot enable valgrind for %s:"
" The location of the valgrind binary is unknown", child->name);
use_valgrind = FALSE;
}
if (child->uid) {
if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
crm_err("Invalid user (%s) for %s: not found", child->uid, child->name);
return FALSE;
}
crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name);
}
child->pid = fork();
CRM_ASSERT(child->pid != -1);
if (child->pid > 0) {
/* parent */
mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);
crm_info("Forked child %d for process %s%s", child->pid, child->name,
use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
update_node_processes(local_nodeid, NULL, get_process_list());
return TRUE;
} else {
/* Start a new session */
(void)setsid();
/* Setup the two alternate arg arrays */
opts_vgrind[0] = strdup(VALGRIND_BIN);
if (use_callgrind) {
opts_vgrind[1] = strdup("--tool=callgrind");
opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p");
opts_vgrind[3] = strdup(child->command);
opts_vgrind[4] = NULL;
} else {
opts_vgrind[1] = strdup(child->command);
opts_vgrind[2] = NULL;
opts_vgrind[3] = NULL;
opts_vgrind[4] = NULL;
}
opts_default[0] = strdup(child->command);
if(gid) {
// Whether we need root group access to talk to cluster layer
bool need_root_group = TRUE;
if (is_corosync_cluster()) {
/* Corosync clusters can drop root group access, because we set
* uidgid.gid.${gid}=1 via CMAP, which allows these processes to
* connect to corosync.
*/
need_root_group = FALSE;
}
// Drop root group access if not needed
if (!need_root_group && (setgid(gid) < 0)) {
crm_perror(LOG_ERR, "Could not set group to %d", gid);
}
/* Initialize supplementary groups to only those always granted to
* the user, plus haclient (so we can access IPC).
*/
if (initgroups(child->uid, gid) < 0) {
crm_err("Cannot initialize groups for %s: %s (%d)", child->uid, pcmk_strerror(errno), errno);
}
}
if (uid && setuid(uid) < 0) {
crm_perror(LOG_ERR, "Could not set user to %d (%s)", uid, child->uid);
}
/* Close all open file descriptors */
getrlimit(RLIMIT_NOFILE, &oflimits);
for (lpc = 0; lpc < oflimits.rlim_cur; lpc++) {
close(lpc);
}
(void)open(devnull, O_RDONLY); /* Stdin: fd 0 */
(void)open(devnull, O_WRONLY); /* Stdout: fd 1 */
(void)open(devnull, O_WRONLY); /* Stderr: fd 2 */
if (use_valgrind) {
(void)execvp(VALGRIND_BIN, opts_vgrind);
} else {
(void)execvp(child->command, opts_default);
}
crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command);
crm_exit(CRM_EX_FATAL);
}
return TRUE; /* never reached */
}
static gboolean
escalate_shutdown(gpointer data)
{
pcmk_child_t *child = data;
if (child->pid) {
/* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
crm_err("Child %s not terminating in a timely manner, forcing", child->name);
stop_child(child, SIGSEGV);
}
return FALSE;
}
static gboolean
pcmk_shutdown_worker(gpointer user_data)
{
static int phase = 0;
static time_t next_log = 0;
static int max = SIZEOF(pcmk_children);
int lpc = 0;
if (phase == 0) {
crm_notice("Shutting down Pacemaker");
phase = max;
/* Add a second, more frequent, check to speed up shutdown */
g_timeout_add_seconds(5, check_active_before_startup_processes, NULL);
}
for (; phase > 0; phase--) {
/* Don't stop anything with start_seq < 1 */
for (lpc = max - 1; lpc >= 0; lpc--) {
pcmk_child_t *child = &(pcmk_children[lpc]);
if (phase != child->start_seq) {
continue;
}
if (child->pid) {
time_t now = time(NULL);
if (child->respawn) {
next_log = now + 30;
child->respawn = FALSE;
stop_child(child, SIGTERM);
if (phase < pcmk_children[pcmk_child_crmd].start_seq) {
g_timeout_add(180000 /* 3m */ , escalate_shutdown, child);
}
} else if (now >= next_log) {
next_log = now + 30;
crm_notice("Still waiting for %s to terminate "
CRM_XS " pid=%d seq=%d",
child->name, child->pid, child->start_seq);
}
return TRUE;
}
/* cleanup */
crm_debug("%s confirmed stopped", child->name);
child->pid = 0;
}
}
/* send_cluster_id(); */
crm_notice("Shutdown complete");
{
const char *delay = daemon_option("shutdown_delay");
if(delay) {
sync();
sleep(crm_get_msec(delay) / 1000);
}
}
g_main_loop_quit(mainloop);
if (fatal_error) {
crm_notice("Shutting down and staying down after fatal error");
crm_exit(CRM_EX_FATAL);
}
return TRUE;
}
static void
pcmk_ignore(int nsig)
{
crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig);
}
static void
pcmk_sigquit(int nsig)
{
pcmk_panic(__FUNCTION__);
}
void
pcmk_shutdown(int nsig)
{
if (shutdown_trigger == NULL) {
shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
}
mainloop_set_trigger(shutdown_trigger);
}
static int32_t
pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
pcmk_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
/* Exit code means? */
static int32_t
pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
const char *task = NULL;
crm_client_t *c = crm_client_get(qbc);
xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags);
crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__);
if (msg == NULL) {
return 0;
}
task = crm_element_value(msg, F_CRM_TASK);
if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) {
/* Time to quit */
crm_notice("Shutting down in response to ticket %s (%s)",
crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN));
pcmk_shutdown(15);
} else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
/* Send to everyone */
struct iovec *iov;
int id = 0;
const char *name = NULL;
crm_element_value_int(msg, XML_ATTR_ID, &id);
name = crm_element_value(msg, XML_ATTR_UNAME);
crm_notice("Instructing peers to remove references to node %s/%u", name, id);
iov = calloc(1, sizeof(struct iovec));
iov->iov_base = dump_xml_unformatted(msg);
iov->iov_len = 1 + strlen(iov->iov_base);
send_cpg_iov(iov);
} else {
update_process_clients(c);
}
free_xml(msg);
return 0;
}
/* Error code means? */
static int32_t
pcmk_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p", c);
crm_client_destroy(client);
return 0;
}
static void
pcmk_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
pcmk_ipc_closed(c);
}
struct qb_ipcs_service_handlers mcp_ipc_callbacks = {
.connection_accept = pcmk_ipc_accept,
.connection_created = pcmk_ipc_created,
.msg_process = pcmk_ipc_dispatch,
.connection_closed = pcmk_ipc_closed,
.connection_destroyed = pcmk_ipc_destroy
};
/*!
* \internal
* \brief Send an XML message with process list of all known peers to client(s)
*
* \param[in] client Send message to this client, or all clients if NULL
*/
void
update_process_clients(crm_client_t *client)
{
GHashTableIter iter;
crm_node_t *node = NULL;
xmlNode *update = create_xml_node(NULL, "nodes");
if (is_corosync_cluster()) {
crm_xml_add_int(update, "quorate", pcmk_quorate);
}
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
xmlNode *xml = create_xml_node(update, "node");
crm_xml_add_int(xml, "id", node->id);
crm_xml_add(xml, "uname", node->uname);
crm_xml_add(xml, "state", node->state);
crm_xml_add_int(xml, "processes", node->processes);
}
if(client) {
crm_trace("Sending process list to client %s", client->id);
crm_ipcs_send(client, 0, update, crm_ipc_server_event);
} else {
crm_trace("Sending process list to %d clients", crm_hash_table_size(client_connections));
g_hash_table_iter_init(&iter, client_connections);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & client)) {
crm_ipcs_send(client, 0, update, crm_ipc_server_event);
}
}
free_xml(update);
}
/*!
* \internal
* \brief Send a CPG message with local node's process list to all peers
*/
static void
update_process_peers(void)
{
/* Do nothing for corosync-2 based clusters */
struct iovec *iov = calloc(1, sizeof(struct iovec));
CRM_ASSERT(iov);
if (local_name) {
iov->iov_base = crm_strdup_printf("<node uname=\"%s\" proclist=\"%u\"/>",
local_name, get_process_list());
} else {
iov->iov_base = crm_strdup_printf("<node proclist=\"%u\"/>",
get_process_list());
}
iov->iov_len = strlen(iov->iov_base) + 1;
crm_trace("Sending %s", (char*) iov->iov_base);
send_cpg_iov(iov);
}
/*!
* \internal
* \brief Update a node's process list, notifying clients and peers if needed
*
* \param[in] id Node ID of affected node
* \param[in] uname Uname of affected node
* \param[in] procs Affected node's process list mask
*
* \return TRUE if the process list changed, FALSE otherwise
*/
static gboolean
update_node_processes(uint32_t id, const char *uname, uint32_t procs)
{
gboolean changed = FALSE;
crm_node_t *node = crm_get_peer(id, uname);
if (procs != 0) {
if (procs != node->processes) {
crm_debug("Node %s now has process list: %.32x (was %.32x)",
node->uname, procs, node->processes);
node->processes = procs;
changed = TRUE;
/* If local node's processes have changed, notify clients/peers */
if (id == local_nodeid) {
update_process_clients(NULL);
update_process_peers();
}
} else {
crm_trace("Node %s still has process list: %.32x", node->uname, procs);
}
}
return changed;
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"version", 0, 0, '$', "\tVersion information" },
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{"shutdown", 0, 0, 'S', "\tInstruct Pacemaker to shutdown on this machine"},
{"features", 0, 0, 'F', "\tDisplay the full version and list of features Pacemaker was built with"},
{"-spacer-", 1, 0, '-', "\nAdditional Options:"},
{"foreground", 0, 0, 'f', "\t(Ignored) Pacemaker always runs in the foreground"},
{"pid-file", 1, 0, 'p', "\t(Ignored) Daemon pid file location"},
{"standby", 0, 0, 's', "\tStart node in standby state"},
{NULL, 0, 0, 0}
};
/* *INDENT-ON* */
static void
mcp_chown(const char *path, uid_t uid, gid_t gid)
{
int rc = chown(path, uid, gid);
if (rc < 0) {
crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s",
path, CRM_DAEMON_USER, gid, pcmk_strerror(errno));
}
}
static gboolean
check_active_before_startup_processes(gpointer user_data)
{
int start_seq = 1, lpc = 0;
static int max = SIZEOF(pcmk_children);
gboolean keep_tracking = FALSE;
for (start_seq = 1; start_seq < max; start_seq++) {
for (lpc = 0; lpc < max; lpc++) {
if (pcmk_children[lpc].active_before_startup == FALSE) {
/* we are already tracking it as a child process. */
continue;
} else if (start_seq != pcmk_children[lpc].start_seq) {
continue;
} else {
const char *name = pcmk_children[lpc].name;
if (pcmk_children[lpc].flag == crm_proc_stonith_ng) {
name = "stonithd";
}
if (crm_pid_active(pcmk_children[lpc].pid, name) != 1) {
crm_notice("Process %s terminated (pid=%d)",
name, pcmk_children[lpc].pid);
pcmk_process_exit(&(pcmk_children[lpc]));
continue;
}
}
/* at least one of the processes found at startup
* is still going, so keep this recurring timer around */
keep_tracking = TRUE;
}
}
return keep_tracking;
}
static void
find_and_track_existing_processes(void)
{
#if SUPPORT_PROCFS
DIR *dp;
struct dirent *entry;
bool start_tracker = FALSE;
char entry_name[64];
dp = opendir("/proc");
if (!dp) {
/* no proc directory to search through */
crm_notice("Can not read /proc directory to track existing components");
return;
}
while ((entry = readdir(dp)) != NULL) {
int pid;
int max = SIZEOF(pcmk_children);
int i;
if (crm_procfs_process_info(entry, entry_name, &pid) < 0) {
continue;
}
for (i = 0; i < max; i++) {
const char *name = pcmk_children[i].name;
if (pcmk_children[i].start_seq == 0) {
continue;
}
if (pcmk_children[i].flag == crm_proc_stonith_ng) {
name = "stonithd";
}
if (safe_str_eq(entry_name, name) && (crm_pid_active(pid, NULL) == 1)) {
crm_notice("Tracking existing %s process (pid=%d)", name, pid);
pcmk_children[i].pid = pid;
pcmk_children[i].active_before_startup = TRUE;
start_tracker = TRUE;
break;
}
}
}
if (start_tracker) {
g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_active_before_startup_processes,
NULL);
}
closedir(dp);
#else
crm_notice("No procfs support, so skipping check for existing components");
#endif // SUPPORT_PROCFS
}
static void
init_children_processes(void)
{
int start_seq = 1, lpc = 0;
static int max = SIZEOF(pcmk_children);
/* start any children that have not been detected */
for (start_seq = 1; start_seq < max; start_seq++) {
/* don't start anything with start_seq < 1 */
for (lpc = 0; lpc < max; lpc++) {
if (pcmk_children[lpc].pid) {
/* we are already tracking it */
continue;
}
if (start_seq == pcmk_children[lpc].start_seq) {
start_child(&(pcmk_children[lpc]));
}
}
}
/* From this point on, any daemons being started will be due to
* respawning rather than node start.
*
* This may be useful for the daemons to know
*/
setenv("PCMK_respawned", "true", 1);
}
static void
mcp_cpg_destroy(gpointer user_data)
{
crm_err("Connection destroyed");
crm_exit(CRM_EX_DISCONNECT);
}
/*!
* \internal
* \brief Process a CPG message (process list or manual peer cache removal)
*
* \param[in] handle CPG connection (ignored)
* \param[in] groupName CPG group name (ignored)
* \param[in] nodeid ID of affected node
* \param[in] pid Process ID (ignored)
* \param[in] msg CPG XML message
* \param[in] msg_len Length of msg in bytes (ignored)
*/
static void
mcp_cpg_deliver(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
xmlNode *xml = string2xml(msg);
const char *task = crm_element_value(xml, F_CRM_TASK);
crm_trace("Received CPG message (%s): %.200s",
(task? task : "process list"), (char*)msg);
if (task == NULL) {
if (nodeid == local_nodeid) {
crm_debug("Ignoring message with local node's process list");
} else {
uint32_t procs = 0;
const char *uname = crm_element_value(xml, "uname");
crm_element_value_int(xml, "proclist", (int *)&procs);
if (update_node_processes(nodeid, uname, procs)) {
update_process_clients(NULL);
}
}
} else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
int id = 0;
const char *name = NULL;
crm_element_value_int(xml, XML_ATTR_ID, &id);
name = crm_element_value(xml, XML_ATTR_UNAME);
reap_crm_member(id, name);
}
if (xml != NULL) {
free_xml(xml);
}
}
static void
mcp_cpg_membership(cpg_handle_t handle,
const struct cpg_name *groupName,
const struct cpg_address *member_list, size_t member_list_entries,
const struct cpg_address *left_list, size_t left_list_entries,
const struct cpg_address *joined_list, size_t joined_list_entries)
{
/* Update peer cache if needed */
pcmk_cpg_membership(handle, groupName, member_list, member_list_entries,
left_list, left_list_entries,
joined_list, joined_list_entries);
/* Always broadcast our own presence after any membership change */
update_process_peers();
}
static gboolean
mcp_quorum_callback(unsigned long long seq, gboolean quorate)
{
pcmk_quorate = quorate;
return TRUE;
}
static void
mcp_quorum_destroy(gpointer user_data)
{
crm_info("connection lost");
}
int
main(int argc, char **argv)
{
int rc;
int flag;
int argerr = 0;
int option_index = 0;
gboolean shutdown = FALSE;
uid_t pcmk_uid = 0;
gid_t pcmk_gid = 0;
struct rlimit cores;
crm_ipc_t *old_instance = NULL;
qb_ipcs_service_t *ipcs = NULL;
static crm_cluster_t cluster;
crm_log_preinit(NULL, argc, argv);
crm_set_options(NULL, "mode [options]", long_options, "Start/Stop Pacemaker\n");
mainloop_add_signal(SIGHUP, pcmk_ignore);
mainloop_add_signal(SIGQUIT, pcmk_sigquit);
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'f':
/* Legacy */
break;
case 'p':
pid_file = optarg;
break;
case 's':
set_daemon_option("node_start_state", "standby");
break;
case '$':
case '?':
crm_help(flag, CRM_EX_OK);
break;
case 'S':
shutdown = TRUE;
break;
case 'F':
printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", PACEMAKER_VERSION, BUILD_VERSION,
CRM_FEATURE_SET, CRM_FEATURES);
crm_exit(CRM_EX_OK);
default:
printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag);
++argerr;
break;
}
}
if (optind < argc) {
printf("non-option ARGV-elements: ");
while (optind < argc)
printf("%s ", argv[optind++]);
printf("\n");
}
if (argerr) {
crm_help('?', CRM_EX_USAGE);
}
setenv("LC_ALL", "C", 1);
set_daemon_option("mcp", "true");
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
crm_debug("Checking for old instances of %s", CRM_SYSTEM_MCP);
old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0);
crm_ipc_connect(old_instance);
if (shutdown) {
crm_debug("Terminating previous instance");
while (crm_ipc_connected(old_instance)) {
xmlNode *cmd =
create_request(CRM_OP_QUIT, NULL, NULL, CRM_SYSTEM_MCP, CRM_SYSTEM_MCP, NULL);
crm_debug(".");
crm_ipc_send(old_instance, cmd, 0, 0, NULL);
free_xml(cmd);
sleep(2);
}
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_exit(CRM_EX_OK);
} else if (crm_ipc_connected(old_instance)) {
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_err("Pacemaker is already active, aborting startup");
crm_exit(CRM_EX_FATAL);
}
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
if (mcp_read_config() == FALSE) {
crm_notice("Could not obtain corosync config data, exiting");
crm_exit(CRM_EX_UNAVAILABLE);
}
// OCF shell functions and cluster-glue need facility under different name
{
const char *facility = daemon_option("logfacility");
if (facility && safe_str_neq(facility, "none")) {
setenv("HA_LOGFACILITY", facility, 1);
}
}
crm_notice("Starting Pacemaker %s "CRM_XS" build=%s features:%s",
PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES);
mainloop = g_main_loop_new(NULL, FALSE);
sysrq_init();
rc = getrlimit(RLIMIT_CORE, &cores);
if (rc < 0) {
crm_perror(LOG_ERR, "Cannot determine current maximum core size.");
} else {
if (cores.rlim_max == 0 && geteuid() == 0) {
cores.rlim_max = RLIM_INFINITY;
} else {
crm_info("Maximum core file size is: %lu", (unsigned long)cores.rlim_max);
}
cores.rlim_cur = cores.rlim_max;
rc = setrlimit(RLIMIT_CORE, &cores);
if (rc < 0) {
crm_perror(LOG_ERR,
"Core file generation will remain disabled."
" Core files are an important diagnostic tool, so"
" please consider enabling them by default.");
}
}
if (crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) {
crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER);
crm_exit(CRM_EX_NOUSER);
}
mkdir(CRM_STATE_DIR, 0750);
mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid);
/* Used to store core/blackbox/pengine/cib files in */
crm_build_path(CRM_PACEMAKER_DIR, 0750);
mcp_chown(CRM_PACEMAKER_DIR, pcmk_uid, pcmk_gid);
/* Used to store core files in */
crm_build_path(CRM_CORE_DIR, 0750);
mcp_chown(CRM_CORE_DIR, pcmk_uid, pcmk_gid);
/* Used to store blackbox dumps in */
crm_build_path(CRM_BLACKBOX_DIR, 0750);
mcp_chown(CRM_BLACKBOX_DIR, pcmk_uid, pcmk_gid);
/* Used to store policy engine inputs in */
crm_build_path(PE_STATE_DIR, 0750);
mcp_chown(PE_STATE_DIR, pcmk_uid, pcmk_gid);
/* Used to store the cluster configuration */
crm_build_path(CRM_CONFIG_DIR, 0750);
mcp_chown(CRM_CONFIG_DIR, pcmk_uid, pcmk_gid);
/* Resource agent paths are constructed by the lrmd */
ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &mcp_ipc_callbacks);
if (ipcs == NULL) {
crm_err("Couldn't start IPC server");
crm_exit(CRM_EX_OSERR);
}
/* Allows us to block shutdown */
if (cluster_connect_cfg(&local_nodeid) == FALSE) {
crm_err("Couldn't connect to Corosync's CFG service");
crm_exit(CRM_EX_PROTOCOL);
}
if(pcmk_locate_sbd() > 0) {
setenv("PCMK_watchdog", "true", 1);
} else {
setenv("PCMK_watchdog", "false", 1);
}
find_and_track_existing_processes();
cluster.destroy = mcp_cpg_destroy;
cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver;
cluster.cpg.cpg_confchg_fn = mcp_cpg_membership;
crm_set_autoreap(FALSE);
rc = pcmk_ok;
if (cluster_connect_cpg(&cluster) == FALSE) {
crm_err("Couldn't connect to Corosync's CPG service");
rc = -ENOPROTOOPT;
} else if (cluster_connect_quorum(mcp_quorum_callback, mcp_quorum_destroy)
== FALSE) {
rc = -ENOTCONN;
} else {
local_name = get_local_node_name();
update_node_processes(local_nodeid, local_name, get_process_list());
mainloop_add_signal(SIGTERM, pcmk_shutdown);
mainloop_add_signal(SIGINT, pcmk_shutdown);
init_children_processes();
crm_info("Starting mainloop");
g_main_loop_run(mainloop);
}
if (ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
g_main_destroy(mainloop);
cluster_disconnect_cpg(&cluster);
cluster_disconnect_cfg();
return crm_exit(crm_errno2exit(rc));
}
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Verification.txt b/doc/Clusters_from_Scratch/en-US/Ch-Verification.txt
index 784a3b2723..f9435f008f 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Verification.txt
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Verification.txt
@@ -1,147 +1,147 @@
= Start and Verify Cluster =
== Start the Cluster ==
Now that corosync is configured, it is time to start the cluster.
The command below will start corosync and pacemaker on both nodes
in the cluster. If you are issuing the start command from a different
node than the one you ran the `pcs cluster auth` command on earlier, you
must authenticate on the current node you are logged into before you will
be allowed to start the cluster.
----
[root@pcmk-1 ~]# pcs cluster start --all
pcmk-1: Starting Cluster...
pcmk-2: Starting Cluster...
----
[NOTE]
======
An alternative to using the `pcs cluster start --all` command
is to issue either of the below command sequences on each node in the
cluster separately:
----
# pcs cluster start
Starting Cluster...
----
or
----
# systemctl start corosync.service
# systemctl start pacemaker.service
----
======
[IMPORTANT]
====
In this example, we are not enabling the corosync and pacemaker services
to start at boot. If a cluster node fails or is rebooted, you will need to run
+pcs cluster start pass:[<replaceable>nodename</replaceable>]+ (or `--all`) to start the cluster on it.
While you could enable the services to start at boot, requiring a manual
start of cluster services gives you the opportunity to do a post-mortem investigation
of a node failure before returning it to the cluster.
====
== Verify Corosync Installation ==
First, use `corosync-cfgtool` to check whether cluster communication is happy:
----
[root@pcmk-1 ~]# corosync-cfgtool -s
Printing ring status.
Local node ID 1
RING ID 0
id = 192.168.122.101
status = ring 0 active with no faults
----
We can see here that everything appears normal with our fixed IP
address (not a 127.0.0.x loopback address) listed as the *id*, and *no
faults* for the status.
If you see something different, you might want to start by checking
the node's network, firewall and selinux configurations.
Next, check the membership and quorum APIs:
----
[root@pcmk-1 ~]# corosync-cmapctl | grep members
runtime.totem.pg.mrp.srp.members.1.config_version (u64) = 0
runtime.totem.pg.mrp.srp.members.1.ip (str) = r(0) ip(192.168.122.101)
runtime.totem.pg.mrp.srp.members.1.join_count (u32) = 1
runtime.totem.pg.mrp.srp.members.1.status (str) = joined
runtime.totem.pg.mrp.srp.members.2.config_version (u64) = 0
runtime.totem.pg.mrp.srp.members.2.ip (str) = r(0) ip(192.168.122.102)
runtime.totem.pg.mrp.srp.members.2.join_count (u32) = 2
runtime.totem.pg.mrp.srp.members.2.status (str) = joined
[root@pcmk-1 ~]# pcs status corosync
Membership information
--------------------------
Nodeid Votes Name
1 1 pcmk-1 (local)
2 1 pcmk-2
----
You should see both nodes have joined the cluster.
== Verify Pacemaker Installation ==
Now that we have confirmed that Corosync is functional, we can check
the rest of the stack. Pacemaker has already been started, so verify
the necessary processes are running:
----
[root@pcmk-1 ~]# ps axf
PID TTY STAT TIME COMMAND
2 ? S 0:00 [kthreadd]
...lots of processes...
1362 ? Ssl 0:35 corosync
1379 ? Ss 0:00 /usr/sbin/pacemakerd -f
1380 ? Ss 0:00 \_ /usr/libexec/pacemaker/cib
1381 ? Ss 0:00 \_ /usr/libexec/pacemaker/stonithd
1382 ? Ss 0:00 \_ /usr/libexec/pacemaker/lrmd
- 1383 ? Ss 0:00 \_ /usr/libexec/pacemaker/attrd
+ 1383 ? Ss 0:00 \_ /usr/libexec/pacemaker/pacemaker-attrd
1384 ? Ss 0:00 \_ /usr/libexec/pacemaker/pengine
1385 ? Ss 0:00 \_ /usr/libexec/pacemaker/crmd
----
If that looks OK, check the `pcs status` output:
----
[root@pcmk-1 ~]# pcs status
Cluster name: mycluster
WARNING: no stonith devices and stonith-enabled is not false
Stack: corosync
Current DC: pcmk-2 (version 1.1.16-12.el7_4.5-94ff4df) - partition with quorum
Last updated: Fri Jan 12 16:15:29 2018
Last change: Fri Jan 12 15:49:47 2018
2 nodes configured
0 resources configured
Online: [ pcmk-1 pcmk-2 ]
No active resources
Daemon Status:
corosync: active/disabled
pacemaker: active/disabled
pcsd: active/enabled
----
Finally, ensure there are no startup errors (aside from messages relating
to not having STONITH configured, which are OK at this point):
----
[root@pcmk-1 ~]# journalctl | grep -i error
----
[NOTE]
======
Other operating systems may report startup errors in other locations,
for example +/var/log/messages+.
======
Repeat these checks on the other node. The results should be the same.
diff --git a/doc/Pacemaker_Explained/en-US/Ch-Status.txt b/doc/Pacemaker_Explained/en-US/Ch-Status.txt
index 3c82074bdd..5139f85c40 100644
--- a/doc/Pacemaker_Explained/en-US/Ch-Status.txt
+++ b/doc/Pacemaker_Explained/en-US/Ch-Status.txt
@@ -1,372 +1,372 @@
= Status -- Here be dragons =
Most users never need to understand the contents of the status section
and can be happy with the output from `crm_mon`.
However for those with a curious inclination, this section attempts to
provide an overview of its contents.
== Node Status ==
indexterm:[Node,Status]
indexterm:[Status of a Node]
In addition to the cluster's configuration, the CIB holds an
up-to-date representation of each cluster node in the +status+ section.
.A bare-bones status entry for a healthy node *cl-virt-1*
======
[source,XML]
-----
<node_state id="1" uname="cl-virt-1" in_ccm="true" crmd="online" crm-debug-origin="do_update_resource" join="member" expected="member">
<transient_attributes id="1"/>
<lrm id="1"/>
</node_state>
-----
======
Users are highly recommended _not_ to modify any part of a node's
state _directly_. The cluster will periodically regenerate the entire
section from authoritative sources, so any changes should be done
with the tools appropriate to those sources.
.Authoritative Sources for State Information
[width="95%",cols="1m,1<",options="header",align="center"]
|=========================================================
| CIB Object | Authoritative Source
|node_state|crmd
-|transient_attributes|attrd
+|transient_attributes|pacemaker-attrd
|lrm|lrmd
|=========================================================
The fields used in the +node_state+ objects are named as they are
largely for historical reasons and are rooted in Pacemaker's origins
as the resource manager for the older Heartbeat project. They have remained
unchanged to preserve compatibility with older versions.
.Node Status Fields
[width="95%",cols="1m,4<",options="header",align="center"]
|=========================================================
|Field |Description
| id |
indexterm:[id,Node Status]
indexterm:[Node,Status,id]
Unique identifier for the node. Corosync-based clusters use a numeric counter.
| uname |
indexterm:[uname,Node Status]
indexterm:[Node,Status,uname]
The node's name as known by the cluster
| in_ccm |
indexterm:[in_ccm,Node Status]
indexterm:[Node,Status,in_ccm]
Is the node a member at the cluster communication layer? Allowed values:
+true+, +false+.
| crmd |
indexterm:[crmd,Node Status]
indexterm:[Node,Status,crmd]
Is the node a member at the pacemaker layer? Allowed values: +online+,
+offline+.
| crm-debug-origin |
indexterm:[crm-debug-origin,Node Status]
indexterm:[Node,Status,crm-debug-origin]
The name of the source function that made the most recent change (for debugging
purposes).
| join |
indexterm:[join,Node Status]
indexterm:[Node,Status,join]
Does the node participate in hosting resources? Allowed values: +down+,
+pending+, +member+, +banned+.
| expected |
indexterm:[expected,Node Status]
indexterm:[Node,Status,expected]
Expected value for +join+.
|=========================================================
The cluster uses these fields to determine whether, at the node level, the
node is healthy or is in a failed state and needs to be fenced.
== Transient Node Attributes ==
Like regular <<s-node-attributes,node attributes>>, the name/value
pairs listed in the +transient_attributes+ section help to describe the
node. However they are forgotten by the cluster when the node goes offline.
This can be useful, for instance, when you want a node to be in standby mode
(not able to run resources) just until the next reboot.
In addition to any values the administrator sets, the cluster will
also store information about failed resources here.
.A set of transient node attributes for node *cl-virt-1*
======
[source,XML]
-----
<transient_attributes id="cl-virt-1">
<instance_attributes id="status-cl-virt-1">
<nvpair id="status-cl-virt-1-pingd" name="pingd" value="3"/>
<nvpair id="status-cl-virt-1-probe_complete" name="probe_complete" value="true"/>
<nvpair id="status-cl-virt-1-fail-count-pingd:0.monitor_30000" name="fail-count-pingd:0#monitor_30000" value="1"/>
<nvpair id="status-cl-virt-1-last-failure-pingd:0" name="last-failure-pingd:0" value="1239009742"/>
</instance_attributes>
</transient_attributes>
-----
======
In the above example, we can see that a monitor on the +pingd:0+ resource has
failed once, at 09:22:22 UTC 6 April 2009.
footnote:[
You can use the standard `date` command to print a human-readable version of
any seconds-since-epoch value, for example `date -d @1239009742`.
]
We also see that the node is connected to three *pingd* peers and that
all known resources have been checked for on this machine (+probe_complete+).
== Operation History ==
indexterm:[Operation History]
A node's resource history is held in the +lrm_resources+ tag (a child
of the +lrm+ tag). The information stored here includes enough
information for the cluster to stop the resource safely if it is
removed from the +configuration+ section. Specifically, the resource's
+id+, +class+, +type+ and +provider+ are stored.
.A record of the +apcstonith+ resource
======
[source,XML]
<lrm_resource id="apcstonith" type="apcmastersnmp" class="stonith"/>
======
Additionally, we store the last job for every combination of
+resource+, +action+ and +interval+. The concatenation of the values in
this tuple are used to create the id of the +lrm_rsc_op+ object.
.Contents of an +lrm_rsc_op+ job
[width="95%",cols="2m,5<",options="header",align="center"]
|=========================================================
|Field
|Description
| id |
indexterm:[id,Action Status]
indexterm:[Action,Status,id]
Identifier for the job constructed from the resource's +id+,
+operation+ and +interval+.
| call-id |
indexterm:[call-id,Action Status]
indexterm:[Action,Status,call-id]
The job's ticket number. Used as a sort key to determine the order in
which the jobs were executed.
| operation |
indexterm:[operation,Action Status]
indexterm:[Action,Status,operation]
The action the resource agent was invoked with.
| interval |
indexterm:[interval,Action Status]
indexterm:[Action,Status,interval]
The frequency, in milliseconds, at which the operation will be
repeated. A one-off job is indicated by 0.
| op-status |
indexterm:[op-status,Action Status]
indexterm:[Action,Status,op-status]
The job's status. Generally this will be either 0 (done) or -1
(pending). Rarely used in favor of +rc-code+.
| rc-code |
indexterm:[rc-code,Action Status]
indexterm:[Action,Status,rc-code]
The job's result. Refer to the 'Resource Agents' chapter of 'Pacemaker
Administration' for details on what the values here mean and how they are
interpreted.
| last-run |
indexterm:[last-run,Action Status]
indexterm:[Action,Status,last-run]
Machine-local date/time, in seconds since epoch,
at which the job was executed. For diagnostic purposes.
| last-rc-change |
indexterm:[last-rc-change,Action Status]
indexterm:[Action,Status,last-rc-change]
Machine-local date/time, in seconds since epoch,
at which the job first returned the current value of +rc-code+.
For diagnostic purposes.
| exec-time |
indexterm:[exec-time,Action Status]
indexterm:[Action,Status,exec-time]
Time, in milliseconds, that the job was running for.
For diagnostic purposes.
| queue-time |
indexterm:[queue-time,Action Status]
indexterm:[Action,Status,queue-time]
Time, in seconds, that the job was queued for in the LRMd.
For diagnostic purposes.
| crm_feature_set |
indexterm:[crm_feature_set,Action Status]
indexterm:[Action,Status,crm_feature_set]
The version which this job description conforms to. Used when
processing +op-digest+.
| transition-key |
indexterm:[transition-key,Action Status]
indexterm:[Action,Status,transition-key]
A concatenation of the job's graph action number, the graph number,
the expected result and the UUID of the crmd instance that scheduled
it. This is used to construct +transition-magic+ (below).
| transition-magic |
indexterm:[transition-magic,Action Status]
indexterm:[Action,Status,transition-magic]
A concatenation of the job's +op-status+, +rc-code+ and
+transition-key+. Guaranteed to be unique for the life of the cluster
(which ensures it is part of CIB update notifications) and contains
all the information needed for the crmd to correctly analyze and
process the completed job. Most importantly, the decomposed elements
tell the crmd if the job entry was expected and whether it failed.
| op-digest |
indexterm:[op-digest,Action Status]
indexterm:[Action,Status,op-digest]
An MD5 sum representing the parameters passed to the job. Used to
detect changes to the configuration, to restart resources if
necessary.
| crm-debug-origin |
indexterm:[crm-debug-origin,Action Status]
indexterm:[Action,Status,crm-debug-origin]
The origin of the current values.
For diagnostic purposes.
|=========================================================
=== Simple Operation History Example ===
.A monitor operation (determines current state of the +apcstonith+ resource)
======
[source,XML]
-----
<lrm_resource id="apcstonith" type="apcmastersnmp" class="stonith">
<lrm_rsc_op id="apcstonith_monitor_0" operation="monitor" call-id="2"
rc-code="7" op-status="0" interval="0"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
op-digest="2e3da9274d3550dc6526fb24bfcbcba0"
transition-key="22:2:7:2668bbeb-06d5-40f9-936d-24cb7f87006a"
transition-magic="0:7;22:2:7:2668bbeb-06d5-40f9-936d-24cb7f87006a"
last-run="1239008085" last-rc-change="1239008085" exec-time="10" queue-time="0"/>
</lrm_resource>
-----
======
In the above example, the job is a non-recurring monitor operation
often referred to as a "probe" for the +apcstonith+ resource.
The cluster schedules probes for every configured resource on a node when
the node first starts, in order to determine the resource's current state
before it takes any further action.
From the +transition-key+, we can see that this was the 22nd action of
the 2nd graph produced by this instance of the crmd
(2668bbeb-06d5-40f9-936d-24cb7f87006a).
The third field of the +transition-key+ contains a 7, which indicates
that the job expects to find the resource inactive. By looking at the +rc-code+
property, we see that this was the case.
As that is the only job recorded for this node, we can conclude that
the cluster started the resource elsewhere.
=== Complex Operation History Example ===
.Resource history of a +pingd+ clone with multiple jobs
======
[source,XML]
-----
<lrm_resource id="pingd:0" type="pingd" class="ocf" provider="pacemaker">
<lrm_rsc_op id="pingd:0_monitor_30000" operation="monitor" call-id="34"
rc-code="0" op-status="0" interval="30000"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
transition-key="10:11:0:2668bbeb-06d5-40f9-936d-24cb7f87006a"
...
last-run="1239009741" last-rc-change="1239009741" exec-time="10" queue-time="0"/>
<lrm_rsc_op id="pingd:0_stop_0" operation="stop"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1" call-id="32"
rc-code="0" op-status="0" interval="0"
transition-key="11:11:0:2668bbeb-06d5-40f9-936d-24cb7f87006a"
...
last-run="1239009741" last-rc-change="1239009741" exec-time="10" queue-time="0"/>
<lrm_rsc_op id="pingd:0_start_0" operation="start" call-id="33"
rc-code="0" op-status="0" interval="0"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
transition-key="31:11:0:2668bbeb-06d5-40f9-936d-24cb7f87006a"
...
last-run="1239009741" last-rc-change="1239009741" exec-time="10" queue-time="0" />
<lrm_rsc_op id="pingd:0_monitor_0" operation="monitor" call-id="3"
rc-code="0" op-status="0" interval="0"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
transition-key="23:2:7:2668bbeb-06d5-40f9-936d-24cb7f87006a"
...
last-run="1239008085" last-rc-change="1239008085" exec-time="20" queue-time="0"/>
</lrm_resource>
-----
======
When more than one job record exists, it is important to first sort
them by +call-id+ before interpreting them.
Once sorted, the above example can be summarized as:
. A non-recurring monitor operation returning 7 (not running), with a +call-id+ of 3
. A stop operation returning 0 (success), with a +call-id+ of 32
. A start operation returning 0 (success), with a +call-id+ of 33
. A recurring monitor returning 0 (success), with a +call-id+ of 34
The cluster processes each job record to build up a picture of the
resource's state. After the first and second entries, it is
considered stopped, and after the third it considered active.
Based on the last operation, we can tell that the resource is
currently active.
Additionally, from the presence of a +stop+ operation with a lower
+call-id+ than that of the +start+ operation, we can conclude that the
resource has been restarted. Specifically this occurred as part of
actions 11 and 31 of transition 11 from the crmd instance with the key
+2668bbeb...+. This information can be helpful for locating the
relevant section of the logs when looking for the source of a failure.
diff --git a/doc/shared/en-US/pacemaker-intro.txt b/doc/shared/en-US/pacemaker-intro.txt
index bfa10f5ee5..6ea77bb42c 100644
--- a/doc/shared/en-US/pacemaker-intro.txt
+++ b/doc/shared/en-US/pacemaker-intro.txt
@@ -1,162 +1,177 @@
== What Is 'Pacemaker'? ==
*Pacemaker* is a high-availability 'cluster resource manager' -- software that
runs on a set of hosts (a 'cluster' of 'nodes') in order to minimize downtime of
desired services ('resources').
footnote:[
'Cluster' is sometimes used in other contexts to refer to hosts grouped
together for other purposes, such as high-performance computing (HPC), but
Pacemaker is not intended for those purposes.
]
Pacemaker's key features include:
* Detection of and recovery from node- and service-level failures
* Ability to ensure data integrity by fencing faulty nodes
* Support for one or more nodes per cluster
* Support for multiple resource interface standards (anything that can be
scripted can be clustered)
* Support (but no requirement) for shared storage
* Support for practically any redundancy configuration (active/passive, N+1,
etc.)
* Automatically replicated configuration that can be updated from any node
* Ability to specify cluster-wide relationships between services,
such as ordering, colocation and anti-colocation
* Support for advanced service types, such as 'clones' (services that need to
be active on multiple nodes), 'stateful resources' (clones that can run in
one of two modes), and containerized services
* Unified, scriptable cluster management tools
.Fencing
[NOTE]
====
'Fencing', also known as 'STONITH' (an acronym for Shoot The Other Node In The
Head), is the ability to ensure that it is not possible for a node to be
running a service. This is accomplished via 'fence devices' such as
intelligent power switches that cut power to the target, or intelligent
network switches that cut the target's access to the local network.
Pacemaker represents fence devices as a special class of resource.
A cluster cannot safely recover from certain failure conditions, such as an
unresponsive node, without fencing.
====
== Cluster Architecture ==
At a high level, a cluster can viewed as having these parts (which together are
often referred to as the 'cluster stack'):
* *Resources:* These are the reason for the cluster's being -- the services
that need to be kept highly available.
* *Resource agents:* These are scripts or operating system components that
start, stop, and monitor resources, given a set of resource parameters.
These provide a uniform interface between Pacemaker and the managed
services.
* *Fence agents:* These are scripts that execute node fencing actions,
given a target and fence device parameters.
* *Cluster membership layer:* This component provides reliable
messaging, membership, and quorum information about the cluster.
Currently, Pacemaker supports http://www.corosync.org/[Corosync]
as this layer.
* *Cluster resource manager:* Pacemaker provides the brain that processes
and reacts to events that occur in the cluster. These events may include
nodes joining or leaving the cluster; resource events caused by failures,
maintenance, or scheduled activities; and other administrative actions.
To achieve the desired availability, Pacemaker may start and stop resources
and fence nodes.
* *Cluster tools:* These provide an interface for users to interact with the
cluster. Various command-line and graphical (GUI) interfaces are available.
Most managed services are not, themselves, cluster-aware. However, many popular
open-source cluster filesystems make use of a common 'Distributed Lock
Manager' (DLM), which makes direct use of Corosync for its messaging and
membership capabilities and Pacemaker for the ability to fence nodes.
.Example Cluster Stack
image::images/pcmk-stack.png["Example cluster stack",width="10cm",height="7.5cm",align="center"]
== Pacemaker Architecture ==
Pacemaker itself is composed of multiple daemons that work together:
- * attrd
+ * pacemakerd
+ * pacemaker-attrd
* cib
* crmd
* lrmd
- * pacemakerd
* pengine
* stonithd
.Internal Components
image::images/pcmk-internals.png["Pacemaker software components",align="center",scaledwidth="65%"]
-The Pacemaker daemon (pacemakerd) is the master process that spawns all the
-other daemons, and respawns them if they unexpectedly exit.
+The Pacemaker master process (pacemakerd) spawns all the other daemons, and
+respawns them if they unexpectedly exit.
The 'Cluster Information Base' (CIB) is an
https://en.wikipedia.org/wiki/XML[XML] representation of the cluster's
configuration and the state of all nodes and resources. The CIB daemon (cib)
keeps the CIB synchronized across the cluster, and handles requests to modify it.
-The 'attribute daemon' (attrd) maintains a database of attributes for all
-nodes, keeps it synchronized across the cluster, and handles requests to modify
-them. These attributes are usually recorded in the CIB.
+The attribute manager (pacemaker-attrd) maintains a database of attributes for
+all nodes, keeps it synchronized across the cluster, and handles requests to
+modify them. These attributes are usually recorded in the CIB.
Given a snapshot of the CIB as input, the 'policy engine' (pengine) determines
what actions are necessary to achieve the desired state of the cluster.
The 'local resource management daemon' (lrmd) handles requests to execute
resource agents on the local node, and returns the result.
The 'STONITH daemon' (stonithd) handles requests to fence nodes. Given a target
node, stonithd decides which cluster node(s) should execute which fencing
device(s), and calls the necessary fencing agents (either directly, or via
requests to stonithd peers on other nodes), and returns the result.
The 'cluster resource management daemon' ('CRMd') is Pacemaker's coordinator,
maintaining a consistent view of the cluster membership and orchestrating all
the other components.
Pacemaker centralizes cluster decision-making by electing one of the CRMd
instances as the 'Designated Controller' ('DC'). Should the elected CRMd
process (or the node it is on) fail, a new one is quickly established.
The DC responds to cluster events by taking a current snapshot of the CIB,
feeding it to the policy engine, then asking the lrmd (either directly on the
local node, or via requests to crmd peers on other nodes) and stonithd to
execute any necessary actions.
+.Old daemon names
+[NOTE]
+====
+The Pacemaker daemons were renamed in version 2.0. You may still find
+references to the old names, especially in documentation targeted to version
+1.1.
+
+[cols="1,2",options="header",align="center"]
+|=========================================================
+| Old name | New name
+| attrd | pacemaker-attrd
+|=========================================================
+
+====
+
== Node Redundancy Designs ==
Pacemaker supports practically any
https://en.wikipedia.org/wiki/High-availability_cluster#Node_configurations[node
redundancy configuration] including 'Active/Active', 'Active/Passive', 'N+1',
'N+M', 'N-to-1' and 'N-to-N'.
Active/passive clusters with two (or more) nodes using Pacemaker and
https://en.wikipedia.org/wiki/Distributed_Replicated_Block_Device:[DRBD] are
a cost-effective high-availability solution for many situations. One of the
nodes provides the desired services, and if it fails, the other node takes
over.
.Active/Passive Redundancy
image::images/pcmk-active-passive.png["Active/Passive Redundancy",width="10cm",height="7.5cm",align="center"]
Pacemaker also supports multiple nodes in a shared-failover design,
reducing hardware costs by allowing several active/passive clusters to be
combined and share a common backup node.
.Shared Failover
image::images/pcmk-shared-failover.png["Shared Failover",width="10cm",height="7.5cm",align="center"]
When shared storage is available, every node can potentially be used for
failover. Pacemaker can even run multiple copies of services to spread out the
workload.
.N to N Redundancy
image::images/pcmk-active-active.png["N to N Redundancy",width="10cm",height="7.5cm",align="center"]
diff --git a/extra/ansible/docker/roles/docker-host/files/pcmk_remote_stop b/extra/ansible/docker/roles/docker-host/files/pcmk_remote_stop
index 074cd598aa..62fc8792d8 100644
--- a/extra/ansible/docker/roles/docker-host/files/pcmk_remote_stop
+++ b/extra/ansible/docker/roles/docker-host/files/pcmk_remote_stop
@@ -1,36 +1,37 @@
#!/bin/bash
status()
{
pid=$(pidof $1 2>/dev/null)
rtrn=$?
if [ $rtrn -ne 0 ]; then
echo "$1 is stopped"
else
echo "$1 (pid $pid) is running..."
fi
return $rtrn
}
stop()
{
desc="Pacemaker Remote"
prog=$1
shutdown_prog=$prog
if status $shutdown_prog > /dev/null 2>&1; then
kill -TERM $(pidof $prog) > /dev/null 2>&1
while status $prog > /dev/null 2>&1; do
sleep 1
echo -n "."
done
else
echo -n "$desc is already stopped"
fi
rm -f /var/lock/subsystem/pacemaker
rm -f /var/run/${prog}.pid
- killall -q -9 'crmd stonithd attrd cib lrmd pacemakerd pacemaker_remoted'
+ killall -q -9 pacemakerd pacemaker-attrd crmd stonithd cib lrmd \
+ pacemaker_remoted
}
stop "pacemaker_remoted"
exit 0
diff --git a/extra/ansible/docker/roles/docker-host/files/pcmk_stop b/extra/ansible/docker/roles/docker-host/files/pcmk_stop
index a8f395ad47..e8dced112d 100644
--- a/extra/ansible/docker/roles/docker-host/files/pcmk_stop
+++ b/extra/ansible/docker/roles/docker-host/files/pcmk_stop
@@ -1,45 +1,46 @@
#!/bin/bash
status()
{
pid=$(pidof $1 2>/dev/null)
rtrn=$?
if [ $rtrn -ne 0 ]; then
echo "$1 is stopped"
else
echo "$1 (pid $pid) is running..."
fi
return $rtrn
}
stop()
{
desc="Pacemaker Cluster Manager"
prog=$1
shutdown_prog=$prog
if ! status $prog > /dev/null 2>&1; then
shutdown_prog="crmd"
fi
cname=$(crm_node --name)
crm_attribute -N $cname -n standby -v true -l reboot
if status $shutdown_prog > /dev/null 2>&1; then
kill -TERM $(pidof $prog) > /dev/null 2>&1
while status $prog > /dev/null 2>&1; do
sleep 1
echo -n "."
done
else
echo -n "$desc is already stopped"
fi
rm -f /var/lock/subsystem/pacemaker
rm -f /var/run/${prog}.pid
- killall -q -9 'crmd stonithd attrd cib lrmd pacemakerd pacemaker_remoted'
+ killall -q -9 pacemakerd pacemaker-attrd crmd stonithd cib lrmd \
+ pacemaker_remoted
}
stop "pacemakerd"
/usr/share/corosync/corosync stop > /dev/null 2>&1
killall -q -9 'corosync'
exit 0
diff --git a/extra/cluster-clean b/extra/cluster-clean
index 7294eac753..59602c479f 100755
--- a/extra/cluster-clean
+++ b/extra/cluster-clean
@@ -1,91 +1,97 @@
#!/bin/bash
+#
+# Copyright 2011-2018 Andrew Beekhof <andrew@beekhof.net>
+#
+# This source code is licensed under the GNU General Public License version 2
+# or later (GPLv2+) WITHOUT ANY WARRANTY.
+#
hosts=
group=
kill=0
while true; do
case "$1" in
-x) set -x; shift;;
-w) for h in $2; do
hosts="$hosts -w $h";
done
shift; shift;;
-g) group=$2; shift; shift;;
--kill) kill=1; shift;;
--kill-only) kill=2; shift;;
"") break;;
*) echo "unknown option: $1"; exit 1;;
esac
done
if [ x"$group" = x -a x"$hosts" = x ]; then
group=$CTS_GROUP
fi
if [ x"$hosts" != x ]; then
echo `date` ": Cleaning up hosts:"
target=$hosts
elif [ x"$group" != x ]; then
echo `date` ": Cleaning up group: $group"
target="-g $group"
else
echo "You didn't specify any nodes to clean up"
exit 1
fi
cluster-helper --list bullet $target
if [ $kill != 0 ]; then
echo "Cleaning processes"
# Bah. Force systemd to actually look at the process and realize it's dead
cluster-helper $target -- "service corosync stop" &> /dev/null &
cluster-helper $target -- "service pacemaker stop" &> /dev/null &
- cluster-helper $target -- "killall -q -9 corosync pacemakerd pacemaker_remoted stonithd lrmd crmd pengine attrd cib dlm_controld gfs_controld" &> /dev/null
+ cluster-helper $target -- "killall -q -9 corosync pacemakerd pacemaker-attrd pacemaker_remoted stonithd lrmd crmd pengine cib dlm_controld gfs_controld" &> /dev/null
cluster-helper $target -- 'kill -9 `pidof valgrind`' &> /dev/null
if [ $kill == 2 ]; then
exit 0
fi
fi
#logrotate -f $cluster_rotate
echo "Cleaning files"
log_files=""
log_files="$log_files 'messages*'"
log_files="$log_files 'localmessages*'"
log_files="$log_files 'cluster*.log'"
log_files="$log_files 'corosync.log*'"
log_files="$log_files 'pacemaker.log*'"
log_files="$log_files '*.journal'"
log_files="$log_files '*.journal~'"
log_files="$log_files 'secure-*'"
state_files=""
state_files="$state_files 'cib.xml*'"
state_files="$state_files 'valgrind-*'"
state_files="$state_files 'cib-*'"
state_files="$state_files 'core.*'"
state_files="$state_files 'cts.*'"
state_files="$state_files 'pe*.bz2'"
state_files="$state_files 'fdata-*'"
for f in $log_files; do
cluster-helper $target -- "find /var/log -name '$f' -exec rm -f \{\} \;"
done
for f in $state_files; do
cluster-helper $target -- "find /var/lib -name '$f' -exec rm -f \{\} \;"
done
cluster-helper $target -- "find /dev/shm -name 'qb-*' -exec rm -f \{\} \;"
cluster-helper $target -- "find /var/lib/pacemaker/blackbox -name '*-*' -exec rm -f \{\} \;"
cluster-helper $target -- "find /tmp -name '*.valgrind' -exec rm -f \{\} \;"
cluster-helper $target -- 'service rsyslog restart' > /dev/null 2>&1
cluster-helper $target -- 'systemctl restart systemd-journald.socket' > /dev/null 2>&1
cluster-helper $target -- logger -i -p daemon.info __clean_logs__
#touch $cluster_log
echo `date` ": Clean complete"
diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h
index 7fe8984fc1..d9f377f60a 100644
--- a/include/crm/cluster/internal.h
+++ b/include/crm/cluster/internal.h
@@ -1,370 +1,359 @@
/*
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef CRM_CLUSTER_INTERNAL__H
# define CRM_CLUSTER_INTERNAL__H
# include <crm/cluster.h>
typedef struct crm_ais_host_s AIS_Host;
typedef struct crm_ais_msg_s AIS_Message;
struct crm_ais_host_s {
uint32_t id;
uint32_t pid;
gboolean local;
enum crm_ais_msg_types type;
uint32_t size;
char uname[MAX_NAME];
} __attribute__ ((packed));
struct crm_ais_msg_s {
cs_ipc_header_response_t header __attribute__ ((aligned(8)));
uint32_t id;
gboolean is_compressed;
AIS_Host host;
AIS_Host sender;
uint32_t size;
uint32_t compressed_size;
/* 584 bytes */
char data[0];
} __attribute__ ((packed));
/* *INDENT-OFF* */
enum crm_proc_flag {
crm_proc_none = 0x00000001,
// Cluster layers
crm_proc_cpg = 0x04000000,
crm_proc_lrmd = 0x00000010,
crm_proc_cib = 0x00000100,
crm_proc_crmd = 0x00000200,
crm_proc_attrd = 0x00001000,
crm_proc_stonithd = 0x00002000,
crm_proc_stonith_ng= 0x00100000,
crm_proc_pe = 0x00010000,
crm_proc_te = 0x00020000,
};
/* *INDENT-ON* */
/*!
* \internal
* \brief Return the process bit corresponding to the current cluster stack
*
* \return Process flag if detectable, otherwise 0
*/
static inline uint32_t
crm_get_cluster_proc()
{
switch (get_cluster_type()) {
case pcmk_cluster_corosync:
return crm_proc_cpg;
default:
break;
}
return crm_proc_none;
}
static inline const char *
peer2text(enum crm_proc_flag proc)
{
const char *text = "unknown";
if (proc == (crm_proc_crmd | crm_get_cluster_proc())) {
return "peer";
}
switch (proc) {
case crm_proc_none:
text = "none";
break;
case crm_proc_cib:
text = "cib";
break;
case crm_proc_crmd:
text = "crmd";
break;
case crm_proc_pe:
text = "pengine";
break;
case crm_proc_te:
text = "tengine";
break;
case crm_proc_lrmd:
text = "lrmd";
break;
case crm_proc_attrd:
- text = "attrd";
+ text = "pacemaker-attrd";
break;
case crm_proc_stonithd:
text = "stonithd";
break;
case crm_proc_stonith_ng:
text = "stonith-ng";
break;
case crm_proc_cpg:
text = "corosync-cpg";
break;
}
return text;
}
static inline enum crm_proc_flag
text2proc(const char *proc)
{
/* We only care about these two so far */
if (proc && strcmp(proc, "cib") == 0) {
return crm_proc_cib;
} else if (proc && strcmp(proc, "crmd") == 0) {
return crm_proc_crmd;
}
return crm_proc_none;
}
static inline const char *
ais_dest(const AIS_Host *host)
{
if (host->local) {
return "local";
} else if (host->size > 0) {
return host->uname;
} else {
return "<all>";
}
}
# define ais_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size)
/*
typedef enum {
CS_OK = 1,
CS_ERR_LIBRARY = 2,
CS_ERR_VERSION = 3,
CS_ERR_INIT = 4,
CS_ERR_TIMEOUT = 5,
CS_ERR_TRY_AGAIN = 6,
CS_ERR_INVALID_PARAM = 7,
CS_ERR_NO_MEMORY = 8,
CS_ERR_BAD_HANDLE = 9,
CS_ERR_BUSY = 10,
CS_ERR_ACCESS = 11,
CS_ERR_NOT_EXIST = 12,
CS_ERR_NAME_TOO_LONG = 13,
CS_ERR_EXIST = 14,
CS_ERR_NO_SPACE = 15,
CS_ERR_INTERRUPT = 16,
CS_ERR_NAME_NOT_FOUND = 17,
CS_ERR_NO_RESOURCES = 18,
CS_ERR_NOT_SUPPORTED = 19,
CS_ERR_BAD_OPERATION = 20,
CS_ERR_FAILED_OPERATION = 21,
CS_ERR_MESSAGE_ERROR = 22,
CS_ERR_QUEUE_FULL = 23,
CS_ERR_QUEUE_NOT_AVAILABLE = 24,
CS_ERR_BAD_FLAGS = 25,
CS_ERR_TOO_BIG = 26,
CS_ERR_NO_SECTIONS = 27,
CS_ERR_CONTEXT_NOT_FOUND = 28,
CS_ERR_TOO_MANY_GROUPS = 30,
CS_ERR_SECURITY = 100
} cs_error_t;
*/
static inline const char *
ais_error2text(int error)
{
const char *text = "unknown";
# if SUPPORT_COROSYNC
switch (error) {
case CS_OK:
text = "OK";
break;
case CS_ERR_LIBRARY:
text = "Library error";
break;
case CS_ERR_VERSION:
text = "Version error";
break;
case CS_ERR_INIT:
text = "Initialization error";
break;
case CS_ERR_TIMEOUT:
text = "Timeout";
break;
case CS_ERR_TRY_AGAIN:
text = "Try again";
break;
case CS_ERR_INVALID_PARAM:
text = "Invalid parameter";
break;
case CS_ERR_NO_MEMORY:
text = "No memory";
break;
case CS_ERR_BAD_HANDLE:
text = "Bad handle";
break;
case CS_ERR_BUSY:
text = "Busy";
break;
case CS_ERR_ACCESS:
text = "Access error";
break;
case CS_ERR_NOT_EXIST:
text = "Doesn't exist";
break;
case CS_ERR_NAME_TOO_LONG:
text = "Name too long";
break;
case CS_ERR_EXIST:
text = "Exists";
break;
case CS_ERR_NO_SPACE:
text = "No space";
break;
case CS_ERR_INTERRUPT:
text = "Interrupt";
break;
case CS_ERR_NAME_NOT_FOUND:
text = "Name not found";
break;
case CS_ERR_NO_RESOURCES:
text = "No resources";
break;
case CS_ERR_NOT_SUPPORTED:
text = "Not supported";
break;
case CS_ERR_BAD_OPERATION:
text = "Bad operation";
break;
case CS_ERR_FAILED_OPERATION:
text = "Failed operation";
break;
case CS_ERR_MESSAGE_ERROR:
text = "Message error";
break;
case CS_ERR_QUEUE_FULL:
text = "Queue full";
break;
case CS_ERR_QUEUE_NOT_AVAILABLE:
text = "Queue not available";
break;
case CS_ERR_BAD_FLAGS:
text = "Bad flags";
break;
case CS_ERR_TOO_BIG:
text = "Too big";
break;
case CS_ERR_NO_SECTIONS:
text = "No sections";
break;
}
# endif
return text;
}
static inline const char *
msg_type2text(enum crm_ais_msg_types type)
{
const char *text = "unknown";
switch (type) {
case crm_msg_none:
text = "unknown";
break;
case crm_msg_ais:
text = "ais";
break;
case crm_msg_cib:
text = "cib";
break;
case crm_msg_crmd:
text = "crmd";
break;
case crm_msg_pe:
text = "pengine";
break;
case crm_msg_te:
text = "tengine";
break;
case crm_msg_lrmd:
text = "lrmd";
break;
case crm_msg_attrd:
text = "attrd";
break;
case crm_msg_stonithd:
text = "stonithd";
break;
case crm_msg_stonith_ng:
text = "stonith-ng";
break;
}
return text;
}
gboolean check_message_sanity(const AIS_Message * msg, const char *data);
# if SUPPORT_COROSYNC
gboolean send_cpg_iov(struct iovec * iov);
char *get_corosync_uuid(crm_node_t *peer);
char *corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid);
char *corosync_cluster_name(void);
int corosync_cmap_has_config(const char *prefix);
gboolean corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml_parent);
gboolean send_cluster_message_cs(xmlNode * msg, gboolean local,
crm_node_t * node, enum crm_ais_msg_types dest);
enum cluster_type_e find_corosync_variant(void);
void terminate_cs_connection(crm_cluster_t * cluster);
gboolean init_cs_connection(crm_cluster_t * cluster);
gboolean init_cs_connection_once(crm_cluster_t * cluster);
# endif
crm_node_t *crm_update_peer_proc(const char *source, crm_node_t * peer,
uint32_t flag, const char *status);
crm_node_t *crm_update_peer_state(const char *source, crm_node_t * node,
const char *state, int membership);
void crm_update_peer_uname(crm_node_t *node, const char *uname);
void crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected);
void crm_reap_unseen_nodes(uint64_t ring_id);
gboolean cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean),
void (*destroy) (gpointer));
gboolean node_name_is_valid(const char *key, const char *name);
crm_node_t * crm_find_peer_full(unsigned int id, const char *uname, int flags);
crm_node_t * crm_find_peer(unsigned int id, const char *uname);
#endif
diff --git a/lib/common/attrd_client.c b/lib/common/attrd_client.c
index 5042da64cb..d4c98a3ba6 100644
--- a/lib/common/attrd_client.c
+++ b/lib/common/attrd_client.c
@@ -1,306 +1,306 @@
/*
- * Copyright (C) 2011-2017 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2011-2018 Andrew Beekhof <andrew@beekhof.net>
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <crm_internal.h>
#include <stdio.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/attrd.h>
/*!
* \internal
- * \brief Create a generic attrd operation
+ * \brief Create a generic pacemaker-attrd operation
*
* \param[in] user_name If not NULL, ACL user to set for operation
*
- * \return XML of attrd operation
+ * \return XML of pacemaker-attrd operation
*/
static xmlNode *
create_attrd_op(const char *user_name)
{
xmlNode *attrd_op = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(attrd_op, F_TYPE, T_ATTRD);
crm_xml_add(attrd_op, F_ORIG, (crm_system_name? crm_system_name: "unknown"));
#if ENABLE_ACL
crm_xml_add(attrd_op, F_ATTRD_USER, user_name);
#endif
return attrd_op;
}
/*!
* \internal
- * \brief Send an operation to attrd via IPC
+ * \brief Send an operation to pacemaker-attrd via IPC
*
- * \param[in] ipc Connection to attrd (or NULL to use a local connection)
- * \param[in] attrd_op XML of attrd operation to send
+ * \param[in] ipc Connection to pacemaker-attrd (or NULL to use a local connection)
+ * \param[in] attrd_op XML of pacemaker-attrd operation to send
*
* \return pcmk_ok on success, -errno otherwise
*/
static int
send_attrd_op(crm_ipc_t *ipc, xmlNode *attrd_op)
{
int rc = -ENOTCONN;
int max = 5;
static gboolean connected = TRUE;
static crm_ipc_t *local_ipc = NULL;
static enum crm_ipc_flags flags = crm_ipc_flags_none;
if (ipc == NULL && local_ipc == NULL) {
local_ipc = crm_ipc_new(T_ATTRD, 0);
flags |= crm_ipc_client_response;
connected = FALSE;
}
if (ipc == NULL) {
ipc = local_ipc;
}
while (max > 0) {
if (connected == FALSE) {
crm_info("Connecting to cluster... %d retries remaining", max);
connected = crm_ipc_connect(ipc);
}
if (connected) {
rc = crm_ipc_send(ipc, attrd_op, flags, 0, NULL);
} else {
crm_perror(LOG_INFO, "Connection to cluster attribute manager failed");
}
if (ipc != local_ipc) {
break;
} else if (rc > 0) {
break;
} else if (rc == -EAGAIN || rc == -EALREADY) {
sleep(5 - max);
max--;
} else {
crm_ipc_close(ipc);
connected = FALSE;
sleep(5 - max);
max--;
}
}
if (rc > 0) {
rc = pcmk_ok;
}
return rc;
}
/*!
- * \brief Send a request to attrd
+ * \brief Send a request to pacemaker-attrd
*
- * \param[in] ipc Connection to attrd (or NULL to use a local connection)
- * \param[in] command A character indicating the type of attrd request:
+ * \param[in] ipc Connection to pacemaker-attrd (or NULL to use a local connection)
+ * \param[in] command A character indicating the type of pacemaker-attrd request:
* U or v: update attribute (or refresh if name is NULL)
* u: update attributes matching regular expression in name
* D: delete attribute (value must be NULL)
* R: refresh
* B: update both attribute and its dampening
* Y: update attribute dampening only
* Q: query attribute
* C: remove peer specified by host
* \param[in] host Affect only this host (or NULL for all hosts)
* \param[in] name Name of attribute to affect
* \param[in] value Attribute value to set
* \param[in] section Status or nodes
* \param[in] set ID of attribute set to use (or NULL to choose first)
* \param[in] dampen Attribute dampening to use with B/Y, and U/v if creating
- * \param[in] user_name ACL user to pass to attrd
+ * \param[in] user_name ACL user to pass to pacemaker-attrd
* \param[in] options Bitmask that may include:
* attrd_opt_remote: host is a Pacemaker Remote node
* attrd_opt_private: attribute is private (not kept in CIB)
*
- * \return pcmk_ok if request was successfully submitted to attrd, else -errno
+ * \return pcmk_ok if request was successfully submitted to pacemaker-attrd, else -errno
*/
int
attrd_update_delegate(crm_ipc_t *ipc, char command, const char *host,
const char *name, const char *value, const char *section,
const char *set, const char *dampen,
const char *user_name, int options)
{
int rc = pcmk_ok;
const char *task = NULL;
const char *name_as = NULL;
const char *display_host = (host ? host : "localhost");
const char *display_command = NULL; /* for commands without name/value */
xmlNode *update = create_attrd_op(user_name);
/* remap common aliases */
if (safe_str_eq(section, "reboot")) {
section = XML_CIB_TAG_STATUS;
} else if (safe_str_eq(section, "forever")) {
section = XML_CIB_TAG_NODES;
}
if (name == NULL && command == 'U') {
command = 'R';
}
switch (command) {
case 'u':
task = ATTRD_OP_UPDATE;
name_as = F_ATTRD_REGEX;
break;
case 'D':
case 'U':
case 'v':
task = ATTRD_OP_UPDATE;
name_as = F_ATTRD_ATTRIBUTE;
break;
case 'R':
task = ATTRD_OP_REFRESH;
display_command = "refresh";
break;
case 'B':
task = ATTRD_OP_UPDATE_BOTH;
name_as = F_ATTRD_ATTRIBUTE;
break;
case 'Y':
task = ATTRD_OP_UPDATE_DELAY;
name_as = F_ATTRD_ATTRIBUTE;
break;
case 'Q':
task = ATTRD_OP_QUERY;
name_as = F_ATTRD_ATTRIBUTE;
break;
case 'C':
task = ATTRD_OP_PEER_REMOVE;
display_command = "purge";
break;
}
if (name_as != NULL) {
if (name == NULL) {
rc = -EINVAL;
goto done;
}
crm_xml_add(update, name_as, name);
}
crm_xml_add(update, F_ATTRD_TASK, task);
crm_xml_add(update, F_ATTRD_VALUE, value);
crm_xml_add(update, F_ATTRD_DAMPEN, dampen);
crm_xml_add(update, F_ATTRD_SECTION, section);
crm_xml_add(update, F_ATTRD_HOST, host);
crm_xml_add(update, F_ATTRD_SET, set);
crm_xml_add_int(update, F_ATTRD_IS_REMOTE, is_set(options, attrd_opt_remote));
crm_xml_add_int(update, F_ATTRD_IS_PRIVATE, is_set(options, attrd_opt_private));
rc = send_attrd_op(ipc, update);
done:
free_xml(update);
if (display_command) {
- crm_debug("Asked attrd to %s %s: %s (%d)",
+ crm_debug("Asked pacemaker-attrd to %s %s: %s (%d)",
display_command, display_host, pcmk_strerror(rc), rc);
} else {
- crm_debug("Asked attrd to update %s=%s for %s: %s (%d)",
+ crm_debug("Asked pacemaker-attrd to update %s=%s for %s: %s (%d)",
name, value, display_host, pcmk_strerror(rc), rc);
}
return rc;
}
/*!
- * \brief Send a request to attrd to clear resource failure
+ * \brief Send a request to pacemaker-attrd to clear resource failure
*
- * \param[in] ipc Connection to attrd (NULL to use local connection)
+ * \param[in] ipc Connection to pacemaker-attrd (NULL to use local connection)
* \param[in] host Affect only this host (or NULL for all hosts)
* \param[in] resource Name of resource to clear (or NULL for all)
* \param[in] operation Name of operation to clear (or NULL for all)
* \param[in] interval_spec If operation is not NULL, its interval
- * \param[in] user_name ACL user to pass to attrd
+ * \param[in] user_name ACL user to pass to pacemaker-attrd
* \param[in] options attrd_opt_remote if host is a Pacemaker Remote node
*
- * \return pcmk_ok if request was successfully submitted to attrd, else -errno
+ * \return pcmk_ok if request was successfully submitted to pacemaker-attrd, else -errno
*/
int
attrd_clear_delegate(crm_ipc_t *ipc, const char *host, const char *resource,
const char *operation, const char *interval_spec,
const char *user_name, int options)
{
int rc = pcmk_ok;
xmlNode *clear_op = create_attrd_op(user_name);
const char *interval_desc = NULL;
const char *op_desc = NULL;
crm_xml_add(clear_op, F_ATTRD_TASK, ATTRD_OP_CLEAR_FAILURE);
crm_xml_add(clear_op, F_ATTRD_HOST, host);
crm_xml_add(clear_op, F_ATTRD_RESOURCE, resource);
crm_xml_add(clear_op, F_ATTRD_OPERATION, operation);
crm_xml_add(clear_op, F_ATTRD_INTERVAL, interval_spec);
crm_xml_add_int(clear_op, F_ATTRD_IS_REMOTE, is_set(options, attrd_opt_remote));
rc = send_attrd_op(ipc, clear_op);
free_xml(clear_op);
if (operation) {
interval_desc = interval_spec? interval_spec : "nonrecurring";
op_desc = operation;
} else {
interval_desc = "all";
op_desc = "operations";
}
- crm_debug("Asked attrd to clear failure of %s %s for %s on %s: %s (%d)",
+ crm_debug("Asked pacemaker-attrd to clear failure of %s %s for %s on %s: %s (%d)",
interval_desc, op_desc, (resource? resource : "all resources"),
(host? host : "all nodes"), pcmk_strerror(rc), rc);
return rc;
}
#define LRM_TARGET_ENV "OCF_RESKEY_" CRM_META "_" XML_LRM_ATTR_TARGET
const char *
attrd_get_target(const char *name)
{
if(safe_str_eq(name, "auto") || safe_str_eq(name, "localhost")) {
name = NULL;
}
if(name != NULL) {
return name;
} else {
char *target_var = crm_meta_name(XML_RSC_ATTR_TARGET);
char *phys_var = crm_meta_name(PCMK_ENV_PHYSICAL_HOST);
const char *target = getenv(target_var);
const char *host_physical = getenv(phys_var);
/* It is important we use the names by which the PE knows us */
if (host_physical && safe_str_eq(target, "host")) {
name = host_physical;
} else {
const char *host_pcmk = getenv(LRM_TARGET_ENV);
if (host_pcmk) {
name = host_pcmk;
}
}
free(target_var);
free(phys_var);
}
// TODO? Call get_local_node_name() if name == NULL
// (currently would require linkage against libcrmcluster)
return name;
}
diff --git a/lib/common/utils.c b/lib/common/utils.c
index 07ba1b7d1d..96a6379c02 100644
--- a/lib/common/utils.c
+++ b/lib/common/utils.c
@@ -1,1353 +1,1353 @@
/*
* Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <dlfcn.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <sys/utsname.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include <pwd.h>
#include <time.h>
#include <libgen.h>
#include <signal.h>
#include <qb/qbdefs.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/msg_xml.h>
#include <crm/cib/internal.h>
#include <crm/common/xml.h>
#include <crm/common/util.h>
#include <crm/common/ipc.h>
#include <crm/common/iso8601.h>
#include <crm/common/mainloop.h>
#include <libxml2/libxml/relaxng.h>
#ifndef MAXLINE
# define MAXLINE 512
#endif
#ifdef HAVE_GETOPT_H
# include <getopt.h>
#endif
#ifndef PW_BUFFER_LEN
# define PW_BUFFER_LEN 500
#endif
CRM_TRACE_INIT_DATA(common);
gboolean crm_config_error = FALSE;
gboolean crm_config_warning = FALSE;
char *crm_system_name = NULL;
int node_score_red = 0;
int node_score_green = 0;
int node_score_yellow = 0;
static struct crm_option *crm_long_options = NULL;
static const char *crm_app_description = NULL;
static char *crm_short_options = NULL;
static const char *crm_app_usage = NULL;
gboolean
check_time(const char *value)
{
if (crm_get_msec(value) < 5000) {
return FALSE;
}
return TRUE;
}
gboolean
check_timer(const char *value)
{
if (crm_get_msec(value) < 0) {
return FALSE;
}
return TRUE;
}
gboolean
check_boolean(const char *value)
{
int tmp = FALSE;
if (crm_str_to_boolean(value, &tmp) != 1) {
return FALSE;
}
return TRUE;
}
gboolean
check_number(const char *value)
{
errno = 0;
if (value == NULL) {
return FALSE;
} else if (safe_str_eq(value, CRM_MINUS_INFINITY_S)) {
} else if (safe_str_eq(value, CRM_INFINITY_S)) {
} else {
crm_int_helper(value, NULL);
}
if (errno != 0) {
return FALSE;
}
return TRUE;
}
gboolean
check_positive_number(const char* value)
{
if (safe_str_eq(value, CRM_INFINITY_S) || (crm_int_helper(value, NULL))) {
return TRUE;
}
return FALSE;
}
gboolean
check_quorum(const char *value)
{
if (safe_str_eq(value, "stop")) {
return TRUE;
} else if (safe_str_eq(value, "freeze")) {
return TRUE;
} else if (safe_str_eq(value, "ignore")) {
return TRUE;
} else if (safe_str_eq(value, "suicide")) {
return TRUE;
}
return FALSE;
}
gboolean
check_script(const char *value)
{
struct stat st;
if(safe_str_eq(value, "/dev/null")) {
return TRUE;
}
if(stat(value, &st) != 0) {
crm_err("Script %s does not exist", value);
return FALSE;
}
if(S_ISREG(st.st_mode) == 0) {
crm_err("Script %s is not a regular file", value);
return FALSE;
}
if( (st.st_mode & (S_IXUSR | S_IXGRP )) == 0) {
crm_err("Script %s is not executable", value);
return FALSE;
}
return TRUE;
}
gboolean
check_utilization(const char *value)
{
char *end = NULL;
long number = strtol(value, &end, 10);
if(end && end[0] != '%') {
return FALSE;
} else if(number < 0) {
return FALSE;
}
return TRUE;
}
void
crm_args_fini()
{
free(crm_short_options);
crm_short_options = NULL;
}
int
char2score(const char *score)
{
int score_f = 0;
if (score == NULL) {
} else if (safe_str_eq(score, CRM_MINUS_INFINITY_S)) {
score_f = -CRM_SCORE_INFINITY;
} else if (safe_str_eq(score, CRM_INFINITY_S)) {
score_f = CRM_SCORE_INFINITY;
} else if (safe_str_eq(score, CRM_PLUS_INFINITY_S)) {
score_f = CRM_SCORE_INFINITY;
} else if (safe_str_eq(score, "red")) {
score_f = node_score_red;
} else if (safe_str_eq(score, "yellow")) {
score_f = node_score_yellow;
} else if (safe_str_eq(score, "green")) {
score_f = node_score_green;
} else {
score_f = crm_parse_int(score, NULL);
if (score_f > 0 && score_f > CRM_SCORE_INFINITY) {
score_f = CRM_SCORE_INFINITY;
} else if (score_f < 0 && score_f < -CRM_SCORE_INFINITY) {
score_f = -CRM_SCORE_INFINITY;
}
}
return score_f;
}
char *
score2char_stack(int score, char *buf, size_t len)
{
if (score >= CRM_SCORE_INFINITY) {
strncpy(buf, CRM_INFINITY_S, 9);
} else if (score <= -CRM_SCORE_INFINITY) {
strncpy(buf, CRM_MINUS_INFINITY_S , 10);
} else {
return crm_itoa_stack(score, buf, len);
}
return buf;
}
char *
score2char(int score)
{
if (score >= CRM_SCORE_INFINITY) {
return strdup(CRM_INFINITY_S);
} else if (score <= -CRM_SCORE_INFINITY) {
return strdup(CRM_MINUS_INFINITY_S);
}
return crm_itoa(score);
}
const char *
cluster_option(GHashTable * options, gboolean(*validate) (const char *),
const char *name, const char *old_name, const char *def_value)
{
const char *value = NULL;
char *new_value = NULL;
CRM_ASSERT(name != NULL);
if (options) {
value = g_hash_table_lookup(options, name);
if ((value == NULL) && old_name) {
value = g_hash_table_lookup(options, old_name);
if (value != NULL) {
crm_config_warn("Support for legacy name '%s' for cluster option '%s'"
" is deprecated and will be removed in a future release",
old_name, name);
// Inserting copy with current name ensures we only warn once
new_value = strdup(value);
g_hash_table_insert(options, strdup(name), new_value);
value = new_value;
}
}
if (value && validate && (validate(value) == FALSE)) {
crm_config_err("Resetting cluster option '%s' to default: value '%s' is invalid",
name, value);
value = NULL;
}
if (value) {
return value;
}
}
// No value found, use default
value = def_value;
if (value == NULL) {
crm_trace("No value or default provided for cluster option '%s'",
name);
return NULL;
}
if (validate) {
CRM_CHECK(validate(value) != FALSE,
crm_err("Bug: default value for cluster option '%s' is invalid", name);
return NULL);
}
crm_trace("Using default value '%s' for cluster option '%s'",
value, name);
if (options) {
new_value = strdup(value);
g_hash_table_insert(options, strdup(name), new_value);
value = new_value;
}
return value;
}
const char *
get_cluster_pref(GHashTable * options, pe_cluster_option * option_list, int len, const char *name)
{
const char *value = NULL;
for (int lpc = 0; lpc < len; lpc++) {
if (safe_str_eq(name, option_list[lpc].name)) {
value = cluster_option(options,
option_list[lpc].is_valid,
option_list[lpc].name,
option_list[lpc].alt_name,
option_list[lpc].default_value);
return value;
}
}
CRM_CHECK(FALSE, crm_err("Bug: looking for unknown option '%s'", name));
return NULL;
}
void
config_metadata(const char *name, const char *version, const char *desc_short,
const char *desc_long, pe_cluster_option * option_list, int len)
{
int lpc = 0;
fprintf(stdout, "<?xml version=\"1.0\"?>"
"<!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n"
"<resource-agent name=\"%s\">\n"
" <version>%s</version>\n"
" <longdesc lang=\"en\">%s</longdesc>\n"
" <shortdesc lang=\"en\">%s</shortdesc>\n"
" <parameters>\n", name, version, desc_long, desc_short);
for (lpc = 0; lpc < len; lpc++) {
if (option_list[lpc].description_long == NULL && option_list[lpc].description_short == NULL) {
continue;
}
fprintf(stdout, " <parameter name=\"%s\" unique=\"0\">\n"
" <shortdesc lang=\"en\">%s</shortdesc>\n"
" <content type=\"%s\" default=\"%s\"/>\n"
" <longdesc lang=\"en\">%s%s%s</longdesc>\n"
" </parameter>\n",
option_list[lpc].name,
option_list[lpc].description_short,
option_list[lpc].type,
option_list[lpc].default_value,
option_list[lpc].description_long ? option_list[lpc].
description_long : option_list[lpc].description_short,
option_list[lpc].values ? " Allowed values: " : "",
option_list[lpc].values ? option_list[lpc].values : "");
}
fprintf(stdout, " </parameters>\n</resource-agent>\n");
}
void
verify_all_options(GHashTable * options, pe_cluster_option * option_list, int len)
{
int lpc = 0;
for (lpc = 0; lpc < len; lpc++) {
cluster_option(options,
option_list[lpc].is_valid,
option_list[lpc].name,
option_list[lpc].alt_name, option_list[lpc].default_value);
}
}
char *
generate_hash_key(const char *crm_msg_reference, const char *sys)
{
char *hash_key = crm_concat(sys ? sys : "none", crm_msg_reference, '_');
crm_trace("created hash key: (%s)", hash_key);
return hash_key;
}
int
crm_user_lookup(const char *name, uid_t * uid, gid_t * gid)
{
int rc = pcmk_ok;
char *buffer = NULL;
struct passwd pwd;
struct passwd *pwentry = NULL;
buffer = calloc(1, PW_BUFFER_LEN);
rc = getpwnam_r(name, &pwd, buffer, PW_BUFFER_LEN, &pwentry);
if (pwentry) {
if (uid) {
*uid = pwentry->pw_uid;
}
if (gid) {
*gid = pwentry->pw_gid;
}
crm_trace("User %s has uid=%d gid=%d", name, pwentry->pw_uid, pwentry->pw_gid);
} else {
rc = rc? -rc : -EINVAL;
crm_info("User %s lookup: %s", name, pcmk_strerror(rc));
}
free(buffer);
return rc;
}
static int
crm_version_helper(const char *text, char **end_text)
{
int atoi_result = -1;
CRM_ASSERT(end_text != NULL);
errno = 0;
if (text != NULL && text[0] != 0) {
atoi_result = (int)strtol(text, end_text, 10);
if (errno == EINVAL) {
crm_err("Conversion of '%s' %c failed", text, text[0]);
atoi_result = -1;
}
}
return atoi_result;
}
/*
* version1 < version2 : -1
* version1 = version2 : 0
* version1 > version2 : 1
*/
int
compare_version(const char *version1, const char *version2)
{
int rc = 0;
int lpc = 0;
char *ver1_copy = NULL, *ver2_copy = NULL;
char *rest1 = NULL, *rest2 = NULL;
if (version1 == NULL && version2 == NULL) {
return 0;
} else if (version1 == NULL) {
return -1;
} else if (version2 == NULL) {
return 1;
}
ver1_copy = strdup(version1);
ver2_copy = strdup(version2);
rest1 = ver1_copy;
rest2 = ver2_copy;
while (1) {
int digit1 = 0;
int digit2 = 0;
lpc++;
if (rest1 == rest2) {
break;
}
if (rest1 != NULL) {
digit1 = crm_version_helper(rest1, &rest1);
}
if (rest2 != NULL) {
digit2 = crm_version_helper(rest2, &rest2);
}
if (digit1 < digit2) {
rc = -1;
break;
} else if (digit1 > digit2) {
rc = 1;
break;
}
if (rest1 != NULL && rest1[0] == '.') {
rest1++;
}
if (rest1 != NULL && rest1[0] == 0) {
rest1 = NULL;
}
if (rest2 != NULL && rest2[0] == '.') {
rest2++;
}
if (rest2 != NULL && rest2[0] == 0) {
rest2 = NULL;
}
}
free(ver1_copy);
free(ver2_copy);
if (rc == 0) {
crm_trace("%s == %s (%d)", version1, version2, lpc);
} else if (rc < 0) {
crm_trace("%s < %s (%d)", version1, version2, lpc);
} else if (rc > 0) {
crm_trace("%s > %s (%d)", version1, version2, lpc);
}
return rc;
}
gboolean do_stderr = FALSE;
#ifndef NUMCHARS
# define NUMCHARS "0123456789."
#endif
#ifndef WHITESPACE
# define WHITESPACE " \t\n\r\f"
#endif
guint
crm_parse_interval_spec(const char *input)
{
long long msec = 0;
if (input == NULL) {
return 0;
} else if (input[0] != 'P') {
long long tmp = crm_get_msec(input);
if(tmp > 0) {
msec = tmp;
}
} else {
crm_time_t *period_s = crm_time_parse_duration(input);
msec = 1000 * crm_time_get_seconds(period_s);
crm_time_free(period_s);
}
return (msec <= 0)? 0 : ((msec >= G_MAXUINT)? G_MAXUINT : (guint) msec);
}
long long
crm_get_msec(const char *input)
{
const char *cp = input;
const char *units;
long long multiplier = 1000;
long long divisor = 1;
long long msec = -1;
char *end_text = NULL;
/* double dret; */
if (input == NULL) {
return msec;
}
cp += strspn(cp, WHITESPACE);
units = cp + strspn(cp, NUMCHARS);
units += strspn(units, WHITESPACE);
if (strchr(NUMCHARS, *cp) == NULL) {
return msec;
}
if (strncasecmp(units, "ms", 2) == 0 || strncasecmp(units, "msec", 4) == 0) {
multiplier = 1;
divisor = 1;
} else if (strncasecmp(units, "us", 2) == 0 || strncasecmp(units, "usec", 4) == 0) {
multiplier = 1;
divisor = 1000;
} else if (strncasecmp(units, "s", 1) == 0 || strncasecmp(units, "sec", 3) == 0) {
multiplier = 1000;
divisor = 1;
} else if (strncasecmp(units, "m", 1) == 0 || strncasecmp(units, "min", 3) == 0) {
multiplier = 60 * 1000;
divisor = 1;
} else if (strncasecmp(units, "h", 1) == 0 || strncasecmp(units, "hr", 2) == 0) {
multiplier = 60 * 60 * 1000;
divisor = 1;
} else if (*units != EOS && *units != '\n' && *units != '\r') {
return msec;
}
msec = crm_int_helper(cp, &end_text);
if (msec > LLONG_MAX/multiplier) {
/* arithmetics overflow while multiplier/divisor mutually exclusive */
return LLONG_MAX;
}
msec *= multiplier;
msec /= divisor;
/* dret += 0.5; */
/* msec = (long long)dret; */
return msec;
}
extern bool crm_is_daemon;
/* coverity[+kill] */
void
crm_abort(const char *file, const char *function, int line,
const char *assert_condition, gboolean do_core, gboolean do_fork)
{
int rc = 0;
int pid = 0;
int status = 0;
/* Implied by the parent's error logging below */
/* crm_write_blackbox(0); */
if(crm_is_daemon == FALSE) {
/* This is a command line tool - do not fork */
/* crm_add_logfile(NULL); * Record it to a file? */
crm_enable_stderr(TRUE); /* Make sure stderr is enabled so we can tell the caller */
do_fork = FALSE; /* Just crash if needed */
}
if (do_core == FALSE) {
crm_err("%s: Triggered assert at %s:%d : %s", function, file, line, assert_condition);
return;
} else if (do_fork) {
pid = fork();
} else {
crm_err("%s: Triggered fatal assert at %s:%d : %s", function, file, line, assert_condition);
}
if (pid == -1) {
crm_crit("%s: Cannot create core for non-fatal assert at %s:%d : %s",
function, file, line, assert_condition);
return;
} else if(pid == 0) {
/* Child process */
abort();
return;
}
/* Parent process */
crm_err("%s: Forked child %d to record non-fatal assert at %s:%d : %s",
function, pid, file, line, assert_condition);
crm_write_blackbox(SIGTRAP, NULL);
do {
rc = waitpid(pid, &status, 0);
if(rc == pid) {
return; /* Job done */
}
} while(errno == EINTR);
if (errno == ECHILD) {
/* crm_mon does this */
crm_trace("Cannot wait on forked child %d - SIGCHLD is probably set to SIG_IGN", pid);
return;
}
crm_perror(LOG_ERR, "Cannot wait on forked child %d", pid);
}
void
crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile)
{
int rc;
long pid;
const char *devnull = "/dev/null";
if (daemonize == FALSE) {
return;
}
/* Check before we even try... */
rc = crm_pidfile_inuse(pidfile, 1, name);
if(rc < pcmk_ok && rc != -ENOENT) {
pid = crm_read_pidfile(pidfile);
crm_err("%s: already running [pid %ld in %s]", name, pid, pidfile);
printf("%s: already running [pid %ld in %s]\n", name, pid, pidfile);
crm_exit(CRM_EX_ERROR);
}
pid = fork();
if (pid < 0) {
fprintf(stderr, "%s: could not start daemon\n", name);
crm_perror(LOG_ERR, "fork");
crm_exit(CRM_EX_OSERR);
} else if (pid > 0) {
crm_exit(CRM_EX_OK);
}
rc = crm_lock_pidfile(pidfile, name);
if(rc < pcmk_ok) {
crm_err("Could not lock '%s' for %s: %s (%d)", pidfile, name, pcmk_strerror(rc), rc);
printf("Could not lock '%s' for %s: %s (%d)\n", pidfile, name, pcmk_strerror(rc), rc);
crm_exit(CRM_EX_ERROR);
}
umask(S_IWGRP | S_IWOTH | S_IROTH);
close(STDIN_FILENO);
(void)open(devnull, O_RDONLY); /* Stdin: fd 0 */
close(STDOUT_FILENO);
(void)open(devnull, O_WRONLY); /* Stdout: fd 1 */
close(STDERR_FILENO);
(void)open(devnull, O_WRONLY); /* Stderr: fd 2 */
}
char *
crm_meta_name(const char *field)
{
int lpc = 0;
int max = 0;
char *crm_name = NULL;
CRM_CHECK(field != NULL, return NULL);
crm_name = crm_concat(CRM_META, field, '_');
/* Massage the names so they can be used as shell variables */
max = strlen(crm_name);
for (; lpc < max; lpc++) {
switch (crm_name[lpc]) {
case '-':
crm_name[lpc] = '_';
break;
}
}
return crm_name;
}
const char *
crm_meta_value(GHashTable * hash, const char *field)
{
char *key = NULL;
const char *value = NULL;
key = crm_meta_name(field);
if (key) {
value = g_hash_table_lookup(hash, key);
free(key);
}
return value;
}
static struct option *
crm_create_long_opts(struct crm_option *long_options)
{
struct option *long_opts = NULL;
#ifdef HAVE_GETOPT_H
int index = 0, lpc = 0;
/*
* A previous, possibly poor, choice of '?' as the short form of --help
* means that getopt_long() returns '?' for both --help and for "unknown option"
*
* This dummy entry allows us to differentiate between the two in crm_get_option()
* and exit with the correct error code
*/
long_opts = realloc_safe(long_opts, (index + 1) * sizeof(struct option));
long_opts[index].name = "__dummmy__";
long_opts[index].has_arg = 0;
long_opts[index].flag = 0;
long_opts[index].val = '_';
index++;
for (lpc = 0; long_options[lpc].name != NULL; lpc++) {
if (long_options[lpc].name[0] == '-') {
continue;
}
long_opts = realloc_safe(long_opts, (index + 1) * sizeof(struct option));
/*fprintf(stderr, "Creating %d %s = %c\n", index,
* long_options[lpc].name, long_options[lpc].val); */
long_opts[index].name = long_options[lpc].name;
long_opts[index].has_arg = long_options[lpc].has_arg;
long_opts[index].flag = long_options[lpc].flag;
long_opts[index].val = long_options[lpc].val;
index++;
}
/* Now create the list terminator */
long_opts = realloc_safe(long_opts, (index + 1) * sizeof(struct option));
long_opts[index].name = NULL;
long_opts[index].has_arg = 0;
long_opts[index].flag = 0;
long_opts[index].val = 0;
#endif
return long_opts;
}
void
crm_set_options(const char *short_options, const char *app_usage, struct crm_option *long_options,
const char *app_desc)
{
if (short_options) {
crm_short_options = strdup(short_options);
} else if (long_options) {
int lpc = 0;
int opt_string_len = 0;
char *local_short_options = NULL;
for (lpc = 0; long_options[lpc].name != NULL; lpc++) {
if (long_options[lpc].val && long_options[lpc].val != '-' && long_options[lpc].val < UCHAR_MAX) {
local_short_options = realloc_safe(local_short_options, opt_string_len + 4);
local_short_options[opt_string_len++] = long_options[lpc].val;
/* getopt(3) says: Two colons mean an option takes an optional arg; */
if (long_options[lpc].has_arg == optional_argument) {
local_short_options[opt_string_len++] = ':';
}
if (long_options[lpc].has_arg >= required_argument) {
local_short_options[opt_string_len++] = ':';
}
local_short_options[opt_string_len] = 0;
}
}
crm_short_options = local_short_options;
crm_trace("Generated short option string: '%s'", local_short_options);
}
if (long_options) {
crm_long_options = long_options;
}
if (app_desc) {
crm_app_description = app_desc;
}
if (app_usage) {
crm_app_usage = app_usage;
}
}
int
crm_get_option(int argc, char **argv, int *index)
{
return crm_get_option_long(argc, argv, index, NULL);
}
int
crm_get_option_long(int argc, char **argv, int *index, const char **longname)
{
#ifdef HAVE_GETOPT_H
static struct option *long_opts = NULL;
if (long_opts == NULL && crm_long_options) {
long_opts = crm_create_long_opts(crm_long_options);
}
*index = 0;
if (long_opts) {
int flag = getopt_long(argc, argv, crm_short_options, long_opts, index);
switch (flag) {
case 0:
if (long_opts[*index].val) {
return long_opts[*index].val;
} else if (longname) {
*longname = long_opts[*index].name;
} else {
crm_notice("Unhandled option --%s", long_opts[*index].name);
return flag;
}
case -1: /* End of option processing */
break;
case ':':
crm_trace("Missing argument");
crm_help('?', CRM_EX_USAGE);
break;
case '?':
crm_help('?', (*index? CRM_EX_OK : CRM_EX_USAGE));
break;
}
return flag;
}
#endif
if (crm_short_options) {
return getopt(argc, argv, crm_short_options);
}
return -1;
}
crm_exit_t
crm_help(char cmd, crm_exit_t exit_code)
{
int i = 0;
FILE *stream = (exit_code ? stderr : stdout);
if (cmd == 'v' || cmd == '$') {
fprintf(stream, "Pacemaker %s\n", PACEMAKER_VERSION);
fprintf(stream, "Written by Andrew Beekhof\n");
goto out;
}
if (cmd == '!') {
fprintf(stream, "Pacemaker %s (Build: %s): %s\n", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES);
goto out;
}
fprintf(stream, "%s - %s\n", crm_system_name, crm_app_description);
if (crm_app_usage) {
fprintf(stream, "Usage: %s %s\n", crm_system_name, crm_app_usage);
}
if (crm_long_options) {
fprintf(stream, "Options:\n");
for (i = 0; crm_long_options[i].name != NULL; i++) {
if (crm_long_options[i].flags & pcmk_option_hidden) {
} else if (crm_long_options[i].flags & pcmk_option_paragraph) {
fprintf(stream, "%s\n\n", crm_long_options[i].desc);
} else if (crm_long_options[i].flags & pcmk_option_example) {
fprintf(stream, "\t#%s\n\n", crm_long_options[i].desc);
} else if (crm_long_options[i].val == '-' && crm_long_options[i].desc) {
fprintf(stream, "%s\n", crm_long_options[i].desc);
} else {
/* is val printable as char ? */
if (crm_long_options[i].val && crm_long_options[i].val <= UCHAR_MAX) {
fprintf(stream, " -%c,", crm_long_options[i].val);
} else {
fputs(" ", stream);
}
fprintf(stream, " --%s%s\t%s\n", crm_long_options[i].name,
crm_long_options[i].has_arg == optional_argument ? "[=value]" :
crm_long_options[i].has_arg == required_argument ? "=value" : "",
crm_long_options[i].desc ? crm_long_options[i].desc : "");
}
}
} else if (crm_short_options) {
fprintf(stream, "Usage: %s - %s\n", crm_system_name, crm_app_description);
for (i = 0; crm_short_options[i] != 0; i++) {
int has_arg = no_argument /* 0 */;
if (crm_short_options[i + 1] == ':') {
if (crm_short_options[i + 2] == ':')
has_arg = optional_argument /* 2 */;
else
has_arg = required_argument /* 1 */;
}
fprintf(stream, " -%c %s\n", crm_short_options[i],
has_arg == optional_argument ? "[value]" :
has_arg == required_argument ? "{value}" : "");
i += has_arg;
}
}
fprintf(stream, "\nReport bugs to %s\n", PACKAGE_BUGREPORT);
out:
return crm_exit(exit_code);
}
void cib_ipc_servers_init(qb_ipcs_service_t **ipcs_ro,
qb_ipcs_service_t **ipcs_rw,
qb_ipcs_service_t **ipcs_shm,
struct qb_ipcs_service_handlers *ro_cb,
struct qb_ipcs_service_handlers *rw_cb)
{
*ipcs_ro = mainloop_add_ipc_server(cib_channel_ro, QB_IPC_NATIVE, ro_cb);
*ipcs_rw = mainloop_add_ipc_server(cib_channel_rw, QB_IPC_NATIVE, rw_cb);
*ipcs_shm = mainloop_add_ipc_server(cib_channel_shm, QB_IPC_SHM, rw_cb);
if (*ipcs_ro == NULL || *ipcs_rw == NULL || *ipcs_shm == NULL) {
crm_err("Failed to create cib servers: exiting and inhibiting respawn.");
crm_warn("Verify pacemaker and pacemaker_remote are not both enabled.");
crm_exit(CRM_EX_FATAL);
}
}
void cib_ipc_servers_destroy(qb_ipcs_service_t *ipcs_ro,
qb_ipcs_service_t *ipcs_rw,
qb_ipcs_service_t *ipcs_shm)
{
qb_ipcs_destroy(ipcs_ro);
qb_ipcs_destroy(ipcs_rw);
qb_ipcs_destroy(ipcs_shm);
}
qb_ipcs_service_t *
crmd_ipc_server_init(struct qb_ipcs_service_handlers *cb)
{
return mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, cb);
}
void
attrd_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb)
{
*ipcs = mainloop_add_ipc_server(T_ATTRD, QB_IPC_NATIVE, cb);
if (*ipcs == NULL) {
- crm_err("Failed to create attrd servers: exiting and inhibiting respawn.");
+ crm_err("Failed to create pacemaker-attrd server: exiting and inhibiting respawn");
crm_warn("Verify pacemaker and pacemaker_remote are not both enabled.");
crm_exit(CRM_EX_FATAL);
}
}
void
stonith_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb)
{
*ipcs = mainloop_add_ipc_server("stonith-ng", QB_IPC_NATIVE, cb);
if (*ipcs == NULL) {
crm_err("Failed to create stonith-ng servers: exiting and inhibiting respawn.");
crm_warn("Verify pacemaker and pacemaker_remote are not both enabled.");
crm_exit(CRM_EX_FATAL);
}
}
bool
pcmk_acl_required(const char *user)
{
#if ENABLE_ACL
if(user == NULL || strlen(user) == 0) {
crm_trace("no user set");
return FALSE;
} else if (strcmp(user, CRM_DAEMON_USER) == 0) {
return FALSE;
} else if (strcmp(user, "root") == 0) {
return FALSE;
}
crm_trace("acls required for %s", user);
return TRUE;
#else
crm_trace("acls not supported");
return FALSE;
#endif
}
#if ENABLE_ACL
char *
uid2username(uid_t uid)
{
struct passwd *pwent = getpwuid(uid);
if (pwent == NULL) {
crm_perror(LOG_ERR, "Cannot get password entry of uid: %d", uid);
return NULL;
} else {
return strdup(pwent->pw_name);
}
}
const char *
crm_acl_get_set_user(xmlNode * request, const char *field, const char *peer_user)
{
/* field is only checked for backwards compatibility */
static const char *effective_user = NULL;
const char *requested_user = NULL;
const char *user = NULL;
if(effective_user == NULL) {
effective_user = uid2username(geteuid());
}
requested_user = crm_element_value(request, XML_ACL_TAG_USER);
if(requested_user == NULL) {
requested_user = crm_element_value(request, field);
}
if (is_privileged(effective_user) == FALSE) {
/* We're not running as a privileged user, set or overwrite any existing value for $XML_ACL_TAG_USER */
user = effective_user;
} else if(peer_user == NULL && requested_user == NULL) {
/* No user known or requested, use 'effective_user' and make sure one is set for the request */
user = effective_user;
} else if(peer_user == NULL) {
/* No user known, trusting 'requested_user' */
user = requested_user;
} else if (is_privileged(peer_user) == FALSE) {
/* The peer is not a privileged user, set or overwrite any existing value for $XML_ACL_TAG_USER */
user = peer_user;
} else if (requested_user == NULL) {
/* Even if we're privileged, make sure there is always a value set */
user = peer_user;
} else {
/* Legal delegation to 'requested_user' */
user = requested_user;
}
// This requires pointer comparison, not string comparison
if(user != crm_element_value(request, XML_ACL_TAG_USER)) {
crm_xml_add(request, XML_ACL_TAG_USER, user);
}
if(field != NULL && user != crm_element_value(request, field)) {
crm_xml_add(request, field, user);
}
return requested_user;
}
void
determine_request_user(const char *user, xmlNode * request, const char *field)
{
/* Get our internal validation out of the way first */
CRM_CHECK(user != NULL && request != NULL && field != NULL, return);
/* If our peer is a privileged user, we might be doing something on behalf of someone else */
if (is_privileged(user) == FALSE) {
/* We're not a privileged user, set or overwrite any existing value for $field */
crm_xml_replace(request, field, user);
} else if (crm_element_value(request, field) == NULL) {
/* Even if we're privileged, make sure there is always a value set */
crm_xml_replace(request, field, user);
/* } else { Legal delegation */
}
crm_trace("Processing msg as user '%s'", crm_element_value(request, field));
}
#endif
void *
find_library_function(void **handle, const char *lib, const char *fn, gboolean fatal)
{
char *error;
void *a_function;
if (*handle == NULL) {
*handle = dlopen(lib, RTLD_LAZY);
}
if (!(*handle)) {
crm_err("%sCould not open %s: %s", fatal ? "Fatal: " : "", lib, dlerror());
if (fatal) {
crm_exit(CRM_EX_FATAL);
}
return NULL;
}
a_function = dlsym(*handle, fn);
if (a_function == NULL) {
error = dlerror();
crm_err("%sCould not find %s in %s: %s", fatal ? "Fatal: " : "", fn, lib, error);
if (fatal) {
crm_exit(CRM_EX_FATAL);
}
}
return a_function;
}
void *
convert_const_pointer(const void *ptr)
{
/* Worst function ever */
return (void *)ptr;
}
#ifdef HAVE_UUID_UUID_H
# include <uuid/uuid.h>
#endif
char *
crm_generate_uuid(void)
{
unsigned char uuid[16];
char *buffer = malloc(37); /* Including NUL byte */
uuid_generate(uuid);
uuid_unparse(uuid, buffer);
return buffer;
}
/*!
* \brief Check whether a string represents a cluster daemon name
*
* \param[in] name String to check
*
* \return TRUE if name is standard client name used by daemons, FALSE otherwise
*/
bool
crm_is_daemon_name(const char *name)
{
return (name &&
(!strcmp(name, CRM_SYSTEM_CRMD)
|| !strcmp(name, CRM_SYSTEM_STONITHD)
|| !strcmp(name, T_ATTRD)
|| !strcmp(name, CRM_SYSTEM_CIB)
|| !strcmp(name, CRM_SYSTEM_MCP)
|| !strcmp(name, CRM_SYSTEM_DC)
|| !strcmp(name, CRM_SYSTEM_TENGINE)
|| !strcmp(name, CRM_SYSTEM_LRMD)));
}
#include <md5.h>
char *
crm_md5sum(const char *buffer)
{
int lpc = 0, len = 0;
char *digest = NULL;
unsigned char raw_digest[MD5_DIGEST_SIZE];
if (buffer == NULL) {
buffer = "";
}
len = strlen(buffer);
crm_trace("Beginning digest of %d bytes", len);
digest = malloc(2 * MD5_DIGEST_SIZE + 1);
if(digest) {
md5_buffer(buffer, len, raw_digest);
for (lpc = 0; lpc < MD5_DIGEST_SIZE; lpc++) {
sprintf(digest + (2 * lpc), "%02x", raw_digest[lpc]);
}
digest[(2 * MD5_DIGEST_SIZE)] = 0;
crm_trace("Digest %s.", digest);
} else {
crm_err("Could not create digest");
}
return digest;
}
#ifdef HAVE_GNUTLS_GNUTLS_H
void
crm_gnutls_global_init(void)
{
signal(SIGPIPE, SIG_IGN);
gnutls_global_init();
}
#endif
char *
crm_generate_ra_key(const char *standard, const char *provider, const char *type)
{
if (!standard && !provider && !type) {
return NULL;
}
return crm_strdup_printf("%s%s%s:%s",
(standard? standard : ""),
(provider? ":" : ""), (provider? provider : ""),
(type? type : ""));
}
/*!
* \brief Check whether a resource standard requires a provider to be specified
*
* \param[in] standard Standard name
*
* \return TRUE if standard requires a provider, FALSE otherwise
*/
bool
crm_provider_required(const char *standard)
{
CRM_CHECK(standard != NULL, return FALSE);
/* @TODO
* - this should probably be case-sensitive, but isn't,
* for backward compatibility
* - it might be nice to keep standards' capabilities (supports provider,
* can be promotable, etc.) as structured data somewhere
*/
if (!strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF)) {
return TRUE;
}
return FALSE;
}
/*!
* \brief Parse a "standard[:provider]:type" agent specification
*
* \param[in] spec Agent specification
* \param[out] standard Newly allocated memory containing agent standard (or NULL)
* \param[out] provider Newly allocated memory containing agent provider (or NULL)
* \param[put] type Newly allocated memory containing agent type (or NULL)
*
* \return pcmk_ok if the string could be parsed, -EINVAL otherwise
*
* \note It is acceptable for the type to contain a ':' if the standard supports
* that. For example, systemd supports the form "systemd:UNIT@A:B".
* \note It is the caller's responsibility to free the returned values.
*/
int
crm_parse_agent_spec(const char *spec, char **standard, char **provider,
char **type)
{
char *colon;
CRM_CHECK(spec && standard && provider && type, return -EINVAL);
*standard = NULL;
*provider = NULL;
*type = NULL;
colon = strchr(spec, ':');
if ((colon == NULL) || (colon == spec)) {
return -EINVAL;
}
*standard = strndup(spec, colon - spec);
spec = colon + 1;
if (crm_provider_required(*standard)) {
colon = strchr(spec, ':');
if ((colon == NULL) || (colon == spec)) {
free(*standard);
return -EINVAL;
}
*provider = strndup(spec, colon - spec);
spec = colon + 1;
}
if (*spec == '\0') {
free(*standard);
free(*provider);
return -EINVAL;
}
*type = strdup(spec);
return pcmk_ok;
}
diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c
index 9bed489d9e..116a125b7d 100644
--- a/tools/attrd_updater.c
+++ b/tools/attrd_updater.c
@@ -1,371 +1,359 @@
-
-/*
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+/*
+ * Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <libgen.h>
#include <sys/param.h>
#include <sys/types.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/attrd.h>
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"version", 0, 0, '$', "\tVersion information" },
{"verbose", 0, 0, 'V', "\tIncrease debug output\n"},
{"name", 1, 0, 'n', "The attribute's name"},
{"-spacer-",1, 0, '-', "\nCommands:"},
- {"update", 1, 0, 'U', "Update the attribute's value in attrd. If this causes the value to change, it will also be updated in the cluster configuration"},
- {"update-both", 1, 0, 'B', "Update the attribute's value and time to wait (dampening) in attrd. If this causes the value or dampening to change, the attribute will also be written to the cluster configuration, so be aware that repeatedly changing the dampening reduces its effectiveness."},
- {"update-delay", 0, 0, 'Y', "Update the attribute's dampening in attrd (requires -d/--delay). If this causes the dampening to change, the attribute will also be written to the cluster configuration, so be aware that repeatedly changing the dampening reduces its effectiveness."},
- {"query", 0, 0, 'Q', "\tQuery the attribute's value from attrd"},
- {"delete", 0, 0, 'D', "\tDelete the attribute in attrd. If a value was previously set, it will also be removed from the cluster configuration"},
- {"refresh", 0, 0, 'R', "\t(Advanced) Force the attrd daemon to resend all current values to the CIB\n"},
-
+ {"update", 1, 0, 'U', "Update the attribute's value in pacemaker-attrd. If this causes the value to change, it will also be updated in the cluster configuration"},
+ {"update-both", 1, 0, 'B', "Update the attribute's value and time to wait (dampening) in pacemaker-attrd. If this causes the value or dampening to change, the attribute will also be written to the cluster configuration, so be aware that repeatedly changing the dampening reduces its effectiveness."},
+ {"update-delay", 0, 0, 'Y', "Update the attribute's dampening in pacemaker-attrd (requires -d/--delay). If this causes the dampening to change, the attribute will also be written to the cluster configuration, so be aware that repeatedly changing the dampening reduces its effectiveness."},
+ {"query", 0, 0, 'Q', "\tQuery the attribute's value from pacemaker-attrd"},
+ {"delete", 0, 0, 'D', "\tDelete the attribute in pacemaker-attrd. If a value was previously set, it will also be removed from the cluster configuration"},
+ {"refresh", 0, 0, 'R', "\t(Advanced) Force the pacemaker-attrd daemon to resend all current values to the CIB\n"},
+
{"-spacer-",1, 0, '-', "\nAdditional options:"},
{"delay", 1, 0, 'd', "The time to wait (dampening) in seconds for further changes before writing"},
{"set", 1, 0, 's', "(Advanced) The attribute set in which to place the value"},
{"node", 1, 0, 'N', "Set the attribute for the named node (instead of the local one)"},
{"all", 0, 0, 'A', "Show values of the attribute for all nodes (query only)"},
- /* lifetime could be implemented for atomic attrd if there is sufficient user demand */
+ /* lifetime could be implemented if there is sufficient user demand */
{"lifetime",1, 0, 'l', "(Deprecated) Lifetime of the node attribute (silently ignored by cluster)"},
{"private", 0, 0, 'p', "\tIf this creates a new attribute, never write the attribute to the CIB"},
/* Legacy options */
{"quiet", 0, 0, 'q', NULL, pcmk_option_hidden},
{"update", 1, 0, 'v', NULL, pcmk_option_hidden},
{"section", 1, 0, 'S', NULL, pcmk_option_hidden},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
static int do_query(const char *attr_name, const char *attr_node, gboolean query_all);
static int do_update(char command, const char *attr_node, const char *attr_name,
const char *attr_value, const char *attr_section,
const char *attr_set, const char *attr_dampen, int attr_options);
int
main(int argc, char **argv)
{
int index = 0;
int argerr = 0;
int attr_options = attrd_opt_none;
int flag;
crm_exit_t exit_code = CRM_EX_OK;
const char *attr_node = NULL;
const char *attr_name = NULL;
const char *attr_value = NULL;
const char *attr_set = NULL;
const char *attr_section = NULL;
const char *attr_dampen = NULL;
char command = 'Q';
gboolean query_all = FALSE;
crm_log_cli_init("attrd_updater");
crm_set_options(NULL, "command -n attribute [options]", long_options,
"Tool for updating cluster node attributes");
if (argc < 2) {
crm_help('?', CRM_EX_USAGE);
}
while (1) {
flag = crm_get_option(argc, argv, &index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case '?':
case '$':
crm_help(flag, CRM_EX_OK);
break;
case 'n':
attr_name = strdup(optarg);
break;
case 's':
attr_set = strdup(optarg);
break;
case 'd':
attr_dampen = strdup(optarg);
break;
case 'l':
case 'S':
attr_section = strdup(optarg);
break;
case 'N':
attr_node = strdup(optarg);
break;
case 'A':
query_all = TRUE;
break;
case 'p':
set_bit(attr_options, attrd_opt_private);
break;
case 'q':
break;
case 'Y':
command = flag;
crm_log_args(argc, argv); /* Too much? */
break;
case 'Q':
case 'B':
case 'R':
case 'D':
case 'U':
case 'v':
command = flag;
attr_value = optarg;
crm_log_args(argc, argv); /* Too much? */
break;
default:
++argerr;
break;
}
}
if (optind > argc) {
++argerr;
}
if (command != 'R' && attr_name == NULL) {
++argerr;
}
if (argerr) {
crm_help('?', CRM_EX_USAGE);
}
if (command == 'Q') {
exit_code = crm_errno2exit(do_query(attr_name, attr_node, query_all));
} else {
/* @TODO We don't know whether the specified node is a Pacemaker Remote
* node or not, so we can't set attrd_opt_remote when appropriate.
- * However, it's not a big problem, because attrd will learn and
- * remember a node's "remoteness".
+ * However, it's not a big problem, because pacemaker-attrd will learn
+ * and remember a node's "remoteness".
*/
attr_node = attrd_get_target(attr_node);
exit_code = crm_errno2exit(do_update(command, attr_node, attr_name,
attr_value, attr_section, attr_set,
attr_dampen, attr_options));
}
return crm_exit(exit_code);
}
/*!
* \internal
- * \brief Submit a query request to attrd and wait for reply
+ * \brief Submit a query request to pacemaker-attrd and wait for reply
*
* \param[in] name Name of attribute to query
* \param[in] host Query applies to this host only (or all hosts if NULL)
* \param[out] reply On success, will be set to new XML tree with reply
*
* \return pcmk_ok on success, -errno on error
* \note On success, caller is responsible for freeing result via free_xml(*reply)
*/
static int
send_attrd_query(const char *name, const char *host, xmlNode **reply)
{
int rc;
crm_ipc_t *ipc;
xmlNode *query;
/* Build the query XML */
query = create_xml_node(NULL, __FUNCTION__);
if (query == NULL) {
return -ENOMEM;
}
crm_xml_add(query, F_TYPE, T_ATTRD);
crm_xml_add(query, F_ORIG, crm_system_name);
crm_xml_add(query, F_ATTRD_HOST, host);
crm_xml_add(query, F_ATTRD_TASK, ATTRD_OP_QUERY);
crm_xml_add(query, F_ATTRD_ATTRIBUTE, name);
- /* Connect to attrd, send query XML and get reply */
+ /* Connect to pacemaker-attrd, send query XML and get reply */
crm_debug("Sending query for value of %s on %s", name, (host? host : "all nodes"));
ipc = crm_ipc_new(T_ATTRD, 0);
if (crm_ipc_connect(ipc) == FALSE) {
crm_perror(LOG_ERR, "Connection to cluster attribute manager failed");
rc = -ENOTCONN;
} else {
rc = crm_ipc_send(ipc, query, crm_ipc_flags_none|crm_ipc_client_response, 0, reply);
if (rc > 0) {
rc = pcmk_ok;
}
crm_ipc_close(ipc);
}
free_xml(query);
return(rc);
}
/*!
- * \brief Validate attrd's XML reply to an query
+ * \brief Validate pacemaker-attrd's XML reply to an query
*
* param[in] reply Root of reply XML tree to validate
* param[in] attr_name Name of attribute that was queried
*
* \return pcmk_ok on success,
* -errno on error (-ENXIO = requested attribute does not exist)
*/
static int
validate_attrd_reply(xmlNode *reply, const char *attr_name)
{
const char *reply_attr;
if (reply == NULL) {
fprintf(stderr, "Could not query value of %s: reply did not contain valid XML\n",
attr_name);
return -pcmk_err_schema_validation;
}
crm_log_xml_trace(reply, "Reply");
reply_attr = crm_element_value(reply, F_ATTRD_ATTRIBUTE);
if (reply_attr == NULL) {
fprintf(stderr, "Could not query value of %s: attribute does not exist\n",
attr_name);
return -ENXIO;
}
if (safe_str_neq(crm_element_value(reply, F_TYPE), T_ATTRD)
|| (crm_element_value(reply, F_ATTRD_VERSION) == NULL)
|| strcmp(reply_attr, attr_name)) {
fprintf(stderr,
"Could not query value of %s: reply did not contain expected identification\n",
attr_name);
return -pcmk_err_schema_validation;
}
return pcmk_ok;
}
/*!
- * \brief Print the attribute values in an attrd XML query reply
+ * \brief Print the attribute values in a pacemaker-attrd XML query reply
*
* \param[in] reply Root of XML tree with query reply
* \param[in] attr_name Name of attribute that was queried
*
* \return TRUE if any values were printed
*/
static gboolean
print_attrd_values(xmlNode *reply, const char *attr_name)
{
xmlNode *child;
const char *reply_host, *reply_value;
gboolean have_values = FALSE;
/* Iterate through reply's XML tags (a node tag for each host-value pair) */
for (child = __xml_first_child(reply); child != NULL; child = __xml_next(child)) {
if (safe_str_neq((const char*)child->name, XML_CIB_TAG_NODE)) {
crm_warn("Ignoring unexpected %s tag in query reply", child->name);
} else {
reply_host = crm_element_value(child, F_ATTRD_HOST);
reply_value = crm_element_value(child, F_ATTRD_VALUE);
if (reply_host == NULL) {
crm_warn("Ignoring %s tag without %s attribute in query reply",
XML_CIB_TAG_NODE, F_ATTRD_HOST);
} else {
printf("name=\"%s\" host=\"%s\" value=\"%s\"\n",
attr_name, reply_host, (reply_value? reply_value : ""));
have_values = TRUE;
}
}
}
return have_values;
}
/*!
- * \brief Submit a query to attrd and print reply
+ * \brief Submit a query to pacemaker-attrd and print reply
*
* \param[in] attr_name Name of attribute to be affected by request
* \param[in] attr_node Name of host to query for (or NULL for localhost)
* \param[in] query_all If TRUE, ignore attr_node and query all nodes instead
*
* \return pcmk_ok on success, -errno on error
*/
static int
do_query(const char *attr_name, const char *attr_node, gboolean query_all)
{
xmlNode *reply = NULL;
int rc;
/* Decide which node(s) to query */
if (query_all == TRUE) {
attr_node = NULL;
} else {
attr_node = attrd_get_target(attr_node);
}
- /* Build and send attrd request, and get XML reply */
+ /* Build and send pacemaker-attrd request, and get XML reply */
rc = send_attrd_query(attr_name, attr_node, &reply);
if (rc != pcmk_ok) {
fprintf(stderr, "Could not query value of %s: %s (%d)\n", attr_name, pcmk_strerror(rc), rc);
return rc;
}
/* Validate the XML reply */
rc = validate_attrd_reply(reply, attr_name);
if (rc != pcmk_ok) {
if (reply != NULL) {
free_xml(reply);
}
return rc;
}
/* Print the values from the reply */
if (print_attrd_values(reply, attr_name) == FALSE) {
fprintf(stderr,
"Could not query value of %s: reply had attribute name but no host values\n",
attr_name);
free_xml(reply);
return -pcmk_err_schema_validation;
}
return pcmk_ok;
}
static int
do_update(char command, const char *attr_node, const char *attr_name,
const char *attr_value, const char *attr_section,
const char *attr_set, const char *attr_dampen, int attr_options)
{
int rc = attrd_update_delegate(NULL, command, attr_node, attr_name,
attr_value, attr_section, attr_set,
attr_dampen, NULL, attr_options);
if (rc != pcmk_ok) {
fprintf(stderr, "Could not update %s=%s: %s (%d)\n", attr_name, attr_value, pcmk_strerror(rc), rc);
}
return rc;
}
diff --git a/tools/crm_attribute.c b/tools/crm_attribute.c
index 8379b4c5f7..11709e770f 100644
--- a/tools/crm_attribute.c
+++ b/tools/crm_attribute.c
@@ -1,348 +1,337 @@
-
/*
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
+ * Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
*
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <libgen.h>
#include <time.h>
#include <sys/param.h>
#include <sys/types.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/ipc.h>
#include <crm/common/util.h>
#include <crm/cluster.h>
#include <crm/cib.h>
#include <crm/attrd.h>
#include <sys/utsname.h>
gboolean BE_QUIET = FALSE;
char command = 'G';
const char *dest_uname = NULL;
char *dest_node = NULL;
char *set_name = NULL;
char *attr_id = NULL;
char *attr_name = NULL;
char *attr_pattern = NULL;
const char *type = NULL;
const char *rsc_id = NULL;
const char *attr_value = NULL;
const char *attr_default = NULL;
const char *set_type = NULL;
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"version", 0, 0, '$', "\tVersion information" },
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{"quiet", 0, 0, 'q', "\tPrint only the value on stdout\n"},
{"name", 1, 0, 'n', "Name of the attribute/option to operate on"},
{"pattern", 1, 0, 'P', "Pattern matching names of attributes (only with -v/-D and -l reboot)"},
{"-spacer-", 0, 0, '-', "\nCommands:"},
{"query", 0, 0, 'G', "\tQuery the current value of the attribute/option"},
{"update", 1, 0, 'v', "Update the value of the attribute/option"},
{"delete", 0, 0, 'D', "\tDelete the attribute/option"},
{"-spacer-", 0, 0, '-', "\nAdditional Options:"},
{"node", 1, 0, 'N', "Set an attribute for the named node (instead of a cluster option). See also: -l"},
{"type", 1, 0, 't', "Which part of the configuration to update/delete/query the option in"},
{"-spacer-", 0, 0, '-', "\t\t\tValid values: crm_config, rsc_defaults, op_defaults, tickets"},
{"lifetime", 1, 0, 'l', "Lifetime of the node attribute"},
{"-spacer-", 0, 0, '-', "\t\t\tValid values: reboot, forever"},
{"utilization", 0, 0, 'z', "Set an utilization attribute for the node."},
{"set-name", 1, 0, 's', "(Advanced) The attribute set in which to place the value"},
{"id", 1, 0, 'i', "\t(Advanced) The ID used to identify the attribute"},
{"default", 1, 0, 'd', "(Advanced) The default value to display if none is found in the configuration"},
{"inhibit-policy-engine", 0, 0, '!', NULL, 1},
/* legacy */
{"quiet", 0, 0, 'Q', NULL, 1},
{"node-uname", 1, 0, 'U', NULL, 1},
{"get-value", 0, 0, 'G', NULL, 1},
{"delete-attr", 0, 0, 'D', NULL, 1},
{"attr-value", 1, 0, 'v', NULL, 1},
{"attr-name", 1, 0, 'n', NULL, 1},
{"attr-id", 1, 0, 'i', NULL, 1},
{"-spacer-", 1, 0, '-', "\nExamples:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', "Add a new node attribute called 'location' with the value of 'office' for host 'myhost':", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_attribute --node myhost --name location --update office", pcmk_option_example},
{"-spacer-", 1, 0, '-', "Query the value of the 'location' node attribute for host 'myhost':", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_attribute --node myhost --name location --query", pcmk_option_example},
{"-spacer-", 1, 0, '-', "Change the value of the 'location' node attribute for host 'myhost':", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_attribute --node myhost --name location --update backoffice", pcmk_option_example},
{"-spacer-", 1, 0, '-', "Delete the 'location' node attribute for host 'myhost':", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_attribute --node myhost --name location --delete", pcmk_option_example},
{"-spacer-", 1, 0, '-', "Query the value of the cluster-delay cluster option:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_attribute --type crm_config --name cluster-delay --query", pcmk_option_example},
{"-spacer-", 1, 0, '-', "Query the value of the cluster-delay cluster option. Only print the value:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_attribute --type crm_config --name cluster-delay --query --quiet", pcmk_option_example},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
int
main(int argc, char **argv)
{
cib_t *the_cib = NULL;
int rc = pcmk_ok;
int cib_opts = cib_sync_call;
int argerr = 0;
int flag;
int option_index = 0;
int is_remote_node = 0;
crm_log_cli_init("crm_attribute");
crm_set_options(NULL, "<command> -n <attribute> [options]", long_options,
"Manage node's attributes and cluster options."
"\n\nAllows node attributes and cluster options to be queried, modified and deleted.\n");
if (argc < 2) {
crm_help('?', CRM_EX_USAGE);
}
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case '$':
case '?':
crm_help(flag, CRM_EX_OK);
break;
case 'G':
command = flag;
attr_value = optarg;
break;
case 'D':
case 'v':
command = flag;
attr_value = optarg;
crm_log_args(argc, argv);
break;
case 'q':
case 'Q':
BE_QUIET = TRUE;
break;
case 'U':
case 'N':
dest_uname = strdup(optarg);
break;
case 's':
set_name = strdup(optarg);
break;
case 'l':
case 't':
type = optarg;
break;
case 'z':
type = XML_CIB_TAG_NODES;
set_type = XML_TAG_UTILIZATION;
break;
case 'n':
attr_name = strdup(optarg);
break;
case 'P':
attr_pattern = strdup(optarg);
break;
case 'i':
attr_id = strdup(optarg);
break;
case 'r':
rsc_id = optarg;
break;
case 'd':
attr_default = optarg;
break;
case '!':
crm_warn("Inhibiting notifications for this update");
cib_opts |= cib_inhibit_notify;
break;
default:
printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag);
++argerr;
break;
}
}
if (optind < argc) {
printf("non-option ARGV-elements: ");
while (optind < argc)
printf("%s ", argv[optind++]);
printf("\n");
}
if (optind > argc) {
++argerr;
}
if (argerr) {
crm_help('?', CRM_EX_USAGE);
}
the_cib = cib_new();
rc = the_cib->cmds->signon(the_cib, crm_system_name, cib_command);
if (rc != pcmk_ok) {
fprintf(stderr, "Error signing on to the CIB service: %s\n", pcmk_strerror(rc));
return crm_exit(crm_errno2exit(rc));
}
if (type == NULL && dest_uname != NULL) {
type = "forever";
}
if (safe_str_eq(type, "reboot")) {
type = XML_CIB_TAG_STATUS;
} else if (safe_str_eq(type, "forever")) {
type = XML_CIB_TAG_NODES;
}
if (type == NULL && dest_uname == NULL) {
/* we're updating cluster options - don't populate dest_node */
type = XML_CIB_TAG_CRMCONFIG;
} else if (safe_str_eq(type, XML_CIB_TAG_CRMCONFIG)) {
} else if (safe_str_neq(type, XML_CIB_TAG_TICKETS)) {
/* If we are being called from a resource agent via the cluster,
* the correct local node name will be passed as an environment
* variable. Otherwise, we have to ask the cluster.
*/
dest_uname = attrd_get_target(dest_uname);
if (dest_uname == NULL) {
dest_uname = get_local_node_name();
}
rc = query_node_uuid(the_cib, dest_uname, &dest_node, &is_remote_node);
if (pcmk_ok != rc) {
fprintf(stderr, "Could not map name=%s to a UUID\n", dest_uname);
the_cib->cmds->signoff(the_cib);
cib_delete(the_cib);
return crm_exit(crm_errno2exit(rc));
}
}
if ((command == 'D') && (attr_name == NULL) && (attr_pattern == NULL)) {
fprintf(stderr, "Error: must specify attribute name or pattern to delete\n");
return crm_exit(CRM_EX_USAGE);
}
if (attr_pattern) {
if (((command != 'v') && (command != 'D'))
|| safe_str_neq(type, XML_CIB_TAG_STATUS)) {
fprintf(stderr, "Error: pattern can only be used with till-reboot update or delete\n");
return crm_exit(CRM_EX_USAGE);
}
command = 'u';
free(attr_name);
attr_name = attr_pattern;
}
if (((command == 'v') || (command == 'D') || (command == 'u'))
&& safe_str_eq(type, XML_CIB_TAG_STATUS)
&& pcmk_ok == attrd_update_delegate(NULL, command, dest_uname, attr_name,
attr_value, type, set_name, NULL, NULL,
is_remote_node?attrd_opt_remote:attrd_opt_none)) {
- crm_info("Update %s=%s sent via attrd", attr_name, command == 'D' ? "<none>" : attr_value);
+ crm_info("Update %s=%s sent via pacemaker-attrd",
+ attr_name, ((command == 'D')? "<none>" : attr_value));
} else if (command == 'D') {
rc = delete_attr_delegate(the_cib, cib_opts, type, dest_node, set_type, set_name,
attr_id, attr_name, attr_value, TRUE, NULL);
if (rc == -ENXIO) {
/* Nothing to delete...
* which means it's not there...
* which is what the admin wanted
*/
rc = pcmk_ok;
}
} else if (command == 'v') {
CRM_LOG_ASSERT(type != NULL);
CRM_LOG_ASSERT(attr_name != NULL);
CRM_LOG_ASSERT(attr_value != NULL);
rc = update_attr_delegate(the_cib, cib_opts, type, dest_node, set_type, set_name,
attr_id, attr_name, attr_value, TRUE, NULL, is_remote_node ? "remote" : NULL);
} else { /* query */
char *read_value = NULL;
rc = read_attr_delegate(the_cib, type, dest_node, set_type, set_name,
attr_id, attr_name, &read_value, TRUE, NULL);
if (rc == -ENXIO && attr_default) {
read_value = strdup(attr_default);
rc = pcmk_ok;
}
crm_info("Read %s=%s %s%s",
attr_name, crm_str(read_value), set_name ? "in " : "", set_name ? set_name : "");
if (rc == -EINVAL) {
rc = pcmk_ok;
} else if (BE_QUIET == FALSE) {
fprintf(stdout, "%s%s %s%s %s%s value=%s\n",
type ? "scope=" : "", type ? type : "",
attr_id ? "id=" : "", attr_id ? attr_id : "",
attr_name ? "name=" : "", attr_name ? attr_name : "",
read_value ? read_value : "(null)");
} else if (read_value != NULL) {
fprintf(stdout, "%s\n", read_value);
}
free(read_value);
}
if (rc == -EINVAL) {
printf("Please choose from one of the matches above and supply the 'id' with --attr-id\n");
} else if (rc != pcmk_ok) {
fprintf(stderr, "Error performing operation: %s\n", pcmk_strerror(rc));
}
the_cib->cmds->signoff(the_cib);
cib_delete(the_cib);
return crm_exit(crm_errno2exit(rc));
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:19 PM (16 h, 58 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2002585
Default Alt Text
(423 KB)

Event Timeline