diff --git a/.gitignore b/.gitignore index 80693c6..4780ec1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,32 +1,33 @@ *.m4 *.cache compile config.* configure *.list depcomp install-sh libtool ltmain.sh* Makefile Makefile.in missing stamp-* sbd *.8 *.o *.service sbd.sh +sbd.sysconfig *~ *.swp *.patch *.diff *.orig *.rej *.rpm *.pod *.tar.* !.copr/Makefile sbd-*/ .deps test-driver diff --git a/Makefile.am b/Makefile.am index bd4346d..dcbf2ba 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,87 +1,87 @@ SUBDIRS = src agent man tests # .gz because github doesn't support .xz yet :-( # this is modified # TAG ?= $(shell git log --pretty="format:%H" -n 1 || sed -n -e "s/%global commit //p" sbd.spec)$(shell test -n "$$(git status -s)" && echo -n "-mod") distdir = $(PACKAGE)-$(TAG) TARFILE = $(distdir).tar.gz DIST_ARCHIVES = $(TARFILE) KEEP_EXISTING_TAR = no INJECT_GIT_COMMIT = yes CLEANFILES = *.rpm *.tar.* sbd-* DISTCLEANFILES = sbd-* sbd-*/ RPM_ROOT = $(shell pwd) RPM_OPTS = --define "_sourcedir $(RPM_ROOT)" \ --define "_specdir $(RPM_ROOT)" \ --define "_srcrpmdir $(RPM_ROOT)" \ --define "_builddir $(RPM_ROOT)" \ --define "_rpmdir $(RPM_ROOT)" MOCK_TARGET ?= rhel-7.1-candidate-x86_64 MOCK_OPTIONS ?= --resultdir=$(RPM_ROOT)/mock --no-cleanup-after BUILD_COUNTER ?= build.counter LAST_COUNT = $(shell test ! -e $(BUILD_COUNTER) && echo 0; test -e $(BUILD_COUNTER) && cat $(BUILD_COUNTER)) COUNT = $(shell expr 1 + $(LAST_COUNT)) TESTS = tests/regressions.sh export SBD_BINARY := src/sbd export SBD_PRELOAD := tests/.libs/libsbdtestbed.so export SBD_USE_DM := no EXTRA_DIST = sbd.spec tests/regressions.sh man/sbd.8.pod.in export: rm -f $(PACKAGE)-HEAD.tar.* if test "$(KEEP_EXISTING_TAR)" != "yes"; then \ rm -f $(TARFILE); \ fi; ! (git status -s | grep "??" && echo "untracked files present in git-repo" ) if [ -f $(TARFILE) ]; then \ echo `date`: Using existing tarball: $(TARFILE); \ else \ rm -f $(PACKAGE).tar.*; \ - (git archive --prefix=$(distdir)/ $(shell echo $(TAG)|cut -f1 -d-) || tar -c --transform="s,^,$(distdir)/," --exclude="*.tar.*" --exclude="$(distdir)" --exclude="*.o" --exclude="*.8" --exclude="config.*" --exclude="libtool" --exclude="ltmain.sh*" --exclude="Makefile" --exclude="Makefile.in" --exclude="stamp-*" --exclude="*.service" --exclude="sbd" --exclude="*.m4" --exclude="*.cache" --exclude="configure" --exclude="*.list" --exclude="depcomp" --exclude="install-sh" --exclude="missing" --exclude="compile" --exclude="sbd.sh" --exclude="~" --exclude="*.swp" --exclude="*.patch" --exclude="*.diff" --exclude="*.orig" --exclude="*.rej" --exclude="*.rpm" --exclude="*.pod" --exclude=".deps" --exclude="test-driver" *) | gzip > $(TARFILE); \ + (git archive --prefix=$(distdir)/ $(shell echo $(TAG)|cut -f1 -d-) || tar -c --transform="s,^,$(distdir)/," --exclude="*.tar.*" --exclude="$(distdir)" --exclude="*.o" --exclude="*.8" --exclude="config.*" --exclude="libtool" --exclude="ltmain.sh*" --exclude="Makefile" --exclude="Makefile.in" --exclude="stamp-*" --exclude="*.service" --exclude="sbd" --exclude="*.m4" --exclude="*.cache" --exclude="configure" --exclude="*.list" --exclude="depcomp" --exclude="install-sh" --exclude="missing" --exclude="compile" --exclude="sbd.sh" --exclude="sbd.sysconfig" --exclude="~" --exclude="*.swp" --exclude="*.patch" --exclude="*.diff" --exclude="*.orig" --exclude="*.rej" --exclude="*.rpm" --exclude="*.pod" --exclude=".deps" --exclude="test-driver" *) | gzip > $(TARFILE); \ if test -n "$$(git status -s)" || test "$(INJECT_GIT_COMMIT)" = "yes"; then \ if test -n "$$(git status -s)"; then git diff HEAD --name-only|grep -v "^\."|xargs -n1 git diff HEAD > uncommitted.diff; fi; \ rm -rf $(distdir); tar -xzf $(TARFILE); rm $(TARFILE); \ cd $(distdir); \ if test -n "$$(git status -s)"; then patch -p1 -i ../uncommitted.diff; fi; \ cd ..; \ sed -i 's/global\ commit.*/global\ commit\ $(TAG)/' $(distdir)/$(PACKAGE).spec; \ tar -czf $(TARFILE) $(distdir); rm -rf $(distdir); \ rm -f uncommitted.diff; \ fi; \ echo `date`: Rebuilt $(TARFILE); \ fi #replace commit id in sbd.spec spec: rm -f *.src.rpm rm -rf $(distdir) mkdir $(distdir) cp $(PACKAGE).spec $(distdir) sed -i 's/global\ commit.*/global\ commit\ $(TAG)/' $(distdir)/$(PACKAGE).spec srpm: export spec if [ -e $(BUILD_COUNTER) ]; then \ sed -i 's/global\ buildnum.*/global\ buildnum\ $(COUNT)/' $(distdir)/$(PACKAGE).spec; \ echo $(COUNT) > $(BUILD_COUNTER); \ fi rpmbuild $(RPM_OPTS) -bs $(distdir)/$(PACKAGE).spec rpm: export spec rpmbuild $(RPM_OPTS) -ba $(distdir)/$(PACKAGE).spec mock: srpm -rm -rf $(RPM_ROOT)/mock @echo "mock --root=$* --rebuild $(MOCK_OPTIONS) $(RPM_ROOT)/*.src.rpm" mock --root=$(MOCK_TARGET) --rebuild $(MOCK_OPTIONS) $(RPM_ROOT)/*.src.rpm beekhof: mock cluster-helper -- 'rm -f sbd-*.x86_64.rpm' cluster-helper --copy $(RPM_ROOT)/mock/sbd-*.x86_64.rpm {}: cluster-helper -- yum install -y sbd-*.x86_64.rpm diff --git a/configure.ac b/configure.ac index 11d12f0..e7882a6 100644 --- a/configure.ac +++ b/configure.ac @@ -1,306 +1,354 @@ dnl dnl autoconf for Agents dnl dnl License: GNU General Public License (GPL) dnl =============================================== dnl Bootstrap dnl =============================================== AC_PREREQ(2.63) dnl Suggested structure: dnl information on the package dnl checks for programs dnl checks for libraries dnl checks for header files dnl checks for types dnl checks for structures dnl checks for compiler characteristics dnl checks for library functions dnl checks for system services AC_INIT([sbd], [1.4.1], [lmb@suse.com]) m4_include([tests-opt.m4]) AC_CANONICAL_HOST AC_CONFIG_AUX_DIR(.) AC_CONFIG_HEADERS(config.h) m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([no])]) AM_INIT_AUTOMAKE(1.11.1 foreign TESTS_OPTION) AM_PROG_CC_C_O PKG_CHECK_MODULES(glib, [glib-2.0]) PKG_CHECK_MODULES(libxml, [libxml-2.0]) PKG_CHECK_MODULES(cmap, [libcmap], HAVE_cmap=1, HAVE_cmap=0) PKG_CHECK_MODULES(votequorum, [libvotequorum], HAVE_votequorum=1, HAVE_votequorum=0) dnl pacemaker > 1.1.8 PKG_CHECK_MODULES(pacemaker, [pacemaker, pacemaker-cib], HAVE_pacemaker=1, HAVE_pacemaker=0) dnl pacemaker <= 1.1.8 PKG_CHECK_MODULES(pcmk, [pcmk, pcmk-cib], HAVE_pcmk=1, HAVE_pcmk=0) PKG_CHECK_MODULES(libqb, [libqb]) CPPFLAGS="$CPPFLAGS -Werror $glib_CFLAGS $libxml_CFLAGS" LIBS="$LIBS $glib_LIBS $libxml_LIBS" if test $HAVE_pacemaker = 0 -a $HAVE_pcmk = 0; then AC_MSG_ERROR(No package 'pacemaker' found) elif test $HAVE_pacemaker = 1; then CPPFLAGS="$CPPFLAGS $glib_CFLAGS $pacemaker_CFLAGS" if test $HAVE_cmap = 0; then AC_MSG_NOTICE(No library 'cmap' found) else CPPFLAGS="$CPPFLAGS $cmap_CFLAGS" LIBS="$LIBS $cmap_LIBS" fi if test $HAVE_votequorum = 0; then AC_MSG_NOTICE(No library 'votequorum' found) else CPPFLAGS="$CPPFLAGS $votequorum_CFLAGS" LIBS="$LIBS $votequorum_LIBS" fi fi CPPFLAGS="$CPPFLAGS $libqb_CFLAGS $pacemaker_CFLAGS $pcmk_CFLAGS" LIBS="$LIBS $libqb_LIBS $pacemaker_LIBS $pcmk_LIBS" dnl checks for libraries AC_CHECK_LIB(c, dlopen) dnl if dlopen is in libc... AC_CHECK_LIB(dl, dlopen) dnl -ldl (for Linux) AC_CHECK_LIB(aio, io_setup, , missing="yes") AC_CHECK_LIB(qb, qb_ipcs_connection_auth_set, , missing="yes") AC_CHECK_LIB(cib, cib_new, , missing="yes") AC_CHECK_LIB(crmcommon, set_crm_log_level, , missing="yes") AC_CHECK_LIB(pe_status, pe_find_node, , missing="yes") AC_CHECK_LIB(pe_rules, test_rule, , missing="yes") AC_CHECK_LIB(crmcluster, crm_peer_init, , missing="yes") AC_CHECK_LIB(uuid, uuid_unparse, , missing="yes") AC_CHECK_LIB(cmap, cmap_initialize, , HAVE_cmap=0) AC_CHECK_LIB(votequorum, votequorum_getinfo, , HAVE_votequorum=0) AC_CHECK_LIB(crmcommon, pcmk_pacemakerd_api_ping, HAVE_pacemakerd_api=1, HAVE_pacemakerd_api=0) dnl pacemaker >= 1.1.8 AC_CHECK_HEADERS(crm/cluster.h) AC_CHECK_LIB(crmcommon, pcmk_strerror, , missing="yes") AC_CHECK_LIB(cib, cib_apply_patch_event, , missing="yes") dnl pacemaker-2.0 removed support for corosync 1 cluster layer AC_CHECK_DECLS([pcmk_cluster_classic_ais, pcmk_cluster_cman],,, [#include ]) dnl check for additional no-quorum-policies dnl AC_TEST_NO_QUORUM_POLICY(POLICY) AC_DEFUN([AC_TEST_NO_QUORUM_POLICY],[ AC_MSG_CHECKING([whether enum pe_quorum_policy defines value $1]) AC_LANG_PUSH([C]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM( [#include ], [enum pe_quorum_policy policy = $1; return policy;])], AC_DEFINE_UNQUOTED(m4_toupper(HAVE_ENUM_$1), 1, [Does pe_types.h have $1 value in enum pe_quorum_policy?]) AC_MSG_RESULT([yes]), AC_MSG_RESULT([no])) AC_LANG_POP([C]) ]) AC_TEST_NO_QUORUM_POLICY(no_quorum_demote) dnl check for new pe-API AC_CHECK_FUNCS(pe_new_working_set) dnl check if votequorum comes with default for qdevice-sync_timeout AC_CHECK_DECLS([VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT], HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT=1, HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT=0, [#include ]) if test "$missing" = "yes"; then AC_MSG_ERROR([Missing required libraries or functions.]) fi AC_PATH_PROGS(POD2MAN, pod2man, pod2man) AC_ARG_ENABLE([shared-disk], [ --enable-shared-disk Turn on functionality that requires shared disk [default=yes]]) DISK=0 if test "x${enable_shared_disk}" != xno ; then DISK=1 fi AC_DEFINE_UNQUOTED(SUPPORT_SHARED_DISK, $DISK, Turn on functionality that requires shared disk) AM_CONDITIONAL(SUPPORT_SHARED_DISK, test "$DISK" = "1") if test -e /proc/$$ then echo "/proc/{pid} is supported" AC_DEFINE_UNQUOTED(HAVE_PROC_PID, 1, Define to 1 if /proc/{pid} is supported.) fi AC_DEFINE_UNQUOTED(CHECK_TWO_NODE, $HAVE_cmap, Turn on checking for 2-node cluster) AM_CONDITIONAL(CHECK_TWO_NODE, test "$HAVE_cmap" = "1") AC_DEFINE_UNQUOTED(CHECK_VOTEQUORUM_HANDLE, $HAVE_votequorum, Turn on periodic checking of votequorum-handle) AM_CONDITIONAL(CHECK_VOTEQUORUM_HANDLE, test "$HAVE_votequorum" = "1") AC_DEFINE_UNQUOTED(CHECK_QDEVICE_SYNC_TIMEOUT, ($HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT && $HAVE_cmap), Turn on checking if watchdog-timeout and qdevice-sync_timeout are matching) AM_CONDITIONAL(CHECK_QDEVICE_SYNC_TIMEOUT, test "$HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT" = "1" && test "$HAVE_cmap" = "1") AC_DEFINE_UNQUOTED(USE_PACEMAKERD_API, $HAVE_pacemakerd_api, Turn on synchronization between sbd & pacemakerd) AM_CONDITIONAL(USE_PACEMAKERD_API, test "$HAVE_pacemakerd_api" = "1") CONFIGDIR="" AC_ARG_WITH(configdir, [ --with-configdir=DIR Directory for SBD configuration file [${CONFIGDIR}]], [ CONFIGDIR="$withval" ] ) +SBD_WATCHDOG_TIMEOUT_DEFAULT="" +AC_ARG_WITH(watchdog-timeout-default, + [ --with-watchdog-timeout-default=SECONDS + Timeout in seconds SBD will configure the watchdog per default], + [ SBD_WATCHDOG_TIMEOUT_DEFAULT="$withval" ] +) + +SBD_SYNC_RESOURCE_STARTUP_DEFAULT="" +AC_ARG_WITH(sync-resource-startup-default, + [ --with-sync-resource-startup-default=yes|no + Default for SBD_SYNC_RESOURCE_STARTUP if not given in SBD configuration file explicitly], + [ SBD_SYNC_RESOURCE_STARTUP_DEFAULT="$withval" ] +) + +SBD_SYNC_RESOURCE_STARTUP_SYSCONFIG="" +AC_ARG_WITH(sync-resource-startup-sysconfig, + [ --with-sync-resource-startup-sysconfig=yes|no + Value for SBD_SYNC_RESOURCE_STARTUP going into template SBD configuration file], + [ SBD_SYNC_RESOURCE_STARTUP_SYSCONFIG="$withval" ] +) + # # Where is dlopen? # if test "$ac_cv_lib_c_dlopen" = yes; then LIBADD_DL="" elif test "$ac_cv_lib_dl_dlopen" = yes; then LIBADD_DL=-ldl else LIBADD_DL=${lt_cv_dlopen_libs} fi dnl ********************************************************************** dnl Check for various argv[] replacing functions on various OSs dnl dnl Borrowed from Proftpd dnl Proftpd is Licenced under the terms of the GNU General Public Licence dnl and is available from http://www.proftpd.org/ dnl AC_CHECK_FUNCS(setproctitle) AC_CHECK_HEADERS(libutil.h) AC_CHECK_LIB(util, setproctitle, [AC_DEFINE(HAVE_SETPROCTITLE,1,[ ]) ac_cv_func_setproctitle="yes" ; LIBS="$LIBS -lutil"]) if test "$ac_cv_func_setproctitle" = "yes"; then pf_argv_set="PF_ARGV_NONE" fi if test "$pf_argv_set" = ""; then AC_CHECK_HEADERS(sys/pstat.h) if test "$ac_cv_header_pstat_h" = "yes"; then AC_CHECK_FUNCS(pstat) if test "$ac_cv_func_pstat" = "yes"; then pf_argv_set="PF_ARGV_PSTAT" else pf_argv_set="PF_ARGV_WRITEABLE" fi fi if test "$pf_argv_set" = ""; then AC_EGREP_HEADER([#define.*PS_STRINGS.*],sys/exec.h, have_psstrings="yes",have_psstrings="no") if test "$have_psstrings" = "yes"; then pf_argv_set="PF_ARGV_PSSTRINGS" fi fi if test "$pf_argv_set" = ""; then AC_CACHE_CHECK(whether __progname and __progname_full are available, pf_cv_var_progname, AC_TRY_LINK([extern char *__progname, *__progname_full;], [__progname = "foo"; __progname_full = "foo bar";], pf_cv_var_progname="yes", pf_cv_var_progname="no")) if test "$pf_cv_var_progname" = "yes"; then AC_DEFINE(HAVE___PROGNAME,1,[ ]) fi AC_CACHE_CHECK(which argv replacement method to use, pf_cv_argv_type, AC_EGREP_CPP(yes,[ #if defined(__GNU_HURD__) yes #endif ],pf_cv_argv_type="new", pf_cv_argv_type="writeable")) if test "$pf_cv_argv_type" = "new"; then pf_argv_set="PF_ARGV_NEW" fi if test "$pf_argv_set" = ""; then pf_argv_set="PF_ARGV_WRITEABLE" fi fi fi AC_DEFINE_UNQUOTED(PF_ARGV_TYPE, $pf_argv_set, mechanism to pretty-print ps output: setproctitle-equivalent) dnl End of tests borrowed from Proftpd AC_MSG_NOTICE(Sanitizing prefix: ${prefix}) case $prefix in NONE) prefix=/usr dnl Fix default variables - "prefix" variable if not specified if test "$localstatedir" = "\${prefix}/var"; then localstatedir="/var" fi if test "$sysconfdir" = "\${prefix}/etc"; then sysconfdir="/etc" fi ;; esac AC_MSG_NOTICE(Sanitizing exec_prefix: ${exec_prefix}) case $exec_prefix in dnl For consistency with Heartbeat, map NONE->$prefix NONE) exec_prefix=$prefix;; prefix) exec_prefix=$prefix;; esac dnl Expand autoconf variables so that we dont end up with '${prefix}' dnl in #defines and python scripts dnl NOTE: Autoconf deliberately leaves them unexpanded to allow dnl make exec_prefix=/foo install dnl No longer being able to do this seems like no great loss to me... eval prefix="`eval echo ${prefix}`" eval exec_prefix="`eval echo ${exec_prefix}`" eval bindir="`eval echo ${bindir}`" eval sbindir="`eval echo ${sbindir}`" eval libexecdir="`eval echo ${libexecdir}`" eval datadir="`eval echo ${datadir}`" eval sysconfdir="`eval echo ${sysconfdir}`" eval sharedstatedir="`eval echo ${sharedstatedir}`" eval localstatedir="`eval echo ${localstatedir}`" eval libdir="`eval echo ${libdir}`" eval includedir="`eval echo ${includedir}`" eval oldincludedir="`eval echo ${oldincludedir}`" eval infodir="`eval echo ${infodir}`" eval mandir="`eval echo ${mandir}`" AC_SUBST(LIBADD_DL) dnl extra flags for dynamic linking libraries if test x"${CONFIGDIR}" = x""; then CONFIGDIR="${sysconfdir}/sysconfig" fi AC_SUBST(CONFIGDIR) +if test x"${SBD_WATCHDOG_TIMEOUT_DEFAULT}" = x""; then + case "$host_cpu" in + s390|s390x) + SBD_WATCHDOG_TIMEOUT_DEFAULT=15 + ;; + *) + SBD_WATCHDOG_TIMEOUT_DEFAULT=5 + ;; + esac +fi +AC_SUBST(SBD_WATCHDOG_TIMEOUT_DEFAULT) +AC_DEFINE_UNQUOTED(SBD_WATCHDOG_TIMEOUT_DEFAULT, $SBD_WATCHDOG_TIMEOUT_DEFAULT, + Timeout in seconds SBD will configure the watchdog per default) + +if test x"${SBD_SYNC_RESOURCE_STARTUP_DEFAULT}" = x""; then + SBD_SYNC_RESOURCE_STARTUP_DEFAULT=no +fi +AC_SUBST(SBD_SYNC_RESOURCE_STARTUP_DEFAULT) +dnl rather pass to C as a string and interpret there for consistent interpretation +AC_DEFINE_UNQUOTED(SBD_SYNC_RESOURCE_STARTUP_DEFAULT, "${SBD_SYNC_RESOURCE_STARTUP_DEFAULT}", + Default for SBD_SYNC_RESOURCE_STARTUP if not given in SBD configuration file explicitly) + +if test x"${SBD_SYNC_RESOURCE_STARTUP_SYSCONFIG}" = x""; then + SBD_SYNC_RESOURCE_STARTUP_SYSCONFIG=no +fi +AC_SUBST(SBD_SYNC_RESOURCE_STARTUP_SYSCONFIG) + dnl The Makefiles and shell scripts we output -AC_CONFIG_FILES([Makefile src/Makefile agent/Makefile man/Makefile agent/sbd src/sbd.service src/sbd_remote.service src/sbd.sh]) +AC_CONFIG_FILES([Makefile src/Makefile agent/Makefile man/Makefile agent/sbd src/sbd.service src/sbd_remote.service src/sbd.sh src/sbd.sysconfig]) AC_CONFIG_SUBDIRS([tests]) dnl Now process the entire list of files added by previous dnl calls to AC_CONFIG_FILES() AC_OUTPUT() diff --git a/sbd.spec b/sbd.spec index 73d7539..505c839 100644 --- a/sbd.spec +++ b/sbd.spec @@ -1,214 +1,222 @@ # # spec file for package sbd # # Copyright (c) 2014 SUSE LINUX Products GmbH, Nuernberg, Germany. # Copyright (c) 2013 Lars Marowsky-Bree # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed # upon. The license for this file, and modifications and additions to the # file, is the same license as for the pristine package itself (unless the # license for the pristine package is not an Open Source License, in which # case the license is the MIT License). An "Open Source License" is a # license that conforms to the Open Source Definition (Version 1.9) # published by the Open Source Initiative. # Please submit bugfixes or comments via http://bugs.opensuse.org/ # %global commit aca7907c1973f331a4f192a0d50e6443840daab6 %global shortcommit %(echo %{commit}|cut -c1-8) %global modified %(echo %{commit}-|cut -f2 -d-) %global github_owner beekhof %global buildnum 1 +%ifarch s390x s390 +# minimum timeout on LPAR diag288 watchdog is 15s +%global watchdog_timeout_default 15 +%else +%global watchdog_timeout_default 5 +%endif + +%global sync_resource_startup_default no +%global sync_resource_startup_sysconfig no + Name: sbd Summary: Storage-based death License: GPLv2+ Group: System Environment/Daemons Version: 1.4.1 Release: 99.%{buildnum}.%{shortcommit}.%{modified}git%{?dist} Url: https://github.com/%{github_owner}/%{name} Source0: https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{name}-%{commit}.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRequires: autoconf BuildRequires: automake BuildRequires: libuuid-devel BuildRequires: glib2-devel BuildRequires: libaio-devel BuildRequires: corosync-devel %if 0%{?suse_version} BuildRequires: libpacemaker-devel %else BuildRequires: pacemaker-libs-devel %endif BuildRequires: libtool BuildRequires: libuuid-devel BuildRequires: libxml2-devel BuildRequires: pkgconfig BuildRequires: make Conflicts: fence-agents-sbd < 4.5.0 %if 0%{?rhel} > 0 ExclusiveArch: i686 x86_64 s390x aarch64 ppc64le %endif %if %{defined systemd_requires} %systemd_requires %endif %description This package contains the storage-based death functionality. %package tests Summary: Storage-based death environment for regression tests License: GPLv2+ Group: System Environment/Daemons %description tests This package provides an environment + testscripts for regression-testing sbd. %prep ########################################################### # %setup -n sbd-%{version} -q %setup -q -n %{name}-%{commit} -%ifarch s390x s390 -sed -i src/sbd.sysconfig -e "s/Default: 5/Default: 15/" -sed -i src/sbd.sysconfig -e "s/SBD_WATCHDOG_TIMEOUT=5/SBD_WATCHDOG_TIMEOUT=15/" -%endif ########################################################### %build ./autogen.sh export CFLAGS="$RPM_OPT_FLAGS -Wall -Werror" -%configure +%configure --with-watchdog-timeout-default=%{watchdog_timeout_default} \ + --with-sync-resource-startup-default=%{sync_resource_startup_default} \ + --with-sync-resource-startup-sysconfig=%{sync_resource_startup_sysconfig} make %{?_smp_mflags} ########################################################### %install ########################################################### make DESTDIR=$RPM_BUILD_ROOT LIBDIR=%{_libdir} install rm -rf ${RPM_BUILD_ROOT}%{_libdir}/stonith install -D -m 0755 src/sbd.sh $RPM_BUILD_ROOT/usr/share/sbd/sbd.sh install -D -m 0755 tests/regressions.sh $RPM_BUILD_ROOT/usr/share/sbd/regressions.sh %if %{defined _unitdir} install -D -m 0644 src/sbd.service $RPM_BUILD_ROOT/%{_unitdir}/sbd.service install -D -m 0644 src/sbd_remote.service $RPM_BUILD_ROOT/%{_unitdir}/sbd_remote.service %endif mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig install -m 644 src/sbd.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/sbd # Don't package static libs find %{buildroot} -name '*.a' -type f -print0 | xargs -0 rm -f find %{buildroot} -name '*.la' -type f -print0 | xargs -0 rm -f %clean rm -rf %{buildroot} %if %{defined _unitdir} %post %systemd_post sbd.service %systemd_post sbd_remote.service %preun %systemd_preun sbd.service %systemd_preun sbd_remote.service %postun %systemd_postun sbd.service %systemd_postun sbd_remote.service %endif %files ########################################################### %defattr(-,root,root) %config(noreplace) %{_sysconfdir}/sysconfig/sbd %{_sbindir}/sbd %{_datadir}/sbd %exclude %{_datadir}/sbd/regressions.sh %doc %{_mandir}/man8/sbd* %if %{defined _unitdir} %{_unitdir}/sbd.service %{_unitdir}/sbd_remote.service %endif %doc COPYING %files tests %defattr(-,root,root) %dir %{_datadir}/sbd %{_datadir}/sbd/regressions.sh %{_libdir}/libsbdtestbed* %changelog * Tue Nov 19 2019 - 1.4.1-99.1.aca7907c.git - improvements/clarifications in documentation - properly finalize cmap connection when disconnected from cluster - make handling of cib-connection loss more robust - silence some coverity findings - overhaul log for reasonable prios and details - if current slice doesn't have rt-budget move to root-slice - periodically ping corosync daemon for liveness - actually use crashdump timeout if configured - avoid deprecated names for g_main-loop-funcitons - conflict with fence-agents-sbd < 4.5.0 - rather require corosync-devel provided by most distributions - make devices on cmdline overrule those coming via SBD_DEVICE - make 15s timeout on s390 be used consistently - improve build/test for CI-friendlyness - * add autogen.sh - * enable/improve out-of-tree-building - * make tar generation smarter - * don't modify sbd.spec - * make distcheck-target work - * Add tests/regressions.sh to check-target - * use unique devmapper names for multiple tests in parallel - * consistently use serial test-harness for visible progress - * package tests into separate package (not packaged before) - * add preload-library to intercept reboots while testing - * add tests for sbd in daemon-mode & watchdog-dev-handling - * make tests work in non-privileged containers * Mon Jan 14 2019 - 1.4.0-0.1.2d595fdd.git - updated travis-CI (ppc64le-build, fedora29, remove need for alectolytic-build-container) - make watchdog-device-query easier to be handled by an SELinux-policy - configurable delay value for SBD_DELAY_START - use pacemaker's new pe api with constructors/destructors - make timeout-action executed by sbd configurable - init script for sysv systems - version bump to v1.4.0 to denote Pacemaker 2.0.0 compatibility * Fri Jun 29 2018 - 1.3.1-0.1.e102d9ed.git - removed unneeded python-devel build-requirement - changed legacy corosync-devel to corosynclib-devel * Fri Nov 3 2017 - 1.3.1-0.1.a180176c.git - Add commands to test/query watchdogs - Allow 2-node-operation with a single shared-disk - Overhaul of the command-line options & config-file - Proper handling of off instead of reboot - Refactored disk-servant for more robust communication with parent - Fix config for Debian + configurable location of config - Fixes in sbd.sh - multiple SBD devices and others * Sun Mar 27 2016 - 1.3.0-0.1.4ee36fa3.git - Changes since v1.2.0 like adding the possibility to have a watchdog-only setup without shared-block-devices legitimate a bump to v1.3.0. * Mon Oct 13 2014 - 1.2.1-0.4.3de531ed.git - Fixes for suitability to the el7 environment * Tue Sep 30 2014 - 1.2.1-0.3.8f912945.git - Only build on archs supported by the HA Add-on * Fri Aug 29 2014 - 1.2.1-0.2.8f912945.git - Remove some additional SUSE-isms * Fri Aug 29 2014 - 1.2.1-0.1.8f912945.git - Prepare for package review Resolves: rhbz#1134245 diff --git a/src/sbd-common.c b/src/sbd-common.c index c2da758..96f4ead 100644 --- a/src/sbd-common.c +++ b/src/sbd-common.c @@ -1,1225 +1,1220 @@ /* * Copyright (C) 2013 Lars Marowsky-Bree * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include "sbd.h" #include #include #ifdef __GLIBC__ #include #endif #include #include #include #include #include #include #include #ifdef _POSIX_MEMLOCK # include #endif /* Tunable defaults: */ -#if defined(__s390__) || defined(__s390x__) -unsigned long timeout_watchdog = 15; -int timeout_msgwait = 30; -#else -unsigned long timeout_watchdog = 5; -int timeout_msgwait = 10; -#endif +unsigned long timeout_watchdog = SBD_WATCHDOG_TIMEOUT_DEFAULT; +int timeout_msgwait = 2 * SBD_WATCHDOG_TIMEOUT_DEFAULT; unsigned long timeout_watchdog_warn = 3; int timeout_allocate = 2; int timeout_loop = 1; int timeout_io = 3; int timeout_startup = 120; int watchdog_use = 1; int watchdog_set_timeout = 1; unsigned long timeout_watchdog_crashdump = 0; int skip_rt = 0; int debug = 0; int debug_mode = 0; char *watchdogdev = NULL; bool watchdogdev_is_default = false; char * local_uname; /* Global, non-tunable variables: */ int sector_size = 0; int watchdogfd = -1; int servant_health = 0; /*const char *devname;*/ const char *cmdname; void usage(void) { fprintf(stderr, "Shared storage fencing tool.\n" "Syntax:\n" " %s \n" "Options:\n" "-d Block device to use (mandatory; can be specified up to 3 times)\n" "-h Display this help.\n" "-n Set local node name; defaults to uname -n (optional)\n" "\n" "-R Do NOT enable realtime priority (debugging only)\n" "-W Use watchdog (recommended) (watch only)\n" "-w Specify watchdog device (optional) (watch only)\n" "-T Do NOT initialize the watchdog timeout (watch only)\n" "-S <0|1> Set start mode if the node was previously fenced (watch only)\n" "-p Write pidfile to the specified path (watch only)\n" "-v|-vv|-vvv Enable verbose|debug|debug-library logging (optional)\n" "\n" "-1 Set watchdog timeout to N seconds (optional, create only)\n" "-2 Set slot allocation timeout to N seconds (optional, create only)\n" "-3 Set daemon loop timeout to N seconds (optional, create only)\n" "-4 Set msgwait timeout to N seconds (optional, create only)\n" "-5 Warn if loop latency exceeds threshold (optional, watch only)\n" " (default is 3, set to 0 to disable)\n" "-C Watchdog timeout to set before crashdumping\n" " (def: 0s = disable gracefully, optional)\n" "-I Async IO read timeout (defaults to 3 * loop timeout, optional)\n" "-s Timeout to wait for devices to become available (def: 120s)\n" "-t Dampening delay before faulty servants are restarted (optional)\n" " (default is 5, set to 0 to disable)\n" "-F # of failures before a servant is considered faulty (optional)\n" " (default is 1, set to 0 to disable)\n" "-P Check Pacemaker quorum and node health (optional, watch only)\n" "-Z Enable trace mode. WARNING: UNSAFE FOR PRODUCTION!\n" "-r Set timeout-action to comma-separated combination of\n" " noflush|flush plus reboot|crashdump|off (default is flush,reboot)\n" "Commands:\n" #if SUPPORT_SHARED_DISK "create initialize N slots on - OVERWRITES DEVICE!\n" "list List all allocated slots on device, and messages.\n" "dump Dump meta-data header from device.\n" "allocate \n" " Allocate a slot for node (optional)\n" "message (test|reset|off|crashdump|clear|exit)\n" " Writes the specified message to node's slot.\n" #endif "watch Loop forever, monitoring own slot\n" "query-watchdog Check for available watchdog-devices and print some info\n" "test-watchdog Test the watchdog-device selected.\n" " Attention: This will arm the watchdog and have your system reset\n" " in case your watchdog is working properly!\n" , cmdname); } static int watchdog_init_interval_fd(int wdfd, int timeout) { if (ioctl(wdfd, WDIOC_SETTIMEOUT, &timeout) < 0) { cl_perror( "WDIOC_SETTIMEOUT" ": Failed to set watchdog timer to %u seconds.", timeout); cl_log(LOG_CRIT, "Please validate your watchdog configuration!"); cl_log(LOG_CRIT, "Choose a different watchdog driver or specify -T to skip this if you are completely sure."); return -1; } return 0; } int watchdog_init_interval(void) { if (watchdogfd < 0) { return 0; } if (watchdog_set_timeout == 0) { cl_log(LOG_INFO, "NOT setting watchdog timeout on explicit user request!"); return 0; } if (watchdog_init_interval_fd(watchdogfd, timeout_watchdog) < 0) { return -1; } cl_log(LOG_INFO, "Set watchdog timeout to %u seconds.", (int) timeout_watchdog); return 0; } static int watchdog_tickle_fd(int wdfd, char *wddev) { if (write(wdfd, "", 1) != 1) { cl_perror("Watchdog write failure: %s!", wddev); return -1; } return 0; } int watchdog_tickle(void) { if (watchdogfd >= 0) { return watchdog_tickle_fd(watchdogfd, watchdogdev); } return 0; } static int watchdog_init_fd(char *wddev, int timeout) { int wdfd; wdfd = open(wddev, O_WRONLY); if (wdfd >= 0) { if (((timeout >= 0) && (watchdog_init_interval_fd(wdfd, timeout) < 0)) || (watchdog_tickle_fd(wdfd, wddev) < 0)) { close(wdfd); return -1; } } else { cl_perror("Cannot open watchdog device '%s'", wddev); return -1; } return wdfd; } int watchdog_init(void) { if (watchdogfd < 0 && watchdogdev != NULL) { int timeout = timeout_watchdog; if (watchdog_set_timeout == 0) { cl_log(LOG_INFO, "NOT setting watchdog timeout on explicit user request!"); timeout = -1; } watchdogfd = watchdog_init_fd(watchdogdev, timeout); if (watchdogfd >= 0) { cl_log(LOG_NOTICE, "Using watchdog device '%s'", watchdogdev); if (watchdog_set_timeout) { cl_log(LOG_INFO, "Set watchdog timeout to %u seconds.", (int) timeout_watchdog); } } else { return -1; } } return 0; } static void watchdog_close_fd(int wdfd, char *wddev, bool disarm) { if (disarm) { int r; int flags = WDIOS_DISABLECARD;; /* Explicitly disarm it */ r = ioctl(wdfd, WDIOC_SETOPTIONS, &flags); if (r < 0) { cl_perror("Failed to disable hardware watchdog %s", wddev); } /* To be sure, use magic close logic, too */ for (;;) { if (write(wdfd, "V", 1) > 0) { break; } cl_perror("Cannot disable watchdog device %s", wddev); } } if (close(wdfd) < 0) { cl_perror("Watchdog close(%d) failed", wdfd); } } void watchdog_close(bool disarm) { if (watchdogfd < 0) { return; } watchdog_close_fd(watchdogfd, watchdogdev, disarm); watchdogfd = -1; } #define MAX_WATCHDOGS 64 #define SYS_CLASS_WATCHDOG "/sys/class/watchdog" #define SYS_CHAR_DEV_DIR "/sys/dev/char" #define WATCHDOG_NODEDIR "/dev/" #define WATCHDOG_NODEDIR_LEN 5 struct watchdog_list_item { dev_t dev; char *dev_node; char *dev_ident; char *dev_driver; struct watchdog_list_item *next; }; struct link_list_item { char *dev_node; char *link_name; struct link_list_item *next; }; static struct watchdog_list_item *watchdog_list = NULL; static int watchdog_list_items = 0; static void watchdog_populate_list(void) { dev_t watchdogs[MAX_WATCHDOGS + 1] = {makedev(10,130), 0}; int num_watchdogs = 1; struct dirent *entry; char entry_name[280]; DIR *dp; char buf[280] = ""; struct link_list_item *link_list = NULL; if (watchdog_list != NULL) { return; } /* get additional devices from /sys/class/watchdog */ dp = opendir(SYS_CLASS_WATCHDOG); if (dp) { while ((entry = readdir(dp))) { if (entry->d_type == DT_LNK) { FILE *file; snprintf(entry_name, sizeof(entry_name), SYS_CLASS_WATCHDOG "/%s/dev", entry->d_name); file = fopen(entry_name, "r"); if (file) { int major, minor; if (fscanf(file, "%d:%d", &major, &minor) == 2) { watchdogs[num_watchdogs++] = makedev(major, minor); } fclose(file); if (num_watchdogs == MAX_WATCHDOGS) { break; } } } } closedir(dp); } /* search for watchdog nodes in /dev */ dp = opendir(WATCHDOG_NODEDIR); if (dp) { /* first go for links and memorize them */ while ((entry = readdir(dp))) { if (entry->d_type == DT_LNK) { int len; snprintf(entry_name, sizeof(entry_name), WATCHDOG_NODEDIR "%s", entry->d_name); /* !realpath(entry_name, buf) unfortunately does a stat on * target so we can't really use it to check if links stay * within /dev without triggering e.g. AVC-logs (with * SELinux policy that just allows stat within /dev). * Without canonicalization that doesn't actually touch the * filesystem easily available introduce some limitations * for simplicity: * - just simple path without '..' * - just one level of symlinks (avoid e.g. loop-checking) */ len = readlink(entry_name, buf, sizeof(buf) - 1); if ((len < 1) || (len > sizeof(buf) - WATCHDOG_NODEDIR_LEN - 1)) { continue; } buf[len] = '\0'; if (buf[0] != '/') { memmove(&buf[WATCHDOG_NODEDIR_LEN], buf, len+1); memcpy(buf, WATCHDOG_NODEDIR, WATCHDOG_NODEDIR_LEN); len += WATCHDOG_NODEDIR_LEN; } if (strstr(buf, "/../") || strncmp(WATCHDOG_NODEDIR, buf, WATCHDOG_NODEDIR_LEN)) { continue; } else { /* just memorize to avoid statting the target - SELinux */ struct link_list_item *lli = calloc(1, sizeof(struct link_list_item)); lli->dev_node = strdup(buf); lli->link_name = strdup(entry_name); lli->next = link_list; link_list = lli; } } } rewinddir(dp); while ((entry = readdir(dp))) { if (entry->d_type == DT_CHR) { struct stat statbuf; snprintf(entry_name, sizeof(entry_name), WATCHDOG_NODEDIR "%s", entry->d_name); if(!stat(entry_name, &statbuf) && S_ISCHR(statbuf.st_mode)) { int i; for (i=0; idev = watchdogs[i]; wdg->dev_node = strdup(entry_name); wdg->next = watchdog_list; watchdog_list = wdg; watchdog_list_items++; if (wdfd >= 0) { struct watchdog_info ident; ident.identity[0] = '\0'; ioctl(wdfd, WDIOC_GETSUPPORT, &ident); watchdog_close_fd(wdfd, entry_name, true); if (ident.identity[0]) { wdg->dev_ident = strdup((char *) ident.identity); } } snprintf(entry_name, sizeof(entry_name), SYS_CHAR_DEV_DIR "/%d:%d/device/driver", major(watchdogs[i]), minor(watchdogs[i])); len = readlink(entry_name, buf, sizeof(buf) - 1); if (len > 0) { buf[len] = '\0'; wdg->dev_driver = strdup(basename(buf)); } else if ((wdg->dev_ident) && (strcmp(wdg->dev_ident, "Software Watchdog") == 0)) { wdg->dev_driver = strdup("softdog"); } /* create dupes if we have memorized links * to this node */ for (tmp_list = link_list; tmp_list; tmp_list = tmp_list->next) { if (!strcmp(tmp_list->dev_node, wdg->dev_node)) { struct watchdog_list_item *dupe_wdg = calloc(1, sizeof(struct watchdog_list_item)); /* as long as we never purge watchdog_list * there is no need to dupe strings */ *dupe_wdg = *wdg; dupe_wdg->dev_node = strdup(tmp_list->link_name); dupe_wdg->next = watchdog_list; watchdog_list = dupe_wdg; watchdog_list_items++; } /* for performance reasons we could remove * the link_list entry */ } break; } } } } } closedir(dp); } /* cleanup link list */ while (link_list) { struct link_list_item *tmp_list = link_list; link_list = link_list->next; free(tmp_list->dev_node); free(tmp_list->link_name); free(tmp_list); } } int watchdog_info(void) { struct watchdog_list_item *wdg; int wdg_cnt = 0; watchdog_populate_list(); printf("\nDiscovered %d watchdog devices:\n", watchdog_list_items); for (wdg = watchdog_list; wdg != NULL; wdg = wdg->next) { wdg_cnt++; printf("\n[%d] %s\nIdentity: %s\nDriver: %s\n", wdg_cnt, wdg->dev_node, wdg->dev_ident?wdg->dev_ident:"Error: Check if hogged by e.g. sbd-daemon!", wdg->dev_driver?wdg->dev_driver:""); if ((wdg->dev_driver) && (strcmp(wdg->dev_driver, "softdog") == 0)) { printf("CAUTION: Not recommended for use with sbd.\n"); } } return 0; } int watchdog_test(void) { int i; if ((watchdog_set_timeout == 0) || !watchdog_use) { printf("\nWatchdog is disabled - aborting test!!!\n"); return 0; } if (watchdogdev_is_default) { watchdog_populate_list(); if (watchdog_list_items > 1) { printf("\nError: Multiple watchdog devices discovered.\n" " Use -w or SBD_WATCHDOG_DEV to specify\n" " which device to reset the system with\n"); watchdog_info(); return -1; } } if ((isatty(fileno(stdin)))) { char buffer[16]; printf("\nWARNING: This operation is expected to force-reboot this system\n" " without following any shutdown procedures.\n\n" "Proceed? [NO/Proceed] "); if ((fgets(buffer, 16, stdin) == NULL) || strcmp(buffer, "Proceed\n")) { printf("\nAborting watchdog test!!!\n"); return 0; } printf("\n"); } printf("Initializing %s with a reset countdown of %d seconds ...\n", watchdogdev, (int) timeout_watchdog); if ((watchdog_init() < 0) || (watchdog_init_interval() < 0)) { printf("Failed to initialize watchdog!!!\n"); return -1; } printf("\n"); printf("NOTICE: The watchdog device is expected to reset the system\n" " in %d seconds. If system remains active beyond that time,\n" " watchdog may not be functional.\n\n", (int) timeout_watchdog); for (i=timeout_watchdog; i>1; i--) { printf("Reset countdown ... %d seconds\n", i); sleep(1); } for (i=2; i>0; i--) { printf("System expected to reset any moment ...\n"); sleep(1); } for (i=5; i>0; i--) { printf("System should have reset ...\n"); sleep(1); } printf("Error: The watchdog device has failed to reboot the system,\n" " and it may not be suitable for usage with sbd.\n"); /* test should trigger a reboot thus returning is actually bad */ return -1; } /* This duplicates some code from linux/ioprio.h since these are not included * even in linux-kernel-headers. Sucks. See also * /usr/src/linux/Documentation/block/ioprio.txt and ioprio_set(2) */ extern int sys_ioprio_set(int, int, int); int ioprio_set(int which, int who, int ioprio); inline int ioprio_set(int which, int who, int ioprio) { return syscall(__NR_ioprio_set, which, who, ioprio); } enum { IOPRIO_CLASS_NONE, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE, }; enum { IOPRIO_WHO_PROCESS = 1, IOPRIO_WHO_PGRP, IOPRIO_WHO_USER, }; #define IOPRIO_BITS (16) #define IOPRIO_CLASS_SHIFT (13) #define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) #define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT) #define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK) #define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) static void sbd_stack_hogger(unsigned char * inbuf, int kbytes) { unsigned char buf[1024]; if(kbytes <= 0) { return; } if (inbuf == NULL) { memset(buf, HOG_CHAR, sizeof(buf)); } else { memcpy(buf, inbuf, sizeof(buf)); } if (kbytes > 0) { sbd_stack_hogger(buf, kbytes-1); } return; } static void sbd_malloc_hogger(int kbytes) { int j; void**chunks; int chunksize = 1024; if(kbytes <= 0) { return; } /* * We could call mallopt(M_MMAP_MAX, 0) to disable it completely, * but we've already called mlockall() * * We could also call mallopt(M_TRIM_THRESHOLD, -1) to prevent malloc * from giving memory back to the system, but we've already called * mlockall(MCL_FUTURE), so there's no need. */ chunks = malloc(kbytes * sizeof(void *)); if (chunks == NULL) { cl_log(LOG_WARNING, "Could not preallocate chunk array"); return; } for (j=0; j < kbytes; ++j) { chunks[j] = malloc(chunksize); if (chunks[j] == NULL) { cl_log(LOG_WARNING, "Could not preallocate block %d", j); } else { memset(chunks[j], 0, chunksize); } } for (j=0; j < kbytes; ++j) { free(chunks[j]); } free(chunks); } static void sbd_memlock(int stackgrowK, int heapgrowK) { #ifdef _POSIX_MEMLOCK /* * We could call setrlimit(RLIMIT_MEMLOCK,...) with a large * number, but the mcp runs as root and mlock(2) says: * * Since Linux 2.6.9, no limits are placed on the amount of memory * that a privileged process may lock, and this limit instead * governs the amount of memory that an unprivileged process may * lock. */ if (mlockall(MCL_CURRENT|MCL_FUTURE) >= 0) { cl_log(LOG_INFO, "Locked ourselves in memory"); /* Now allocate some extra pages (MCL_FUTURE will ensure they stay around) */ sbd_malloc_hogger(heapgrowK); sbd_stack_hogger(NULL, stackgrowK); } else { cl_perror("Unable to lock ourselves into memory"); } #else cl_log(LOG_ERR, "Unable to lock ourselves into memory"); #endif } static int get_realtime_budget(void) { FILE *f; char fname[PATH_MAX]; int res = -1, lnum = 0, num; char *cgroup = NULL, *namespecs = NULL; snprintf(fname, PATH_MAX, "/proc/%jd/cgroup", (intmax_t)getpid()); f = fopen(fname, "rt"); if (f == NULL) { cl_log(LOG_WARNING, "Can't open cgroup file for pid=%jd", (intmax_t)getpid()); goto exit_res; } while( (num = fscanf(f, "%d:%m[^:]:%m[^\n]\n", &lnum, &namespecs, &cgroup)) !=EOF ) { if (namespecs && strstr(namespecs, "cpuacct")) { free(namespecs); break; } if (cgroup) { free(cgroup); cgroup = NULL; } if (namespecs) { free(namespecs); namespecs = NULL; } /* not to get stuck if format changes */ if ((num < 3) && ((fscanf(f, "%*[^\n]") == EOF) || (fscanf(f, "\n") == EOF))) { break; } } fclose(f); if (cgroup == NULL) { cl_log(LOG_WARNING, "Failed getting cgroup for pid=%jd", (intmax_t)getpid()); goto exit_res; } snprintf(fname, PATH_MAX, "/sys/fs/cgroup/cpu%s/cpu.rt_runtime_us", cgroup); f = fopen(fname, "rt"); if (f == NULL) { cl_log(LOG_WARNING, "cpu.rt_runtime_us existed for root-slice but " "doesn't for '%s'", cgroup); goto exit_res; } if (fscanf(f, "%d", &res) != 1) { cl_log(LOG_WARNING, "failed reading rt-budget from %s", fname); } else { cl_log(LOG_INFO, "slice='%s' has rt-budget=%d", cgroup, res); } fclose(f); exit_res: if (cgroup) { free(cgroup); } return res; } /* stolen from corosync */ static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) { FILE *f; int res = -1; /* * /sys/fs/cgroup is hardcoded, because most of Linux distributions are now * using systemd and systemd uses hardcoded path of cgroup mount point. * * This feature is expected to be removed as soon as systemd gets support * for managing RT configuration. */ f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt"); if (f == NULL) { cl_log(LOG_DEBUG, "cpu.rt_runtime_us doesn't exist -> " "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED"); res = 0; goto exit_res; } fclose(f); if ((!enforce_root_cgroup) && (get_realtime_budget() > 0)) { cl_log(LOG_DEBUG, "looks as if we have rt-budget in the slice we are " "-> skip moving to root-slice"); res = 0; goto exit_res; } f = fopen("/sys/fs/cgroup/cpu/tasks", "w"); if (f == NULL) { cl_log(LOG_WARNING, "Can't open cgroups tasks file for writing"); goto exit_res; } if (fprintf(f, "%jd\n", (intmax_t)getpid()) <= 0) { cl_log(LOG_WARNING, "Can't write sbd pid into cgroups tasks file"); goto close_and_exit_res; } close_and_exit_res: if (fclose(f) != 0) { cl_log(LOG_WARNING, "Can't close cgroups tasks file"); goto exit_res; } exit_res: return (res); } void sbd_make_realtime(int priority, int stackgrowK, int heapgrowK) { if(priority < 0) { return; } do { #ifdef SCHED_RR if (move_to_root_cgroup) { sbd_move_to_root_cgroup(enforce_moving_to_root_cgroup); } { int pmin = sched_get_priority_min(SCHED_RR); int pmax = sched_get_priority_max(SCHED_RR); struct sched_param sp; int pcurrent; if (priority == 0) { priority = pmax; } else if (priority < pmin) { priority = pmin; } else if (priority > pmax) { priority = pmax; } if (sched_getparam(0, &sp) < 0) { cl_perror("Unable to get scheduler priority"); } else if ((pcurrent = sched_getscheduler(0)) < 0) { cl_perror("Unable to get scheduler policy"); } else if ((pcurrent == SCHED_RR) && (sp.sched_priority >= priority)) { cl_log(LOG_INFO, "Stay with priority (%d) for policy SCHED_RR", sp.sched_priority); break; } else { memset(&sp, 0, sizeof(sp)); sp.sched_priority = priority; if (sched_setscheduler(0, SCHED_RR, &sp) < 0) { cl_perror( "Unable to set scheduler policy to SCHED_RR priority %d", priority); } else { cl_log(LOG_INFO, "Scheduler policy is now SCHED_RR priority %d", priority); break; } } } #else cl_log(LOG_ERR, "System does not support updating the scheduler policy"); #endif #ifdef PRIO_PGRP if (setpriority(PRIO_PGRP, 0, INT_MIN) < 0) { cl_perror("Unable to raise the scheduler priority"); } else { cl_log(LOG_INFO, "Scheduler priority raised to the maximum"); } #else cl_perror("System does not support setting the scheduler priority"); #endif } while (0); sbd_memlock(heapgrowK, stackgrowK); } void maximize_priority(void) { if (skip_rt) { cl_log(LOG_INFO, "Not elevating to realtime (-R specified)."); return; } sbd_make_realtime(0, 256, 256); if (ioprio_set(IOPRIO_WHO_PROCESS, getpid(), IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 1)) != 0) { cl_perror("ioprio_set() call failed."); } } void sysrq_init(void) { FILE* procf; int c; procf = fopen("/proc/sys/kernel/sysrq", "r"); if (!procf) { cl_perror("cannot open /proc/sys/kernel/sysrq for read."); return; } if (fscanf(procf, "%d", &c) != 1) { cl_perror("Parsing sysrq failed"); c = 0; } fclose(procf); if (c == 1) return; /* 8 for debugging dumps of processes, 128 for reboot/poweroff */ c |= 136; procf = fopen("/proc/sys/kernel/sysrq", "w"); if (!procf) { cl_perror("cannot open /proc/sys/kernel/sysrq for writing"); return; } fprintf(procf, "%d", c); fclose(procf); return; } void sysrq_trigger(char t) { FILE *procf; procf = fopen("/proc/sysrq-trigger", "a"); if (!procf) { cl_perror("Opening sysrq-trigger failed."); return; } cl_log(LOG_INFO, "sysrq-trigger: %c\n", t); fprintf(procf, "%c\n", t); fclose(procf); return; } static void do_exit(char kind, bool do_flush) { /* TODO: Turn debug_mode into a bit field? Delay + kdump for example */ const char *reason = NULL; if (kind == 'c') { cl_log(LOG_NOTICE, "Initiating kdump"); } else if (debug_mode == 1) { cl_log(LOG_WARNING, "Initiating kdump instead of panicking the node (debug mode)"); kind = 'c'; } if (debug_mode == 2) { cl_log(LOG_WARNING, "Shutting down SBD instead of panicking the node (debug mode)"); watchdog_close(true); exit(0); } if (debug_mode == 3) { /* Give the system some time to flush logs to disk before rebooting. */ cl_log(LOG_WARNING, "Delaying node panic by 10s (debug mode)"); watchdog_close(true); sync(); sleep(10); } switch(kind) { case 'b': reason = "reboot"; break; case 'c': reason = "crashdump"; break; case 'o': reason = "off"; break; default: reason = "unknown"; break; } cl_log(LOG_EMERG, "Rebooting system: %s", reason); if (do_flush) { sync(); } if (kind == 'c') { if (timeout_watchdog_crashdump) { if (timeout_watchdog != timeout_watchdog_crashdump) { timeout_watchdog = timeout_watchdog_crashdump; watchdog_init_interval(); } watchdog_close(false); } else { watchdog_close(true); } sysrq_trigger(kind); } else { watchdog_close(false); sysrq_trigger(kind); if (reboot((kind == 'o')?RB_POWER_OFF:RB_AUTOBOOT) < 0) { cl_perror("%s failed", (kind == 'o')?"Poweroff":"Reboot"); } } exit(1); } void do_crashdump(void) { do_exit('c', true); } void do_reset(void) { do_exit('b', true); } void do_off(void) { do_exit('o', true); } void do_timeout_action(void) { do_exit(timeout_sysrq_char, do_flush); } /* * Change directory to the directory our core file needs to go in * Call after you establish the userid you're running under. */ int sbd_cdtocoredir(void) { int rc; static const char *dir = NULL; if (dir == NULL) { dir = CRM_CORE_DIR; } if ((rc=chdir(dir)) < 0) { int errsave = errno; cl_perror("Cannot chdir to [%s]", dir); errno = errsave; } return rc; } pid_t make_daemon(void) { pid_t pid; const char * devnull = "/dev/null"; pid = fork(); if (pid < 0) { cl_log(LOG_ERR, "%s: could not start daemon\n", cmdname); cl_perror("fork"); exit(1); }else if (pid > 0) { return pid; } qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE); /* This is the child; ensure privileges have not been lost. */ maximize_priority(); sysrq_init(); umask(022); close(0); (void)open(devnull, O_RDONLY); close(1); (void)open(devnull, O_WRONLY); close(2); (void)open(devnull, O_WRONLY); sbd_cdtocoredir(); return 0; } void sbd_get_uname(void) { struct utsname uname_buf; int i; if (uname(&uname_buf) < 0) { cl_perror("uname() failed?"); exit(1); } local_uname = strdup(uname_buf.nodename); for (i = 0; i < strlen(local_uname); i++) local_uname[i] = tolower(local_uname[i]); } #define FMT_MAX 256 void sbd_set_format_string(int method, const char *daemon) { int offset = 0; char fmt[FMT_MAX]; struct utsname res; switch(method) { case QB_LOG_STDERR: break; case QB_LOG_SYSLOG: if(daemon && strcmp(daemon, "sbd") != 0) { offset += snprintf(fmt + offset, FMT_MAX - offset, "%10s: ", daemon); } break; default: /* When logging to a file */ if (uname(&res) == 0) { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%t [%d] %s %10s: ", getpid(), res.nodename, daemon); } else { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%t [%d] %10s: ", getpid(), daemon); } } if (debug && method >= QB_LOG_STDERR) { offset += snprintf(fmt + offset, FMT_MAX - offset, "(%%-12f:%%5l %%g) %%-7p: %%n: "); } else { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%g %%-7p: %%n: "); } if (method == QB_LOG_SYSLOG) { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%b"); } else { offset += snprintf(fmt + offset, FMT_MAX - offset, "\t%%b"); } if(offset > 0) { qb_log_format_set(method, fmt); } } void notify_parent(void) { pid_t ppid; union sigval signal_value; memset(&signal_value, 0, sizeof(signal_value)); ppid = getppid(); if (ppid == 1) { /* Our parent died unexpectedly. Triggering * self-fence. */ cl_log(LOG_WARNING, "Our parent is dead."); do_timeout_action(); } switch (servant_health) { case pcmk_health_pending: case pcmk_health_shutdown: case pcmk_health_transient: DBGLOG(LOG_DEBUG, "Not notifying parent: state transient (%d)", servant_health); break; case pcmk_health_unknown: case pcmk_health_unclean: case pcmk_health_noquorum: DBGLOG(LOG_WARNING, "Notifying parent: UNHEALTHY (%d)", servant_health); sigqueue(ppid, SIG_PCMK_UNHEALTHY, signal_value); break; case pcmk_health_online: DBGLOG(LOG_DEBUG, "Notifying parent: healthy"); sigqueue(ppid, SIG_LIVENESS, signal_value); break; default: DBGLOG(LOG_WARNING, "Notifying parent: UNHEALTHY %d", servant_health); sigqueue(ppid, SIG_PCMK_UNHEALTHY, signal_value); break; } } void set_servant_health(enum pcmk_health state, int level, char const *format, ...) { if (servant_health != state) { va_list ap; int len = 0; char *string = NULL; servant_health = state; va_start(ap, format); len = vasprintf (&string, format, ap); if(len > 0) { cl_log(level, "%s", string); } va_end(ap); free(string); } } bool sbd_is_disk(struct servants_list_item *servant) { if ((servant != NULL) && (servant->devname != NULL) && (servant->devname[0] == '/')) { return true; } return false; } bool sbd_is_cluster(struct servants_list_item *servant) { if ((servant != NULL) && (servant->devname != NULL) && (strcmp("cluster", servant->devname) == 0)) { return true; } return false; } bool sbd_is_pcmk(struct servants_list_item *servant) { if ((servant != NULL) && (servant->devname != NULL) && (strcmp("pcmk", servant->devname) == 0)) { return true; } return false; } diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c index 5a23589..d1a0fee 100644 --- a/src/sbd-inquisitor.c +++ b/src/sbd-inquisitor.c @@ -1,1309 +1,1309 @@ /* * Copyright (C) 2013 Lars Marowsky-Bree * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include #include "sbd.h" #define LOCKSTRLEN 11 static struct servants_list_item *servants_leader = NULL; int disk_priority = 1; int check_pcmk = 1; int check_cluster = 1; int disk_count = 0; int servant_count = 0; int servant_restart_interval = 5; int servant_restart_count = 1; int start_mode = 0; char* pidfile = NULL; bool do_flush = true; char timeout_sysrq_char = 'b'; bool move_to_root_cgroup = true; bool enforce_moving_to_root_cgroup = false; bool sync_resource_startup = false; int parse_device_line(const char *line); static int recruit_servant(const char *devname, pid_t pid) { struct servants_list_item *s = servants_leader; struct servants_list_item *newbie; if (lookup_servant_by_dev(devname)) { cl_log(LOG_DEBUG, "Servant %s already exists", devname); return 0; } newbie = malloc(sizeof(*newbie)); if (newbie) { memset(newbie, 0, sizeof(*newbie)); newbie->devname = strdup(devname); newbie->pid = pid; newbie->first_start = 1; } if (!newbie || !newbie->devname) { fprintf(stderr, "heap allocation failed in recruit_servant.\n"); exit(1); } /* some sanity-check on our newbie */ if (sbd_is_disk(newbie)) { cl_log(LOG_INFO, "Monitoring %s", devname); disk_count++; } else if (sbd_is_pcmk(newbie) || sbd_is_cluster(newbie)) { /* alive just after pcmk and cluster servants have shown up */ newbie->outdated = 1; } else { /* toss our newbie */ cl_log(LOG_ERR, "Refusing to recruit unrecognized servant %s", devname); free((void *) newbie->devname); free(newbie); return -1; } if (!s) { servants_leader = newbie; } else { while (s->next) s = s->next; s->next = newbie; } servant_count++; return 0; } int assign_servant(const char* devname, functionp_t functionp, int mode, const void* argp) { pid_t pid = 0; int rc = 0; pid = fork(); if (pid == 0) { /* child */ maximize_priority(); sbd_set_format_string(QB_LOG_SYSLOG, devname); rc = (*functionp)(devname, mode, argp); if (rc == -1) exit(1); else exit(0); } else if (pid != -1) { /* parent */ return pid; } else { cl_log(LOG_ERR,"Failed to fork servant"); exit(1); } } struct servants_list_item *lookup_servant_by_dev(const char *devname) { struct servants_list_item *s; for (s = servants_leader; s; s = s->next) { if (strcasecmp(s->devname, devname) == 0) break; } return s; } struct servants_list_item *lookup_servant_by_pid(pid_t pid) { struct servants_list_item *s; for (s = servants_leader; s; s = s->next) { if (s->pid == pid) break; } return s; } int check_all_dead(void) { struct servants_list_item *s; int r = 0; union sigval svalue; for (s = servants_leader; s; s = s->next) { if (s->pid != 0) { r = sigqueue(s->pid, 0, svalue); if (r == -1 && errno == ESRCH) continue; return 0; } } return 1; } void servant_start(struct servants_list_item *s) { int r = 0; union sigval svalue; if (s->pid != 0) { r = sigqueue(s->pid, 0, svalue); if ((r != -1 || errno != ESRCH)) return; } s->restarts++; if (sbd_is_disk(s)) { #if SUPPORT_SHARED_DISK DBGLOG(LOG_INFO, "Starting servant for device %s", s->devname); s->pid = assign_servant(s->devname, servant_md, start_mode, s); #else cl_log(LOG_ERR, "Shared disk functionality not supported"); return; #endif } else if(sbd_is_pcmk(s)) { DBGLOG(LOG_INFO, "Starting Pacemaker servant"); s->pid = assign_servant(s->devname, servant_pcmk, start_mode, NULL); } else if(sbd_is_cluster(s)) { DBGLOG(LOG_INFO, "Starting Cluster servant"); s->pid = assign_servant(s->devname, servant_cluster, start_mode, NULL); } else { cl_log(LOG_ERR, "Unrecognized servant: %s", s->devname); } clock_gettime(CLOCK_MONOTONIC, &s->t_started); return; } void servants_start(void) { struct servants_list_item *s; for (s = servants_leader; s; s = s->next) { s->restarts = 0; servant_start(s); } } void servants_kill(void) { struct servants_list_item *s; union sigval svalue; for (s = servants_leader; s; s = s->next) { if (s->pid != 0) sigqueue(s->pid, SIGKILL, svalue); } } static inline void cleanup_servant_by_pid(pid_t pid) { struct servants_list_item* s; s = lookup_servant_by_pid(pid); if (s) { cl_log(LOG_WARNING, "Servant for %s (pid: %i) has terminated", s->devname, s->pid); s->pid = 0; } else { /* This most likely is a stray signal from somewhere, or * a SIGCHLD for a process that has previously * explicitly disconnected. */ DBGLOG(LOG_INFO, "cleanup_servant: Nothing known about pid %i", pid); } } int inquisitor_decouple(void) { pid_t ppid = getppid(); union sigval signal_value; /* During start-up, we only arm the watchdog once we've got * quorum at least once. */ if (watchdog_use) { if (watchdog_init() < 0) { return -1; } } if (ppid > 1) { sigqueue(ppid, SIG_LIVENESS, signal_value); } return 0; } static int sbd_lock_running(long pid) { int rc = 0; long mypid; int running = 0; char proc_path[PATH_MAX], exe_path[PATH_MAX], myexe_path[PATH_MAX]; /* check if pid is running */ if (kill(pid, 0) < 0 && errno == ESRCH) { goto bail; } #ifndef HAVE_PROC_PID return 1; #endif /* check to make sure pid hasn't been reused by another process */ snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", pid); rc = readlink(proc_path, exe_path, PATH_MAX-1); if(rc < 0) { cl_perror("Could not read from %s", proc_path); goto bail; } exe_path[rc] = 0; mypid = (unsigned long) getpid(); snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", mypid); rc = readlink(proc_path, myexe_path, PATH_MAX-1); if(rc < 0) { cl_perror("Could not read from %s", proc_path); goto bail; } myexe_path[rc] = 0; if(strcmp(exe_path, myexe_path) == 0) { running = 1; } bail: return running; } static int sbd_lock_pidfile(const char *filename) { char lf_name[256], tf_name[256], buf[LOCKSTRLEN+1]; int fd; long pid, mypid; int rc; struct stat sbuf; if (filename == NULL) { errno = EFAULT; return -1; } mypid = (unsigned long) getpid(); snprintf(lf_name, sizeof(lf_name), "%s",filename); snprintf(tf_name, sizeof(tf_name), "%s.%lu", filename, mypid); if ((fd = open(lf_name, O_RDONLY)) >= 0) { if (fstat(fd, &sbuf) >= 0 && sbuf.st_size < LOCKSTRLEN) { sleep(1); /* if someone was about to create one, * give'm a sec to do so * Though if they follow our protocol, * this won't happen. They should really * put the pid in, then link, not the * other way around. */ } if (read(fd, buf, sizeof(buf)) < 1) { /* lockfile empty -> rm it and go on */; } else { if (sscanf(buf, "%ld", &pid) < 1) { /* lockfile screwed up -> rm it and go on */ } else { if (pid > 1 && (getpid() != pid) && sbd_lock_running(pid)) { /* is locked by existing process * -> give up */ close(fd); return -1; } else { /* stale lockfile -> rm it and go on */ } } } unlink(lf_name); close(fd); } if ((fd = open(tf_name, O_CREAT | O_WRONLY | O_EXCL, 0644)) < 0) { /* Hmmh, why did we fail? Anyway, nothing we can do about it */ return -3; } /* Slight overkill with the %*d format ;-) */ snprintf(buf, sizeof(buf), "%*lu\n", LOCKSTRLEN-1, mypid); if (write(fd, buf, LOCKSTRLEN) != LOCKSTRLEN) { /* Again, nothing we can do about this */ rc = -3; close(fd); goto out; } close(fd); switch (link(tf_name, lf_name)) { case 0: if (stat(tf_name, &sbuf) < 0) { /* something weird happened */ rc = -3; break; } if (sbuf.st_nlink < 2) { /* somehow, it didn't get through - NFS trouble? */ rc = -2; break; } rc = 0; break; case EEXIST: rc = -1; break; default: rc = -3; } out: unlink(tf_name); return rc; } /* * Unlock a file (remove its lockfile) * do we need to check, if its (still) ours? No, IMHO, if someone else * locked our line, it's his fault -tho * returns 0 on success * <0 if some failure occured */ static int sbd_unlock_pidfile(const char *filename) { char lf_name[256]; if (filename == NULL) { errno = EFAULT; return -1; } snprintf(lf_name, sizeof(lf_name), "%s", filename); return unlink(lf_name); } int cluster_alive(bool all) { int alive = 1; struct servants_list_item* s; if(servant_count == disk_count) { return 0; } for (s = servants_leader; s; s = s->next) { if (sbd_is_cluster(s) || sbd_is_pcmk(s)) { if(s->outdated) { alive = 0; } else if(all == false) { return 1; } } } return alive; } int quorum_read(int good_servants) { if (disk_count > 2) return (good_servants > disk_count/2); else return (good_servants > 0); } void inquisitor_child(void) { int sig, pid; sigset_t procmask; siginfo_t sinfo; int status; struct timespec timeout; int exiting = 0; int decoupled = 0; int cluster_appeared = 0; int pcmk_override = 0; time_t latency; struct timespec t_last_tickle, t_now; struct servants_list_item* s; if (debug_mode) { cl_log(LOG_ERR, "DEBUG MODE %d IS ACTIVE - DO NOT RUN IN PRODUCTION!", debug_mode); } set_proc_title("sbd: inquisitor"); if (pidfile) { if (sbd_lock_pidfile(pidfile) < 0) { exit(1); } } sigemptyset(&procmask); sigaddset(&procmask, SIGCHLD); sigaddset(&procmask, SIGTERM); sigaddset(&procmask, SIG_LIVENESS); sigaddset(&procmask, SIG_EXITREQ); sigaddset(&procmask, SIG_TEST); sigaddset(&procmask, SIG_PCMK_UNHEALTHY); sigaddset(&procmask, SIG_RESTART); sigaddset(&procmask, SIGUSR1); sigaddset(&procmask, SIGUSR2); sigprocmask(SIG_BLOCK, &procmask, NULL); servants_start(); timeout.tv_sec = timeout_loop; timeout.tv_nsec = 0; clock_gettime(CLOCK_MONOTONIC, &t_last_tickle); while (1) { bool tickle = 0; bool can_detach = 0; int good_servants = 0; sig = sigtimedwait(&procmask, &sinfo, &timeout); clock_gettime(CLOCK_MONOTONIC, &t_now); if (sig == SIG_EXITREQ || sig == SIGTERM) { servants_kill(); watchdog_close(true); exiting = 1; } else if (sig == SIGCHLD) { while ((pid = waitpid(-1, &status, WNOHANG))) { if (pid == -1 && errno == ECHILD) { break; } else { s = lookup_servant_by_pid(pid); if (sbd_is_disk(s)) { if (WIFEXITED(status)) { switch(WEXITSTATUS(status)) { case EXIT_MD_SERVANT_IO_FAIL: DBGLOG(LOG_INFO, "Servant for %s requests to be disowned", s->devname); break; case EXIT_MD_SERVANT_REQUEST_RESET: cl_log(LOG_WARNING, "%s requested a reset", s->devname); do_reset(); break; case EXIT_MD_SERVANT_REQUEST_SHUTOFF: cl_log(LOG_WARNING, "%s requested a shutoff", s->devname); do_off(); break; case EXIT_MD_SERVANT_REQUEST_CRASHDUMP: cl_log(LOG_WARNING, "%s requested a crashdump", s->devname); do_crashdump(); break; default: break; } } } else if (sbd_is_pcmk(s)) { if (WIFEXITED(status)) { switch(WEXITSTATUS(status)) { case EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN: DBGLOG(LOG_INFO, "PCMK-Servant has exited gracefully"); /* revert to state prior to pacemaker-detection */ s->restarts = 0; s->restart_blocked = 0; cluster_appeared = 0; s->outdated = 1; s->t_last.tv_sec = 0; break; default: break; } } } cleanup_servant_by_pid(pid); } } } else if (sig == SIG_PCMK_UNHEALTHY) { s = lookup_servant_by_pid(sinfo.si_pid); if (sbd_is_cluster(s) || sbd_is_pcmk(s)) { if (s->outdated == 0) { cl_log(LOG_WARNING, "%s health check: UNHEALTHY", s->devname); } s->t_last.tv_sec = 1; } else { cl_log(LOG_WARNING, "Ignoring SIG_PCMK_UNHEALTHY from unknown source"); } } else if (sig == SIG_LIVENESS) { s = lookup_servant_by_pid(sinfo.si_pid); if (s) { s->first_start = 0; clock_gettime(CLOCK_MONOTONIC, &s->t_last); } } else if (sig == SIG_TEST) { } else if (sig == SIGUSR1) { if (exiting) continue; servants_start(); } if (exiting) { if (check_all_dead()) { if (pidfile) { sbd_unlock_pidfile(pidfile); } exit(0); } else continue; } good_servants = 0; for (s = servants_leader; s; s = s->next) { int age = t_now.tv_sec - s->t_last.tv_sec; if (!s->t_last.tv_sec) continue; if (age < (int)(timeout_io+timeout_loop)) { if (sbd_is_disk(s)) { good_servants++; } if (s->outdated) { cl_log(LOG_NOTICE, "Servant %s is healthy (age: %d)", s->devname, age); } s->outdated = 0; } else if (!s->outdated) { if (!s->restart_blocked) { cl_log(LOG_WARNING, "Servant %s is outdated (age: %d)", s->devname, age); } s->outdated = 1; } } if(disk_count == 0) { /* NO disks, everything is up to the cluster */ if(cluster_alive(true)) { /* We LIVE! */ if(cluster_appeared == false) { cl_log(LOG_INFO, "Active cluster detected"); } tickle = 1; can_detach = 1; cluster_appeared = 1; } else if(cluster_alive(false)) { if(!decoupled) { /* On the way up, detach and arm the watchdog */ cl_log(LOG_INFO, "Partial cluster detected, detaching"); } can_detach = 1; tickle = !cluster_appeared; } else if(!decoupled) { /* Stay alive until the cluster comes up */ tickle = !cluster_appeared; } } else if(disk_priority == 1 || servant_count == disk_count) { if (quorum_read(good_servants)) { /* There are disks and we're connected to the majority of them */ tickle = 1; can_detach = 1; pcmk_override = 0; } else if (servant_count > disk_count && cluster_alive(true)) { tickle = 1; if(!pcmk_override) { cl_log(LOG_WARNING, "Majority of devices lost - surviving on pacemaker"); pcmk_override = 1; /* Only log this message once */ } } } else if(cluster_alive(true) && quorum_read(good_servants)) { /* Both disk and cluster servants are healthy */ tickle = 1; can_detach = 1; cluster_appeared = 1; } else if(quorum_read(good_servants)) { /* The cluster takes priority but only once * connected for the first time. * * Until then, we tickle based on disk quorum. */ can_detach = 1; tickle = !cluster_appeared; } /* cl_log(LOG_DEBUG, "Tickle: q=%d, g=%d, p=%d, s=%d", */ /* quorum_read(good_servants), good_servants, tickle, disk_count); */ if(tickle) { watchdog_tickle(); clock_gettime(CLOCK_MONOTONIC, &t_last_tickle); } if (!decoupled && can_detach) { /* We only do this at the point either the disk or * cluster servants become healthy */ cl_log(LOG_DEBUG, "Decoupling"); if (inquisitor_decouple() < 0) { servants_kill(); exiting = 1; continue; } else { decoupled = 1; } } /* Note that this can actually be negative, since we set * last_tickle after we set now. */ latency = t_now.tv_sec - t_last_tickle.tv_sec; if (timeout_watchdog && (latency > (int)timeout_watchdog)) { if (!decoupled) { /* We're still being watched by our * parent. We don't fence, but exit. */ cl_log(LOG_ERR, "SBD: Not enough votes to proceed. Aborting start-up."); servants_kill(); exiting = 1; continue; } if (debug_mode < 2) { /* At level 2 or above, we do nothing, but expect * things to eventually return to * normal. */ do_timeout_action(); } else { cl_log(LOG_ERR, "SBD: DEBUG MODE: Would have fenced due to timeout!"); } } if (timeout_watchdog_warn && (latency > (int)timeout_watchdog_warn)) { cl_log(LOG_WARNING, "Latency: No liveness for %d s exceeds threshold of %d s (healthy servants: %d)", (int)latency, (int)timeout_watchdog_warn, good_servants); if (debug_mode && watchdog_use) { /* In debug mode, trigger a reset before the watchdog can panic the machine */ do_timeout_action(); } } for (s = servants_leader; s; s = s->next) { int age = t_now.tv_sec - s->t_started.tv_sec; if (age > servant_restart_interval) { s->restarts = 0; s->restart_blocked = 0; } if (servant_restart_count && (s->restarts >= servant_restart_count) && !s->restart_blocked) { if (servant_restart_count > 1) { cl_log(LOG_WARNING, "Max retry count (%d) reached: not restarting servant for %s", (int)servant_restart_count, s->devname); } s->restart_blocked = 1; } if (!s->restart_blocked) { servant_start(s); } } } /* not reached */ exit(0); } int inquisitor(void) { int sig, pid, inquisitor_pid; int status; sigset_t procmask; siginfo_t sinfo; /* Where's the best place for sysrq init ?*/ sysrq_init(); sigemptyset(&procmask); sigaddset(&procmask, SIGCHLD); sigaddset(&procmask, SIG_LIVENESS); sigprocmask(SIG_BLOCK, &procmask, NULL); inquisitor_pid = make_daemon(); if (inquisitor_pid == 0) { inquisitor_child(); } /* We're the parent. Wait for a happy signal from our child * before we proceed - we either get "SIG_LIVENESS" when the * inquisitor has completed the first successful round, or * ECHLD when it exits with an error. */ while (1) { sig = sigwaitinfo(&procmask, &sinfo); if (sig == SIGCHLD) { while ((pid = waitpid(-1, &status, WNOHANG))) { if (pid == -1 && errno == ECHILD) { break; } /* We got here because the inquisitor * did not succeed. */ return -1; } } else if (sig == SIG_LIVENESS) { /* Inquisitor started up properly. */ return 0; } else { fprintf(stderr, "Nobody expected the spanish inquisition!\n"); continue; } } /* not reached */ return -1; } int parse_device_line(const char *line) { size_t lpc = 0; size_t last = 0; size_t max = 0; int found = 0; bool skip_space = true; int space_run = 0; if (!line) { return 0; } max = strlen(line); cl_log(LOG_DEBUG, "Processing %d bytes: [%s]", (int) max, line); for (lpc = 0; lpc <= max; lpc++) { if (isspace(line[lpc])) { if (skip_space) { last = lpc + 1; } else { space_run++; } continue; } skip_space = false; if (line[lpc] == ';' || line[lpc] == 0) { int rc = 0; char *entry = calloc(1, 1 + lpc - last); if (entry) { rc = sscanf(line + last, "%[^;]", entry); } else { fprintf(stderr, "Heap allocation failed parsing device-line.\n"); exit(1); } if (rc != 1) { cl_log(LOG_WARNING, "Could not parse: '%s'", line + last); } else { entry[strlen(entry)-space_run] = '\0'; cl_log(LOG_DEBUG, "Adding '%s'", entry); if (recruit_servant(entry, 0) != 0) { free(entry); // sbd should refuse to start if any of the configured device names is invalid. return -1; } found++; } free(entry); skip_space = true; last = lpc + 1; } space_run = 0; } return found; } #define SBD_SOURCE_FILES "sbd-cluster.c,sbd-common.c,sbd-inquisitor.c,sbd-md.c,sbd-pacemaker.c,setproctitle.c" static void sbd_log_filter_ctl(const char *files, uint8_t priority) { if (files == NULL) { files = SBD_SOURCE_FILES; } qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, files, priority); qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, files, priority); } int arg_enabled(int arg_count) { return arg_count % 2; } int main(int argc, char **argv, char **envp) { int exit_status = 0; int c; int W_count = 0; int c_count = 0; int P_count = 0; int qb_facility; const char *value = NULL; bool delay_start = false; long delay = 0; char *timeout_action = NULL; if ((cmdname = strrchr(argv[0], '/')) == NULL) { cmdname = argv[0]; } else { ++cmdname; } watchdogdev = strdup("/dev/watchdog"); watchdogdev_is_default = true; qb_facility = qb_log_facility2int("daemon"); qb_log_init(cmdname, qb_facility, LOG_WARNING); sbd_set_format_string(QB_LOG_SYSLOG, "sbd"); qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE); qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE); sbd_log_filter_ctl(NULL, LOG_NOTICE); sbd_get_uname(); value = getenv("SBD_PACEMAKER"); if(value) { check_pcmk = crm_is_true(value); check_cluster = crm_is_true(value); } cl_log(LOG_INFO, "Enable pacemaker checks: %d (%s)", (int)check_pcmk, value?value:"default"); value = getenv("SBD_STARTMODE"); if(value == NULL) { } else if(strcmp(value, "clean") == 0) { start_mode = 1; } else if(strcmp(value, "always") == 0) { start_mode = 0; } cl_log(LOG_INFO, "Start mode set to: %d (%s)", (int)start_mode, value?value:"default"); value = getenv("SBD_WATCHDOG_DEV"); if(value) { free(watchdogdev); watchdogdev = strdup(value); watchdogdev_is_default = false; } /* SBD_WATCHDOG has been dropped from sbd.sysconfig example. * This is for backward compatibility. */ value = getenv("SBD_WATCHDOG"); if(value) { watchdog_use = crm_is_true(value); } value = getenv("SBD_WATCHDOG_TIMEOUT"); if(value) { timeout_watchdog = crm_get_msec(value) / 1000; if(timeout_watchdog > 5) { timeout_watchdog_warn = (int)timeout_watchdog / 5 * 3; } } value = getenv("SBD_PIDFILE"); if(value) { pidfile = strdup(value); cl_log(LOG_INFO, "pidfile set to %s", pidfile); } value = getenv("SBD_DELAY_START"); if(value) { delay_start = crm_is_true(value); if (!delay_start) { delay = crm_get_msec(value) / 1000; if (delay > 0) { delay_start = true; } } } cl_log(LOG_DEBUG, "Delay start: %s%s%s", delay_start? "yes (" : "no", delay_start? (delay > 0 ? value: "msgwait") : "", delay_start? ")" : ""); value = getenv("SBD_TIMEOUT_ACTION"); if(value) { timeout_action = strdup(value); } value = getenv("SBD_MOVE_TO_ROOT_CGROUP"); if(value) { move_to_root_cgroup = crm_is_true(value); if (move_to_root_cgroup) { enforce_moving_to_root_cgroup = true; } else { if (strcmp(value, "auto") == 0) { move_to_root_cgroup = true; } } } value = getenv("SBD_SYNC_RESOURCE_STARTUP"); - if(value) { - sync_resource_startup = crm_is_true(value); - } + sync_resource_startup = + crm_is_true(value?value:SBD_SYNC_RESOURCE_STARTUP_DEFAULT); + #if !USE_PACEMAKERD_API if (sync_resource_startup) { fprintf(stderr, "Failed to sync resource-startup as " "SBD was built against pacemaker not supporting pacemakerd-API.\n"); exit_status = -1; goto out; } #else if (!sync_resource_startup) { cl_log(LOG_WARNING, "SBD built against pacemaker supporting " "pacemakerd-API. Should think about enabling " "SBD_SYNC_RESOURCE_STARTUP."); } #endif while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:r:")) != -1) { switch (c) { case 'D': break; case 'Z': debug_mode++; cl_log(LOG_INFO, "Debug mode now at level %d", (int)debug_mode); break; case 'R': skip_rt = 1; cl_log(LOG_INFO, "Realtime mode deactivated."); break; case 'S': start_mode = atoi(optarg); cl_log(LOG_INFO, "Start mode set to: %d", (int)start_mode); break; case 's': timeout_startup = atoi(optarg); cl_log(LOG_INFO, "Start timeout set to: %d", (int)timeout_startup); break; case 'v': debug++; if(debug == 1) { sbd_log_filter_ctl(NULL, LOG_INFO); cl_log(LOG_INFO, "Verbose mode enabled."); } else if(debug == 2) { sbd_log_filter_ctl(NULL, LOG_DEBUG); cl_log(LOG_INFO, "Debug mode enabled."); } else if(debug == 3) { /* Go nuts, turn on pacemaker's logging too */ sbd_log_filter_ctl("*", LOG_DEBUG); cl_log(LOG_INFO, "Debug library mode enabled."); } break; case 'T': watchdog_set_timeout = 0; cl_log(LOG_INFO, "Setting watchdog timeout disabled; using defaults."); break; case 'W': W_count++; break; case 'w': cl_log(LOG_NOTICE, "Using watchdog device '%s'", watchdogdev); free(watchdogdev); watchdogdev = strdup(optarg); watchdogdev_is_default = false; break; case 'd': #if SUPPORT_SHARED_DISK if (recruit_servant(optarg, 0) != 0) { fprintf(stderr, "Invalid device: %s\n", optarg); exit_status = -1; goto out; } #else fprintf(stderr, "Shared disk functionality not supported\n"); exit_status = -2; goto out; #endif break; case 'c': c_count++; break; case 'P': P_count++; break; case 'z': disk_priority = 0; break; case 'n': local_uname = strdup(optarg); cl_log(LOG_INFO, "Overriding local hostname to %s", local_uname); break; case 'p': pidfile = strdup(optarg); cl_log(LOG_INFO, "pidfile set to %s", pidfile); break; case 'C': timeout_watchdog_crashdump = atoi(optarg); cl_log(LOG_INFO, "Setting crashdump watchdog timeout to %d", (int)timeout_watchdog_crashdump); break; case '1': timeout_watchdog = atoi(optarg); if(timeout_watchdog > 5) { timeout_watchdog_warn = (int)timeout_watchdog / 5 * 3; } break; case '2': timeout_allocate = atoi(optarg); break; case '3': timeout_loop = atoi(optarg); break; case '4': timeout_msgwait = atoi(optarg); break; case '5': timeout_watchdog_warn = atoi(optarg); cl_log(LOG_INFO, "Setting latency warning to %d", (int)timeout_watchdog_warn); break; case 't': servant_restart_interval = atoi(optarg); cl_log(LOG_INFO, "Setting servant restart interval to %d", (int)servant_restart_interval); break; case 'I': timeout_io = atoi(optarg); cl_log(LOG_INFO, "Setting IO timeout to %d", (int)timeout_io); break; case 'F': servant_restart_count = atoi(optarg); cl_log(LOG_INFO, "Servant restart count set to %d", (int)servant_restart_count); break; case 'r': if (timeout_action) { free(timeout_action); } timeout_action = strdup(optarg); break; case 'h': usage(); goto out; break; default: exit_status = -2; goto out; break; } } if (disk_count == 0) { /* if we already have disks from commandline then it is probably undesirable to add those from environment (general rule cmdline has precedence) */ value = getenv("SBD_DEVICE"); if ((value) && strlen(value)) { #if SUPPORT_SHARED_DISK int devices = parse_device_line(value); if(devices < 1) { fprintf(stderr, "Invalid device line: %s\n", value); exit_status = -1; goto out; } #else fprintf(stderr, "Shared disk functionality not supported\n"); exit_status = -2; goto out; #endif } } if (watchdogdev == NULL || strcmp(watchdogdev, "/dev/null") == 0) { watchdog_use = 0; } else if (W_count > 0) { watchdog_use = arg_enabled(W_count); } if (watchdog_use) { cl_log(LOG_INFO, "Watchdog enabled."); } else { cl_log(LOG_INFO, "Watchdog disabled."); } if (c_count > 0) { check_cluster = arg_enabled(c_count); } if (P_count > 0) { check_pcmk = arg_enabled(P_count); } if ((disk_count > 0) && (strlen(local_uname) > SECTOR_NAME_MAX)) { fprintf(stderr, "Node name mustn't be longer than %d chars.\n", SECTOR_NAME_MAX); fprintf(stderr, "If uname is longer define a name to be used by sbd.\n"); exit_status = -1; goto out; } if (disk_count > 3) { fprintf(stderr, "You can specify up to 3 devices via the -d option.\n"); exit_status = -1; goto out; } /* There must at least be one command following the options: */ if ((argc - optind) < 1) { fprintf(stderr, "Not enough arguments.\n"); exit_status = -2; goto out; } if (init_set_proc_title(argc, argv, envp) < 0) { fprintf(stderr, "Allocation of proc title failed.\n"); exit_status = -1; goto out; } if (timeout_action) { char *p[2]; int i; char c; int nrflags = sscanf(timeout_action, "%m[a-z],%m[a-z]%c", &p[0], &p[1], &c); bool parse_error = (nrflags < 1) || (nrflags > 2); for (i = 0; (i < nrflags) && (i < 2); i++) { if (!strcmp(p[i], "reboot")) { timeout_sysrq_char = 'b'; } else if (!strcmp(p[i], "crashdump")) { timeout_sysrq_char = 'c'; } else if (!strcmp(p[i], "off")) { timeout_sysrq_char = 'o'; } else if (!strcmp(p[i], "flush")) { do_flush = true; } else if (!strcmp(p[i], "noflush")) { do_flush = false; } else { parse_error = true; } free(p[i]); } if (parse_error) { fprintf(stderr, "Failed to parse timeout-action \"%s\".\n", timeout_action); exit_status = -1; goto out; } } #if SUPPORT_SHARED_DISK if (strcmp(argv[optind], "create") == 0) { exit_status = init_devices(servants_leader); } else if (strcmp(argv[optind], "dump") == 0) { exit_status = dump_headers(servants_leader); } else if (strcmp(argv[optind], "allocate") == 0) { exit_status = allocate_slots(argv[optind + 1], servants_leader); } else if (strcmp(argv[optind], "list") == 0) { exit_status = list_slots(servants_leader); } else if (strcmp(argv[optind], "message") == 0) { exit_status = messenger(argv[optind + 1], argv[optind + 2], servants_leader); } else if (strcmp(argv[optind], "ping") == 0) { exit_status = ping_via_slots(argv[optind + 1], servants_leader); } else if (strcmp(argv[optind], "watch") == 0) { if(disk_count > 0) { /* If no devices are specified, its not an error to be unable to find one */ open_any_device(servants_leader); } if (delay_start) { if (delay <= 0) { delay = get_first_msgwait(servants_leader); } sleep((unsigned long) delay); } } else { exit_status = -2; } #endif if (strcmp(argv[optind], "query-watchdog") == 0) { exit_status = watchdog_info(); } else if (strcmp(argv[optind], "test-watchdog") == 0) { exit_status = watchdog_test(); } else if (strcmp(argv[optind], "watch") == 0) { /* sleep $(sbd $SBD_DEVICE_ARGS dump | grep -m 1 msgwait | awk '{print $4}') 2>/dev/null */ /* We only want this to have an effect during watch right now; * pinging and fencing would be too confused */ cl_log(LOG_INFO, "Turning on pacemaker checks: %d", check_pcmk); if (check_pcmk) { recruit_servant("pcmk", 0); #if SUPPORT_PLUGIN check_cluster = 1; #endif } cl_log(LOG_INFO, "Turning on cluster checks: %d", check_cluster); if (check_cluster) { recruit_servant("cluster", 0); } cl_log(LOG_NOTICE, "%s flush + write \'%c\' to sysrq in case of timeout", do_flush?"Do":"Skip", timeout_sysrq_char); exit_status = inquisitor(); } out: if (timeout_action) { free(timeout_action); } if (exit_status < 0) { if (exit_status == -2) { usage(); } else { fprintf(stderr, "sbd failed; please check the logs.\n"); } return (1); } return (0); } diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c index aa1fb57..4806b0a 100644 --- a/src/sbd-pacemaker.c +++ b/src/sbd-pacemaker.c @@ -1,699 +1,701 @@ /* * Copyright (C) 2013 Lars Marowsky-Bree * * Based on crm_mon.c, which was: * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ /* TODO list: * * - Trying to shutdown a node if no devices are up will fail, since SBD * currently uses a message via the disk to achieve this. * * - Shutting down cluster nodes while the majority of devices is down * will eventually take the cluster below the quorum threshold, at which * time the remaining cluster nodes will all immediately suicide. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "sbd.h" #ifndef HAVE_PE_NEW_WORKING_SET #define pe_reset_working_set(data_set) cleanup_calculations(data_set) static pe_working_set_t * pe_new_working_set() { pe_working_set_t *data_set = calloc(1, sizeof(pe_working_set_t)); if (data_set != NULL) { set_working_set_defaults(data_set); } return data_set; } static void pe_free_working_set(pe_working_set_t *data_set) { if (data_set != NULL) { pe_reset_working_set(data_set); free(data_set); } } #endif static void clean_up(int rc); #if USE_PACEMAKERD_API #include static pcmk_ipc_api_t *pacemakerd_api = NULL; static time_t last_ok = (time_t) 0; static void pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { pcmk_pacemakerd_api_reply_t *reply = event_data; switch (event_type) { case pcmk_ipc_event_disconnect: /* Unexpected */ cl_log(LOG_ERR, "Lost connection to pacemakerd\n"); return; case pcmk_ipc_event_reply: break; default: return; } if (status != CRM_EX_OK) { cl_log(LOG_ERR, "Bad reply from pacemakerd: %s", crm_exit_str(status)); return; } if (reply->reply_type != pcmk_pacemakerd_reply_ping) { cl_log(LOG_ERR, "Unknown reply type %d from pacemakerd\n", reply->reply_type); } else { if ((reply->data.ping.last_good != (time_t) 0) && (reply->data.ping.status == pcmk_rc_ok)) { switch (reply->data.ping.state) { case pcmk_pacemakerd_state_running: case pcmk_pacemakerd_state_shutting_down: last_ok = reply->data.ping.last_good; break; case pcmk_pacemakerd_state_shutdown_complete: clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN); break; default: break; } } } } #endif extern int disk_count; static void clean_up(int rc); static void crm_diff_update(const char *event, xmlNode * msg); static int cib_connect(gboolean full); static void compute_status(pe_working_set_t * data_set); static gboolean mon_refresh_state(gpointer user_data); static GMainLoop *mainloop = NULL; static guint timer_id_reconnect = 0; static guint timer_id_notify = 0; static int reconnect_msec = 1000; static int cib_connected = 0; static cib_t *cib = NULL; static xmlNode *current_cib = NULL; static pe_working_set_t *data_set = NULL; static long last_refresh = 0; static int pcmk_clean_shutdown = 0; static int pcmk_shutdown = 0; static gboolean mon_timer_reconnect(gpointer data) { int rc = 0; if (timer_id_reconnect > 0) { g_source_remove(timer_id_reconnect); } rc = cib_connect(TRUE); if (rc != 0) { cl_log(LOG_WARNING, "CIB reconnect failed: %d", rc); timer_id_reconnect = g_timeout_add(reconnect_msec, mon_timer_reconnect, NULL); } else { cl_log(LOG_INFO, "CIB reconnect successful"); } return FALSE; } static void mon_cib_connection_destroy(gpointer user_data) { if (cib) { cib->cmds->signoff(cib); /* retrigger as last one might have been skipped */ mon_refresh_state(NULL); if ((pcmk_clean_shutdown) && (!sync_resource_startup)) { /* assume a graceful pacemaker-shutdown */ clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN); } /* getting here we aren't sure about the pacemaker-state so try to use the timeout to reconnect and get everything sorted out again */ pcmk_shutdown = 0; set_servant_health(pcmk_health_transient, LOG_WARNING, "Disconnected from CIB"); timer_id_reconnect = g_timeout_add(reconnect_msec, mon_timer_reconnect, NULL); } cib_connected = 0; /* no sense in looking into outdated cib, trying to apply patch, ... */ if (current_cib) { free_xml(current_cib); current_cib = NULL; } return; } static void mon_retrieve_current_cib() { xmlNode *xml_cib = NULL; int options = cib_scope_local | cib_sync_call; int rc = pcmk_ok; + const char* element_name; free_xml(current_cib); current_cib = NULL; rc = cib->cmds->query(cib, NULL, &xml_cib, options); if (rc != pcmk_ok) { crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc); free_xml(xml_cib); return; } else if (xml_cib == NULL) { crm_err("Couldn't retrieve the CIB: empty result"); return; } - if (safe_str_eq(crm_element_name(xml_cib), XML_TAG_CIB)) { + element_name = crm_element_name(xml_cib); + if (element_name && !strcmp(element_name, XML_TAG_CIB)) { current_cib = xml_cib; } else { free_xml(xml_cib); } return; } static gboolean mon_timer_notify(gpointer data) { static int counter = 0; int counter_max = timeout_watchdog / timeout_loop / 2; if (timer_id_notify > 0) { g_source_remove(timer_id_notify); } #if USE_PACEMAKERD_API { time_t now = time(NULL); if ((last_ok <= now) && (now - last_ok < timeout_watchdog)) { #endif if (cib_connected) { if (counter == counter_max) { mon_retrieve_current_cib(); mon_refresh_state(NULL); counter = 0; } else { cib->cmds->noop(cib, 0); notify_parent(); counter++; } } #if USE_PACEMAKERD_API } } if (pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_main) == pcmk_rc_ok) { pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name); } #endif timer_id_notify = g_timeout_add(timeout_loop * 1000, mon_timer_notify, NULL); return FALSE; } /* * Mainloop signal handler. */ static void mon_shutdown(int nsig) { clean_up(0); } static int cib_connect(gboolean full) { int rc = 0; CRM_CHECK(cib != NULL, return -EINVAL); cib_connected = 0; crm_xml_init(); if (cib->state != cib_connected_query && cib->state != cib_connected_command) { rc = cib->cmds->signon(cib, crm_system_name, cib_query); if (rc != 0) { return rc; } mon_retrieve_current_cib(); mon_refresh_state(NULL); if (full) { if (rc == 0) { rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy); if (rc == -EPROTONOSUPPORT) { /* Notification setup failed, won't be able to reconnect after failure */ rc = 0; } } if (rc == 0) { cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); } if (rc != 0) { /* Notification setup failed, could not monitor CIB actions */ clean_up(-rc); } } } if (!rc) { cib_connected = 1; } return rc; } static void compute_status(pe_working_set_t * data_set) { static int updates = 0; static int ever_had_quorum = FALSE; node_t *node = NULL; updates++; if (data_set->dc_node == NULL) { set_servant_health(pcmk_health_transient, LOG_INFO, "We don't have a DC right now."); notify_parent(); return; } node = pe_find_node(data_set->nodes, local_uname); if ((node == NULL) || (node->details == NULL)) { set_servant_health(pcmk_health_unknown, LOG_WARNING, "Node state: %s is UNKNOWN", local_uname); notify_parent(); return; } if (node->details->online == FALSE) { set_servant_health(pcmk_health_unknown, LOG_WARNING, "Node state: OFFLINE"); } else if (node->details->unclean) { set_servant_health(pcmk_health_unclean, LOG_WARNING, "Node state: UNCLEAN"); } else if (node->details->pending) { set_servant_health(pcmk_health_pending, LOG_WARNING, "Node state: pending"); } else if (data_set->flags & pe_flag_have_quorum) { set_servant_health(pcmk_health_online, LOG_INFO, "Node state: online"); ever_had_quorum = TRUE; } else if(disk_count > 0) { set_servant_health(pcmk_health_noquorum, LOG_WARNING, "Quorum lost"); } else if(ever_had_quorum == FALSE) { set_servant_health(pcmk_health_online, LOG_INFO, "We do not have quorum yet"); } else { /* We lost quorum, and there are no disks present * Setting healthy > 2 here will result in us self-fencing */ switch (data_set->no_quorum_policy) { case no_quorum_freeze: set_servant_health(pcmk_health_transient, LOG_INFO, "Quorum lost: Freeze resources"); break; #if HAVE_ENUM_NO_QUORUM_DEMOTE case no_quorum_demote: set_servant_health(pcmk_health_transient, LOG_INFO, "Quorum lost: Demote promotable resources and stop others"); break; #endif case no_quorum_stop: set_servant_health(pcmk_health_transient, LOG_INFO, "Quorum lost: Stop ALL resources"); break; case no_quorum_ignore: set_servant_health(pcmk_health_transient, LOG_INFO, "Quorum lost: Ignore"); break; default: /* immediate reboot is the most excessive action we take use for no_quorum_suicide and everything we don't know yet */ set_servant_health(pcmk_health_unclean, LOG_INFO, "Quorum lost: Self-fence"); break; } } /* If we are in shutdown-state once this will go on till the end. * If we've on top reached a state of 0 locally running resources * we can assume a clean shutdown. * Tricky are the situations where the node is in maintenance-mode * or resources are unmanaged. So if the node is in maintenance or * all left-over running resources are unmanaged we assume intention. */ if (node->details->shutdown) { pcmk_shutdown = 1; } if (pcmk_shutdown) { pcmk_clean_shutdown = 1; if (!(node->details->maintenance)) { GListPtr iter; for (iter = node->details->running_rsc; iter != NULL; iter = iter->next) { resource_t *rsc = (resource_t *) iter->data; if (is_set(rsc->flags, pe_rsc_managed)) { pcmk_clean_shutdown = 0; crm_debug("not clean as %s managed and still running", rsc->id); break; } } if (pcmk_clean_shutdown) { crm_debug("pcmk_clean_shutdown because " "all managed resources down"); } } else { crm_debug("pcmk_clean_shutdown because node is in maintenance"); } } notify_parent(); return; } static crm_trigger_t *refresh_trigger = NULL; static gboolean mon_trigger_refresh(gpointer user_data) { mainloop_set_trigger(refresh_trigger); mon_refresh_state(NULL); return FALSE; } #define XPATH_SHUTDOWN "//" XML_CIB_TAG_STATE "[@uname='%s']/" \ XML_TAG_TRANSIENT_NODEATTRS "/" XML_TAG_ATTR_SETS "/" \ XML_CIB_TAG_NVPAIR "[@name='" XML_CIB_ATTR_SHUTDOWN "']" static gboolean shutdown_attr_in_cib(void) { xmlNode *match = NULL; char *xpath_string; xpath_string = crm_strdup_printf(XPATH_SHUTDOWN, local_uname); if (xpath_string) { match = get_xpath_object(xpath_string, current_cib, LOG_TRACE); free(xpath_string); } return (match != NULL); } static void crm_diff_update(const char *event, xmlNode * msg) { int rc = -1; const char *op = NULL; long now = time(NULL); static int updates = 0; static mainloop_timer_t *refresh_timer = NULL; if(refresh_timer == NULL) { refresh_timer = mainloop_timer_add("refresh", reconnect_msec, FALSE, mon_trigger_refresh, NULL); refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_state, refresh_timer); } if (current_cib != NULL) { xmlNode *cib_last = current_cib; current_cib = NULL; rc = cib_apply_patch_event(msg, cib_last, ¤t_cib, LOG_DEBUG); free_xml(cib_last); switch(rc) { case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_warn("[%s] %s Patch aborted: %s (%d)", event, op, pcmk_strerror(rc), rc); break; case pcmk_ok: updates++; break; default: crm_notice("[%s] %s ABORTED: %s (%d)", event, op, pcmk_strerror(rc), rc); break; } } if (current_cib == NULL) { mon_retrieve_current_cib(); } /* Refresh * - immediately if the last update was more than 1s ago * - every 10 updates * - at most 1s after the last update * - shutdown attribute for our node set for the first time */ if ((!pcmk_shutdown && shutdown_attr_in_cib()) || (updates > 10 || (now - last_refresh) > (reconnect_msec / 1000))) { mon_refresh_state(refresh_timer); updates = 0; } else { mainloop_set_trigger(refresh_trigger); mainloop_timer_start(refresh_timer); } } static gboolean mon_refresh_state(gpointer user_data) { xmlNode *cib_copy = NULL; if(current_cib == NULL) { return FALSE; } if(user_data) { mainloop_timer_t *timer = user_data; mainloop_timer_stop(timer); } cib_copy = copy_xml(current_cib); if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) { cl_log(LOG_WARNING, "cli_config_update() failed - forcing reconnect to CIB"); if (cib) { cib->cmds->signoff(cib); } } else { last_refresh = time(NULL); data_set->input = cib_copy; data_set->flags |= pe_flag_have_stonith_resource; cluster_status(data_set); compute_status(data_set); pe_reset_working_set(data_set); } return FALSE; } static void clean_up(int rc) { if (timer_id_reconnect > 0) { g_source_remove(timer_id_reconnect); timer_id_reconnect = 0; } if (timer_id_notify > 0) { g_source_remove(timer_id_notify); timer_id_notify = 0; } if (data_set != NULL) { pe_free_working_set(data_set); data_set = NULL; } if (cib != NULL) { cib->cmds->signoff(cib); cib_delete(cib); cib = NULL; } #if USE_PACEMAKERD_API if (pacemakerd_api != NULL) { pcmk_ipc_api_t *capi = pacemakerd_api; pacemakerd_api = NULL; // Ensure we can't free this twice pcmk_free_ipc_api(capi); } #endif if (rc >= 0) { exit(rc); } return; } int servant_pcmk(const char *diskname, int mode, const void* argp) { int exit_code = 0; crm_system_name = strdup("sbd:pcmk"); cl_log(LOG_NOTICE, "Monitoring Pacemaker health"); set_proc_title("sbd: watcher: Pacemaker"); setenv("PCMK_watchdog", "true", 1); if(debug == 0) { /* We don't want any noisy crm messages */ set_crm_log_level(LOG_CRIT); } if (data_set == NULL) { data_set = pe_new_working_set(); } if (data_set == NULL) { return -1; } #if USE_PACEMAKERD_API { int rc; rc = pcmk_new_ipc_api(&pacemakerd_api, pcmk_ipc_pacemakerd); if (pacemakerd_api == NULL) { cl_log(LOG_ERR, "Could not connect to pacemakerd: %s\n", pcmk_rc_str(rc)); return -1; } pcmk_register_ipc_callback(pacemakerd_api, pacemakerd_event_cb, NULL); do { rc = pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_main); if (rc != pcmk_rc_ok) { cl_log(LOG_DEBUG, "Could not connect to pacemakerd: %s\n", pcmk_rc_str(rc)); sleep(reconnect_msec / 1000); } } while (rc != pcmk_rc_ok); /* send a ping to pacemakerd to wake it up */ pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name); /* cib should come up now as well so it's time * to have the inquisitor have a closer look */ notify_parent(); } #endif if (current_cib == NULL) { cib = cib_new(); do { exit_code = cib_connect(TRUE); if (exit_code != 0) { sleep(reconnect_msec / 1000); } } while (exit_code == -ENOTCONN); if (exit_code != 0) { clean_up(-exit_code); } } mainloop = g_main_loop_new(NULL, FALSE); mainloop_add_signal(SIGTERM, mon_shutdown); mainloop_add_signal(SIGINT, mon_shutdown); timer_id_notify = g_timeout_add(timeout_loop * 1000, mon_timer_notify, NULL); g_main_loop_run(mainloop); g_main_loop_unref(mainloop); clean_up(0); return 0; /* never reached */ } diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig.in similarity index 94% rename from src/sbd.sysconfig rename to src/sbd.sysconfig.in index b32e826..15cd66a 100644 --- a/src/sbd.sysconfig +++ b/src/sbd.sysconfig.in @@ -1,128 +1,128 @@ ## Type: string ## Default: "" # # SBD_DEVICE specifies the devices to use for exchanging sbd messages # and to monitor. If specifying more than one path, use ";" as # separator. # #SBD_DEVICE="" ## Type: yesno ## Default: yes # # Whether to enable the pacemaker integration. # SBD_PACEMAKER=yes ## Type: always / clean ## Default: always # # Specify the start mode for sbd. Setting this to "clean" will only # allow sbd to start if it was not previously fenced. See the -S option # in the man page. # SBD_STARTMODE=always ## Type: yesno / integer ## Default: no # # Whether to delay after starting sbd on boot for "msgwait" seconds. # This may be necessary if your cluster nodes reboot so fast that the # other nodes are still waiting in the fence acknowledgement phase. # This is an occasional issue with virtual machines. # # This can also be enabled by being set to a specific delay value, in # seconds. Sometimes a longer delay than the default, "msgwait", is # needed, for example in the cases where it's considered to be safer to # wait longer than: # corosync token timeout + consensus timeout + pcmk_delay_max + msgwait # # Be aware that the special value "1" means "yes" rather than "1s". # # Consider that you might have to adapt the startup-timeout accordingly # if the default isn't sufficient. (TimeoutStartSec for systemd) # # This option may be ignored at a later point, once pacemaker handles # this case better. # SBD_DELAY_START=no ## Type: string ## Default: /dev/watchdog # # Watchdog device to use. If set to /dev/null, no watchdog device will # be used. # SBD_WATCHDOG_DEV=/dev/watchdog ## Type: integer -## Default: 5 +## Default: @SBD_WATCHDOG_TIMEOUT_DEFAULT@ # # How long, in seconds, the watchdog will wait before panicking the # node if no-one tickles it. # # This depends mostly on your storage latency; the majority of devices # must be successfully read within this time, or else the node will # self-fence. # # If your sbd device(s) reside on a multipath setup or iSCSI, this # should be the time required to detect a path failure. # # Be aware that watchdog timeout set in the on-disk metadata takes # precedence. # -SBD_WATCHDOG_TIMEOUT=5 +SBD_WATCHDOG_TIMEOUT=@SBD_WATCHDOG_TIMEOUT_DEFAULT@ ## Type: string ## Default: "flush,reboot" # # Actions to be executed when the watchers don't timely report to the sbd # master process or one of the watchers detects that the master process # has died. # # Set timeout-action to comma-separated combination of # noflush|flush plus reboot|crashdump|off. # If just one of both is given the other stays at the default. # # This doesn't affect actions like off, crashdump, reboot explicitly # triggered via message slots. # And it does as well not configure the action a watchdog would # trigger should it run off (there is no generic interface). # SBD_TIMEOUT_ACTION=flush,reboot ## Type: yesno / auto ## Default: auto # # If CPUAccounting is enabled default is not to assign any RT-budget # to the system.slice which prevents sbd from running RR-scheduled. # # One way to escape that issue is to move sbd-processes from the # slice they were originally started to root-slice. # Of course starting sbd in a certain slice might be intentional. # Thus in auto-mode sbd will check if the slice has RT-budget assigned. # If that is the case sbd will stay in that slice while it will # be moved to root-slice otherwise. # SBD_MOVE_TO_ROOT_CGROUP=auto ## Type: yesno -## Default: no +## Default: @SBD_SYNC_RESOURCE_STARTUP_DEFAULT@ # # If resource startup syncing is enabled then pacemakerd is # gonna wait to be pinged via IPC before it starts resources. # On shutdown pacemakerd is going to wait in a state where it # has cleanly shutdown resources till sbd fetches that state. # # Default is 'no' to prevent pacemaker from waiting for a # ping that will never come when working together with an sbd # version that doesn't support the feature. # -SBD_SYNC_RESOURCE_STARTUP=no +SBD_SYNC_RESOURCE_STARTUP=@SBD_SYNC_RESOURCE_STARTUP_SYSCONFIG@ ## Type: string ## Default: "" # # Additional options for starting sbd # SBD_OPTS= diff --git a/tests/regressions.sh b/tests/regressions.sh index 7ab80be..17c817d 100755 --- a/tests/regressions.sh +++ b/tests/regressions.sh @@ -1,332 +1,339 @@ #!/bin/bash # # Copyright (C) 2013 Lars Marowsky-Bree # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # TODO: # - More tests # - Handle optional, long-running tests better # - Support for explicitly running a single test # - Verify output from commands # - Normalize uuids and device names so they are diffable # - Log to file, instead of syslog is needed # - How to test watch mode? # - Can the unit/service file be tested? or at least the wrapper? : ${SBD_BINARY:="/usr/sbin/sbd"} : ${SBD_PRELOAD="libsbdtestbed.so"} : ${SBD_USE_DM:="yes"} sbd() { - LD_PRELOAD=${SBD_PRELOAD} SBD_WATCHDOG_TIMEOUT=5 SBD_DEVICE="${SBD_DEVICE}" SBD_PRELOAD_LOG=${SBD_PRELOAD_LOG} SBD_WATCHDOG_DEV=/dev/watchdog setsid ${SBD_BINARY} -p ${SBD_PIDFILE} "$@" + LD_PRELOAD=${SBD_PRELOAD} SBD_DEVICE="${SBD_DEVICE}" SBD_PRELOAD_LOG=${SBD_PRELOAD_LOG} SBD_WATCHDOG_DEV=/dev/watchdog setsid ${SBD_BINARY} -p ${SBD_PIDFILE} "$@" } sbd_wipe_disk() { dd if=/dev/zero of=$1 count=2048 2>/dev/null } sbd_setup() { trap sbd_teardown EXIT for N in $(seq 3) ; do F[$N]=$(mktemp /tmp/sbd.device.$N.XXXXXX) sbd_wipe_disk ${F[$N]} if [[ "${SBD_USE_DM}" == "yes" ]]; then R[$N]=$(echo ${F[$N]}|cut -f4 -d.) L[$N]=$(losetup -f) losetup ${L[$N]} ${F[$N]} D[$N]="/dev/mapper/sbd_${N}_${R[$N]}" dmsetup create sbd_${N}_${R[$N]} --table "0 2048 linear ${L[$N]} 0" dmsetup mknodes sbd_${N}_${R[$N]} else D[$N]=${F[$N]} fi done if [[ "${SBD_USE_DM}" != "yes" ]]; then SBD_DEVICE="${F[1]};${F[2]};${F[3]}" fi SBD_PIDFILE=$(mktemp /tmp/sbd.pidfile.XXXXXX) SBD_PRELOAD_LOG=$(mktemp /tmp/sbd.logfile.XXXXXX) + sbd -d ${D[1]} create + WATCHDOG_TIMEOUT=$(LD_PRELOAD=${SBD_PRELOAD} SBD_DEVICE="${D[1]}" ${SBD_BINARY} dump |grep watchdog|cut -f2 -d:) + MSGWAIT_TIMEOUT=$(LD_PRELOAD=${SBD_PRELOAD} SBD_DEVICE="${D[1]}" ${SBD_BINARY} dump |grep msgwait|cut -f2 -d:) } sbd_teardown() { for N in $(seq 3) ; do if [[ "${SBD_USE_DM}" == "yes" ]]; then dmsetup remove sbd_${N}_${R[$N]} losetup -d ${L[$N]} fi rm -f ${F[$N]} sbd_daemon_cleanup rm -f ${SBD_PIDFILE} rm -f ${SBD_PRELOAD_LOG} done } sbd_dev_fail() { if [[ "${SBD_USE_DM}" == "yes" ]]; then dmsetup wipe_table sbd_${1}_${R[$1]} else D[$1]=/tmp/fail123456789 fi } sbd_dev_resume() { if [[ "${SBD_USE_DM}" == "yes" ]]; then dmsetup suspend sbd_${1}_${R[$1]} dmsetup load sbd_${1}_${R[$1]} --table "0 2048 linear ${L[$1]} 0" dmsetup resume sbd_${1}_${R[$1]} else D[$1]=${F[$1]} fi } sbd_daemon_cleanup() { echo > ${SBD_PRELOAD_LOG} pkill -TERM --pidfile ${SBD_PIDFILE} 2>/dev/null sleep 5 pkill -KILL --pidfile ${SBD_PIDFILE} 2>/dev/null pkill -KILL --parent "$(cat ${SBD_PIDFILE} 2>/dev/null)" 2>/dev/null echo > ${SBD_PIDFILE} } _ok() { echo "-- $*" "$@" rc=$? if [ $rc -ne 0 ]; then echo "$* failed with $rc" exit $rc fi } _no() { echo "-- $*" "$@" rc=$? if [ $rc -eq 0 ]; then echo "$* did NOT fail ($rc)" exit $rc fi return 0 } _in_log() { grep "$@" ${SBD_PRELOAD_LOG} >/dev/null if [ $? -ne 0 ]; then echo "didn't find '$*' in log:" cat ${SBD_PRELOAD_LOG} sbd_daemon_cleanup exit 1 fi } test_1() { echo "Creating three devices" _ok sbd -d ${D[1]} -d ${D[2]} -d ${D[3]} create _ok sbd -d ${D[1]} -d ${D[2]} -d ${D[3]} dump } test_2() { echo "Basic functionality" for S in `seq 2` ; do _ok sbd -d ${D[1]} -d ${D[2]} -d ${D[3]} allocate "test-$S" done _ok sbd -d ${D[1]} -d ${D[2]} -d ${D[3]} -n test-1 message test-2 reset _ok sbd -d ${D[1]} -d ${D[2]} -d ${D[3]} list } test_3() { echo "Start mode (expected not to start, because reset was written in test_2)" _no sbd -d ${D[1]} -d ${D[2]} -d ${D[3]} -n test-2 -Z -Z -Z -S 1 watch } test_4() { echo "Deliver message with 1 failure" sbd_dev_fail 1 _no sbd -d ${D[1]} -n test-1 message test-2 exit _no sbd -d ${D[1]} -d ${D[2]} -n test-1 message test-2 exit _ok sbd -d ${D[1]} -d ${D[2]} -d ${D[3]} -n test-1 message test-2 exit sbd_dev_resume 1 } test_5() { echo "Deliver message with 2 failures" sbd_dev_fail 1 sbd_dev_fail 2 _no sbd -d ${D[1]} -d ${D[2]} -n test-1 message test-2 exit _no sbd -d ${D[1]} -d ${D[2]} -d ${D[3]} -n test-1 message test-2 exit sbd_dev_resume 1 sbd_dev_resume 2 } test_6() { echo "Deliver message with 3 failures" sbd_dev_fail 1 sbd_dev_fail 2 sbd_dev_fail 3 _no sbd -d ${D[1]} -d ${D[2]} -d ${D[3]} -n test-1 message test-2 exit sbd_dev_resume 1 sbd_dev_resume 2 sbd_dev_resume 3 } test_101() { echo "Creating one device" _ok sbd -d ${D[1]} create } test_102() { echo "Creating two devices" _ok sbd -d ${D[1]} -d ${D[2]} create } test_7() { echo "Allocate all slots plus 1" _ok sbd -d ${D[1]} -d ${D[2]} -d ${D[3]} -2 0 create for S in `seq 255` ; do _ok sbd -d ${D[1]} -d ${D[2]} -d ${D[3]} allocate "test-$S" done _no sbd -d ${D[1]} -d ${D[2]} -d ${D[3]} allocate "test-256" } test_8() { echo "Non-existent device path" _no sbd -d /dev/kfdifdifdfdlfd -create 2>/dev/null } test_9() { echo "Basic sbd invocation" _no sbd _ok sbd -h } test_watchdog() { echo "Basic watchdog test" echo > ${SBD_PRELOAD_LOG} sbd test-watchdog < /dev/null _in_log "watchdog fired" } test_stall_inquisitor() { echo "Stall inquisitor test" sbd_daemon_cleanup sbd -d ${D[1]} -d ${D[2]} -d ${D[3]} -n test-1 watch sleep 10 _ok kill -0 "$(cat ${SBD_PIDFILE})" kill -STOP "$(cat ${SBD_PIDFILE})" - sleep 10 + sleep $((${WATCHDOG_TIMEOUT} * 2)) kill -CONT "$(cat ${SBD_PIDFILE})" 2>/dev/null _in_log "watchdog fired" } test_wipe_slots1() { echo "Wipe slots test (with watchdog)" sbd_daemon_cleanup sbd -d ${D[1]} -n test-1 watch sleep 2 sbd_wipe_disk ${D[1]} - sleep 15 + sleep $((${MSGWAIT_TIMEOUT} + ${WATCHDOG_TIMEOUT} * 2)) _in_log "watchdog fired" } test_wipe_slots2() { echo "Wipe slots test (without watchdog)" sbd_daemon_cleanup sbd -d ${D[1]} create sbd -d ${D[1]} -w /dev/null -n test-1 watch sleep 2 sbd_wipe_disk ${D[1]} - sleep 15 + sleep $((${MSGWAIT_TIMEOUT} + ${WATCHDOG_TIMEOUT} * 2)) _in_log "sysrq-trigger ('b')" _in_log "reboot (reboot)" } test_message1() { echo "Message test (reset)" sbd_daemon_cleanup sbd -d ${D[1]} create sbd -d ${D[1]} -w /dev/null -n test-1 watch sleep 2 sbd -d ${D[1]} message test-1 reset sleep 2 _in_log "sysrq-trigger ('b')" _in_log "reboot (reboot)" } test_message2() { echo "Message test (off)" sbd_daemon_cleanup sbd -d ${D[1]} create sbd -d ${D[1]} -w /dev/null -n test-1 watch sleep 2 sbd -d ${D[1]} message test-1 off sleep 2 _in_log "sysrq-trigger ('o')" _in_log "reboot (poweroff)" } test_message3() { echo "Message test (crashdump)" sbd_daemon_cleanup sbd -d ${D[1]} create sbd -d ${D[1]} -w /dev/null -n test-1 watch sleep 2 sbd -d ${D[1]} message test-1 crashdump sleep 2 _in_log "sysrq-trigger ('c')" } test_timeout_action1() { echo "Timeout action test (off)" sbd_daemon_cleanup sbd -d ${D[1]} create SBD_TIMEOUT_ACTION=off sbd -d ${D[1]} -w /dev/null -n test-1 watch sleep 2 sbd_wipe_disk ${D[1]} - sleep 15 + sleep $((${MSGWAIT_TIMEOUT} + ${WATCHDOG_TIMEOUT} * 2)) _in_log "sysrq-trigger ('o')" _in_log "reboot (poweroff)" } test_timeout_action2() { echo "Timeout action test (crashdump)" sbd_daemon_cleanup sbd -d ${D[1]} create SBD_TIMEOUT_ACTION=crashdump sbd -d ${D[1]} -w /dev/null -n test-1 watch sleep 2 sbd_wipe_disk ${D[1]} - sleep 15 + sleep $((${MSGWAIT_TIMEOUT} + ${WATCHDOG_TIMEOUT} * 2)) _in_log "sysrq-trigger ('c')" } sbd_setup +_ok test "${WATCHDOG_TIMEOUT}" -eq "${WATCHDOG_TIMEOUT}" +_ok test "${MSGWAIT_TIMEOUT}" -eq "${MSGWAIT_TIMEOUT}" +echo "running sbd-tests with WATCHDOG_TIMEOUT=${WATCHDOG_TIMEOUT}s MSGWAIT_TIMEOUT=${MSGWAIT_TIMEOUT}s" + if [[ "${SBD_PRELOAD}" != "" ]]; then SBD_DAEMON_TESTS="watchdog stall_inquisitor wipe_slots1 wipe_slots2 message1 message2 message3 timeout_action1 timeout_action2" fi -for T in $(seq 9) ${SBD_DAEMON_TESTS}; do +for T in 101 102 $(seq 9) ${SBD_DAEMON_TESTS}; do if ! test_$T ; then echo "FAILURE: Test $T" break fi echo "SUCCESS: Test $T" done echo "SUCCESS: All tests completed"