Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F3686801
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
83 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/sbd.spec b/sbd.spec
index e9ff7bc..b9e7f72 100644
--- a/sbd.spec
+++ b/sbd.spec
@@ -1,339 +1,339 @@
#
# spec file for package sbd
#
# Copyright (c) 2014 SUSE LINUX Products GmbH, Nuernberg, Germany.
# Copyright (c) 2013 Lars Marowsky-Bree
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
# upon. The license for this file, and modifications and additions to the
# file, is the same license as for the pristine package itself (unless the
# license for the pristine package is not an Open Source License, in which
# case the license is the MIT License). An "Open Source License" is a
# license that conforms to the Open Source Definition (Version 1.9)
# published by the Open Source Initiative.
# Please submit bugfixes or comments via http://bugs.opensuse.org/
#
%global longcommit 7bcdf69597042c31ea0b4a523e732d4bbb99b3a0
%global shortcommit %(echo %{longcommit}|cut -c1-8)
%global modified %(echo %{longcommit}-|cut -f2 -d-)
%global github_owner Clusterlabs
%global commit_counter 0
%global build_counter 0
%global buildnum %(expr %{commit_counter} + %{build_counter})
%ifarch s390x s390
# minimum timeout on LPAR diag288 watchdog is 15s
%global watchdog_timeout_default 15
%else
%global watchdog_timeout_default 5
%endif
# Be careful with sync_resource_startup_default
# being enabled. This configuration has
# to be in sync with configuration in pacemaker
# where it is called sbd_sync - assure by e.g.
# mutual rpm dependencies.
%bcond_without sync_resource_startup_default
# Syncing enabled per default will lead to
# syncing enabled on upgrade without adaption
# of the config.
# Setting can still be overruled via sysconfig.
# The setting in the config-template packaged
# will follow the default if below is is left
# empty. But it is possible to have the setting
# in the config-template deviate from the default
# by setting below to an explicit 'yes' or 'no'.
%global sync_resource_startup_sysconfig ""
Name: sbd
Summary: Storage-based death
-License: GPLv2+
+License: GPL-2.0-or-later
Group: System Environment/Daemons
Version: 1.5.2
Release: 99.%{buildnum}.%{shortcommit}.%{modified}git%{?dist}
Url: https://github.com/%{github_owner}/%{name}
Source0: https://github.com/%{github_owner}/%{name}/archive/%{longcommit}/%{name}-%{longcommit}.tar.gz
BuildRoot: %{_tmppath}/%{name}-%{version}-build
BuildRequires: autoconf
BuildRequires: automake
BuildRequires: libuuid-devel
BuildRequires: glib2-devel
BuildRequires: libaio-devel
BuildRequires: corosync-devel
%if 0%{?suse_version}
%if 0%{?suse_version} > 1500
BuildRequires: libpacemaker3-devel
%else
BuildRequires: libpacemaker-devel
%endif
%else
BuildRequires: pacemaker-libs-devel
%endif
BuildRequires: libtool
BuildRequires: libuuid-devel
BuildRequires: libxml2-devel
BuildRequires: pkgconfig
BuildRequires: make
Conflicts: fence-agents-sbd < 4.5.0
%if 0%{?rhel} > 0
ExclusiveArch: i686 x86_64 s390x aarch64 ppc64le
%endif
%if %{defined systemd_requires}
%systemd_requires
%endif
%description
This package contains the storage-based death functionality.
Available rpmbuild rebuild options:
--with(out) : sync_resource_startup_default
%package tests
Summary: Storage-based death environment for regression tests
-License: GPLv2+
+License: GPL-2.0-or-later
Group: System Environment/Daemons
%description tests
This package provides an environment + testscripts for
regression-testing sbd.
%prep
###########################################################
# %setup -n sbd-%{version} -q
%setup -q -n %{name}-%{longcommit}
###########################################################
%build
./autogen.sh
export CFLAGS="$RPM_OPT_FLAGS -Wall -Werror"
%configure --with-watchdog-timeout-default=%{watchdog_timeout_default} \
--with-sync-resource-startup-default=%{?with_sync_resource_startup_default:yes}%{!?with_sync_resource_startup_default:no} \
--with-sync-resource-startup-sysconfig=%{sync_resource_startup_sysconfig} \
--with-runstatedir=%{_rundir}
make %{?_smp_mflags}
###########################################################
%install
###########################################################
make DESTDIR=$RPM_BUILD_ROOT LIBDIR=%{_libdir} install
rm -rf ${RPM_BUILD_ROOT}%{_libdir}/stonith
install -D -m 0755 src/sbd.sh $RPM_BUILD_ROOT/usr/share/sbd/sbd.sh
install -D -m 0755 tests/regressions.sh $RPM_BUILD_ROOT/usr/share/sbd/regressions.sh
%if %{defined _unitdir}
install -D -m 0644 src/sbd.service $RPM_BUILD_ROOT/%{_unitdir}/sbd.service
install -D -m 0644 src/sbd_remote.service $RPM_BUILD_ROOT/%{_unitdir}/sbd_remote.service
%endif
mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig
install -m 644 src/sbd.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/sbd
# Don't package static libs
find %{buildroot} -name '*.a' -type f -print0 | xargs -0 rm -f
find %{buildroot} -name '*.la' -type f -print0 | xargs -0 rm -f
%clean
rm -rf %{buildroot}
%if %{defined _unitdir}
%post
%systemd_post sbd.service
%systemd_post sbd_remote.service
%preun
%systemd_preun sbd.service
%systemd_preun sbd_remote.service
%postun
%systemd_postun sbd.service
%systemd_postun sbd_remote.service
%endif
%files
###########################################################
%defattr(-,root,root)
%config(noreplace) %{_sysconfdir}/sysconfig/sbd
%{_sbindir}/sbd
%{_datadir}/sbd
%{_datadir}/pkgconfig/sbd.pc
%exclude %{_datadir}/sbd/regressions.sh
%doc %{_mandir}/man8/sbd*
%if %{defined _unitdir}
%{_unitdir}/sbd.service
%{_unitdir}/sbd_remote.service
%endif
%doc COPYING
%files tests
%defattr(-,root,root)
%dir %{_datadir}/sbd
%{_datadir}/sbd/regressions.sh
%{_libdir}/libsbdtestbed*
%changelog
* Thu Jan 5 2023 <klaus.wenninger@aon.at> - 1.5.2-99.0.8ec8e011.git
- fail startup if pacemaker integration is disabled while
SBD_SYNC_RESOURCE_STARTUP is conflicting (+ hint to overcome)
- improve logs
- when logging state of SBD_PACEMAKER tell it is just that as
this might still be overridden via cmdline options
- log a warning if SBD_PACEMAKER is overridden by -P or -PP option
- do not warn about startup syncing with pacemaker integration disabled
- when watchdog-device is busy give a hint on who is hogging it
- improve build environment
- have --with-runstatedir overrule --runstatedir
- use new package name for pacemaker devel on opensuse
- make config location configurable for man-page-creation
- reverse alloc/de-alloc order to make gcc-12 static analysis happy
- improve test environment
- have image-files in /dev/shm to assure they are in memory and
sbd opening the files with O_SYNC doesn't trigger unnecessary
syncs on a heavily loaded test-machine
fallback to /tmp if /dev/shm doesn't exist
- wrapping away libaio and usage of device-mapper for block-device
simulation can now be passed into make via
SBD_USE_DM & SBD_TRANSLATE_AIO
- have variables that configure test-environment be printed
out prior to running tests
- finally assure we clean environment when interrupted by a
signal (bash should have done it with just setting EXIT handler -
but avoiding bashism might come handy one day)
* Mon Nov 15 2021 <klaus.wenninger@aon.at> - 1.5.1-99.0.7bcdf695.git
- improve/fix cmdline handling
- tell the actual watchdog device specified with -w
- tolerate and strip any leading spaces of commandline option values
- Sanitize numeric arguments
- if start-delay enabled, not explicitly given and msgwait can't be
read from disk (diskless) use 2 * watchdog-timeout
- avoid using deprecated valloc for disk-io-buffers
- avoid frequent alloc/free of aligned buffers to prevent fragmentation
- fix memory-leak in one-time-allocations of sector-buffers
- fix AIO-API usage: properly destroy io-context
- improve/fix build environment
- validate configure options for paths
- remove unneeded complexity of configure.ac hierarchy
- correctly derive package version from git (regression since 1.5.0)
- make runstatedir configurable and derive from distribution
* Tue Jun 8 2021 <klaus.wenninger@aon.at> - 1.5.0-99.0.2a00ac70.git
- default to resource-syncing with pacemaker in spec-file and configure.ac
This default has to match between sbd and pacemaker and
thus qualifies this release for a minor-version-bump
- fix some regressions introduced by adding configurability previously
- adapt description of startup/shutdown sync with pacemaker
- make watchdog warning messages more understandable
* Wed Dec 2 2020 <klaus.wenninger@aon.at> - 1.4.2-99.1.bfeee963.git
- improve build/CI-friendlyness
- * travis: switch to F32 as build-host
- switch to F32 & leap-15.2
- changes for mock-2.0
- turn off loop-devices & device-mapper on x86_64 targets because
- of changes in GCE
- * regressions.sh: get timeouts from disk-header to go with proper defaults
- for architecture
- * use configure for watchdog-default-timeout & others
- * ship sbd.pc with basic sbd build information for downstream packages
- to use
- * add number of commits since version-tag to build-counter
- add robustness against misconfiguration / improve documentation
- * add environment section to man-page previously just available in
- template-config
- * inform the user to restart the sbd service after disk-initialization
- * refuse to start if any of the configured device names is invalid
- * add handshake to sync startup/shutdown with pacemakerd
- Previously sbd just waited for the cib-connnection to show up/go away
- which isn't robust at all.
- The new feature needs new pacemakerd-api as counterpart.
- Thus build checks for presence of pacemakerd-api.
- To simplify downstream adoption behavior is configurable at runtime
- via configure-file with a build-time-configurable default.
- * refuse to start if qdevice-sync_timeout doesn't match watchdog-timeout
- Needed in particular as qdevice-sync_timeout delays quorum-state-update
- and has a default of 30s that doesn't match the 5s watchdog-timeout
- default.
- Fix: sbd-pacemaker: handle new no_quorum_demote + robustness against new
- policies added
- Fix: agent: correctly compare string values when calculating timeout
- Fix: scheduling: overhaul the whole thing
- * prevent possible lockup when format in proc changes
- * properly get and handle scheduler policy & prio
- * on SCHED_RR failing push to the max with SCHED_OTHER
* Tue Nov 19 2019 <klaus.wenninger@aon.at> - 1.4.1-99.1.aca7907c.git
- improvements/clarifications in documentation
- properly finalize cmap connection when disconnected from cluster
- make handling of cib-connection loss more robust
- silence some coverity findings
- overhaul log for reasonable prios and details
- if current slice doesn't have rt-budget move to root-slice
- periodically ping corosync daemon for liveness
- actually use crashdump timeout if configured
- avoid deprecated names for g_main-loop-funcitons
- conflict with fence-agents-sbd < 4.5.0
- rather require corosync-devel provided by most distributions
- make devices on cmdline overrule those coming via SBD_DEVICE
- make 15s timeout on s390 be used consistently
- improve build/test for CI-friendlyness
- * add autogen.sh
- * enable/improve out-of-tree-building
- * make tar generation smarter
- * don't modify sbd.spec
- * make distcheck-target work
- * Add tests/regressions.sh to check-target
- * use unique devmapper names for multiple tests in parallel
- * consistently use serial test-harness for visible progress
- * package tests into separate package (not packaged before)
- * add preload-library to intercept reboots while testing
- * add tests for sbd in daemon-mode & watchdog-dev-handling
- * make tests work in non-privileged containers
* Mon Jan 14 2019 <klaus.wenninger@aon.at> - 1.4.0-0.1.2d595fdd.git
- updated travis-CI (ppc64le-build, fedora29, remove need for
alectolytic-build-container)
- make watchdog-device-query easier to be handled by an SELinux-policy
- configurable delay value for SBD_DELAY_START
- use pacemaker's new pe api with constructors/destructors
- make timeout-action executed by sbd configurable
- init script for sysv systems
- version bump to v1.4.0 to denote Pacemaker 2.0.0 compatibility
* Fri Jun 29 2018 <klaus.wenninger@aon.at> - 1.3.1-0.1.e102d9ed.git
- removed unneeded python-devel build-requirement
- changed legacy corosync-devel to corosynclib-devel
* Fri Nov 3 2017 <klaus.wenninger@aon.at> - 1.3.1-0.1.a180176c.git
- Add commands to test/query watchdogs
- Allow 2-node-operation with a single shared-disk
- Overhaul of the command-line options & config-file
- Proper handling of off instead of reboot
- Refactored disk-servant for more robust communication with parent
- Fix config for Debian + configurable location of config
- Fixes in sbd.sh - multiple SBD devices and others
* Sun Mar 27 2016 <klaus.wenninger@aon.at> - 1.3.0-0.1.4ee36fa3.git
- Changes since v1.2.0 like adding the possibility to have a
watchdog-only setup without shared-block-devices
legitimate a bump to v1.3.0.
* Mon Oct 13 2014 <andrew@beekhof.net> - 1.2.1-0.4.3de531ed.git
- Fixes for suitability to the el7 environment
* Tue Sep 30 2014 <andrew@beekhof.net> - 1.2.1-0.3.8f912945.git
- Only build on archs supported by the HA Add-on
* Fri Aug 29 2014 <andrew@beekhof.net> - 1.2.1-0.2.8f912945.git
- Remove some additional SUSE-isms
* Fri Aug 29 2014 <andrew@beekhof.net> - 1.2.1-0.1.8f912945.git
- Prepare for package review
Resolves: rhbz#1134245
diff --git a/src/sbd-common.c b/src/sbd-common.c
index f3f226a..3abf75f 100644
--- a/src/sbd-common.c
+++ b/src/sbd-common.c
@@ -1,1330 +1,1355 @@
/*
* Copyright (C) 2013 Lars Marowsky-Bree <lmb@suse.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "sbd.h"
#include <sys/reboot.h>
#include <sys/types.h>
#ifdef __GLIBC__
#include <sys/sysmacros.h>
#endif
#include <sys/stat.h>
#include <pwd.h>
#include <unistd.h>
#include <dirent.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <limits.h>
#ifdef _POSIX_MEMLOCK
# include <sys/mman.h>
#endif
/* Tunable defaults: */
unsigned long timeout_watchdog = SBD_WATCHDOG_TIMEOUT_DEFAULT;
int timeout_msgwait = 2 * SBD_WATCHDOG_TIMEOUT_DEFAULT;
unsigned long timeout_watchdog_warn = calculate_timeout_watchdog_warn(SBD_WATCHDOG_TIMEOUT_DEFAULT);
bool do_calculate_timeout_watchdog_warn = true;
int timeout_allocate = 2;
int timeout_loop = 1;
int timeout_io = 3;
int timeout_startup = 120;
int watchdog_use = 1;
int watchdog_set_timeout = 1;
unsigned long timeout_watchdog_crashdump = 0;
int skip_rt = 0;
int debug = 0;
int debug_mode = 0;
char *watchdogdev = NULL;
bool watchdogdev_is_default = false;
char * local_uname;
/* Global, non-tunable variables: */
int sector_size = 0;
int watchdogfd = -1;
int servant_health = 0;
/*const char *devname;*/
const char *cmdname;
void
usage(void)
{
fprintf(stderr,
"Shared storage fencing tool.\n"
"Syntax:\n"
" %s <options> <command> <cmdarguments>\n"
"Options:\n"
"-d <devname> Block device to use (mandatory; can be specified up to 3 times)\n"
"-h Display this help.\n"
"-n <node> Set local node name; defaults to uname -n (optional)\n"
"\n"
"-R Do NOT enable realtime priority (debugging only)\n"
"-W Use watchdog (recommended) (watch only)\n"
"-w <dev> Specify watchdog device (optional) (watch only)\n"
"-T Do NOT initialize the watchdog timeout (watch only)\n"
"-S <0|1> Set start mode if the node was previously fenced (watch only)\n"
"-p <path> Write pidfile to the specified path (watch only)\n"
"-v|-vv|-vvv Enable verbose|debug|debug-library logging (optional)\n"
"\n"
"-1 <N> Set watchdog timeout to N seconds (optional, create only)\n"
"-2 <N> Set slot allocation timeout to N seconds (optional, create only)\n"
"-3 <N> Set daemon loop timeout to N seconds (optional, create only)\n"
"-4 <N> Set msgwait timeout to N seconds (optional, create only)\n"
"-5 <N> Warn if loop latency exceeds threshold (optional, watch only)\n"
" (default is 3, set to 0 to disable)\n"
"-C <N> Watchdog timeout to set before crashdumping\n"
" (def: 0s = disable gracefully, optional)\n"
"-I <N> Async IO read timeout (defaults to 3 * loop timeout, optional)\n"
"-s <N> Timeout to wait for devices to become available (def: 120s)\n"
"-t <N> Dampening delay before faulty servants are restarted (optional)\n"
" (default is 5, set to 0 to disable)\n"
"-F <N> # of failures before a servant is considered faulty (optional)\n"
" (default is 1, set to 0 to disable)\n"
"-P Check Pacemaker quorum and node health (optional, watch only)\n"
"-Z Enable trace mode. WARNING: UNSAFE FOR PRODUCTION!\n"
"-r Set timeout-action to comma-separated combination of\n"
" noflush|flush plus reboot|crashdump|off (default is flush,reboot)\n"
"Commands:\n"
#if SUPPORT_SHARED_DISK
"create initialize N slots on <dev> - OVERWRITES DEVICE!\n"
"list List all allocated slots on device, and messages.\n"
"dump Dump meta-data header from device.\n"
"allocate <node>\n"
" Allocate a slot for node (optional)\n"
"message <node> (test|reset|off|crashdump|clear|exit)\n"
" Writes the specified message to node's slot.\n"
#endif
"watch Loop forever, monitoring own slot\n"
"query-watchdog Check for available watchdog-devices and print some info\n"
"test-watchdog Test the watchdog-device selected.\n"
" Attention: This will arm the watchdog and have your system reset\n"
" in case your watchdog is working properly!\n"
, cmdname);
}
#define MAX_WATCHDOGS 64
#define SYS_CLASS_WATCHDOG "/sys/class/watchdog"
#define SYS_CHAR_DEV_DIR "/sys/dev/char"
#define WATCHDOG_NODEDIR "/dev/"
static bool
is_watchdog(dev_t device)
{
static int num_watchdog_devs = 0;
static dev_t watchdog_devs[MAX_WATCHDOGS];
struct dirent *entry;
int i;
/* populate on first call */
if (num_watchdog_devs == 0) {
DIR *dp;
watchdog_devs[0] = makedev(10,130);
num_watchdog_devs = 1;
/* get additional devices from /sys/class/watchdog */
dp = opendir(SYS_CLASS_WATCHDOG);
if (dp) {
while ((entry = readdir(dp))) {
if (entry->d_type == DT_LNK) {
FILE *file;
char entry_name[NAME_MAX+sizeof(SYS_CLASS_WATCHDOG)+5];
snprintf(entry_name, sizeof(entry_name),
SYS_CLASS_WATCHDOG "/%s/dev", entry->d_name);
file = fopen(entry_name, "r");
if (file) {
int major, minor;
if (fscanf(file, "%d:%d", &major, &minor) == 2) {
watchdog_devs[num_watchdog_devs++] = makedev(major, minor);
}
fclose(file);
if (num_watchdog_devs == MAX_WATCHDOGS) {
break;
}
}
}
}
closedir(dp);
}
}
for (i=0; i < num_watchdog_devs; i++) {
if (device == watchdog_devs[i]) {
return true;
}
}
return false;
}
static int
watchdog_init_interval_fd(int wdfd, int timeout)
{
if (ioctl(wdfd, WDIOC_SETTIMEOUT, &timeout) < 0) {
cl_perror( "WDIOC_SETTIMEOUT"
": Failed to set watchdog timer to %u seconds.",
timeout);
cl_log(LOG_CRIT, "Please validate your watchdog configuration!");
cl_log(LOG_CRIT, "Choose a different watchdog driver or specify -T to skip this if you are completely sure.");
return -1;
}
return 0;
}
int
watchdog_init_interval(void)
{
if (watchdogfd < 0) {
return 0;
}
if (watchdog_set_timeout == 0) {
cl_log(LOG_INFO, "NOT setting watchdog timeout on explicit user request!");
return 0;
}
if (watchdog_init_interval_fd(watchdogfd, timeout_watchdog) < 0) {
return -1;
}
cl_log(LOG_INFO, "Set watchdog timeout to %u seconds.", (int) timeout_watchdog);
return 0;
}
static int
watchdog_tickle_fd(int wdfd, char *wddev)
{
if (write(wdfd, "", 1) != 1) {
cl_perror("Watchdog write failure: %s!", wddev);
return -1;
}
return 0;
}
int
watchdog_tickle(void)
{
if (watchdogfd >= 0) {
return watchdog_tickle_fd(watchdogfd, watchdogdev);
}
return 0;
}
static int
watchdog_init_fd(char *wddev, int timeout)
{
int wdfd;
wdfd = open(wddev, O_WRONLY);
if (wdfd >= 0) {
if (((timeout >= 0) && (watchdog_init_interval_fd(wdfd, timeout) < 0)) ||
(watchdog_tickle_fd(wdfd, wddev) < 0)) {
close(wdfd);
return -1;
}
} else {
struct stat statbuf;
if(!stat(wddev, &statbuf) && S_ISCHR(statbuf.st_mode) &&
is_watchdog(statbuf.st_rdev)) {
cl_perror("Cannot open watchdog device '%s'", wddev);
} else {
cl_perror("Seems as if '%s' isn't a valid watchdog-device", wddev);
}
return -1;
}
return wdfd;
}
int
watchdog_init(void)
{
if (watchdogfd < 0 && watchdogdev != NULL) {
int timeout = timeout_watchdog;
if (watchdog_set_timeout == 0) {
cl_log(LOG_INFO, "NOT setting watchdog timeout on explicit user request!");
timeout = -1;
}
watchdogfd = watchdog_init_fd(watchdogdev, timeout);
if (watchdogfd >= 0) {
cl_log(LOG_NOTICE, "Using watchdog device '%s'", watchdogdev);
if (watchdog_set_timeout) {
cl_log(LOG_INFO, "Set watchdog timeout to %u seconds.", (int) timeout_watchdog);
}
} else {
return -1;
}
}
return 0;
}
static void
watchdog_close_fd(int wdfd, char *wddev, bool disarm)
{
if (disarm) {
int r;
int flags = WDIOS_DISABLECARD;;
/* Explicitly disarm it */
r = ioctl(wdfd, WDIOC_SETOPTIONS, &flags);
if (r < 0) {
cl_perror("Failed to disable hardware watchdog %s", wddev);
}
/* To be sure, use magic close logic, too */
for (;;) {
if (write(wdfd, "V", 1) > 0) {
break;
}
cl_perror("Cannot disable watchdog device %s", wddev);
}
}
if (close(wdfd) < 0) {
cl_perror("Watchdog close(%d) failed", wdfd);
}
}
void
watchdog_close(bool disarm)
{
if (watchdogfd < 0) {
return;
}
watchdog_close_fd(watchdogfd, watchdogdev, disarm);
watchdogfd = -1;
}
struct watchdog_list_item {
dev_t dev;
char *dev_node;
char *dev_ident;
char *dev_driver;
pid_t busy_pid;
char *busy_name;
struct watchdog_list_item *next;
};
struct link_list_item {
char *dev_node;
char *link_name;
struct link_list_item *next;
};
static struct watchdog_list_item *watchdog_list = NULL;
static int watchdog_list_items = 0;
static void
watchdog_populate_list(void)
{
struct dirent *entry;
char entry_name[sizeof(WATCHDOG_NODEDIR)+NAME_MAX];
DIR *dp;
char buf[NAME_MAX+sizeof(WATCHDOG_NODEDIR)] = "";
struct link_list_item *link_list = NULL;
if (watchdog_list != NULL) {
return;
}
/* search for watchdog nodes in /dev */
dp = opendir(WATCHDOG_NODEDIR);
if (dp) {
/* first go for links and memorize them */
while ((entry = readdir(dp))) {
if (entry->d_type == DT_LNK) {
int len;
snprintf(entry_name, sizeof(entry_name),
WATCHDOG_NODEDIR "%s", entry->d_name);
/* realpath(entry_name, buf) unfortunately does a stat on
* target so we can't really use it to check if links stay
* within /dev without triggering e.g. AVC-logs (with
* SELinux policy that just allows stat within /dev).
* Without canonicalization that doesn't actually touch the
* filesystem easily available introduce some limitations
* for simplicity:
* - just simple path without '..'
* - just one level of symlinks (avoid e.g. loop-checking)
*/
len = readlink(entry_name, buf, sizeof(buf) - 1);
if ((len < 1) ||
(len > sizeof(buf) - sizeof(WATCHDOG_NODEDIR) -1 - 1)) {
continue;
}
buf[len] = '\0';
if (buf[0] != '/') {
memmove(&buf[sizeof(WATCHDOG_NODEDIR)-1], buf, len+1);
memcpy(buf, WATCHDOG_NODEDIR, sizeof(WATCHDOG_NODEDIR)-1);
len += sizeof(WATCHDOG_NODEDIR)-1;
}
if (strstr(buf, "/../") ||
strncmp(WATCHDOG_NODEDIR, buf, sizeof(WATCHDOG_NODEDIR)-1)) {
continue;
} else {
/* just memorize to avoid statting the target - SELinux */
struct link_list_item *lli =
calloc(1, sizeof(struct link_list_item));
+ if (lli == NULL) {
+ break;
+ }
lli->dev_node = strdup(buf);
lli->link_name = strdup(entry_name);
+ if ((lli->dev_node == NULL) || (lli->link_name == NULL)) {
+ free(lli->dev_node);
+ free(lli->link_name);
+ free(lli);
+ break;
+ }
lli->next = link_list;
link_list = lli;
}
}
}
rewinddir(dp);
while ((entry = readdir(dp))) {
if (entry->d_type == DT_CHR) {
struct stat statbuf;
snprintf(entry_name, sizeof(entry_name),
WATCHDOG_NODEDIR "%s", entry->d_name);
if(!stat(entry_name, &statbuf) && S_ISCHR(statbuf.st_mode) &&
is_watchdog(statbuf.st_rdev)) {
- int wdfd = watchdog_init_fd(entry_name, -1);
+ int wdfd;
struct watchdog_list_item *wdg =
calloc(1, sizeof(struct watchdog_list_item));
int len;
struct link_list_item *tmp_list = NULL;
+ if (wdg == NULL) {
+ break;
+ }
+
wdg->dev = statbuf.st_rdev;
wdg->dev_node = strdup(entry_name);
+ if (wdg->dev_node == NULL) {
+ free(wdg);
+ break;
+ }
wdg->next = watchdog_list;
watchdog_list = wdg;
watchdog_list_items++;
+ wdfd = watchdog_init_fd(entry_name, -1);
if (wdfd >= 0) {
struct watchdog_info ident;
ident.identity[0] = '\0';
ioctl(wdfd, WDIOC_GETSUPPORT, &ident);
watchdog_close_fd(wdfd, entry_name, true);
if (ident.identity[0]) {
wdg->dev_ident = strdup((char *) ident.identity);
}
}
snprintf(entry_name, sizeof(entry_name),
SYS_CHAR_DEV_DIR "/%d:%d/device/driver",
major(wdg->dev), minor(wdg->dev));
len = readlink(entry_name, buf, sizeof(buf) - 1);
if (len > 0) {
buf[len] = '\0';
wdg->dev_driver = strdup(basename(buf));
} else if ((wdg->dev_ident) &&
(strcmp(wdg->dev_ident,
"Software Watchdog") == 0)) {
wdg->dev_driver = strdup("softdog");
}
/* create dupes if we have memorized links
* to this node
*/
for (tmp_list = link_list; tmp_list;
tmp_list = tmp_list->next) {
if (!strcmp(tmp_list->dev_node,
wdg->dev_node)) {
struct watchdog_list_item *dupe_wdg =
calloc(1, sizeof(struct watchdog_list_item));
+ if (dupe_wdg == NULL) {
+ break;
+ }
/* as long as we never purge watchdog_list
* there is no need to dupe strings
*/
*dupe_wdg = *wdg;
dupe_wdg->dev_node = strdup(tmp_list->link_name);
+ if (dupe_wdg->dev_node == NULL) {
+ free(dupe_wdg);
+ break;
+ }
dupe_wdg->next = watchdog_list;
watchdog_list = dupe_wdg;
watchdog_list_items++;
}
/* for performance reasons we could remove
* the link_list entry
*/
}
}
}
}
closedir(dp);
}
/* cleanup link list */
while (link_list) {
struct link_list_item *tmp_list = link_list;
link_list = link_list->next;
free(tmp_list->dev_node);
free(tmp_list->link_name);
free(tmp_list);
}
}
static void
watchdog_checkbusy()
{
DIR *dproc;
struct dirent *entry;
dproc = opendir("/proc");
if (!dproc) {
/* no proc directory to search through */
return;
}
while ((entry = readdir(dproc)) != NULL) {
pid_t local_pid;
char *leftover;
DIR *dpid;
char procpath[NAME_MAX+10] = { 0 };
if (entry->d_name[0] == '.') {
continue;
}
local_pid = strtol(entry->d_name, &leftover, 10);
if (leftover[0] != '\0')
continue;
snprintf(procpath, sizeof(procpath), "/proc/%s/fd", entry->d_name);
dpid = opendir(procpath);
if (!dpid) {
/* silently continue - might be just a race */
continue;
}
while ((entry = readdir(dpid)) != NULL) {
struct watchdog_list_item *wdg;
char entry_name[sizeof(procpath)+NAME_MAX+1] = { 0 };
char buf[NAME_MAX+1] = { 0 };
int len;
if (entry->d_type != DT_LNK) {
continue;
}
snprintf(entry_name, sizeof(entry_name),
"%s/%s", procpath, entry->d_name);
len = readlink(entry_name, buf, sizeof(buf) - 1);
if (len < 1) {
continue;
}
buf[len] = '\0';
for (wdg = watchdog_list; wdg != NULL; wdg = wdg->next) {
if (!strcmp(buf, wdg->dev_node)) {
char name[16];
FILE *file;
wdg->busy_pid = local_pid;
snprintf(procpath, sizeof(procpath), "/proc/%d/status", local_pid);
file = fopen(procpath, "r");
if (file) {
if (fscanf(file, "Name:\t%15[a-zA-Z0-9 _-]", name) == 1) {
wdg->busy_name = strdup(name);
}
fclose(file);
}
}
}
}
closedir(dpid);
}
closedir(dproc);
return;
}
int watchdog_info(void)
{
struct watchdog_list_item *wdg;
int wdg_cnt = 0;
watchdog_populate_list();
watchdog_checkbusy();
printf("\nDiscovered %d watchdog devices:\n", watchdog_list_items);
for (wdg = watchdog_list; wdg != NULL; wdg = wdg->next) {
wdg_cnt++;
if (wdg->busy_pid) {
printf("\n[%d] %s\nIdentity: Busy: PID %d (%s)\nDriver: %s\n",
wdg_cnt, wdg->dev_node,
wdg->busy_pid,
wdg->busy_name?wdg->busy_name:"<unknown>",
wdg->dev_driver?wdg->dev_driver:"<unknown>");
} else {
printf("\n[%d] %s\nIdentity: %s\nDriver: %s\n",
wdg_cnt, wdg->dev_node,
wdg->dev_ident?wdg->dev_ident:
"Error: device hogged via alias major/minor?",
wdg->dev_driver?wdg->dev_driver:"<unknown>");
}
if ((wdg->dev_driver) && (strcmp(wdg->dev_driver, "softdog") == 0)) {
printf("CAUTION: Not recommended for use with sbd.\n");
}
}
return 0;
}
int watchdog_test(void)
{
int i;
if ((watchdog_set_timeout == 0) || !watchdog_use) {
printf("\nWatchdog is disabled - aborting test!!!\n");
return 0;
}
if (watchdogdev_is_default) {
watchdog_populate_list();
if (watchdog_list_items > 1) {
printf("\nError: Multiple watchdog devices discovered.\n"
" Use -w <watchdog> or SBD_WATCHDOG_DEV to specify\n"
" which device to reset the system with\n");
watchdog_info();
return -1;
}
}
if ((isatty(fileno(stdin)))) {
char buffer[16];
printf("\nWARNING: This operation is expected to force-reboot this system\n"
" without following any shutdown procedures.\n\n"
"Proceed? [NO/Proceed] ");
if ((fgets(buffer, 16, stdin) == NULL) ||
strcmp(buffer, "Proceed\n")) {
printf("\nAborting watchdog test!!!\n");
return 0;
}
printf("\n");
}
printf("Initializing %s with a reset countdown of %d seconds ...\n",
watchdogdev, (int) timeout_watchdog);
if ((watchdog_init() < 0) || (watchdog_init_interval() < 0)) {
printf("Failed to initialize watchdog!!!\n");
watchdog_info();
return -1;
}
printf("\n");
printf("NOTICE: The watchdog device is expected to reset the system\n"
" in %d seconds. If system remains active beyond that time,\n"
" watchdog may not be functional.\n\n", (int) timeout_watchdog);
for (i=timeout_watchdog; i>1; i--) {
printf("Reset countdown ... %d seconds\n", i);
sleep(1);
}
for (i=2; i>0; i--) {
printf("System expected to reset any moment ...\n");
sleep(1);
}
for (i=5; i>0; i--) {
printf("System should have reset ...\n");
sleep(1);
}
printf("Error: The watchdog device has failed to reboot the system,\n"
" and it may not be suitable for usage with sbd.\n");
/* test should trigger a reboot thus returning is actually bad */
return -1;
}
/* This duplicates some code from linux/ioprio.h since these are not included
* even in linux-kernel-headers. Sucks. See also
* /usr/src/linux/Documentation/block/ioprio.txt and ioprio_set(2) */
extern int sys_ioprio_set(int, int, int);
int ioprio_set(int which, int who, int ioprio);
inline int ioprio_set(int which, int who, int ioprio)
{
return syscall(__NR_ioprio_set, which, who, ioprio);
}
enum {
IOPRIO_CLASS_NONE,
IOPRIO_CLASS_RT,
IOPRIO_CLASS_BE,
IOPRIO_CLASS_IDLE,
};
enum {
IOPRIO_WHO_PROCESS = 1,
IOPRIO_WHO_PGRP,
IOPRIO_WHO_USER,
};
#define IOPRIO_BITS (16)
#define IOPRIO_CLASS_SHIFT (13)
#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1)
#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT)
#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK)
#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data)
static void
sbd_stack_hogger(unsigned char * inbuf, int kbytes)
{
unsigned char buf[1024];
if(kbytes <= 0) {
return;
}
if (inbuf == NULL) {
memset(buf, HOG_CHAR, sizeof(buf));
} else {
memcpy(buf, inbuf, sizeof(buf));
}
if (kbytes > 0) {
sbd_stack_hogger(buf, kbytes-1);
}
return;
}
static void
sbd_malloc_hogger(int kbytes)
{
int j;
void**chunks;
int chunksize = 1024;
if(kbytes <= 0) {
return;
}
/*
* We could call mallopt(M_MMAP_MAX, 0) to disable it completely,
* but we've already called mlockall()
*
* We could also call mallopt(M_TRIM_THRESHOLD, -1) to prevent malloc
* from giving memory back to the system, but we've already called
* mlockall(MCL_FUTURE), so there's no need.
*/
chunks = malloc(kbytes * sizeof(void *));
if (chunks == NULL) {
cl_log(LOG_WARNING, "Could not preallocate chunk array");
return;
}
for (j=0; j < kbytes; ++j) {
chunks[j] = malloc(chunksize);
if (chunks[j] == NULL) {
cl_log(LOG_WARNING, "Could not preallocate block %d", j);
} else {
memset(chunks[j], 0, chunksize);
}
}
for (j=0; j < kbytes; ++j) {
free(chunks[j]);
}
free(chunks);
}
static void sbd_memlock(int stackgrowK, int heapgrowK)
{
#ifdef _POSIX_MEMLOCK
/*
* We could call setrlimit(RLIMIT_MEMLOCK,...) with a large
* number, but the mcp runs as root and mlock(2) says:
*
* Since Linux 2.6.9, no limits are placed on the amount of memory
* that a privileged process may lock, and this limit instead
* governs the amount of memory that an unprivileged process may
* lock.
*/
if (mlockall(MCL_CURRENT|MCL_FUTURE) >= 0) {
cl_log(LOG_INFO, "Locked ourselves in memory");
/* Now allocate some extra pages (MCL_FUTURE will ensure they stay around) */
sbd_malloc_hogger(heapgrowK);
sbd_stack_hogger(NULL, stackgrowK);
} else {
cl_perror("Unable to lock ourselves into memory");
}
#else
cl_log(LOG_ERR, "Unable to lock ourselves into memory");
#endif
}
static int get_realtime_budget(void)
{
FILE *f;
char fname[PATH_MAX];
int res = -1, lnum = 0, num;
char *cgroup = NULL, *namespecs = NULL;
snprintf(fname, PATH_MAX, "/proc/%jd/cgroup", (intmax_t)getpid());
f = fopen(fname, "rt");
if (f == NULL) {
cl_log(LOG_WARNING, "Can't open cgroup file for pid=%jd",
(intmax_t)getpid());
goto exit_res;
}
while( (num = fscanf(f, "%d:%m[^:]:%m[^\n]\n", &lnum,
&namespecs, &cgroup)) !=EOF ) {
if (namespecs && strstr(namespecs, "cpuacct")) {
free(namespecs);
break;
}
if (cgroup) {
free(cgroup);
cgroup = NULL;
}
if (namespecs) {
free(namespecs);
namespecs = NULL;
}
/* not to get stuck if format changes */
if ((num < 3) && ((fscanf(f, "%*[^\n]") == EOF) ||
(fscanf(f, "\n") == EOF))) {
break;
}
}
fclose(f);
if (cgroup == NULL) {
cl_log(LOG_WARNING, "Failed getting cgroup for pid=%jd",
(intmax_t)getpid());
goto exit_res;
}
snprintf(fname, PATH_MAX, "/sys/fs/cgroup/cpu%s/cpu.rt_runtime_us",
cgroup);
f = fopen(fname, "rt");
if (f == NULL) {
cl_log(LOG_WARNING, "cpu.rt_runtime_us existed for root-slice but "
"doesn't for '%s'", cgroup);
goto exit_res;
}
if (fscanf(f, "%d", &res) != 1) {
cl_log(LOG_WARNING, "failed reading rt-budget from %s", fname);
} else {
cl_log(LOG_INFO, "slice='%s' has rt-budget=%d", cgroup, res);
}
fclose(f);
exit_res:
if (cgroup) {
free(cgroup);
}
return res;
}
/* stolen from corosync */
static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) {
FILE *f;
int res = -1;
/*
* /sys/fs/cgroup is hardcoded, because most of Linux distributions are now
* using systemd and systemd uses hardcoded path of cgroup mount point.
*
* This feature is expected to be removed as soon as systemd gets support
* for managing RT configuration.
*/
f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt");
if (f == NULL) {
cl_log(LOG_DEBUG, "cpu.rt_runtime_us doesn't exist -> "
"system without cgroup or with disabled CONFIG_RT_GROUP_SCHED");
res = 0;
goto exit_res;
}
fclose(f);
if ((!enforce_root_cgroup) && (get_realtime_budget() > 0)) {
cl_log(LOG_DEBUG, "looks as if we have rt-budget in the slice we are "
"-> skip moving to root-slice");
res = 0;
goto exit_res;
}
f = fopen("/sys/fs/cgroup/cpu/tasks", "w");
if (f == NULL) {
cl_log(LOG_WARNING, "Can't open cgroups tasks file for writing");
goto exit_res;
}
if (fprintf(f, "%jd\n", (intmax_t)getpid()) <= 0) {
cl_log(LOG_WARNING, "Can't write sbd pid into cgroups tasks file");
goto close_and_exit_res;
}
close_and_exit_res:
if (fclose(f) != 0) {
cl_log(LOG_WARNING, "Can't close cgroups tasks file");
goto exit_res;
}
exit_res:
return (res);
}
void
sbd_make_realtime(int priority, int stackgrowK, int heapgrowK)
{
if(priority < 0) {
return;
}
do {
#ifdef SCHED_RR
if (move_to_root_cgroup) {
sbd_move_to_root_cgroup(enforce_moving_to_root_cgroup);
}
{
int pmin = sched_get_priority_min(SCHED_RR);
int pmax = sched_get_priority_max(SCHED_RR);
struct sched_param sp;
int pcurrent;
if (priority == 0) {
priority = pmax;
} else if (priority < pmin) {
priority = pmin;
} else if (priority > pmax) {
priority = pmax;
}
if (sched_getparam(0, &sp) < 0) {
cl_perror("Unable to get scheduler priority");
} else if ((pcurrent = sched_getscheduler(0)) < 0) {
cl_perror("Unable to get scheduler policy");
} else if ((pcurrent == SCHED_RR) &&
(sp.sched_priority >= priority)) {
cl_log(LOG_INFO,
"Stay with priority (%d) for policy SCHED_RR",
sp.sched_priority);
break;
} else {
memset(&sp, 0, sizeof(sp));
sp.sched_priority = priority;
if (sched_setscheduler(0, SCHED_RR, &sp) < 0) {
cl_perror(
"Unable to set scheduler policy to SCHED_RR priority %d",
priority);
} else {
cl_log(LOG_INFO,
"Scheduler policy is now SCHED_RR priority %d",
priority);
break;
}
}
}
#else
cl_log(LOG_ERR, "System does not support updating the scheduler policy");
#endif
#ifdef PRIO_PGRP
if (setpriority(PRIO_PGRP, 0, INT_MIN) < 0) {
cl_perror("Unable to raise the scheduler priority");
} else {
cl_log(LOG_INFO, "Scheduler priority raised to the maximum");
}
#else
cl_perror("System does not support setting the scheduler priority");
#endif
} while (0);
sbd_memlock(heapgrowK, stackgrowK);
}
void
maximize_priority(void)
{
if (skip_rt) {
cl_log(LOG_INFO, "Not elevating to realtime (-R specified).");
return;
}
sbd_make_realtime(0, 256, 256);
if (ioprio_set(IOPRIO_WHO_PROCESS, getpid(),
IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 1)) != 0) {
cl_perror("ioprio_set() call failed.");
}
}
void
sysrq_init(void)
{
FILE* procf;
int c;
procf = fopen("/proc/sys/kernel/sysrq", "r");
if (!procf) {
cl_perror("cannot open /proc/sys/kernel/sysrq for read.");
return;
}
if (fscanf(procf, "%d", &c) != 1) {
cl_perror("Parsing sysrq failed");
c = 0;
}
fclose(procf);
if (c == 1)
return;
/* 8 for debugging dumps of processes,
128 for reboot/poweroff */
c |= 136;
procf = fopen("/proc/sys/kernel/sysrq", "w");
if (!procf) {
cl_perror("cannot open /proc/sys/kernel/sysrq for writing");
return;
}
fprintf(procf, "%d", c);
fclose(procf);
return;
}
void
sysrq_trigger(char t)
{
FILE *procf;
procf = fopen("/proc/sysrq-trigger", "a");
if (!procf) {
cl_perror("Opening sysrq-trigger failed.");
return;
}
cl_log(LOG_INFO, "sysrq-trigger: %c\n", t);
fprintf(procf, "%c\n", t);
fclose(procf);
return;
}
static void
do_exit(char kind, bool do_flush)
{
/* TODO: Turn debug_mode into a bit field? Delay + kdump for example */
const char *reason = NULL;
if (kind == 'c') {
cl_log(LOG_NOTICE, "Initiating kdump");
} else if (debug_mode == 1) {
cl_log(LOG_WARNING, "Initiating kdump instead of panicking the node (debug mode)");
kind = 'c';
}
if (debug_mode == 2) {
cl_log(LOG_WARNING, "Shutting down SBD instead of panicking the node (debug mode)");
watchdog_close(true);
exit(0);
}
if (debug_mode == 3) {
/* Give the system some time to flush logs to disk before rebooting. */
cl_log(LOG_WARNING, "Delaying node panic by 10s (debug mode)");
watchdog_close(true);
sync();
sleep(10);
}
switch(kind) {
case 'b':
reason = "reboot";
break;
case 'c':
reason = "crashdump";
break;
case 'o':
reason = "off";
break;
default:
reason = "unknown";
break;
}
cl_log(LOG_EMERG, "Rebooting system: %s", reason);
if (do_flush) {
sync();
}
if (kind == 'c') {
if (timeout_watchdog_crashdump) {
if (timeout_watchdog != timeout_watchdog_crashdump) {
timeout_watchdog = timeout_watchdog_crashdump;
watchdog_init_interval();
}
watchdog_close(false);
} else {
watchdog_close(true);
}
sysrq_trigger(kind);
} else {
watchdog_close(false);
sysrq_trigger(kind);
if (reboot((kind == 'o')?RB_POWER_OFF:RB_AUTOBOOT) < 0) {
cl_perror("%s failed", (kind == 'o')?"Poweroff":"Reboot");
}
}
exit(1);
}
void
do_crashdump(void)
{
do_exit('c', true);
}
void
do_reset(void)
{
do_exit('b', true);
}
void
do_off(void)
{
do_exit('o', true);
}
void
do_timeout_action(void)
{
do_exit(timeout_sysrq_char, do_flush);
}
/*
* Change directory to the directory our core file needs to go in
* Call after you establish the userid you're running under.
*/
int
sbd_cdtocoredir(void)
{
int rc;
static const char *dir = NULL;
if (dir == NULL) {
dir = CRM_CORE_DIR;
}
if ((rc=chdir(dir)) < 0) {
int errsave = errno;
cl_perror("Cannot chdir to [%s]", dir);
errno = errsave;
}
return rc;
}
pid_t
make_daemon(void)
{
pid_t pid;
const char * devnull = "/dev/null";
pid = fork();
if (pid < 0) {
cl_log(LOG_ERR, "%s: could not start daemon\n",
cmdname);
cl_perror("fork");
exit(1);
}else if (pid > 0) {
return pid;
}
qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE);
/* This is the child; ensure privileges have not been lost. */
maximize_priority();
sysrq_init();
umask(022);
close(0);
(void)open(devnull, O_RDONLY);
close(1);
(void)open(devnull, O_WRONLY);
close(2);
(void)open(devnull, O_WRONLY);
sbd_cdtocoredir();
return 0;
}
void
sbd_get_uname(void)
{
struct utsname uname_buf;
int i;
if (uname(&uname_buf) < 0) {
cl_perror("uname() failed?");
exit(1);
}
local_uname = strdup(uname_buf.nodename);
for (i = 0; i < strlen(local_uname); i++)
local_uname[i] = tolower(local_uname[i]);
}
#define FMT_MAX 256
void
sbd_set_format_string(int method, const char *daemon)
{
int offset = 0;
char fmt[FMT_MAX];
struct utsname res;
switch(method) {
case QB_LOG_STDERR:
break;
case QB_LOG_SYSLOG:
if(daemon && strcmp(daemon, "sbd") != 0) {
offset += snprintf(fmt + offset, FMT_MAX - offset, "%10s: ", daemon);
}
break;
default:
/* When logging to a file */
if (uname(&res) == 0) {
offset +=
snprintf(fmt + offset, FMT_MAX - offset, "%%t [%d] %s %10s: ", getpid(),
res.nodename, daemon);
} else {
offset += snprintf(fmt + offset, FMT_MAX - offset, "%%t [%d] %10s: ", getpid(), daemon);
}
}
if (debug && method >= QB_LOG_STDERR) {
offset += snprintf(fmt + offset, FMT_MAX - offset, "(%%-12f:%%5l %%g) %%-7p: %%n: ");
} else {
offset += snprintf(fmt + offset, FMT_MAX - offset, "%%g %%-7p: %%n: ");
}
if (method == QB_LOG_SYSLOG) {
offset += snprintf(fmt + offset, FMT_MAX - offset, "%%b");
} else {
offset += snprintf(fmt + offset, FMT_MAX - offset, "\t%%b");
}
if(offset > 0) {
qb_log_format_set(method, fmt);
}
}
void
notify_parent(void)
{
pid_t ppid;
union sigval signal_value;
memset(&signal_value, 0, sizeof(signal_value));
ppid = getppid();
if (ppid == 1) {
/* Our parent died unexpectedly. Triggering
* self-fence. */
cl_log(LOG_WARNING, "Our parent is dead.");
do_timeout_action();
}
switch (servant_health) {
case pcmk_health_pending:
case pcmk_health_shutdown:
case pcmk_health_transient:
DBGLOG(LOG_DEBUG, "Not notifying parent: state transient (%d)", servant_health);
break;
case pcmk_health_unknown:
case pcmk_health_unclean:
case pcmk_health_noquorum:
DBGLOG(LOG_WARNING, "Notifying parent: UNHEALTHY (%d)", servant_health);
sigqueue(ppid, SIG_PCMK_UNHEALTHY, signal_value);
break;
case pcmk_health_online:
DBGLOG(LOG_DEBUG, "Notifying parent: healthy");
sigqueue(ppid, SIG_LIVENESS, signal_value);
break;
default:
DBGLOG(LOG_WARNING, "Notifying parent: UNHEALTHY %d", servant_health);
sigqueue(ppid, SIG_PCMK_UNHEALTHY, signal_value);
break;
}
}
void
set_servant_health(enum pcmk_health state, int level, char const *format, ...)
{
if (servant_health != state) {
va_list ap;
int len = 0;
char *string = NULL;
servant_health = state;
va_start(ap, format);
len = vasprintf (&string, format, ap);
if(len > 0) {
cl_log(level, "%s", string);
}
va_end(ap);
free(string);
}
}
bool
sbd_is_disk(struct servants_list_item *servant)
{
if ((servant != NULL) &&
(servant->devname != NULL) &&
(servant->devname[0] == '/')) {
return true;
}
return false;
}
bool
sbd_is_cluster(struct servants_list_item *servant)
{
if ((servant != NULL) &&
(servant->devname != NULL) &&
(strcmp("cluster", servant->devname) == 0)) {
return true;
}
return false;
}
bool
sbd_is_pcmk(struct servants_list_item *servant)
{
if ((servant != NULL) &&
(servant->devname != NULL) &&
(strcmp("pcmk", servant->devname) == 0)) {
return true;
}
return false;
}
diff --git a/src/sbd-md.c b/src/sbd-md.c
index 7a37522..2a237ad 100644
--- a/src/sbd-md.c
+++ b/src/sbd-md.c
@@ -1,1289 +1,1288 @@
/*
* Copyright (C) 2013 Lars Marowsky-Bree <lmb@suse.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "sbd.h"
#define SBD_MSG_EMPTY 0x00
#define SBD_MSG_TEST 0x01
#define SBD_MSG_RESET 0x02
#define SBD_MSG_OFF 0x03
#define SBD_MSG_EXIT 0x04
#define SBD_MSG_CRASHDUMP 0x05
#define SLOT_TO_SECTOR(slot) (1+slot*2)
#define MBOX_TO_SECTOR(mbox) (2+mbox*2)
extern int disk_count;
/* These have to match the values in the header of the partition */
static char sbd_magic[8] = "SBD_SBD_";
static char sbd_version = 0x02;
struct slot_msg_arg_t {
const char* name;
const char* msg;
};
static signed char
cmd2char(const char *cmd)
{
if (strcmp("clear", cmd) == 0) {
return SBD_MSG_EMPTY;
} else if (strcmp("test", cmd) == 0) {
return SBD_MSG_TEST;
} else if (strcmp("reset", cmd) == 0) {
return SBD_MSG_RESET;
} else if (strcmp("off", cmd) == 0) {
return SBD_MSG_OFF;
} else if (strcmp("exit", cmd) == 0) {
return SBD_MSG_EXIT;
} else if (strcmp("crashdump", cmd) == 0) {
return SBD_MSG_CRASHDUMP;
}
return -1;
}
static const char*
char2cmd(const char cmd)
{
switch (cmd) {
case SBD_MSG_EMPTY:
return "clear";
break;
case SBD_MSG_TEST:
return "test";
break;
case SBD_MSG_RESET:
return "reset";
break;
case SBD_MSG_OFF:
return "off";
break;
case SBD_MSG_EXIT:
return "exit";
break;
case SBD_MSG_CRASHDUMP:
return "crashdump";
break;
default:
return "undefined";
break;
}
}
static void
close_device(struct sbd_context *st)
{
if (!st) {
return;
}
if (st->ioctx) {
io_destroy(st->ioctx);
}
if (st->devfd >= 0) {
close(st->devfd);
}
free(st->buffer);
free(st);
}
static struct sbd_context *
open_device(const char* devname, int loglevel)
{
struct sbd_context *st;
if (!devname)
return NULL;
st = calloc(1, sizeof(struct sbd_context));
if (!st) {
return NULL;
}
st->devfd = -1;
if (io_setup(1, &st->ioctx) != 0) {
cl_perror("io_setup failed");
goto out;
}
st->devfd = open(devname, O_SYNC|O_RDWR|O_DIRECT);
if (st->devfd == -1) {
if (loglevel == LOG_DEBUG) {
DBGLOG(loglevel, "Opening device %s failed.", devname);
} else {
cl_log(loglevel, "Opening device %s failed.", devname);
}
goto out;
}
ioctl(st->devfd, BLKSSZGET, §or_size);
if (sector_size == 0) {
cl_perror("Get sector size failed.\n");
goto out;
}
if (posix_memalign(&st->buffer, sector_size, sector_size)) {
cl_perror("Couldn't allocate sector-buffer.");
goto out;
}
return st;
out:
close_device(st);
return NULL;
}
static void *
sector_alloc(void)
{
void *x;
x = calloc(1, sector_size);
if (!x) {
exit(1);
}
return x;
}
static int
sector_io(struct sbd_context *st, int sector, void *data, int rw)
{
struct timespec timeout;
struct io_event event;
struct iocb *ios[1] = { &st->io };
long r;
timeout.tv_sec = timeout_io;
timeout.tv_nsec = 0;
memset(&st->io, 0, sizeof(struct iocb));
if (rw) {
memcpy(st->buffer, data, sector_size);
io_prep_pwrite(&st->io, st->devfd, st->buffer, sector_size, (long long) sector_size * sector);
} else {
memset(st->buffer, 0, sector_size);
io_prep_pread(&st->io, st->devfd, st->buffer, sector_size, (long long) sector_size * sector);
}
if (io_submit(st->ioctx, 1, ios) != 1) {
cl_log(LOG_ERR, "Failed to submit IO request! (rw=%d)", rw);
return -1;
}
errno = 0;
r = io_getevents(st->ioctx, 1L, 1L, &event, &timeout);
if (r < 0 ) {
cl_log(LOG_ERR, "Failed to retrieve IO events (rw=%d)", rw);
return -1;
} else if (r < 1L) {
cl_log(LOG_INFO, "Cancelling IO request due to timeout (rw=%d, r=%ld)", rw, r);
r = io_cancel(st->ioctx, ios[0], &event);
if (r) {
DBGLOG(LOG_INFO, "Could not cancel IO request (rw=%d)", rw);
/* Doesn't really matter, debugging information.
*/
}
return -1;
} else if (r > 1L) {
cl_log(LOG_ERR, "More than one IO was returned (r=%ld)", r);
return -1;
}
/* IO is happy */
if (event.res == sector_size) {
if (!rw) {
memcpy(data, st->buffer, sector_size);
}
return 0;
} else {
cl_log(LOG_ERR, "Short IO (rw=%d, res=%lu, sector_size=%d)",
rw, event.res, sector_size);
return -1;
}
}
static int
sector_write(struct sbd_context *st, int sector, void *data)
{
return sector_io(st, sector, data, 1);
}
static int
sector_read(struct sbd_context *st, int sector, void *data)
{
return sector_io(st, sector, data, 0);
}
static int
slot_read(struct sbd_context *st, int slot, struct sector_node_s *s_node)
{
return sector_read(st, SLOT_TO_SECTOR(slot), s_node);
}
static int
slot_write(struct sbd_context *st, int slot, struct sector_node_s *s_node)
{
return sector_write(st, SLOT_TO_SECTOR(slot), s_node);
}
static int
mbox_write(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox)
{
return sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox);
}
static int
mbox_read(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox)
{
return sector_read(st, MBOX_TO_SECTOR(mbox), s_mbox);
}
static int
mbox_write_verify(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox)
{
void *data;
int rc = 0;
if (sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox) < 0)
return -1;
data = sector_alloc();
if (sector_read(st, MBOX_TO_SECTOR(mbox), data) < 0) {
rc = -1;
goto out;
}
if (memcmp(s_mbox, data, sector_size) != 0) {
cl_log(LOG_ERR, "Write verification failed!");
rc = -1;
goto out;
}
rc = 0;
out:
free(data);
return rc;
}
static int header_write(struct sbd_context *st, struct sector_header_s *s_header)
{
s_header->sector_size = htonl(s_header->sector_size);
s_header->timeout_watchdog = htonl(s_header->timeout_watchdog);
s_header->timeout_allocate = htonl(s_header->timeout_allocate);
s_header->timeout_loop = htonl(s_header->timeout_loop);
s_header->timeout_msgwait = htonl(s_header->timeout_msgwait);
return sector_write(st, 0, s_header);
}
static int
header_read(struct sbd_context *st, struct sector_header_s *s_header)
{
if (sector_read(st, 0, s_header) < 0)
return -1;
s_header->sector_size = ntohl(s_header->sector_size);
s_header->timeout_watchdog = ntohl(s_header->timeout_watchdog);
s_header->timeout_allocate = ntohl(s_header->timeout_allocate);
s_header->timeout_loop = ntohl(s_header->timeout_loop);
s_header->timeout_msgwait = ntohl(s_header->timeout_msgwait);
/* This sets the global defaults: */
timeout_watchdog = s_header->timeout_watchdog;
timeout_allocate = s_header->timeout_allocate;
timeout_loop = s_header->timeout_loop;
timeout_msgwait = s_header->timeout_msgwait;
return 0;
}
static int
valid_header(const struct sector_header_s *s_header)
{
if (memcmp(s_header->magic, sbd_magic, sizeof(s_header->magic)) != 0) {
cl_log(LOG_ERR, "Header magic does not match.");
return -1;
}
if (s_header->version != sbd_version) {
cl_log(LOG_ERR, "Header version does not match.");
return -1;
}
if (s_header->sector_size != sector_size) {
cl_log(LOG_ERR, "Header sector size does not match.");
return -1;
}
return 0;
}
static struct sector_header_s *
header_get(struct sbd_context *st)
{
struct sector_header_s *s_header;
s_header = sector_alloc();
if (header_read(st, s_header) < 0) {
cl_log(LOG_ERR, "Unable to read header from device %d", st->devfd);
free(s_header);
return NULL;
}
if (valid_header(s_header) < 0) {
cl_log(LOG_ERR, "header on device %d is not valid.", st->devfd);
free(s_header);
return NULL;
}
/* cl_log(LOG_INFO, "Found version %d header with %d slots",
s_header->version, s_header->slots); */
return s_header;
}
static int
header_dump(struct sbd_context *st)
{
struct sector_header_s *s_header;
char uuid[37];
s_header = header_get(st);
if (s_header == NULL)
return -1;
printf("Header version : %u.%u\n", s_header->version,
s_header->minor_version);
if (s_header->minor_version > 0) {
uuid_unparse_lower(s_header->uuid, uuid);
printf("UUID : %s\n", uuid);
}
printf("Number of slots : %u\n", s_header->slots);
printf("Sector size : %lu\n",
(unsigned long)s_header->sector_size);
printf("Timeout (watchdog) : %lu\n",
(unsigned long)s_header->timeout_watchdog);
printf("Timeout (allocate) : %lu\n",
(unsigned long)s_header->timeout_allocate);
printf("Timeout (loop) : %lu\n",
(unsigned long)s_header->timeout_loop);
printf("Timeout (msgwait) : %lu\n",
(unsigned long)s_header->timeout_msgwait);
free(s_header);
return 0;
}
static int
init_device(struct sbd_context *st)
{
struct sector_header_s *s_header;
struct sector_node_s *s_node;
struct sector_mbox_s *s_mbox;
char uuid[37];
int i;
int rc = 0;
s_header = sector_alloc();
s_node = sector_alloc();
s_mbox = sector_alloc();
memcpy(s_header->magic, sbd_magic, sizeof(s_header->magic));
s_header->version = sbd_version;
s_header->slots = 255;
s_header->sector_size = sector_size;
s_header->timeout_watchdog = timeout_watchdog;
s_header->timeout_allocate = timeout_allocate;
s_header->timeout_loop = timeout_loop;
s_header->timeout_msgwait = timeout_msgwait;
s_header->minor_version = 1;
uuid_generate(s_header->uuid);
uuid_unparse_lower(s_header->uuid, uuid);
cl_log(LOG_INFO, "Creating version %d.%d header on device %d (uuid: %s)",
s_header->version, s_header->minor_version,
st->devfd, uuid);
fprintf(stdout, "Creating version %d.%d header on device %d (uuid: %s)\n",
s_header->version, s_header->minor_version,
st->devfd, uuid);
if (header_write(st, s_header) < 0) {
rc = -1; goto out;
}
cl_log(LOG_INFO, "Initializing %d slots on device %d",
s_header->slots,
st->devfd);
fprintf(stdout, "Initializing %d slots on device %d\n",
s_header->slots,
st->devfd);
for (i=0;i < s_header->slots;i++) {
if (slot_write(st, i, s_node) < 0) {
rc = -1; goto out;
}
if (mbox_write(st, i, s_mbox) < 0) {
rc = -1; goto out;
}
}
-out: free(s_node);
+out: free(s_mbox);
+ free(s_node);
free(s_header);
- free(s_mbox);
return(rc);
}
/* Check if there already is a slot allocated to said name; returns the
* slot number. If not found, returns -1.
* This is necessary because slots might not be continuous. */
static int
slot_lookup(struct sbd_context *st, const struct sector_header_s *s_header, const char *name)
{
struct sector_node_s *s_node = NULL;
int i;
int rc = -1;
if (!name) {
cl_log(LOG_ERR, "slot_lookup(): No name specified.\n");
goto out;
}
s_node = sector_alloc();
for (i=0; i < s_header->slots; i++) {
if (slot_read(st, i, s_node) < 0) {
rc = -2; goto out;
}
if (s_node->in_use != 0) {
if (strncasecmp(s_node->name, name,
SECTOR_NAME_MAX) == 0) {
DBGLOG(LOG_INFO, "%s owns slot %d", name, i);
rc = i; goto out;
}
}
}
out: free(s_node);
return rc;
}
static int
slot_unused(struct sbd_context *st, const struct sector_header_s *s_header)
{
struct sector_node_s *s_node;
int i;
int rc = -1;
s_node = sector_alloc();
for (i=0; i < s_header->slots; i++) {
if (slot_read(st, i, s_node) < 0) {
rc = -1; goto out;
}
if (s_node->in_use == 0) {
rc = i; goto out;
}
}
out: free(s_node);
return rc;
}
static int
slot_allocate(struct sbd_context *st, const char *name)
{
struct sector_header_s *s_header = NULL;
struct sector_node_s *s_node = NULL;
struct sector_mbox_s *s_mbox = NULL;
int i;
int rc = 0;
if (!name) {
cl_log(LOG_ERR, "slot_allocate(): No name specified.\n");
fprintf(stderr, "slot_allocate(): No name specified.\n");
rc = -1; goto out;
}
s_header = header_get(st);
if (!s_header) {
rc = -1; goto out;
}
s_node = sector_alloc();
s_mbox = sector_alloc();
while (1) {
i = slot_lookup(st, s_header, name);
if ((i >= 0) || (i == -2)) {
/* -1 is "no slot found", in which case we
* proceed to allocate a new one.
* -2 is "read error during lookup", in which
* case we error out too
* >= 0 is "slot already allocated" */
rc = i; goto out;
}
i = slot_unused(st, s_header);
if (i >= 0) {
cl_log(LOG_INFO, "slot %d is unused - trying to own", i);
fprintf(stdout, "slot %d is unused - trying to own\n", i);
memset(s_node, 0, sizeof(*s_node));
s_node->in_use = 1;
strncpy(s_node->name, name, SECTOR_NAME_MAX);
if (slot_write(st, i, s_node) < 0) {
rc = -1; goto out;
}
sleep(timeout_allocate);
} else {
cl_log(LOG_ERR, "No more free slots.");
fprintf(stderr, "No more free slots.\n");
rc = -1; goto out;
}
}
-out: free(s_node);
+out: free(s_mbox);
+ free(s_node);
free(s_header);
- free(s_mbox);
return(rc);
}
static int
slot_list(struct sbd_context *st)
{
struct sector_header_s *s_header = NULL;
struct sector_node_s *s_node = NULL;
struct sector_mbox_s *s_mbox = NULL;
int i;
int rc = 0;
s_header = header_get(st);
if (!s_header) {
rc = -1; goto out;
}
s_node = sector_alloc();
s_mbox = sector_alloc();
for (i=0; i < s_header->slots; i++) {
if (slot_read(st, i, s_node) < 0) {
rc = -1; goto out;
}
if (s_node->in_use > 0) {
if (mbox_read(st, i, s_mbox) < 0) {
rc = -1; goto out;
}
printf("%d\t%s\t%s\t%s\n",
i, s_node->name, char2cmd(s_mbox->cmd),
s_mbox->from);
}
}
out: free(s_mbox);
free(s_node);
free(s_header);
return rc;
}
static int
slot_msg(struct sbd_context *st, const char *name, const char *cmd)
{
struct sector_header_s *s_header = NULL;
struct sector_mbox_s *s_mbox = NULL;
int mbox;
int rc = 0;
char uuid[37];
if (!name || !cmd) {
cl_log(LOG_ERR, "slot_msg(): No recipient / cmd specified.\n");
rc = -1; goto out;
}
s_header = header_get(st);
if (!s_header) {
rc = -1; goto out;
}
if (strcmp(name, "LOCAL") == 0) {
name = local_uname;
}
if (s_header->minor_version > 0) {
uuid_unparse_lower(s_header->uuid, uuid);
cl_log(LOG_INFO, "Device UUID: %s", uuid);
}
mbox = slot_lookup(st, s_header, name);
if (mbox < 0) {
cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name);
rc = -1; goto out;
}
s_mbox = sector_alloc();
s_mbox->cmd = cmd2char(cmd);
if (s_mbox->cmd < 0) {
cl_log(LOG_ERR, "slot_msg(): Invalid command %s.", cmd);
rc = -1; goto out;
}
strncpy(s_mbox->from, local_uname, SECTOR_NAME_MAX);
cl_log(LOG_INFO, "Writing %s to node slot %s",
cmd, name);
if (mbox_write_verify(st, mbox, s_mbox) < -1) {
rc = -1; goto out;
}
if (strcasecmp(cmd, "exit") != 0) {
cl_log(LOG_INFO, "Messaging delay: %d",
(int)timeout_msgwait);
sleep(timeout_msgwait);
}
cl_log(LOG_INFO, "%s successfully delivered to %s",
cmd, name);
out: free(s_mbox);
free(s_header);
return rc;
}
static int
slot_ping(struct sbd_context *st, const char *name)
{
struct sector_header_s *s_header = NULL;
struct sector_mbox_s *s_mbox = NULL;
int mbox;
int waited = 0;
int rc = 0;
if (!name) {
cl_log(LOG_ERR, "slot_ping(): No recipient specified.\n");
rc = -1; goto out;
}
s_header = header_get(st);
if (!s_header) {
rc = -1; goto out;
}
if (strcmp(name, "LOCAL") == 0) {
name = local_uname;
}
mbox = slot_lookup(st, s_header, name);
if (mbox < 0) {
cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name);
rc = -1; goto out;
}
s_mbox = sector_alloc();
s_mbox->cmd = SBD_MSG_TEST;
strncpy(s_mbox->from, local_uname, SECTOR_NAME_MAX);
DBGLOG(LOG_DEBUG, "Pinging node %s", name);
if (mbox_write(st, mbox, s_mbox) < -1) {
rc = -1; goto out;
}
rc = -1;
while (waited <= timeout_msgwait) {
if (mbox_read(st, mbox, s_mbox) < 0)
break;
if (s_mbox->cmd != SBD_MSG_TEST) {
rc = 0;
break;
}
sleep(1);
waited++;
}
if (rc == 0) {
cl_log(LOG_DEBUG, "%s successfully pinged.", name);
} else {
cl_log(LOG_ERR, "%s failed to ping.", name);
}
out: free(s_mbox);
free(s_header);
return rc;
}
int init_devices(struct servants_list_item *servants)
{
int rc = 0;
struct sbd_context *st;
struct servants_list_item *s;
for (s = servants; s; s = s->next) {
fprintf(stdout, "Initializing device %s\n",
s->devname);
st = open_device(s->devname, LOG_ERR);
if (!st) {
return -1;
}
rc = init_device(st);
close_device(st);
if (rc == -1) {
fprintf(stderr, "Failed to init device %s\n", s->devname);
return rc;
}
fprintf(stdout, "Device %s is initialized.\n", s->devname);
}
fprintf(stdout, "Did you check sbd service down on all nodes before? If not do so now and restart afterwards.\n");
return 0;
}
static int slot_msg_wrapper(const char* devname, int mode, const void* argp)
{
int rc = 0;
struct sbd_context *st;
const struct slot_msg_arg_t* arg = (const struct slot_msg_arg_t*)argp;
st = open_device(devname, LOG_WARNING);
if (!st)
return -1;
cl_log(LOG_INFO, "Delivery process handling %s",
devname);
rc = slot_msg(st, arg->name, arg->msg);
close_device(st);
return rc;
}
static int slot_ping_wrapper(const char* devname, int mode, const void* argp)
{
int rc = 0;
const char* name = (const char*)argp;
struct sbd_context *st;
st = open_device(devname, LOG_WARNING);
if (!st)
return -1;
rc = slot_ping(st, name);
close_device(st);
return rc;
}
int allocate_slots(const char *name, struct servants_list_item *servants)
{
int rc = 0;
struct sbd_context *st;
struct servants_list_item *s;
for (s = servants; s; s = s->next) {
fprintf(stdout, "Trying to allocate slot for %s on device %s.\n",
name,
s->devname);
st = open_device(s->devname, LOG_WARNING);
if (!st) {
return -1;
}
rc = slot_allocate(st, name);
close_device(st);
if (rc < 0)
return rc;
fprintf(stdout, "Slot for %s has been allocated on %s.\n",
name,
s->devname);
}
return 0;
}
int list_slots(struct servants_list_item *servants)
{
int rc = 0;
struct servants_list_item *s;
struct sbd_context *st;
for (s = servants; s; s = s->next) {
int rv = 0;
st = open_device(s->devname, LOG_WARNING);
if (!st) {
rc = -1;
fprintf(stderr, "== disk %s unreadable!\n", s->devname);
continue;
}
rv = slot_list(st);
close_device(st);
if (rv == -1) {
rc = -1;
fprintf(stderr, "== Slots on disk %s NOT dumped\n", s->devname);
}
}
return rc;
}
int ping_via_slots(const char *name, struct servants_list_item *servants)
{
int sig = 0;
pid_t pid = 0;
int status = 0;
int servants_finished = 0;
sigset_t procmask;
siginfo_t sinfo;
struct servants_list_item *s;
sigemptyset(&procmask);
sigaddset(&procmask, SIGCHLD);
sigprocmask(SIG_BLOCK, &procmask, NULL);
for (s = servants; s; s = s->next) {
if(sbd_is_disk(s)) {
s->pid = assign_servant(s->devname, &slot_ping_wrapper, 0, (const void*)name);
}
}
while (servants_finished < disk_count) {
sig = sigwaitinfo(&procmask, &sinfo);
if (sig == SIGCHLD) {
while ((pid = wait(&status))) {
if (pid == -1 && errno == ECHILD) {
break;
} else {
s = lookup_servant_by_pid(pid);
if (sbd_is_disk(s)) {
servants_finished++;
}
}
}
}
}
return 0;
}
int quorum_write(int good_servants)
{
return (good_servants > disk_count/2);
}
int messenger(const char *name, const char *msg, struct servants_list_item *servants)
{
int sig = 0;
pid_t pid = 0;
int status = 0;
int servants_finished = 0;
int successful_delivery = 0;
sigset_t procmask;
siginfo_t sinfo;
struct servants_list_item *s;
struct slot_msg_arg_t slot_msg_arg = {name, msg};
sigemptyset(&procmask);
sigaddset(&procmask, SIGCHLD);
sigprocmask(SIG_BLOCK, &procmask, NULL);
for (s = servants; s; s = s->next) {
s->pid = assign_servant(s->devname, &slot_msg_wrapper, 0, &slot_msg_arg);
}
while (!(quorum_write(successful_delivery) ||
(servants_finished == disk_count))) {
sig = sigwaitinfo(&procmask, &sinfo);
if (sig == SIGCHLD) {
while ((pid = waitpid(-1, &status, WNOHANG))) {
if (pid == -1 && errno == ECHILD) {
break;
} else {
servants_finished++;
if (WIFEXITED(status)
&& WEXITSTATUS(status) == 0) {
DBGLOG(LOG_INFO, "Process %d succeeded.",
(int)pid);
successful_delivery++;
} else {
cl_log(LOG_WARNING, "Process %d failed to deliver!",
(int)pid);
}
}
}
}
}
if (quorum_write(successful_delivery)) {
cl_log(LOG_INFO, "Message successfully delivered.");
return 0;
} else {
cl_log(LOG_ERR, "Message is not delivered via more then a half of devices");
return -1;
}
}
unsigned long
get_first_msgwait(struct servants_list_item *servants)
{
unsigned long msgwait = 0;
struct servants_list_item *s = servants;
for (s = servants; s; s = s->next) {
struct sbd_context *st;
struct sector_header_s *s_header;
st = open_device(s->devname, LOG_WARNING);
if (!st) {
continue;
}
s_header = header_get(st);
if (s_header != NULL) {
msgwait = (unsigned long)s_header->timeout_msgwait;
close_device(st);
free(s_header);
return msgwait;
}
close_device(st);
}
return msgwait;
}
int dump_headers(struct servants_list_item *servants)
{
int rc = 0;
struct servants_list_item *s = servants;
struct sbd_context *st;
for (s = servants; s; s = s->next) {
int rv;
fprintf(stdout, "==Dumping header on disk %s\n", s->devname);
st = open_device(s->devname, LOG_WARNING);
if (st) {
rv = header_dump(st);
close_device(st);
} else {
fprintf(stderr, "== disk %s unreadable!\n", s->devname);
rv = -1;
}
if (rv == -1) {
rc = -1;
fprintf(stderr, "==Header on disk %s NOT dumped\n", s->devname);
} else {
fprintf(stdout, "==Header on disk %s is dumped\n", s->devname);
}
}
return rc;
}
void open_any_device(struct servants_list_item *servants)
{
struct sector_header_s *hdr_cur = NULL;
struct timespec t_0;
int t_wait = 0;
bool logged_once = false;
clock_gettime(CLOCK_MONOTONIC, &t_0);
while (!hdr_cur && t_wait < timeout_startup) {
struct timespec t_now;
struct servants_list_item* s;
for (s = servants; s; s = s->next) {
struct sbd_context *st = open_device(s->devname, LOG_DEBUG);
if (!st) {
if (logged_once == false) {
cl_log(LOG_WARNING, "Failed to open %s. "
"Trying any other configured devices, "
"otherwise retrying every %ds within %ds",
s->devname, timeout_loop, timeout_startup);
logged_once = true;
}
continue;
}
hdr_cur = header_get(st);
close_device(st);
if (hdr_cur) {
break;
} else {
if (logged_once == false) {
cl_log(LOG_WARNING, "Failed to read header from %s. "
"Trying any other configured devices, "
"otherwise retrying every %ds within %ds",
s->devname, timeout_loop, timeout_startup);
logged_once = true;
}
}
}
clock_gettime(CLOCK_MONOTONIC, &t_now);
t_wait = t_now.tv_sec - t_0.tv_sec;
if (!hdr_cur) {
sleep(timeout_loop);
}
}
if (hdr_cur) {
timeout_watchdog = hdr_cur->timeout_watchdog;
timeout_allocate = hdr_cur->timeout_allocate;
timeout_loop = hdr_cur->timeout_loop;
timeout_msgwait = hdr_cur->timeout_msgwait;
} else {
cl_log(LOG_ERR, "No devices were available at start-up within %i seconds.",
timeout_startup);
exit(1);
}
free(hdr_cur);
return;
}
/*
::-::-::-::-::-::-::-::-::-::-::-::-::
Begin disk based servant code
::-::-::-::-::-::-::-::-::-::-::-::-::
*/
static int servant_check_timeout_inconsistent(struct sector_header_s *hdr)
{
if (timeout_watchdog != hdr->timeout_watchdog) {
cl_log(LOG_WARNING, "watchdog timeout: %d versus %d on this device",
(int)timeout_watchdog, (int)hdr->timeout_watchdog);
return -1;
}
if (timeout_allocate != hdr->timeout_allocate) {
cl_log(LOG_WARNING, "allocate timeout: %d versus %d on this device",
(int)timeout_allocate, (int)hdr->timeout_allocate);
return -1;
}
if (timeout_loop != hdr->timeout_loop) {
cl_log(LOG_WARNING, "loop timeout: %d versus %d on this device",
(int)timeout_loop, (int)hdr->timeout_loop);
return -1;
}
if (timeout_msgwait != hdr->timeout_msgwait) {
cl_log(LOG_WARNING, "msgwait timeout: %d versus %d on this device",
(int)timeout_msgwait, (int)hdr->timeout_msgwait);
return -1;
}
return 0;
}
int servant_md(const char *diskname, int mode, const void* argp)
{
struct sector_mbox_s *s_mbox = NULL;
struct sector_node_s *s_node = NULL;
struct sector_header_s *s_header = NULL;
int mbox;
int rc = 0;
time_t t0, t1, latency;
union sigval signal_value;
sigset_t servant_masks;
struct sbd_context *st;
pid_t ppid;
char uuid[37];
const struct servants_list_item *s = argp;
cl_log(LOG_INFO, "Servant starting for device %s", diskname);
/* Block most of the signals */
sigfillset(&servant_masks);
sigdelset(&servant_masks, SIGKILL);
sigdelset(&servant_masks, SIGFPE);
sigdelset(&servant_masks, SIGILL);
sigdelset(&servant_masks, SIGSEGV);
sigdelset(&servant_masks, SIGBUS);
sigdelset(&servant_masks, SIGALRM);
/* FIXME: check error */
sigprocmask(SIG_SETMASK, &servant_masks, NULL);
st = open_device(diskname, LOG_WARNING);
if (!st) {
exit(EXIT_MD_SERVANT_IO_FAIL);
}
s_header = header_get(st);
if (!s_header) {
cl_log(LOG_ERR, "Not a valid header on %s", diskname);
rc = EXIT_MD_SERVANT_IO_FAIL;
goto out;
}
if (servant_check_timeout_inconsistent(s_header) < 0) {
cl_log(LOG_ERR, "Timeouts on %s do not match first device",
diskname);
rc = EXIT_MD_SERVANT_IO_FAIL;
goto out;
}
if (s_header->minor_version > 0) {
uuid_unparse_lower(s_header->uuid, uuid);
cl_log(LOG_INFO, "Device %s uuid: %s", diskname, uuid);
}
mbox = slot_allocate(st, local_uname);
if (mbox < 0) {
cl_log(LOG_ERR,
"No slot allocated, and automatic allocation failed for disk %s.",
diskname);
rc = EXIT_MD_SERVANT_IO_FAIL;
goto out;
}
s_node = sector_alloc();
if (slot_read(st, mbox, s_node) < 0) {
cl_log(LOG_ERR, "Unable to read node entry on %s",
diskname);
rc = EXIT_MD_SERVANT_IO_FAIL;
goto out;
}
cl_log(LOG_NOTICE, "Monitoring slot %d on disk %s", mbox, diskname);
if (s_header->minor_version == 0) {
set_proc_title("sbd: watcher: %s - slot: %d", diskname, mbox);
} else {
set_proc_title("sbd: watcher: %s - slot: %d - uuid: %s",
diskname, mbox, uuid);
}
s_mbox = sector_alloc();
if (s->first_start) {
if (mode > 0) {
if (mbox_read(st, mbox, s_mbox) < 0) {
cl_log(LOG_ERR, "mbox read failed during start-up in servant.");
rc = EXIT_MD_SERVANT_IO_FAIL;
goto out;
}
if (s_mbox->cmd != SBD_MSG_EXIT &&
s_mbox->cmd != SBD_MSG_EMPTY) {
/* Not a clean stop. Abort start-up */
cl_log(LOG_WARNING, "Found fencing message - aborting start-up. Manual intervention required!");
ppid = getppid();
sigqueue(ppid, SIG_EXITREQ, signal_value);
rc = 0;
goto out;
}
}
DBGLOG(LOG_INFO, "First servant start - zeroing inbox");
memset(s_mbox, 0, sizeof(*s_mbox));
if (mbox_write(st, mbox, s_mbox) < 0) {
rc = EXIT_MD_SERVANT_IO_FAIL;
goto out;
}
}
memset(&signal_value, 0, sizeof(signal_value));
while (1) {
struct sector_header_s *s_header_retry = NULL;
struct sector_node_s *s_node_retry = NULL;
t0 = time(NULL);
sleep(timeout_loop);
ppid = getppid();
if (ppid == 1) {
/* Our parent died unexpectedly. Triggering
* self-fence. */
do_timeout_action();
}
/* These attempts are, by definition, somewhat racy. If
* the device is wiped out or corrupted between here and
* us reading our mbox, there is nothing we can do about
* that. But at least we tried. */
s_header_retry = header_get(st);
if (!s_header_retry) {
cl_log(LOG_ERR, "No longer found a valid header on %s", diskname);
rc = EXIT_MD_SERVANT_IO_FAIL;
goto out;
}
if (memcmp(s_header, s_header_retry, sizeof(*s_header)) != 0) {
cl_log(LOG_ERR, "Header on %s changed since start-up!", diskname);
free(s_header_retry);
rc = EXIT_MD_SERVANT_IO_FAIL;
goto out;
}
free(s_header_retry);
s_node_retry = sector_alloc();
if (slot_read(st, mbox, s_node_retry) < 0) {
cl_log(LOG_ERR, "slot read failed in servant.");
free(s_node_retry);
rc = EXIT_MD_SERVANT_IO_FAIL;
goto out;
}
if (memcmp(s_node, s_node_retry, sizeof(*s_node)) != 0) {
cl_log(LOG_ERR, "Node entry on %s changed since start-up!", diskname);
free(s_node_retry);
rc = EXIT_MD_SERVANT_IO_FAIL;
goto out;
}
free(s_node_retry);
if (mbox_read(st, mbox, s_mbox) < 0) {
cl_log(LOG_ERR, "mbox read failed in servant.");
rc = EXIT_MD_SERVANT_IO_FAIL;
goto out;
}
if (s_mbox->cmd > 0) {
cl_log(LOG_NOTICE,
"Received command %s from %s on disk %s",
char2cmd(s_mbox->cmd), s_mbox->from, diskname);
switch (s_mbox->cmd) {
case SBD_MSG_TEST:
memset(s_mbox, 0, sizeof(*s_mbox));
mbox_write(st, mbox, s_mbox);
sigqueue(ppid, SIG_TEST, signal_value);
break;
case SBD_MSG_RESET:
rc = EXIT_MD_SERVANT_REQUEST_RESET;
goto out;
case SBD_MSG_OFF:
rc = EXIT_MD_SERVANT_REQUEST_SHUTOFF;
goto out;
case SBD_MSG_EXIT:
sigqueue(ppid, SIG_EXITREQ, signal_value);
break;
case SBD_MSG_CRASHDUMP:
rc = EXIT_MD_SERVANT_REQUEST_CRASHDUMP;
goto out;
default:
/* FIXME:
An "unknown" message might result
from a partial write.
log it and clear the slot.
*/
cl_log(LOG_ERR, "Unknown message on disk %s",
diskname);
memset(s_mbox, 0, sizeof(*s_mbox));
mbox_write(st, mbox, s_mbox);
break;
}
}
sigqueue(ppid, SIG_LIVENESS, signal_value);
t1 = time(NULL);
latency = t1 - t0;
if (timeout_watchdog_warn && (latency > timeout_watchdog_warn)) {
cl_log(LOG_WARNING,
"Latency: %ds exceeded watchdog warning timeout %ds on disk %s",
(int)latency, (int)timeout_watchdog_warn,
diskname);
} else if (debug) {
DBGLOG(LOG_DEBUG, "Latency: %ds on disk %s", (int)latency,
diskname);
}
}
out:
- free(s_header);
free(s_node);
free(s_mbox);
+ free(s_header);
close_device(st);
exit(rc);
}
-
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Apr 21, 6:03 PM (1 d, 4 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1665031
Default Alt Text
(83 KB)
Attached To
Mode
rS SBD
Attached
Detach File
Event Timeline
Log In to Comment