diff --git a/agent/sbd.in b/agent/sbd.in index 4bc0245..61b7199 100644 --- a/agent/sbd.in +++ b/agent/sbd.in @@ -1,180 +1,182 @@ #!/bin/bash # # This STONITH script drives the shared-storage stonith plugin. # # Copyright (C) 2013 Lars Marowsky-Bree # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # Main code if [ -z "$sbd_device" ]; then if [ -f @CONFIGDIR@/sbd ]; then source @CONFIGDIR@/sbd sbd_device=$SBD_DEVICE fi fi SBD_DEVS=${sbd_device%;} sbd_device=${SBD_DEVS//;/ -d } sbd_check_device() { if [ -z "$sbd_device" ]; then ha_log.sh err "No sbd device(s) found in the configuration." exit 1 fi } sbd_validate_timeout() { case "$timeout_bypass" in yes|true|1|YES|TRUE|ja|on|ON) return ;; esac crm_timeout=$[$(crm_attribute -t crm_config -G -n stonith-timeout -d 20s -q | sed -e 's/\(.*\)s/\1/' -e 's/\(.*\)m/\1*60/')] sbd_timeout=$(sbd -d $sbd_device dump | perl -ne 'if (/msgwait.*: (\d+)/) { print "$1\n"; }' | head -n 1) if [ "$sbd_timeout" -eq "0" ]; then return fi sbd_timeout_min=$[$sbd_timeout*12/10] if [ "$sbd_timeout_min" -lt 20 ]; then sbd_timeout_min=20 fi sbd_timeout_suggested=$[$sbd_timeout_min*12/10] if [ "$crm_timeout" -lt "$sbd_timeout_min" ]; then ha_log.sh err "The CIB property stonith-timeout is set too low for sbd to ever succeed" ha_log.sh err "Recommended value is $sbd_timeout_suggested, updating configuration." crm_attribute -t crm_config -n stonith-timeout -v $sbd_timeout_suggested exit 1 fi } case $1 in gethosts) sbd_check_device echo `sbd -d $sbd_device list | cut -f2 | sort | uniq` exit 0 ;; off|reset) sbd_check_device sbd_validate_timeout message=$1 case "$crashdump" in yes|true|1|YES|TRUE|ja|on|ON) message="crashdump" ;; esac sbd -d $sbd_device message $2 $message exit $? ;; status) sbd_check_device sbd_validate_timeout - if ! sbd -d $sbd_device list >/dev/null 2>&1 ; then - ha_log.sh err "sbd could not list nodes from $sbd_device" + error_output=$(sbd -d $sbd_device list 2>&1 >/dev/null) + if [ $? -ne 0 ]; then + error_message=$(echo "$error_output" | grep -v "please check the logs") + ha_log.sh err "sbd list failed: $error_message" exit 1 fi exit 0 ;; on) exit 1 ;; getconfignames) echo "sbd_device crashdump timeout_bypass" exit 0 ;; getinfo-devid) echo "Shared storage STONITH device" exit 0 ;; getinfo-devname) echo "Shared storage STONITH device" exit 0 ;; getinfo-devdescr) cat << DESC sbd uses a shared storage device as a medium to communicate fencing requests. This allows clusters without network power switches; the downside is that access to the shared storage device becomes a Single Point of Failure. It requires sbd to be configured on all nodes. Please read http://linux-ha.org/wiki/SBD_Fencing! DESC exit 0 ;; getinfo-devurl) echo "http://linux-ha.org/wiki/SBD_Fencing" exit 0 ;; getinfo-xml) cat << SSHXML Crashdump instead of regular fence If SBD is given a fence command, this option will instead perform a kernel crash of a reboot or power-off, which on a properly configured system can lead to a crashdump for analysis. This is less safe for production environments. Please use with caution and for debugging purposes only. SBD device(s) The block device used for the SBD partition. Up to three can be specified if separated by a semicolon. (Please check the documentation if specifying two.) If not specified, will default to the value from @CONFIGDIR@/sbd. Permit a seemingly too short stonith-timeout The sbd agent will try to detect a too short stonith-timeout (relative to msgwait) in the Pacemaker configuration and automatically correct it. Should that logic fail in your environment or you have legitimate need to use a shorter timeout, you can disable it via this parameter. SSHXML exit 0 ;; *) exit 1 ;; esac diff --git a/sbd.spec b/sbd.spec index 06b4582..245c2ef 100644 --- a/sbd.spec +++ b/sbd.spec @@ -1,160 +1,161 @@ # # spec file for package sbd # # Copyright (c) 2014 SUSE LINUX Products GmbH, Nuernberg, Germany. # Copyright (c) 2013 Lars Marowsky-Bree # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed # upon. The license for this file, and modifications and additions to the # file, is the same license as for the pristine package itself (unless the # license for the pristine package is not an Open Source License, in which # case the license is the MIT License). An "Open Source License" is a # license that conforms to the Open Source Definition (Version 1.9) # published by the Open Source Initiative. # Please submit bugfixes or comments via http://bugs.opensuse.org/ # %global commit 5705703da3db01bb4c34fd73ae33f24b43a16b78-mod %global shortcommit %(echo %{commit}|cut -c1-8) %global modified %(echo %{commit}-|cut -f2 -d-) %global github_owner beekhof %global buildnum 1 Name: sbd Summary: Storage-based death License: GPLv2+ Group: System Environment/Daemons Version: 1.4.0 Release: 99.%{buildnum}.%{shortcommit}.%{modified}git%{?dist} Url: https://github.com/%{github_owner}/%{name} Source0: https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{name}-%{commit}.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRequires: autoconf BuildRequires: automake BuildRequires: libuuid-devel BuildRequires: glib2-devel BuildRequires: libaio-devel BuildRequires: corosynclib-devel %if 0%{?suse_version} BuildRequires: libpacemaker-devel %else BuildRequires: pacemaker-libs-devel %endif BuildRequires: libtool BuildRequires: libuuid-devel BuildRequires: libxml2-devel BuildRequires: pkgconfig BuildRequires: make +Conflicts: fence-agents-sbd < 4.5.0 %if 0%{?rhel} > 0 ExclusiveArch: i686 x86_64 s390x aarch64 ppc64le %endif %if %{defined systemd_requires} %systemd_requires %endif %description This package contains the storage-based death functionality. %prep ########################################################### # %setup -n sbd-%{version} -q %setup -q -n %{name}-%{commit} ########################################################### %build autoreconf -i export CFLAGS="$RPM_OPT_FLAGS -Wall -Werror" %configure make %{?_smp_mflags} ########################################################### %install ########################################################### make DESTDIR=$RPM_BUILD_ROOT LIBDIR=%{_libdir} install rm -rf ${RPM_BUILD_ROOT}%{_libdir}/stonith install -D -m 0755 src/sbd.sh $RPM_BUILD_ROOT/usr/share/sbd/sbd.sh %if %{defined _unitdir} install -D -m 0644 src/sbd.service $RPM_BUILD_ROOT/%{_unitdir}/sbd.service install -D -m 0644 src/sbd_remote.service $RPM_BUILD_ROOT/%{_unitdir}/sbd_remote.service %endif mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig install -m 644 src/sbd.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/sbd %clean rm -rf %{buildroot} %if %{defined _unitdir} %post %systemd_post sbd.service %systemd_post sbd_remote.service %preun %systemd_preun sbd.service %systemd_preun sbd_remote.service %postun %systemd_postun sbd.service %systemd_postun sbd_remote.service %endif %files ########################################################### %defattr(-,root,root) %config(noreplace) %{_sysconfdir}/sysconfig/sbd %{_sbindir}/sbd %{_datadir}/sbd %doc %{_mandir}/man8/sbd* %if %{defined _unitdir} %{_unitdir}/sbd.service %{_unitdir}/sbd_remote.service %endif %doc COPYING %changelog * Mon Jan 14 2019 - 1.4.0-0.1.2d595fdd.git - updated travis-CI (ppc64le-build, fedora29, remove need for alectolytic-build-container) - make watchdog-device-query easier to be handled by an SELinux-policy - configurable delay value for SBD_DELAY_START - use pacemaker's new pe api with constructors/destructors - make timeout-action executed by sbd configurable - init script for sysv systems - version bump to v1.4.0 to denote Pacemaker 2.0.0 compatibility * Fri Jun 29 2018 - 1.3.1-0.1.e102d9ed.git - removed unneeded python-devel build-requirement - changed legacy corosync-devel to corosynclib-devel * Fri Nov 3 2017 - 1.3.1-0.1.a180176c.git - Add commands to test/query watchdogs - Allow 2-node-operation with a single shared-disk - Overhaul of the command-line options & config-file - Proper handling of off instead of reboot - Refactored disk-servant for more robust communication with parent - Fix config for Debian + configurable location of config - Fixes in sbd.sh - multiple SBD devices and others * Sun Mar 27 2016 - 1.3.0-0.1.4ee36fa3.git - Changes since v1.2.0 like adding the possibility to have a watchdog-only setup without shared-block-devices legitimate a bump to v1.3.0. * Mon Oct 13 2014 - 1.2.1-0.4.3de531ed.git - Fixes for suitability to the el7 environment * Tue Sep 30 2014 - 1.2.1-0.3.8f912945.git - Only build on archs supported by the HA Add-on * Fri Aug 29 2014 - 1.2.1-0.2.8f912945.git - Remove some additional SUSE-isms * Fri Aug 29 2014 - 1.2.1-0.1.8f912945.git - Prepare for package review Resolves: rhbz#1134245 diff --git a/src/sbd-md.c b/src/sbd-md.c index d5aaf6e..1c66eb0 100644 --- a/src/sbd-md.c +++ b/src/sbd-md.c @@ -1,1247 +1,1247 @@ /* * Copyright (C) 2013 Lars Marowsky-Bree * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include "sbd.h" #define SBD_MSG_EMPTY 0x00 #define SBD_MSG_TEST 0x01 #define SBD_MSG_RESET 0x02 #define SBD_MSG_OFF 0x03 #define SBD_MSG_EXIT 0x04 #define SBD_MSG_CRASHDUMP 0x05 #define SLOT_TO_SECTOR(slot) (1+slot*2) #define MBOX_TO_SECTOR(mbox) (2+mbox*2) extern int disk_count; /* These have to match the values in the header of the partition */ static char sbd_magic[8] = "SBD_SBD_"; static char sbd_version = 0x02; struct slot_msg_arg_t { const char* name; const char* msg; }; static signed char cmd2char(const char *cmd) { if (strcmp("clear", cmd) == 0) { return SBD_MSG_EMPTY; } else if (strcmp("test", cmd) == 0) { return SBD_MSG_TEST; } else if (strcmp("reset", cmd) == 0) { return SBD_MSG_RESET; } else if (strcmp("off", cmd) == 0) { return SBD_MSG_OFF; } else if (strcmp("exit", cmd) == 0) { return SBD_MSG_EXIT; } else if (strcmp("crashdump", cmd) == 0) { return SBD_MSG_CRASHDUMP; } return -1; } static const char* char2cmd(const char cmd) { switch (cmd) { case SBD_MSG_EMPTY: return "clear"; break; case SBD_MSG_TEST: return "test"; break; case SBD_MSG_RESET: return "reset"; break; case SBD_MSG_OFF: return "off"; break; case SBD_MSG_EXIT: return "exit"; break; case SBD_MSG_CRASHDUMP: return "crashdump"; break; default: return "undefined"; break; } } static void close_device(struct sbd_context *st) { close(st->devfd); free(st); } static struct sbd_context * open_device(const char* devname, int loglevel) { struct sbd_context *st; if (!devname) return NULL; st = malloc(sizeof(struct sbd_context)); if (!st) return NULL; memset(st, 0, sizeof(struct sbd_context)); if (io_setup(1, &st->ioctx) != 0) { cl_perror("io_setup failed"); free(st); return NULL; } st->devfd = open(devname, O_SYNC|O_RDWR|O_DIRECT); if (st->devfd == -1) { if (loglevel == LOG_DEBUG) { DBGLOG(loglevel, "Opening device %s failed.", devname); } else { cl_log(loglevel, "Opening device %s failed.", devname); } free(st); return NULL; } ioctl(st->devfd, BLKSSZGET, §or_size); if (sector_size == 0) { cl_perror("Get sector size failed.\n"); close_device(st); return NULL; } return st; } static void * sector_alloc(void) { void *x; x = valloc(sector_size); if (!x) { exit(1); } memset(x, 0, sector_size); return x; } static int sector_io(struct sbd_context *st, int sector, void *data, int rw) { struct timespec timeout; struct io_event event; struct iocb *ios[1] = { &st->io }; long r; timeout.tv_sec = timeout_io; timeout.tv_nsec = 0; memset(&st->io, 0, sizeof(struct iocb)); if (rw) { io_prep_pwrite(&st->io, st->devfd, data, sector_size, (long long) sector_size * sector); } else { io_prep_pread(&st->io, st->devfd, data, sector_size, (long long) sector_size * sector); } if (io_submit(st->ioctx, 1, ios) != 1) { cl_log(LOG_ERR, "Failed to submit IO request! (rw=%d)", rw); return -1; } errno = 0; r = io_getevents(st->ioctx, 1L, 1L, &event, &timeout); if (r < 0 ) { cl_log(LOG_ERR, "Failed to retrieve IO events (rw=%d)", rw); return -1; } else if (r < 1L) { cl_log(LOG_INFO, "Cancelling IO request due to timeout (rw=%d)", rw); r = io_cancel(st->ioctx, ios[0], &event); if (r) { DBGLOG(LOG_INFO, "Could not cancel IO request (rw=%d)", rw); /* Doesn't really matter, debugging information. */ } return -1; } else if (r > 1L) { cl_log(LOG_ERR, "More than one IO was returned (r=%ld)", r); return -1; } /* IO is happy */ if (event.res == sector_size) { return 0; } else { cl_log(LOG_ERR, "Short IO (rw=%d, res=%lu, sector_size=%d)", rw, event.res, sector_size); return -1; } } static int sector_write(struct sbd_context *st, int sector, void *data) { return sector_io(st, sector, data, 1); } static int sector_read(struct sbd_context *st, int sector, void *data) { return sector_io(st, sector, data, 0); } static int slot_read(struct sbd_context *st, int slot, struct sector_node_s *s_node) { return sector_read(st, SLOT_TO_SECTOR(slot), s_node); } static int slot_write(struct sbd_context *st, int slot, struct sector_node_s *s_node) { return sector_write(st, SLOT_TO_SECTOR(slot), s_node); } static int mbox_write(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox) { return sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox); } static int mbox_read(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox) { return sector_read(st, MBOX_TO_SECTOR(mbox), s_mbox); } static int mbox_write_verify(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox) { void *data; int rc = 0; if (sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox) < 0) return -1; data = sector_alloc(); if (sector_read(st, MBOX_TO_SECTOR(mbox), data) < 0) { rc = -1; goto out; } if (memcmp(s_mbox, data, sector_size) != 0) { cl_log(LOG_ERR, "Write verification failed!"); rc = -1; goto out; } rc = 0; out: free(data); return rc; } static int header_write(struct sbd_context *st, struct sector_header_s *s_header) { s_header->sector_size = htonl(s_header->sector_size); s_header->timeout_watchdog = htonl(s_header->timeout_watchdog); s_header->timeout_allocate = htonl(s_header->timeout_allocate); s_header->timeout_loop = htonl(s_header->timeout_loop); s_header->timeout_msgwait = htonl(s_header->timeout_msgwait); return sector_write(st, 0, s_header); } static int header_read(struct sbd_context *st, struct sector_header_s *s_header) { if (sector_read(st, 0, s_header) < 0) return -1; s_header->sector_size = ntohl(s_header->sector_size); s_header->timeout_watchdog = ntohl(s_header->timeout_watchdog); s_header->timeout_allocate = ntohl(s_header->timeout_allocate); s_header->timeout_loop = ntohl(s_header->timeout_loop); s_header->timeout_msgwait = ntohl(s_header->timeout_msgwait); /* This sets the global defaults: */ timeout_watchdog = s_header->timeout_watchdog; timeout_allocate = s_header->timeout_allocate; timeout_loop = s_header->timeout_loop; timeout_msgwait = s_header->timeout_msgwait; return 0; } static int valid_header(const struct sector_header_s *s_header) { if (memcmp(s_header->magic, sbd_magic, sizeof(s_header->magic)) != 0) { cl_log(LOG_ERR, "Header magic does not match."); return -1; } if (s_header->version != sbd_version) { cl_log(LOG_ERR, "Header version does not match."); return -1; } if (s_header->sector_size != sector_size) { cl_log(LOG_ERR, "Header sector size does not match."); return -1; } return 0; } static struct sector_header_s * header_get(struct sbd_context *st) { struct sector_header_s *s_header; s_header = sector_alloc(); if (header_read(st, s_header) < 0) { cl_log(LOG_ERR, "Unable to read header from device %d", st->devfd); return NULL; } if (valid_header(s_header) < 0) { cl_log(LOG_ERR, "header on device %d is not valid.", st->devfd); return NULL; } /* cl_log(LOG_INFO, "Found version %d header with %d slots", s_header->version, s_header->slots); */ return s_header; } static int header_dump(struct sbd_context *st) { struct sector_header_s *s_header; char uuid[37]; s_header = header_get(st); if (s_header == NULL) return -1; printf("Header version : %u.%u\n", s_header->version, s_header->minor_version); if (s_header->minor_version > 0) { uuid_unparse_lower(s_header->uuid, uuid); printf("UUID : %s\n", uuid); } printf("Number of slots : %u\n", s_header->slots); printf("Sector size : %lu\n", (unsigned long)s_header->sector_size); printf("Timeout (watchdog) : %lu\n", (unsigned long)s_header->timeout_watchdog); printf("Timeout (allocate) : %lu\n", (unsigned long)s_header->timeout_allocate); printf("Timeout (loop) : %lu\n", (unsigned long)s_header->timeout_loop); printf("Timeout (msgwait) : %lu\n", (unsigned long)s_header->timeout_msgwait); return 0; } static int init_device(struct sbd_context *st) { struct sector_header_s *s_header; struct sector_node_s *s_node; struct sector_mbox_s *s_mbox; char uuid[37]; int i; int rc = 0; s_header = sector_alloc(); s_node = sector_alloc(); s_mbox = sector_alloc(); memcpy(s_header->magic, sbd_magic, sizeof(s_header->magic)); s_header->version = sbd_version; s_header->slots = 255; s_header->sector_size = sector_size; s_header->timeout_watchdog = timeout_watchdog; s_header->timeout_allocate = timeout_allocate; s_header->timeout_loop = timeout_loop; s_header->timeout_msgwait = timeout_msgwait; s_header->minor_version = 1; uuid_generate(s_header->uuid); uuid_unparse_lower(s_header->uuid, uuid); cl_log(LOG_INFO, "Creating version %d.%d header on device %d (uuid: %s)", s_header->version, s_header->minor_version, st->devfd, uuid); fprintf(stdout, "Creating version %d.%d header on device %d (uuid: %s)\n", s_header->version, s_header->minor_version, st->devfd, uuid); if (header_write(st, s_header) < 0) { rc = -1; goto out; } cl_log(LOG_INFO, "Initializing %d slots on device %d", s_header->slots, st->devfd); fprintf(stdout, "Initializing %d slots on device %d\n", s_header->slots, st->devfd); for (i=0;i < s_header->slots;i++) { if (slot_write(st, i, s_node) < 0) { rc = -1; goto out; } if (mbox_write(st, i, s_mbox) < 0) { rc = -1; goto out; } } out: free(s_node); free(s_header); free(s_mbox); return(rc); } /* Check if there already is a slot allocated to said name; returns the * slot number. If not found, returns -1. * This is necessary because slots might not be continuous. */ static int slot_lookup(struct sbd_context *st, const struct sector_header_s *s_header, const char *name) { struct sector_node_s *s_node = NULL; int i; int rc = -1; if (!name) { cl_log(LOG_ERR, "slot_lookup(): No name specified.\n"); goto out; } s_node = sector_alloc(); for (i=0; i < s_header->slots; i++) { if (slot_read(st, i, s_node) < 0) { rc = -2; goto out; } if (s_node->in_use != 0) { if (strncasecmp(s_node->name, name, SECTOR_NAME_MAX) == 0) { DBGLOG(LOG_INFO, "%s owns slot %d", name, i); rc = i; goto out; } } } out: free(s_node); return rc; } static int slot_unused(struct sbd_context *st, const struct sector_header_s *s_header) { struct sector_node_s *s_node; int i; int rc = -1; s_node = sector_alloc(); for (i=0; i < s_header->slots; i++) { if (slot_read(st, i, s_node) < 0) { rc = -1; goto out; } if (s_node->in_use == 0) { rc = i; goto out; } } out: free(s_node); return rc; } static int slot_allocate(struct sbd_context *st, const char *name) { struct sector_header_s *s_header = NULL; struct sector_node_s *s_node = NULL; struct sector_mbox_s *s_mbox = NULL; int i; int rc = 0; if (!name) { cl_log(LOG_ERR, "slot_allocate(): No name specified.\n"); fprintf(stderr, "slot_allocate(): No name specified.\n"); rc = -1; goto out; } s_header = header_get(st); if (!s_header) { rc = -1; goto out; } s_node = sector_alloc(); s_mbox = sector_alloc(); while (1) { i = slot_lookup(st, s_header, name); if ((i >= 0) || (i == -2)) { /* -1 is "no slot found", in which case we * proceed to allocate a new one. * -2 is "read error during lookup", in which * case we error out too * >= 0 is "slot already allocated" */ rc = i; goto out; } i = slot_unused(st, s_header); if (i >= 0) { cl_log(LOG_INFO, "slot %d is unused - trying to own", i); fprintf(stdout, "slot %d is unused - trying to own\n", i); memset(s_node, 0, sizeof(*s_node)); s_node->in_use = 1; strncpy(s_node->name, name, SECTOR_NAME_MAX); if (slot_write(st, i, s_node) < 0) { rc = -1; goto out; } sleep(timeout_allocate); } else { cl_log(LOG_ERR, "No more free slots."); fprintf(stderr, "No more free slots.\n"); rc = -1; goto out; } } out: free(s_node); free(s_header); free(s_mbox); return(rc); } static int slot_list(struct sbd_context *st) { struct sector_header_s *s_header = NULL; struct sector_node_s *s_node = NULL; struct sector_mbox_s *s_mbox = NULL; int i; int rc = 0; s_header = header_get(st); if (!s_header) { rc = -1; goto out; } s_node = sector_alloc(); s_mbox = sector_alloc(); for (i=0; i < s_header->slots; i++) { if (slot_read(st, i, s_node) < 0) { rc = -1; goto out; } if (s_node->in_use > 0) { if (mbox_read(st, i, s_mbox) < 0) { rc = -1; goto out; } printf("%d\t%s\t%s\t%s\n", i, s_node->name, char2cmd(s_mbox->cmd), s_mbox->from); } } out: free(s_node); free(s_header); free(s_mbox); return rc; } static int slot_msg(struct sbd_context *st, const char *name, const char *cmd) { struct sector_header_s *s_header = NULL; struct sector_mbox_s *s_mbox = NULL; int mbox; int rc = 0; char uuid[37]; if (!name || !cmd) { cl_log(LOG_ERR, "slot_msg(): No recipient / cmd specified.\n"); rc = -1; goto out; } s_header = header_get(st); if (!s_header) { rc = -1; goto out; } if (strcmp(name, "LOCAL") == 0) { name = local_uname; } if (s_header->minor_version > 0) { uuid_unparse_lower(s_header->uuid, uuid); cl_log(LOG_INFO, "Device UUID: %s", uuid); } mbox = slot_lookup(st, s_header, name); if (mbox < 0) { cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name); rc = -1; goto out; } s_mbox = sector_alloc(); s_mbox->cmd = cmd2char(cmd); if (s_mbox->cmd < 0) { cl_log(LOG_ERR, "slot_msg(): Invalid command %s.", cmd); rc = -1; goto out; } strncpy(s_mbox->from, local_uname, SECTOR_NAME_MAX); cl_log(LOG_INFO, "Writing %s to node slot %s", cmd, name); if (mbox_write_verify(st, mbox, s_mbox) < -1) { rc = -1; goto out; } if (strcasecmp(cmd, "exit") != 0) { cl_log(LOG_INFO, "Messaging delay: %d", (int)timeout_msgwait); sleep(timeout_msgwait); } cl_log(LOG_INFO, "%s successfully delivered to %s", cmd, name); out: free(s_mbox); free(s_header); return rc; } static int slot_ping(struct sbd_context *st, const char *name) { struct sector_header_s *s_header = NULL; struct sector_mbox_s *s_mbox = NULL; int mbox; int waited = 0; int rc = 0; if (!name) { cl_log(LOG_ERR, "slot_ping(): No recipient specified.\n"); rc = -1; goto out; } s_header = header_get(st); if (!s_header) { rc = -1; goto out; } if (strcmp(name, "LOCAL") == 0) { name = local_uname; } mbox = slot_lookup(st, s_header, name); if (mbox < 0) { cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name); rc = -1; goto out; } s_mbox = sector_alloc(); s_mbox->cmd = SBD_MSG_TEST; strncpy(s_mbox->from, local_uname, SECTOR_NAME_MAX); DBGLOG(LOG_DEBUG, "Pinging node %s", name); if (mbox_write(st, mbox, s_mbox) < -1) { rc = -1; goto out; } rc = -1; while (waited <= timeout_msgwait) { if (mbox_read(st, mbox, s_mbox) < 0) break; if (s_mbox->cmd != SBD_MSG_TEST) { rc = 0; break; } sleep(1); waited++; } if (rc == 0) { cl_log(LOG_DEBUG, "%s successfully pinged.", name); } else { cl_log(LOG_ERR, "%s failed to ping.", name); } out: free(s_mbox); free(s_header); return rc; } int init_devices(struct servants_list_item *servants) { int rc = 0; struct sbd_context *st; struct servants_list_item *s; for (s = servants; s; s = s->next) { fprintf(stdout, "Initializing device %s\n", s->devname); st = open_device(s->devname, LOG_ERR); if (!st) { return -1; } rc = init_device(st); close_device(st); if (rc == -1) { fprintf(stderr, "Failed to init device %s\n", s->devname); return rc; } fprintf(stdout, "Device %s is initialized.\n", s->devname); } return 0; } static int slot_msg_wrapper(const char* devname, int mode, const void* argp) { int rc = 0; struct sbd_context *st; const struct slot_msg_arg_t* arg = (const struct slot_msg_arg_t*)argp; st = open_device(devname, LOG_WARNING); if (!st) return -1; cl_log(LOG_INFO, "Delivery process handling %s", devname); rc = slot_msg(st, arg->name, arg->msg); close_device(st); return rc; } static int slot_ping_wrapper(const char* devname, int mode, const void* argp) { int rc = 0; const char* name = (const char*)argp; struct sbd_context *st; st = open_device(devname, LOG_WARNING); if (!st) return -1; rc = slot_ping(st, name); close_device(st); return rc; } int allocate_slots(const char *name, struct servants_list_item *servants) { int rc = 0; struct sbd_context *st; struct servants_list_item *s; for (s = servants; s; s = s->next) { fprintf(stdout, "Trying to allocate slot for %s on device %s.\n", name, s->devname); st = open_device(s->devname, LOG_WARNING); if (!st) { return -1; } rc = slot_allocate(st, name); close_device(st); if (rc < 0) return rc; fprintf(stdout, "Slot for %s has been allocated on %s.\n", name, s->devname); } return 0; } int list_slots(struct servants_list_item *servants) { int rc = 0; struct servants_list_item *s; struct sbd_context *st; for (s = servants; s; s = s->next) { int rv = 0; st = open_device(s->devname, LOG_WARNING); if (!st) { rc = -1; - fprintf(stdout, "== disk %s unreadable!\n", s->devname); + fprintf(stderr, "== disk %s unreadable!\n", s->devname); continue; } rv = slot_list(st); close_device(st); if (rv == -1) { rc = -1; - fprintf(stdout, "== Slots on disk %s NOT dumped\n", s->devname); + fprintf(stderr, "== Slots on disk %s NOT dumped\n", s->devname); } } return rc; } int ping_via_slots(const char *name, struct servants_list_item *servants) { int sig = 0; pid_t pid = 0; int status = 0; int servants_finished = 0; sigset_t procmask; siginfo_t sinfo; struct servants_list_item *s; sigemptyset(&procmask); sigaddset(&procmask, SIGCHLD); sigprocmask(SIG_BLOCK, &procmask, NULL); for (s = servants; s; s = s->next) { if(sbd_is_disk(s)) { s->pid = assign_servant(s->devname, &slot_ping_wrapper, 0, (const void*)name); } } while (servants_finished < disk_count) { sig = sigwaitinfo(&procmask, &sinfo); if (sig == SIGCHLD) { while ((pid = wait(&status))) { if (pid == -1 && errno == ECHILD) { break; } else { s = lookup_servant_by_pid(pid); if (sbd_is_disk(s)) { servants_finished++; } } } } } return 0; } int quorum_write(int good_servants) { return (good_servants > disk_count/2); } int messenger(const char *name, const char *msg, struct servants_list_item *servants) { int sig = 0; pid_t pid = 0; int status = 0; int servants_finished = 0; int successful_delivery = 0; sigset_t procmask; siginfo_t sinfo; struct servants_list_item *s; struct slot_msg_arg_t slot_msg_arg = {name, msg}; sigemptyset(&procmask); sigaddset(&procmask, SIGCHLD); sigprocmask(SIG_BLOCK, &procmask, NULL); for (s = servants; s; s = s->next) { s->pid = assign_servant(s->devname, &slot_msg_wrapper, 0, &slot_msg_arg); } while (!(quorum_write(successful_delivery) || (servants_finished == disk_count))) { sig = sigwaitinfo(&procmask, &sinfo); if (sig == SIGCHLD) { while ((pid = waitpid(-1, &status, WNOHANG))) { if (pid == -1 && errno == ECHILD) { break; } else { servants_finished++; if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { DBGLOG(LOG_INFO, "Process %d succeeded.", (int)pid); successful_delivery++; } else { cl_log(LOG_WARNING, "Process %d failed to deliver!", (int)pid); } } } } } if (quorum_write(successful_delivery)) { cl_log(LOG_INFO, "Message successfully delivered."); return 0; } else { cl_log(LOG_ERR, "Message is not delivered via more then a half of devices"); return -1; } } unsigned long get_first_msgwait(struct servants_list_item *servants) { unsigned long msgwait = 0; struct servants_list_item *s = servants; for (s = servants; s; s = s->next) { struct sbd_context *st; struct sector_header_s *s_header; st = open_device(s->devname, LOG_WARNING); if (!st) { continue; } s_header = header_get(st); if (s_header != NULL) { msgwait = (unsigned long)s_header->timeout_msgwait; close_device(st); return msgwait; } close_device(st); } return msgwait; } int dump_headers(struct servants_list_item *servants) { int rc = 0; struct servants_list_item *s = servants; struct sbd_context *st; for (s = servants; s; s = s->next) { int rv; fprintf(stdout, "==Dumping header on disk %s\n", s->devname); st = open_device(s->devname, LOG_WARNING); if (st) { rv = header_dump(st); close_device(st); } else { - fprintf(stdout, "== disk %s unreadable!\n", s->devname); + fprintf(stderr, "== disk %s unreadable!\n", s->devname); rv = -1; } if (rv == -1) { rc = -1; - fprintf(stdout, "==Header on disk %s NOT dumped\n", s->devname); + fprintf(stderr, "==Header on disk %s NOT dumped\n", s->devname); } else { fprintf(stdout, "==Header on disk %s is dumped\n", s->devname); } } return rc; } void open_any_device(struct servants_list_item *servants) { struct sector_header_s *hdr_cur = NULL; struct timespec t_0; int t_wait = 0; bool logged_once = false; clock_gettime(CLOCK_MONOTONIC, &t_0); while (!hdr_cur && t_wait < timeout_startup) { struct timespec t_now; struct servants_list_item* s; for (s = servants; s; s = s->next) { struct sbd_context *st = open_device(s->devname, LOG_DEBUG); if (!st) { if (logged_once == false) { cl_log(LOG_WARNING, "Failed to open %s. " "Trying any other configured devices, " "otherwise retrying every %ds within %ds", s->devname, timeout_loop, timeout_startup); logged_once = true; } continue; } hdr_cur = header_get(st); close_device(st); if (hdr_cur) { break; } else { if (logged_once == false) { cl_log(LOG_WARNING, "Failed to read header from %s. " "Trying any other configured devices, " "otherwise retrying every %ds within %ds", s->devname, timeout_loop, timeout_startup); logged_once = true; } } } clock_gettime(CLOCK_MONOTONIC, &t_now); t_wait = t_now.tv_sec - t_0.tv_sec; if (!hdr_cur) { sleep(timeout_loop); } } if (hdr_cur) { timeout_watchdog = hdr_cur->timeout_watchdog; timeout_allocate = hdr_cur->timeout_allocate; timeout_loop = hdr_cur->timeout_loop; timeout_msgwait = hdr_cur->timeout_msgwait; } else { cl_log(LOG_ERR, "No devices were available at start-up within %i seconds.", timeout_startup); exit(1); } free(hdr_cur); return; } /* ::-::-::-::-::-::-::-::-::-::-::-::-:: Begin disk based servant code ::-::-::-::-::-::-::-::-::-::-::-::-:: */ static int servant_check_timeout_inconsistent(struct sector_header_s *hdr) { if (timeout_watchdog != hdr->timeout_watchdog) { cl_log(LOG_WARNING, "watchdog timeout: %d versus %d on this device", (int)timeout_watchdog, (int)hdr->timeout_watchdog); return -1; } if (timeout_allocate != hdr->timeout_allocate) { cl_log(LOG_WARNING, "allocate timeout: %d versus %d on this device", (int)timeout_allocate, (int)hdr->timeout_allocate); return -1; } if (timeout_loop != hdr->timeout_loop) { cl_log(LOG_WARNING, "loop timeout: %d versus %d on this device", (int)timeout_loop, (int)hdr->timeout_loop); return -1; } if (timeout_msgwait != hdr->timeout_msgwait) { cl_log(LOG_WARNING, "msgwait timeout: %d versus %d on this device", (int)timeout_msgwait, (int)hdr->timeout_msgwait); return -1; } return 0; } int servant_md(const char *diskname, int mode, const void* argp) { struct sector_mbox_s *s_mbox = NULL; struct sector_node_s *s_node = NULL; struct sector_header_s *s_header = NULL; int mbox; int rc = 0; time_t t0, t1, latency; union sigval signal_value; sigset_t servant_masks; struct sbd_context *st; pid_t ppid; char uuid[37]; const struct servants_list_item *s = argp; cl_log(LOG_INFO, "Servant starting for device %s", diskname); /* Block most of the signals */ sigfillset(&servant_masks); sigdelset(&servant_masks, SIGKILL); sigdelset(&servant_masks, SIGFPE); sigdelset(&servant_masks, SIGILL); sigdelset(&servant_masks, SIGSEGV); sigdelset(&servant_masks, SIGBUS); sigdelset(&servant_masks, SIGALRM); /* FIXME: check error */ sigprocmask(SIG_SETMASK, &servant_masks, NULL); st = open_device(diskname, LOG_WARNING); if (!st) { exit(EXIT_MD_SERVANT_IO_FAIL); } s_header = header_get(st); if (!s_header) { cl_log(LOG_ERR, "Not a valid header on %s", diskname); exit(EXIT_MD_SERVANT_IO_FAIL); } if (servant_check_timeout_inconsistent(s_header) < 0) { cl_log(LOG_ERR, "Timeouts on %s do not match first device", diskname); exit(EXIT_MD_SERVANT_IO_FAIL); } if (s_header->minor_version > 0) { uuid_unparse_lower(s_header->uuid, uuid); cl_log(LOG_INFO, "Device %s uuid: %s", diskname, uuid); } mbox = slot_allocate(st, local_uname); if (mbox < 0) { cl_log(LOG_ERR, "No slot allocated, and automatic allocation failed for disk %s.", diskname); rc = EXIT_MD_SERVANT_IO_FAIL; goto out; } s_node = sector_alloc(); if (slot_read(st, mbox, s_node) < 0) { cl_log(LOG_ERR, "Unable to read node entry on %s", diskname); exit(EXIT_MD_SERVANT_IO_FAIL); } cl_log(LOG_NOTICE, "Monitoring slot %d on disk %s", mbox, diskname); if (s_header->minor_version == 0) { set_proc_title("sbd: watcher: %s - slot: %d", diskname, mbox); } else { set_proc_title("sbd: watcher: %s - slot: %d - uuid: %s", diskname, mbox, uuid); } s_mbox = sector_alloc(); if (s->first_start) { if (mode > 0) { if (mbox_read(st, mbox, s_mbox) < 0) { cl_log(LOG_ERR, "mbox read failed during start-up in servant."); rc = EXIT_MD_SERVANT_IO_FAIL; goto out; } if (s_mbox->cmd != SBD_MSG_EXIT && s_mbox->cmd != SBD_MSG_EMPTY) { /* Not a clean stop. Abort start-up */ cl_log(LOG_WARNING, "Found fencing message - aborting start-up. Manual intervention required!"); ppid = getppid(); sigqueue(ppid, SIG_EXITREQ, signal_value); rc = 0; goto out; } } DBGLOG(LOG_INFO, "First servant start - zeroing inbox"); memset(s_mbox, 0, sizeof(*s_mbox)); if (mbox_write(st, mbox, s_mbox) < 0) { rc = EXIT_MD_SERVANT_IO_FAIL; goto out; } } memset(&signal_value, 0, sizeof(signal_value)); while (1) { struct sector_header_s *s_header_retry = NULL; struct sector_node_s *s_node_retry = NULL; t0 = time(NULL); sleep(timeout_loop); ppid = getppid(); if (ppid == 1) { /* Our parent died unexpectedly. Triggering * self-fence. */ do_timeout_action(); } /* These attempts are, by definition, somewhat racy. If * the device is wiped out or corrupted between here and * us reading our mbox, there is nothing we can do about * that. But at least we tried. */ s_header_retry = header_get(st); if (!s_header_retry) { cl_log(LOG_ERR, "No longer found a valid header on %s", diskname); exit(EXIT_MD_SERVANT_IO_FAIL); } if (memcmp(s_header, s_header_retry, sizeof(*s_header)) != 0) { cl_log(LOG_ERR, "Header on %s changed since start-up!", diskname); exit(EXIT_MD_SERVANT_IO_FAIL); } free(s_header_retry); s_node_retry = sector_alloc(); if (slot_read(st, mbox, s_node_retry) < 0) { cl_log(LOG_ERR, "slot read failed in servant."); exit(EXIT_MD_SERVANT_IO_FAIL); } if (memcmp(s_node, s_node_retry, sizeof(*s_node)) != 0) { cl_log(LOG_ERR, "Node entry on %s changed since start-up!", diskname); exit(EXIT_MD_SERVANT_IO_FAIL); } free(s_node_retry); if (mbox_read(st, mbox, s_mbox) < 0) { cl_log(LOG_ERR, "mbox read failed in servant."); exit(EXIT_MD_SERVANT_IO_FAIL); } if (s_mbox->cmd > 0) { cl_log(LOG_NOTICE, "Received command %s from %s on disk %s", char2cmd(s_mbox->cmd), s_mbox->from, diskname); switch (s_mbox->cmd) { case SBD_MSG_TEST: memset(s_mbox, 0, sizeof(*s_mbox)); mbox_write(st, mbox, s_mbox); sigqueue(ppid, SIG_TEST, signal_value); break; case SBD_MSG_RESET: exit(EXIT_MD_SERVANT_REQUEST_RESET); case SBD_MSG_OFF: exit(EXIT_MD_SERVANT_REQUEST_SHUTOFF); case SBD_MSG_EXIT: sigqueue(ppid, SIG_EXITREQ, signal_value); break; case SBD_MSG_CRASHDUMP: exit(EXIT_MD_SERVANT_REQUEST_CRASHDUMP); default: /* FIXME: An "unknown" message might result from a partial write. log it and clear the slot. */ cl_log(LOG_ERR, "Unknown message on disk %s", diskname); memset(s_mbox, 0, sizeof(*s_mbox)); mbox_write(st, mbox, s_mbox); break; } } sigqueue(ppid, SIG_LIVENESS, signal_value); t1 = time(NULL); latency = t1 - t0; if (timeout_watchdog_warn && (latency > timeout_watchdog_warn)) { cl_log(LOG_WARNING, "Latency: %d exceeded threshold %d on disk %s", (int)latency, (int)timeout_watchdog_warn, diskname); } else if (debug) { DBGLOG(LOG_DEBUG, "Latency: %d on disk %s", (int)latency, diskname); } } out: free(s_mbox); close_device(st); exit(rc); }