diff --git a/configure.ac b/configure.ac index 036e30e..fab5924 100644 --- a/configure.ac +++ b/configure.ac @@ -1,78 +1,80 @@ dnl dnl autoconf for Agents dnl dnl License: GNU General Public License (GPL) dnl =============================================== dnl Bootstrap dnl =============================================== AC_PREREQ(2.63) dnl Suggested structure: dnl information on the package dnl checks for programs dnl checks for libraries dnl checks for header files dnl checks for types dnl checks for structures dnl checks for compiler characteristics dnl checks for library functions dnl checks for system services AC_INIT([sbd], [1.2.1], [lmb@suse.com]) AC_CANONICAL_HOST AC_CONFIG_AUX_DIR(.) AC_CONFIG_HEADERS(config.h) AM_INIT_AUTOMAKE AM_PROG_CC_C_O PKG_CHECK_MODULES(glib, [glib-2.0]) -PKG_CHECK_MODULES(libcoroipcc, [libcoroipcc]) +dnl PKG_CHECK_MODULES(libcoroipcc, [libcoroipcc]) dnl pacemaker > 1.1.8 PKG_CHECK_MODULES(pacemaker, [pacemaker, pacemaker-cib], HAVE_pacemaker=1, HAVE_pacemaker=0) dnl pacemaker <= 1.1.8 PKG_CHECK_MODULES(pcmk, [pcmk, pcmk-cib], HAVE_pcmk=1, HAVE_pcmk=0) +PKG_CHECK_MODULES(libqb, [libqb]) if test $HAVE_pacemaker = 0 -a $HAVE_pcmk = 0; then AC_MSG_ERROR(No package 'pacemaker' found) elif test $HAVE_pacemaker = 1; then CPPFLAGS="$CPPFLAGS $glib_CFLAGS $pacemaker_CFLAGS" else dnl Deal with the wrong 'includedir' in pcmk.pc from pacemaker < 1.1.8 pcmk_CFLAGS="-I${prefix}/include/pacemaker -I${prefix}/include/heartbeat" CPPFLAGS="$CPPFLAGS $glib_CFLAGS $pcmk_CFLAGS" fi PKG_CHECK_MODULES(libxml, [libxml-2.0]) -CPPFLAGS="$CPPFLAGS $libxml_CFLAGS" +CPPFLAGS="$CPPFLAGS $libxml_CFLAGS $libqb_CFLAGS" dnl checks for libraries AC_CHECK_LIB(aio, io_setup, , missing="yes") -AC_CHECK_LIB(plumbgpl, init_set_proc_title, , missing="yes") +AC_CHECK_LIB(qb, qb_ipcs_connection_auth_set, , missing="yes") AC_CHECK_LIB(cib, cib_new, , missing="yes") AC_CHECK_LIB(crmcommon, set_crm_log_level, , missing="yes") AC_CHECK_LIB(pe_status, pe_find_node, , missing="yes") AC_CHECK_LIB(pe_rules, test_rule, , missing="yes") AC_CHECK_LIB(crmcluster, crm_peer_init, , missing="yes") +AC_CHECK_LIB(uuid, uuid_unparse, , missing="yes") dnl pacemaker >= 1.1.8 AC_CHECK_HEADERS(pacemaker/crm/cluster.h) AC_CHECK_LIB(crmcommon, pcmk_strerror, , missing="yes") AC_CHECK_LIB(cib, cib_apply_patch_event, , missing="yes") if test "$missing" = "yes"; then AC_MSG_ERROR([Missing required libraries or functions.]) fi AC_PATH_PROGS(POD2MAN, pod2man, pod2man) dnl The Makefiles and shell scripts we output AC_CONFIG_FILES([Makefile src/Makefile agent/Makefile man/Makefile]) dnl Now process the entire list of files added by previous dnl calls to AC_CONFIG_FILES() AC_OUTPUT() diff --git a/src/Makefile.am b/src/Makefile.am index 124c979..d581756 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,12 +1,10 @@ AM_CFLAGS = -D_GNU_SOURCE -DCHECK_AIS -AM_CPPFLAGS = -I$(includedir)/pacemaker -I$(includedir)/clplumbing \ - -I$(includedir)/heartbeat \ - $(glib_CFLAGS) \ - $(libxml_CFLAGS) +AM_CPPFLAGS = -I$(includedir)/pacemaker \ + $(glib_CFLAGS) sbin_PROGRAMS = sbd -sbd_SOURCES = sbd-common.c sbd-md.c sbd-pacemaker.c +sbd_SOURCES = sbd-common.c sbd-md.c sbd-pacemaker.c setproctitle.c sbd_LDFLAGS = $(glib_LIBS) $(libcoroipcc_LIBS) diff --git a/src/sbd-common.c b/src/sbd-common.c index 79c8890..f9827b9 100644 --- a/src/sbd-common.c +++ b/src/sbd-common.c @@ -1,1054 +1,1265 @@ /* * Copyright (C) 2013 Lars Marowsky-Bree * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "sbd.h" +#include +#include +#include + +#ifdef _POSIX_MEMLOCK +# include +#endif /* These have to match the values in the header of the partition */ static char sbd_magic[8] = "SBD_SBD_"; static char sbd_version = 0x02; /* Tunable defaults: */ unsigned long timeout_watchdog = 5; unsigned long timeout_watchdog_warn = 3; int timeout_allocate = 2; int timeout_loop = 1; int timeout_msgwait = 10; int timeout_io = 3; int timeout_startup = 120; int watchdog_use = 1; int watchdog_set_timeout = 1; unsigned long timeout_watchdog_crashdump = 240; int skip_rt = 0; int check_pcmk = 0; int debug = 0; int debug_mode = 0; const char *watchdogdev = "/dev/watchdog"; char * local_uname; /* Global, non-tunable variables: */ int sector_size = 0; int watchdogfd = -1; /*const char *devname;*/ const char *cmdname; void usage(void) { fprintf(stderr, "Shared storage fencing tool.\n" "Syntax:\n" " %s \n" "Options:\n" "-d Block device to use (mandatory; can be specified up to 3 times)\n" "-h Display this help.\n" "-n Set local node name; defaults to uname -n (optional)\n" "\n" "-R Do NOT enable realtime priority (debugging only)\n" "-W Use watchdog (recommended) (watch only)\n" "-w Specify watchdog device (optional) (watch only)\n" "-T Do NOT initialize the watchdog timeout (watch only)\n" "-S <0|1> Set start mode if the node was previously fenced (watch only)\n" "-p Write pidfile to the specified path (watch only)\n" "-v Enable some verbose debug logging (optional)\n" "\n" "-1 Set watchdog timeout to N seconds (optional, create only)\n" "-2 Set slot allocation timeout to N seconds (optional, create only)\n" "-3 Set daemon loop timeout to N seconds (optional, create only)\n" "-4 Set msgwait timeout to N seconds (optional, create only)\n" "-5 Warn if loop latency exceeds threshold (optional, watch only)\n" " (default is 3, set to 0 to disable)\n" "-C Watchdog timeout to set before crashdumping (def: 240s, optional)\n" "-I Async IO read timeout (defaults to 3 * loop timeout, optional)\n" "-s Timeout to wait for devices to become available (def: 120s)\n" "-t Dampening delay before faulty servants are restarted (optional)\n" " (default is 5, set to 0 to disable)\n" "-F # of failures before a servant is considered faulty (optional)\n" " (default is 1, set to 0 to disable)\n" "-P Check Pacemaker quorum and node health (optional, watch only)\n" "-Z Enable trace mode. WARNING: UNSAFE FOR PRODUCTION!\n" "Commands:\n" "create initialize N slots on - OVERWRITES DEVICE!\n" "list List all allocated slots on device, and messages.\n" "dump Dump meta-data header from device.\n" "watch Loop forever, monitoring own slot\n" "allocate \n" " Allocate a slot for node (optional)\n" "message (test|reset|off|clear|exit)\n" " Writes the specified message to node's slot.\n" , cmdname); } int watchdog_init_interval(void) { int timeout = timeout_watchdog; if (watchdogfd < 0) { return 0; } if (watchdog_set_timeout == 0) { cl_log(LOG_INFO, "NOT setting watchdog timeout on explicit user request!"); return 0; } if (ioctl(watchdogfd, WDIOC_SETTIMEOUT, &timeout) < 0) { cl_perror( "WDIOC_SETTIMEOUT" ": Failed to set watchdog timer to %u seconds.", timeout); cl_log(LOG_CRIT, "Please validate your watchdog configuration!"); cl_log(LOG_CRIT, "Choose a different watchdog driver or specify -T to skip this if you are completely sure."); return -1; } else { cl_log(LOG_INFO, "Set watchdog timeout to %u seconds.", timeout); } return 0; } int watchdog_tickle(void) { if (watchdogfd >= 0) { if (write(watchdogfd, "", 1) != 1) { cl_perror("Watchdog write failure: %s!", watchdogdev); return -1; } } return 0; } int watchdog_init(void) { if (watchdogfd < 0 && watchdogdev != NULL) { watchdogfd = open(watchdogdev, O_WRONLY); if (watchdogfd >= 0) { cl_log(LOG_NOTICE, "Using watchdog device: %s", watchdogdev); if ((watchdog_init_interval() < 0) || (watchdog_tickle() < 0)) { return -1; } }else{ cl_perror("Cannot open watchdog device: %s", watchdogdev); return -1; } } return 0; } void -watchdog_close(void) +watchdog_close(bool disarm) { - if (watchdogfd >= 0) { - if (write(watchdogfd, "V", 1) != 1) { - cl_perror( - "Watchdog write magic character failure: closing %s!", - watchdogdev); - } - if (close(watchdogfd) < 0) { - cl_perror("Watchdog close(2) failed."); - } - watchdogfd = -1; - } + if (watchdogfd < 0) { + return; + } + + if (disarm) { + int r; + int flags = WDIOS_DISABLECARD;; + + /* Explicitly disarm it */ + r = ioctl(watchdogfd, WDIOC_SETOPTIONS, &flags); + if (r < 0) { + cl_perror("Failed to disable hardware watchdog %s", watchdogdev); + } + + /* To be sure, use magic close logic, too */ + for (;;) { + if (write(watchdogfd, "V", 1) > 0) { + break; + } + cl_perror("Cannot disable watchdog device %s", watchdogdev); + } + } + + if (close(watchdogfd) < 0) { + cl_perror("Watchdog close(%d) failed", watchdogfd); + } + + watchdogfd = -1; } /* This duplicates some code from linux/ioprio.h since these are not included * even in linux-kernel-headers. Sucks. See also * /usr/src/linux/Documentation/block/ioprio.txt and ioprio_set(2) */ extern int sys_ioprio_set(int, int, int); int ioprio_set(int which, int who, int ioprio); inline int ioprio_set(int which, int who, int ioprio) { return syscall(__NR_ioprio_set, which, who, ioprio); } enum { IOPRIO_CLASS_NONE, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE, }; enum { IOPRIO_WHO_PROCESS = 1, IOPRIO_WHO_PGRP, IOPRIO_WHO_USER, }; #define IOPRIO_BITS (16) #define IOPRIO_CLASS_SHIFT (13) #define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) #define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT) #define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK) #define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) +static unsigned char +sbd_stack_hogger(unsigned char * inbuf, int kbytes) +{ + unsigned char buf[1024]; + + if(kbytes <= 0) { + return HOG_CHAR; + } + + if (inbuf == NULL) { + memset(buf, HOG_CHAR, sizeof(buf)); + } else { + memcpy(buf, inbuf, sizeof(buf)); + } + + if (kbytes > 0) { + return sbd_stack_hogger(buf, kbytes-1); + } else { + return buf[sizeof(buf)-1]; + } +} + +static void +sbd_malloc_hogger(int kbytes) +{ + int j; + void**chunks; + int chunksize = 1024; + + if(kbytes <= 0) { + return; + } + + /* + * We could call mallopt(M_MMAP_MAX, 0) to disable it completely, + * but we've already called mlockall() + * + * We could also call mallopt(M_TRIM_THRESHOLD, -1) to prevent malloc + * from giving memory back to the system, but we've already called + * mlockall(MCL_FUTURE), so there's no need. + */ + + chunks = malloc(kbytes * sizeof(void *)); + if (chunks == NULL) { + cl_log(LOG_WARNING, "Could not preallocate chunk array"); + return; + } + + for (j=0; j < kbytes; ++j) { + chunks[j] = malloc(chunksize); + if (chunks[j] == NULL) { + cl_log(LOG_WARNING, "Could not preallocate block %d", j); + + } else { + memset(chunks[j], 0, chunksize); + } + } + + for (j=0; j < kbytes; ++j) { + free(chunks[j]); + } + + free(chunks); +} + +static void sbd_memlock(int stackgrowK, int heapgrowK) +{ + +#ifdef _POSIX_MEMLOCK + /* + * We could call setrlimit(RLIMIT_MEMLOCK,...) with a large + * number, but the mcp runs as root and mlock(2) says: + * + * Since Linux 2.6.9, no limits are placed on the amount of memory + * that a privileged process may lock, and this limit instead + * governs the amount of memory that an unprivileged process may + * lock. + */ + if (mlockall(MCL_CURRENT|MCL_FUTURE) >= 0) { + cl_log(LOG_INFO, "Locked ourselves in memory"); + + /* Now allocate some extra pages (MCL_FUTURE will ensure they stay around) */ + sbd_malloc_hogger(heapgrowK); + sbd_stack_hogger(NULL, stackgrowK); + + } else { + cl_perror("Unable to lock ourselves into memory"); + } + +#else + cl_log(LOG_ERR, "Unable to lock ourselves into memory"); +#endif +} + +void +sbd_make_realtime(int priority, int stackgrowK, int heapgrowK) +{ + if(priority < 0) { + return; + } + +#ifdef SCHED_RR + { + int pcurrent = 0; + int pmin = sched_get_priority_min(SCHED_RR); + int pmax = sched_get_priority_max(SCHED_RR); + + if (priority == 0) { + priority = pmax; + } else if (priority < pmin) { + priority = pmin; + } else if (priority > pmax) { + priority = pmax; + } + + pcurrent = sched_getscheduler(0); + if (pcurrent < 0) { + cl_perror("Unable to get scheduler priority"); + + } else if(pcurrent < priority) { + struct sched_param sp; + + memset(&sp, 0, sizeof(sp)); + sp.sched_priority = priority; + + if (sched_setscheduler(0, SCHED_RR, &sp) < 0) { + cl_perror("Unable to set scheduler priority to %d", priority); + } else { + cl_log(LOG_INFO, "Scheduler priority is now %d", priority); + } + } + } +#else + cl_log(LOG_ERR, "System does not support updating the scheduler priority"); +#endif + + sbd_memlock(heapgrowK, stackgrowK); +} + void maximize_priority(void) { if (skip_rt) { cl_log(LOG_INFO, "Not elevating to realtime (-R specified)."); return; } - cl_make_realtime(-1, 100, 256, 256); + sbd_make_realtime(0, 256, 256); if (ioprio_set(IOPRIO_WHO_PROCESS, getpid(), IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 1)) != 0) { cl_perror("ioprio_set() call failed."); } } + void close_device(struct sbd_context *st) { close(st->devfd); free(st); } struct sbd_context * open_device(const char* devname, int loglevel) { struct sbd_context *st; if (!devname) return NULL; st = malloc(sizeof(struct sbd_context)); if (!st) return NULL; memset(st, 0, sizeof(struct sbd_context)); if (io_setup(1, &st->ioctx) != 0) { cl_perror("io_setup failed"); free(st); return NULL; } st->devfd = open(devname, O_SYNC|O_RDWR|O_DIRECT); if (st->devfd == -1) { if (loglevel == LOG_DEBUG) { DBGLOG(loglevel, "Opening device %s failed.", devname); } else { cl_log(loglevel, "Opening device %s failed.", devname); } free(st); return NULL; } ioctl(st->devfd, BLKSSZGET, §or_size); if (sector_size == 0) { cl_perror("Get sector size failed.\n"); close_device(st); return NULL; } return st; } signed char cmd2char(const char *cmd) { if (strcmp("clear", cmd) == 0) { return SBD_MSG_EMPTY; } else if (strcmp("test", cmd) == 0) { return SBD_MSG_TEST; } else if (strcmp("reset", cmd) == 0) { return SBD_MSG_RESET; } else if (strcmp("off", cmd) == 0) { return SBD_MSG_OFF; } else if (strcmp("exit", cmd) == 0) { return SBD_MSG_EXIT; } else if (strcmp("crashdump", cmd) == 0) { return SBD_MSG_CRASHDUMP; } return -1; } void * sector_alloc(void) { void *x; x = valloc(sector_size); if (!x) { exit(1); } memset(x, 0, sector_size); return x; } const char* char2cmd(const char cmd) { switch (cmd) { case SBD_MSG_EMPTY: return "clear"; break; case SBD_MSG_TEST: return "test"; break; case SBD_MSG_RESET: return "reset"; break; case SBD_MSG_OFF: return "off"; break; case SBD_MSG_EXIT: return "exit"; break; case SBD_MSG_CRASHDUMP: return "crashdump"; break; default: return "undefined"; break; } } static int sector_io(struct sbd_context *st, int sector, void *data, int rw) { struct timespec timeout; struct io_event event; struct iocb *ios[1] = { &st->io }; long r; timeout.tv_sec = timeout_io; timeout.tv_nsec = 0; memset(&st->io, 0, sizeof(struct iocb)); if (rw) { io_prep_pwrite(&st->io, st->devfd, data, sector_size, sector_size * sector); } else { io_prep_pread(&st->io, st->devfd, data, sector_size, sector_size * sector); } if (io_submit(st->ioctx, 1, ios) != 1) { cl_log(LOG_ERR, "Failed to submit IO request! (rw=%d)", rw); return -1; } errno = 0; r = io_getevents(st->ioctx, 1L, 1L, &event, &timeout); if (r < 0 ) { cl_log(LOG_ERR, "Failed to retrieve IO events (rw=%d)", rw); return -1; } else if (r < 1L) { cl_log(LOG_INFO, "Cancelling IO request due to timeout (rw=%d)", rw); r = io_cancel(st->ioctx, ios[0], &event); if (r) { DBGLOG(LOG_INFO, "Could not cancel IO request (rw=%d)", rw); /* Doesn't really matter, debugging information. */ } return -1; } else if (r > 1L) { cl_log(LOG_ERR, "More than one IO was returned (r=%ld)", r); return -1; } /* IO is happy */ if (event.res == sector_size) { return 0; } else { cl_log(LOG_ERR, "Short IO (rw=%d, res=%lu, sector_size=%d)", rw, event.res, sector_size); return -1; } } int sector_write(struct sbd_context *st, int sector, void *data) { return sector_io(st, sector, data, 1); } int sector_read(struct sbd_context *st, int sector, void *data) { return sector_io(st, sector, data, 0); } int slot_read(struct sbd_context *st, int slot, struct sector_node_s *s_node) { return sector_read(st, SLOT_TO_SECTOR(slot), s_node); } int slot_write(struct sbd_context *st, int slot, struct sector_node_s *s_node) { return sector_write(st, SLOT_TO_SECTOR(slot), s_node); } int mbox_write(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox) { return sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox); } int mbox_read(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox) { return sector_read(st, MBOX_TO_SECTOR(mbox), s_mbox); } int mbox_write_verify(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox) { void *data; int rc = 0; if (sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox) < 0) return -1; data = sector_alloc(); if (sector_read(st, MBOX_TO_SECTOR(mbox), data) < 0) { rc = -1; goto out; } if (memcmp(s_mbox, data, sector_size) != 0) { cl_log(LOG_ERR, "Write verification failed!"); rc = -1; goto out; } rc = 0; out: free(data); return rc; } int header_write(struct sbd_context *st, struct sector_header_s *s_header) { s_header->sector_size = htonl(s_header->sector_size); s_header->timeout_watchdog = htonl(s_header->timeout_watchdog); s_header->timeout_allocate = htonl(s_header->timeout_allocate); s_header->timeout_loop = htonl(s_header->timeout_loop); s_header->timeout_msgwait = htonl(s_header->timeout_msgwait); return sector_write(st, 0, s_header); } int header_read(struct sbd_context *st, struct sector_header_s *s_header) { if (sector_read(st, 0, s_header) < 0) return -1; s_header->sector_size = ntohl(s_header->sector_size); s_header->timeout_watchdog = ntohl(s_header->timeout_watchdog); s_header->timeout_allocate = ntohl(s_header->timeout_allocate); s_header->timeout_loop = ntohl(s_header->timeout_loop); s_header->timeout_msgwait = ntohl(s_header->timeout_msgwait); /* This sets the global defaults: */ timeout_watchdog = s_header->timeout_watchdog; timeout_allocate = s_header->timeout_allocate; timeout_loop = s_header->timeout_loop; timeout_msgwait = s_header->timeout_msgwait; return 0; } int valid_header(const struct sector_header_s *s_header) { if (memcmp(s_header->magic, sbd_magic, sizeof(s_header->magic)) != 0) { cl_log(LOG_ERR, "Header magic does not match."); return -1; } if (s_header->version != sbd_version) { cl_log(LOG_ERR, "Header version does not match."); return -1; } if (s_header->sector_size != sector_size) { cl_log(LOG_ERR, "Header sector size does not match."); return -1; } return 0; } struct sector_header_s * header_get(struct sbd_context *st) { struct sector_header_s *s_header; s_header = sector_alloc(); if (header_read(st, s_header) < 0) { cl_log(LOG_ERR, "Unable to read header from device %d", st->devfd); return NULL; } if (valid_header(s_header) < 0) { cl_log(LOG_ERR, "header on device %d is not valid.", st->devfd); return NULL; } /* cl_log(LOG_INFO, "Found version %d header with %d slots", s_header->version, s_header->slots); */ return s_header; } int init_device(struct sbd_context *st) { struct sector_header_s *s_header; struct sector_node_s *s_node; struct sector_mbox_s *s_mbox; struct stat s; char uuid[37]; int i; int rc = 0; s_header = sector_alloc(); s_node = sector_alloc(); s_mbox = sector_alloc(); memcpy(s_header->magic, sbd_magic, sizeof(s_header->magic)); s_header->version = sbd_version; s_header->slots = 255; s_header->sector_size = sector_size; s_header->timeout_watchdog = timeout_watchdog; s_header->timeout_allocate = timeout_allocate; s_header->timeout_loop = timeout_loop; s_header->timeout_msgwait = timeout_msgwait; s_header->minor_version = 1; uuid_generate(s_header->uuid); uuid_unparse_lower(s_header->uuid, uuid); fstat(st->devfd, &s); /* printf("st_size = %ld, st_blksize = %ld, st_blocks = %ld\n", s.st_size, s.st_blksize, s.st_blocks); */ cl_log(LOG_INFO, "Creating version %d.%d header on device %d (uuid: %s)", s_header->version, s_header->minor_version, st->devfd, uuid); fprintf(stdout, "Creating version %d.%d header on device %d (uuid: %s)\n", s_header->version, s_header->minor_version, st->devfd, uuid); if (header_write(st, s_header) < 0) { rc = -1; goto out; } cl_log(LOG_INFO, "Initializing %d slots on device %d", s_header->slots, st->devfd); fprintf(stdout, "Initializing %d slots on device %d\n", s_header->slots, st->devfd); for (i=0;i < s_header->slots;i++) { if (slot_write(st, i, s_node) < 0) { rc = -1; goto out; } if (mbox_write(st, i, s_mbox) < 0) { rc = -1; goto out; } } out: free(s_node); free(s_header); free(s_mbox); return(rc); } /* Check if there already is a slot allocated to said name; returns the * slot number. If not found, returns -1. * This is necessary because slots might not be continuous. */ int slot_lookup(struct sbd_context *st, const struct sector_header_s *s_header, const char *name) { struct sector_node_s *s_node = NULL; int i; int rc = -1; if (!name) { cl_log(LOG_ERR, "slot_lookup(): No name specified.\n"); goto out; } s_node = sector_alloc(); for (i=0; i < s_header->slots; i++) { if (slot_read(st, i, s_node) < 0) { rc = -2; goto out; } if (s_node->in_use != 0) { if (strncasecmp(s_node->name, name, sizeof(s_node->name)) == 0) { DBGLOG(LOG_INFO, "%s owns slot %d", name, i); rc = i; goto out; } } } out: free(s_node); return rc; } int slot_unused(struct sbd_context *st, const struct sector_header_s *s_header) { struct sector_node_s *s_node; int i; int rc = -1; s_node = sector_alloc(); for (i=0; i < s_header->slots; i++) { if (slot_read(st, i, s_node) < 0) { rc = -1; goto out; } if (s_node->in_use == 0) { rc = i; goto out; } } out: free(s_node); return rc; } int slot_allocate(struct sbd_context *st, const char *name) { struct sector_header_s *s_header = NULL; struct sector_node_s *s_node = NULL; struct sector_mbox_s *s_mbox = NULL; int i; int rc = 0; if (!name) { cl_log(LOG_ERR, "slot_allocate(): No name specified.\n"); fprintf(stderr, "slot_allocate(): No name specified.\n"); rc = -1; goto out; } s_header = header_get(st); if (!s_header) { rc = -1; goto out; } s_node = sector_alloc(); s_mbox = sector_alloc(); while (1) { i = slot_lookup(st, s_header, name); if ((i >= 0) || (i == -2)) { /* -1 is "no slot found", in which case we * proceed to allocate a new one. * -2 is "read error during lookup", in which * case we error out too * >= 0 is "slot already allocated" */ rc = i; goto out; } i = slot_unused(st, s_header); if (i >= 0) { cl_log(LOG_INFO, "slot %d is unused - trying to own", i); fprintf(stdout, "slot %d is unused - trying to own\n", i); memset(s_node, 0, sizeof(*s_node)); s_node->in_use = 1; strncpy(s_node->name, name, sizeof(s_node->name)); if (slot_write(st, i, s_node) < 0) { rc = -1; goto out; } sleep(timeout_allocate); } else { cl_log(LOG_ERR, "No more free slots."); fprintf(stderr, "No more free slots.\n"); rc = -1; goto out; } } out: free(s_node); free(s_header); free(s_mbox); return(rc); } int slot_list(struct sbd_context *st) { struct sector_header_s *s_header = NULL; struct sector_node_s *s_node = NULL; struct sector_mbox_s *s_mbox = NULL; int i; int rc = 0; s_header = header_get(st); if (!s_header) { rc = -1; goto out; } s_node = sector_alloc(); s_mbox = sector_alloc(); for (i=0; i < s_header->slots; i++) { if (slot_read(st, i, s_node) < 0) { rc = -1; goto out; } if (s_node->in_use > 0) { if (mbox_read(st, i, s_mbox) < 0) { rc = -1; goto out; } printf("%d\t%s\t%s\t%s\n", i, s_node->name, char2cmd(s_mbox->cmd), s_mbox->from); } } out: free(s_node); free(s_header); free(s_mbox); return rc; } int slot_msg(struct sbd_context *st, const char *name, const char *cmd) { struct sector_header_s *s_header = NULL; struct sector_mbox_s *s_mbox = NULL; int mbox; int rc = 0; char uuid[37]; if (!name || !cmd) { cl_log(LOG_ERR, "slot_msg(): No recipient / cmd specified.\n"); rc = -1; goto out; } s_header = header_get(st); if (!s_header) { rc = -1; goto out; } if (strcmp(name, "LOCAL") == 0) { name = local_uname; } if (s_header->minor_version > 0) { uuid_unparse_lower(s_header->uuid, uuid); cl_log(LOG_INFO, "Device UUID: %s", uuid); } mbox = slot_lookup(st, s_header, name); if (mbox < 0) { cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name); rc = -1; goto out; } s_mbox = sector_alloc(); s_mbox->cmd = cmd2char(cmd); if (s_mbox->cmd < 0) { cl_log(LOG_ERR, "slot_msg(): Invalid command %s.", cmd); rc = -1; goto out; } strncpy(s_mbox->from, local_uname, sizeof(s_mbox->from)-1); cl_log(LOG_INFO, "Writing %s to node slot %s", cmd, name); if (mbox_write_verify(st, mbox, s_mbox) < -1) { rc = -1; goto out; } if (strcasecmp(cmd, "exit") != 0) { cl_log(LOG_INFO, "Messaging delay: %d", (int)timeout_msgwait); sleep(timeout_msgwait); } cl_log(LOG_INFO, "%s successfully delivered to %s", cmd, name); out: free(s_mbox); free(s_header); return rc; } int slot_ping(struct sbd_context *st, const char *name) { struct sector_header_s *s_header = NULL; struct sector_mbox_s *s_mbox = NULL; int mbox; int waited = 0; int rc = 0; if (!name) { cl_log(LOG_ERR, "slot_ping(): No recipient specified.\n"); rc = -1; goto out; } s_header = header_get(st); if (!s_header) { rc = -1; goto out; } if (strcmp(name, "LOCAL") == 0) { name = local_uname; } mbox = slot_lookup(st, s_header, name); if (mbox < 0) { cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name); rc = -1; goto out; } s_mbox = sector_alloc(); s_mbox->cmd = SBD_MSG_TEST; strncpy(s_mbox->from, local_uname, sizeof(s_mbox->from)-1); DBGLOG(LOG_DEBUG, "Pinging node %s", name); if (mbox_write(st, mbox, s_mbox) < -1) { rc = -1; goto out; } rc = -1; while (waited <= timeout_msgwait) { if (mbox_read(st, mbox, s_mbox) < 0) break; if (s_mbox->cmd != SBD_MSG_TEST) { rc = 0; break; } sleep(1); waited++; } if (rc == 0) { cl_log(LOG_DEBUG, "%s successfully pinged.", name); } else { cl_log(LOG_ERR, "%s failed to ping.", name); } out: free(s_mbox); free(s_header); return rc; } void sysrq_init(void) { FILE* procf; int c; procf = fopen("/proc/sys/kernel/sysrq", "r"); if (!procf) { cl_perror("cannot open /proc/sys/kernel/sysrq for read."); return; } if (fscanf(procf, "%d", &c) != 1) { cl_perror("Parsing sysrq failed"); c = 0; } fclose(procf); if (c == 1) return; /* 8 for debugging dumps of processes, 128 for reboot/poweroff */ c |= 136; procf = fopen("/proc/sys/kernel/sysrq", "w"); if (!procf) { cl_perror("cannot open /proc/sys/kernel/sysrq for writing"); return; } fprintf(procf, "%d", c); fclose(procf); return; } void sysrq_trigger(char t) { FILE *procf; procf = fopen("/proc/sysrq-trigger", "a"); if (!procf) { cl_perror("Opening sysrq-trigger failed."); return; } cl_log(LOG_INFO, "sysrq-trigger: %c\n", t); fprintf(procf, "%c\n", t); fclose(procf); return; } + +static void +do_exit(char kind) +{ + /* TODO: Turn debug_mode into a bit field? Delay + kdump for example */ + const char *reason = NULL; + + if (kind == 'c') { + cl_log(LOG_NOTICE, "Initiating kdump"); + + } else if (debug_mode == 1) { + cl_log(LOG_WARNING, "Initiating kdump instead of panicing the node (debug mode)"); + kind = 'c'; + } + + if (debug_mode == 2) { + cl_log(LOG_WARNING, "Shutting down SBD instead of panicing the node (debug mode)"); + watchdog_close(true); + exit(0); + } + + if (debug_mode == 3) { + /* Give the system some time to flush logs to disk before rebooting. */ + cl_log(LOG_WARNING, "Delaying node panic by 10s (debug mode)"); + + watchdog_close(true); + sync(); + + sleep(10); + } + + switch(kind) { + case 'b': + reason = "reboot"; + break; + case 'c': + reason = "crashdump"; + break; + case 'o': + reason = "off"; + break; + default: + reason = "unknown"; + break; + } + + cl_log(LOG_EMERG, "Rebooting system: %s", reason); + sync(); + + if(kind == 'c') { + watchdog_close(true); + sysrq_trigger(kind); + + } else { + watchdog_close(false); + sysrq_trigger(kind); + if(reboot(RB_AUTOBOOT) < 0) { + cl_perror("Reboot failed"); + } + } + + exit(1); +} + void do_crashdump(void) { - if (timeout_watchdog_crashdump) { - timeout_watchdog = timeout_watchdog_crashdump; - watchdog_init_interval(); - watchdog_tickle(); - } - sysrq_trigger('c'); - /* is it possible to reach the following line? */ - cl_reboot(5, "sbd is triggering crashdumping"); - exit(1); + do_exit('c'); } void do_reset(void) { - if (debug_mode == 1) { - cl_log(LOG_ERR, "Request to suicide changed to kdump due to DEBUG MODE!"); - watchdog_close(); - sysrq_trigger('c'); - exit(0); - } else if (debug_mode == 2) { - cl_log(LOG_ERR, "Skipping request to suicide due to DEBUG MODE!"); - watchdog_close(); - exit(0); - } else if (debug_mode == 3) { - /* The idea is to give the system some time to flush - * logs to disk before rebooting. */ - cl_log(LOG_ERR, "Delaying request to suicide by 10s due to DEBUG MODE!"); - watchdog_close(); - sync(); - sync(); - sleep(10); - cl_log(LOG_ERR, "Debug mode is now becoming real ..."); - } - sysrq_trigger('b'); - cl_reboot(5, "sbd is self-fencing (reset)"); - sleep(timeout_watchdog * 2); - exit(1); + do_exit('b'); } void do_off(void) { - if (debug_mode == 1) { - cl_log(LOG_ERR, "Request to power-off changed to kdump due to DEBUG MODE!"); - watchdog_close(); - sysrq_trigger('c'); - exit(0); - } else if (debug_mode == 2) { - cl_log(LOG_ERR, "Skipping request to power-off due to DEBUG MODE!"); - watchdog_close(); - exit(0); - } else if (debug_mode == 3) { - /* The idea is to give the system some time to flush - * logs to disk before rebooting. */ - cl_log(LOG_ERR, "Delaying request to power-off by 10s due to DEBUG MODE!"); - watchdog_close(); - sync(); - sync(); - sleep(10); - cl_log(LOG_ERR, "Debug mode is now becoming real ..."); + do_exit('o'); +} + +/* + * Change directory to the directory our core file needs to go in + * Call after you establish the userid you're running under. + */ +int +sbd_cdtocoredir(void) +{ + int rc; + struct passwd* pwent; + static const char *dir = NULL; + + if (dir == NULL) { + dir = HA_COREDIR; + } + if ((rc=chdir(dir)) < 0) { + int errsave = errno; + cl_perror("Cannot chdir to [%s]", dir); + errno = errsave; + return rc; } - sysrq_trigger('o'); - cl_reboot(5, "sbd is self-fencing (power-off)"); - sleep(timeout_watchdog * 2); - exit(1); + pwent = getpwuid(getuid()); + if (pwent == NULL) { + int errsave = errno; + cl_perror("Cannot get name for uid [%d]", getuid()); + errno = errsave; + return -1; + } + if ((rc=chdir(pwent->pw_name)) < 0) { + int errsave = errno; + cl_perror("Cannot chdir to [%s/%s]", dir, pwent->pw_name); + errno = errsave; + } + return rc; } pid_t make_daemon(void) { pid_t pid; const char * devnull = "/dev/null"; pid = fork(); if (pid < 0) { cl_log(LOG_ERR, "%s: could not start daemon\n", cmdname); cl_perror("fork"); exit(1); }else if (pid > 0) { return pid; } - cl_log_enable_stderr(FALSE); + qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE); /* This is the child; ensure privileges have not been lost. */ maximize_priority(); sysrq_init(); umask(022); close(0); (void)open(devnull, O_RDONLY); close(1); (void)open(devnull, O_WRONLY); close(2); (void)open(devnull, O_WRONLY); - cl_cdtocoredir(); + sbd_cdtocoredir(); return 0; } int header_dump(struct sbd_context *st) { struct sector_header_s *s_header; char uuid[37]; s_header = header_get(st); if (s_header == NULL) return -1; printf("Header version : %u.%u\n", s_header->version, s_header->minor_version); if (s_header->minor_version > 0) { uuid_unparse_lower(s_header->uuid, uuid); printf("UUID : %s\n", uuid); } printf("Number of slots : %u\n", s_header->slots); printf("Sector size : %lu\n", (unsigned long)s_header->sector_size); printf("Timeout (watchdog) : %lu\n", (unsigned long)s_header->timeout_watchdog); printf("Timeout (allocate) : %lu\n", (unsigned long)s_header->timeout_allocate); printf("Timeout (loop) : %lu\n", (unsigned long)s_header->timeout_loop); printf("Timeout (msgwait) : %lu\n", (unsigned long)s_header->timeout_msgwait); return 0; } void sbd_get_uname(void) { struct utsname uname_buf; int i; if (uname(&uname_buf) < 0) { cl_perror("uname() failed?"); exit(1); } local_uname = strdup(uname_buf.nodename); for (i = 0; i < strlen(local_uname); i++) local_uname[i] = tolower(local_uname[i]); } diff --git a/src/sbd-md.c b/src/sbd-md.c index 0a7278c..b9dfdfb 100644 --- a/src/sbd-md.c +++ b/src/sbd-md.c @@ -1,1171 +1,1331 @@ /* * Copyright (C) 2013 Lars Marowsky-Bree * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "sbd.h" +#define LOCKSTRLEN 11 struct servants_list_item *servants_leader = NULL; static int servant_count = 0; static int servant_restart_interval = 5; static int servant_restart_count = 1; static int servant_inform_parent = 0; static int check_pcmk = 0; static int start_mode = 0; static char* pidfile = NULL; static void open_any_device(void); static int check_timeout_inconsistent(struct sector_header_s *hdr); int quorum_write(int good_servants) { return (good_servants > servant_count/2); } int quorum_read(int good_servants) { if (servant_count >= 3) return (good_servants > servant_count/2); else return (good_servants >= 1); } int assign_servant(const char* devname, functionp_t functionp, const void* argp) { pid_t pid = 0; int rc = 0; pid = fork(); if (pid == 0) { /* child */ maximize_priority(); rc = (*functionp)(devname, argp); if (rc == -1) exit(1); else exit(0); } else if (pid != -1) { /* parent */ return pid; } else { cl_log(LOG_ERR,"Failed to fork servant"); exit(1); } } int init_devices() { int rc = 0; struct sbd_context *st; struct servants_list_item *s; for (s = servants_leader; s; s = s->next) { fprintf(stdout, "Initializing device %s\n", s->devname); st = open_device(s->devname, LOG_ERR); if (!st) { return -1; } rc = init_device(st); close_device(st); if (rc == -1) { fprintf(stderr, "Failed to init device %s\n", s->devname); return rc; } fprintf(stdout, "Device %s is initialized.\n", s->devname); } return 0; } int slot_msg_wrapper(const char* devname, const void* argp) { int rc = 0; struct sbd_context *st; const struct slot_msg_arg_t* arg = (const struct slot_msg_arg_t*)argp; st = open_device(devname, LOG_WARNING); if (!st) return -1; cl_log(LOG_INFO, "Delivery process handling %s", devname); rc = slot_msg(st, arg->name, arg->msg); close_device(st); return rc; } int slot_ping_wrapper(const char* devname, const void* argp) { int rc = 0; const char* name = (const char*)argp; struct sbd_context *st; st = open_device(devname, LOG_WARNING); if (!st) return -1; rc = slot_ping(st, name); close_device(st); return rc; } int allocate_slots(const char *name) { int rc = 0; struct sbd_context *st; struct servants_list_item *s; for (s = servants_leader; s; s = s->next) { fprintf(stdout, "Trying to allocate slot for %s on device %s.\n", name, s->devname); st = open_device(s->devname, LOG_WARNING); if (!st) { return -1; } rc = slot_allocate(st, name); close_device(st); if (rc < 0) return rc; fprintf(stdout, "Slot for %s has been allocated on %s.\n", name, s->devname); } return 0; } int list_slots() { int rc = 0; struct servants_list_item *s; struct sbd_context *st; for (s = servants_leader; s; s = s->next) { st = open_device(s->devname, LOG_WARNING); if (!st) { fprintf(stdout, "== disk %s unreadable!\n", s->devname); continue; } rc = slot_list(st); close_device(st); if (rc == -1) { fprintf(stdout, "== Slots on disk %s NOT dumped\n", s->devname); } } return 0; } int ping_via_slots(const char *name) { int sig = 0; pid_t pid = 0; int status = 0; int servants_finished = 0; sigset_t procmask; siginfo_t sinfo; struct servants_list_item *s; sigemptyset(&procmask); sigaddset(&procmask, SIGCHLD); sigprocmask(SIG_BLOCK, &procmask, NULL); for (s = servants_leader; s; s = s->next) { s->pid = assign_servant(s->devname, &slot_ping_wrapper, (const void*)name); } while (servants_finished < servant_count) { sig = sigwaitinfo(&procmask, &sinfo); if (sig == SIGCHLD) { while ((pid = wait(&status))) { if (pid == -1 && errno == ECHILD) { break; } else { s = lookup_servant_by_pid(pid); if (s) { servants_finished++; } } } } } return 0; } /* This is a bit hackish, but the easiest way to rewire all process * exits to send the desired signal to the parent. */ void servant_exit(void) { pid_t ppid; union sigval signal_value; ppid = getppid(); if (servant_inform_parent) { memset(&signal_value, 0, sizeof(signal_value)); sigqueue(ppid, SIG_IO_FAIL, signal_value); } } int servant(const char *diskname, const void* argp) { struct sector_mbox_s *s_mbox = NULL; struct sector_node_s *s_node = NULL; struct sector_header_s *s_header = NULL; int mbox; int rc = 0; time_t t0, t1, latency; union sigval signal_value; sigset_t servant_masks; struct sbd_context *st; pid_t ppid; char uuid[37]; const struct servants_list_item *s = argp; if (!diskname) { cl_log(LOG_ERR, "Empty disk name %s.", diskname); return -1; } cl_log(LOG_INFO, "Servant starting for device %s", diskname); /* Block most of the signals */ sigfillset(&servant_masks); sigdelset(&servant_masks, SIGKILL); sigdelset(&servant_masks, SIGFPE); sigdelset(&servant_masks, SIGILL); sigdelset(&servant_masks, SIGSEGV); sigdelset(&servant_masks, SIGBUS); sigdelset(&servant_masks, SIGALRM); /* FIXME: check error */ sigprocmask(SIG_SETMASK, &servant_masks, NULL); atexit(servant_exit); servant_inform_parent = 1; st = open_device(diskname, LOG_WARNING); if (!st) { return -1; } s_header = header_get(st); if (!s_header) { cl_log(LOG_ERR, "Not a valid header on %s", diskname); return -1; } if (check_timeout_inconsistent(s_header) < 0) { cl_log(LOG_ERR, "Timeouts on %s do not match first device", diskname); return -1; } if (s_header->minor_version > 0) { uuid_unparse_lower(s_header->uuid, uuid); cl_log(LOG_INFO, "Device %s uuid: %s", diskname, uuid); } mbox = slot_allocate(st, local_uname); if (mbox < 0) { cl_log(LOG_ERR, "No slot allocated, and automatic allocation failed for disk %s.", diskname); rc = -1; goto out; } s_node = sector_alloc(); if (slot_read(st, mbox, s_node) < 0) { cl_log(LOG_ERR, "Unable to read node entry on %s", diskname); exit(1); } DBGLOG(LOG_INFO, "Monitoring slot %d on disk %s", mbox, diskname); if (s_header->minor_version == 0) { set_proc_title("sbd: watcher: %s - slot: %d", diskname, mbox); } else { set_proc_title("sbd: watcher: %s - slot: %d - uuid: %s", diskname, mbox, uuid); } s_mbox = sector_alloc(); if (s->first_start) { if (start_mode > 0) { if (mbox_read(st, mbox, s_mbox) < 0) { cl_log(LOG_ERR, "mbox read failed during start-up in servant."); rc = -1; goto out; } if (s_mbox->cmd != SBD_MSG_EXIT && s_mbox->cmd != SBD_MSG_EMPTY) { /* Not a clean stop. Abort start-up */ cl_log(LOG_WARNING, "Found fencing message - aborting start-up. Manual intervention required!"); ppid = getppid(); sigqueue(ppid, SIG_EXITREQ, signal_value); rc = 0; goto out; } } DBGLOG(LOG_INFO, "First servant start - zeroing inbox"); memset(s_mbox, 0, sizeof(*s_mbox)); if (mbox_write(st, mbox, s_mbox) < 0) { rc = -1; goto out; } } memset(&signal_value, 0, sizeof(signal_value)); while (1) { struct sector_header_s *s_header_retry = NULL; struct sector_node_s *s_node_retry = NULL; t0 = time(NULL); sleep(timeout_loop); ppid = getppid(); if (ppid == 1) { /* Our parent died unexpectedly. Triggering * self-fence. */ do_reset(); } /* These attempts are, by definition, somewhat racy. If * the device is wiped out or corrupted between here and * us reading our mbox, there is nothing we can do about * that. But at least we tried. */ s_header_retry = header_get(st); if (!s_header_retry) { cl_log(LOG_ERR, "No longer found a valid header on %s", diskname); exit(1); } if (memcmp(s_header, s_header_retry, sizeof(*s_header)) != 0) { cl_log(LOG_ERR, "Header on %s changed since start-up!", diskname); exit(1); } free(s_header_retry); s_node_retry = sector_alloc(); if (slot_read(st, mbox, s_node_retry) < 0) { cl_log(LOG_ERR, "slot read failed in servant."); exit(1); } if (memcmp(s_node, s_node_retry, sizeof(*s_node)) != 0) { cl_log(LOG_ERR, "Node entry on %s changed since start-up!", diskname); exit(1); } free(s_node_retry); if (mbox_read(st, mbox, s_mbox) < 0) { cl_log(LOG_ERR, "mbox read failed in servant."); exit(1); } if (s_mbox->cmd > 0) { cl_log(LOG_INFO, "Received command %s from %s on disk %s", char2cmd(s_mbox->cmd), s_mbox->from, diskname); switch (s_mbox->cmd) { case SBD_MSG_TEST: memset(s_mbox, 0, sizeof(*s_mbox)); mbox_write(st, mbox, s_mbox); sigqueue(ppid, SIG_TEST, signal_value); break; case SBD_MSG_RESET: do_reset(); break; case SBD_MSG_OFF: do_off(); break; case SBD_MSG_EXIT: sigqueue(ppid, SIG_EXITREQ, signal_value); break; case SBD_MSG_CRASHDUMP: do_crashdump(); break; default: /* FIXME: An "unknown" message might result from a partial write. log it and clear the slot. */ cl_log(LOG_ERR, "Unknown message on disk %s", diskname); memset(s_mbox, 0, sizeof(*s_mbox)); mbox_write(st, mbox, s_mbox); break; } } sigqueue(ppid, SIG_LIVENESS, signal_value); t1 = time(NULL); latency = t1 - t0; if (timeout_watchdog_warn && (latency > timeout_watchdog_warn)) { cl_log(LOG_WARNING, "Latency: %d exceeded threshold %d on disk %s", (int)latency, (int)timeout_watchdog_warn, diskname); } else if (debug) { DBGLOG(LOG_INFO, "Latency: %d on disk %s", (int)latency, diskname); } } out: free(s_mbox); close_device(st); if (rc == 0) { servant_inform_parent = 0; } return rc; } void recruit_servant(const char *devname, pid_t pid) { struct servants_list_item *s = servants_leader; struct servants_list_item *newbie; newbie = malloc(sizeof(*newbie)); if (!newbie) { fprintf(stderr, "malloc failed in recruit_servant.\n"); exit(1); } memset(newbie, 0, sizeof(*newbie)); newbie->devname = strdup(devname); newbie->pid = pid; newbie->first_start = 1; if (!s) { servants_leader = newbie; } else { while (s->next) s = s->next; s->next = newbie; } servant_count++; } struct servants_list_item *lookup_servant_by_dev(const char *devname) { struct servants_list_item *s; for (s = servants_leader; s; s = s->next) { if (strncasecmp(s->devname, devname, strlen(s->devname))) break; } return s; } struct servants_list_item *lookup_servant_by_pid(pid_t pid) { struct servants_list_item *s; for (s = servants_leader; s; s = s->next) { if (s->pid == pid) break; } return s; } int check_all_dead(void) { struct servants_list_item *s; int r = 0; union sigval svalue; for (s = servants_leader; s; s = s->next) { if (s->pid != 0) { r = sigqueue(s->pid, 0, svalue); if (r == -1 && errno == ESRCH) continue; return 0; } } return 1; } void servant_start(struct servants_list_item *s) { int r = 0; union sigval svalue; if (s->pid != 0) { r = sigqueue(s->pid, 0, svalue); if ((r != -1 || errno != ESRCH)) return; } s->restarts++; if (strcmp("pcmk",s->devname) == 0) { DBGLOG(LOG_INFO, "Starting Pacemaker servant"); s->pid = assign_servant(s->devname, servant_pcmk, NULL); } else { DBGLOG(LOG_INFO, "Starting servant for device %s", s->devname); s->pid = assign_servant(s->devname, servant, s); } clock_gettime(CLOCK_MONOTONIC, &s->t_started); return; } void servants_start(void) { struct servants_list_item *s; for (s = servants_leader; s; s = s->next) { s->restarts = 0; servant_start(s); } } void servants_kill(void) { struct servants_list_item *s; union sigval svalue; for (s = servants_leader; s; s = s->next) { if (s->pid != 0) sigqueue(s->pid, SIGKILL, svalue); } } void open_any_device(void) { struct sector_header_s *hdr_cur = NULL; struct timespec t_0; int t_wait = 0; clock_gettime(CLOCK_MONOTONIC, &t_0); while (!hdr_cur && t_wait < timeout_startup) { struct timespec t_now; struct servants_list_item* s; for (s = servants_leader; s; s = s->next) { struct sbd_context *st = open_device(s->devname, LOG_DEBUG); if (!st) continue; hdr_cur = header_get(st); close_device(st); if (hdr_cur) break; } clock_gettime(CLOCK_MONOTONIC, &t_now); t_wait = t_now.tv_sec - t_0.tv_sec; if (!hdr_cur) { sleep(timeout_loop); } } if (hdr_cur) { timeout_watchdog = hdr_cur->timeout_watchdog; timeout_allocate = hdr_cur->timeout_allocate; timeout_loop = hdr_cur->timeout_loop; timeout_msgwait = hdr_cur->timeout_msgwait; } else { cl_log(LOG_ERR, "No devices were available at start-up within %i seconds.", timeout_startup); exit(1); } free(hdr_cur); return; } int check_timeout_inconsistent(struct sector_header_s *hdr) { if (timeout_watchdog != hdr->timeout_watchdog) { cl_log(LOG_WARNING, "watchdog timeout: %d versus %d on this device", (int)timeout_watchdog, (int)hdr->timeout_watchdog); return -1; } if (timeout_allocate != hdr->timeout_allocate) { cl_log(LOG_WARNING, "allocate timeout: %d versus %d on this device", (int)timeout_allocate, (int)hdr->timeout_allocate); return -1; } if (timeout_loop != hdr->timeout_loop) { cl_log(LOG_WARNING, "loop timeout: %d versus %d on this device", (int)timeout_loop, (int)hdr->timeout_loop); return -1; } if (timeout_msgwait != hdr->timeout_msgwait) { cl_log(LOG_WARNING, "msgwait timeout: %d versus %d on this device", (int)timeout_msgwait, (int)hdr->timeout_msgwait); return -1; } return 0; } inline void cleanup_servant_by_pid(pid_t pid) { struct servants_list_item* s; s = lookup_servant_by_pid(pid); if (s) { cl_log(LOG_WARNING, "Servant for %s (pid: %i) has terminated", s->devname, s->pid); s->pid = 0; } else { /* This most likely is a stray signal from somewhere, or * a SIGCHLD for a process that has previously * explicitly disconnected. */ DBGLOG(LOG_INFO, "cleanup_servant: Nothing known about pid %i", pid); } } int inquisitor_decouple(void) { pid_t ppid = getppid(); union sigval signal_value; /* During start-up, we only arm the watchdog once we've got * quorum at least once. */ if (watchdog_use) { if (watchdog_init() < 0) { return -1; } } if (ppid > 1) { sigqueue(ppid, SIG_LIVENESS, signal_value); } return 0; } +static int sbd_lock_running(long pid) +{ + int rc = 0; + long mypid; + int running = 0; + char proc_path[PATH_MAX], exe_path[PATH_MAX], myexe_path[PATH_MAX]; + + /* check if pid is running */ + if (kill(pid, 0) < 0 && errno == ESRCH) { + goto bail; + } + +#ifndef HAVE_PROC_PID + return 1; +#endif + + /* check to make sure pid hasn't been reused by another process */ + snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", pid); + rc = readlink(proc_path, exe_path, PATH_MAX-1); + if(rc < 0) { + cl_perror("Could not read from %s", proc_path); + goto bail; + } + exe_path[rc] = 0; + mypid = (unsigned long) getpid(); + snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", mypid); + rc = readlink(proc_path, myexe_path, PATH_MAX-1); + if(rc < 0) { + cl_perror("Could not read from %s", proc_path); + goto bail; + } + myexe_path[rc] = 0; + + if(strcmp(exe_path, myexe_path) == 0) { + running = 1; + } + + bail: + return running; +} + +static int +sbd_lock_pidfile(const char *filename) +{ + char lf_name[256], tf_name[256], buf[LOCKSTRLEN+1]; + int fd; + long pid, mypid; + int rc; + struct stat sbuf; + + if (filename == NULL) { + errno = EFAULT; + return -1; + } + + mypid = (unsigned long) getpid(); + snprintf(lf_name, sizeof(lf_name), "%s",filename); + snprintf(tf_name, sizeof(tf_name), "%s.%lu", + filename, mypid); + + if ((fd = open(lf_name, O_RDONLY)) >= 0) { + if (fstat(fd, &sbuf) >= 0 && sbuf.st_size < LOCKSTRLEN) { + sleep(1); /* if someone was about to create one, + * give'm a sec to do so + * Though if they follow our protocol, + * this won't happen. They should really + * put the pid in, then link, not the + * other way around. + */ + } + if (read(fd, buf, sizeof(buf)) < 1) { + /* lockfile empty -> rm it and go on */; + } else { + if (sscanf(buf, "%lu", &pid) < 1) { + /* lockfile screwed up -> rm it and go on */ + } else { + if (pid > 1 && (getpid() != pid) + && sbd_lock_running(pid)) { + /* is locked by existing process + * -> give up */ + close(fd); + return -1; + } else { + /* stale lockfile -> rm it and go on */ + } + } + } + unlink(lf_name); + close(fd); + } + if ((fd = open(tf_name, O_CREAT | O_WRONLY | O_EXCL, 0644)) < 0) { + /* Hmmh, why did we fail? Anyway, nothing we can do about it */ + return -3; + } + + /* Slight overkill with the %*d format ;-) */ + snprintf(buf, sizeof(buf), "%*lu\n", LOCKSTRLEN-1, mypid); + + if (write(fd, buf, LOCKSTRLEN) != LOCKSTRLEN) { + /* Again, nothing we can do about this */ + rc = -3; + close(fd); + goto out; + } + close(fd); + + switch (link(tf_name, lf_name)) { + case 0: + if (stat(tf_name, &sbuf) < 0) { + /* something weird happened */ + rc = -3; + break; + } + if (sbuf.st_nlink < 2) { + /* somehow, it didn't get through - NFS trouble? */ + rc = -2; + break; + } + rc = 0; + break; + case EEXIST: + rc = -1; + break; + default: + rc = -3; + } + out: + unlink(tf_name); + return rc; +} + + +/* + * Unlock a file (remove its lockfile) + * do we need to check, if its (still) ours? No, IMHO, if someone else + * locked our line, it's his fault -tho + * returns 0 on success + * <0 if some failure occured + */ + +static int +sbd_unlock_pidfile(const char *filename) +{ + char lf_name[256]; + + if (filename == NULL) { + errno = EFAULT; + return -1; + } + + snprintf(lf_name, sizeof(lf_name), "%s", filename); + + return unlink(lf_name); +} + + void inquisitor_child(void) { int sig, pid; sigset_t procmask; siginfo_t sinfo; int status; struct timespec timeout; int exiting = 0; int decoupled = 0; int pcmk_healthy = 0; int pcmk_override = 0; time_t latency; struct timespec t_last_tickle, t_now; struct servants_list_item* s; if (debug_mode) { cl_log(LOG_ERR, "DEBUG MODE IS ACTIVE - DO NOT RUN IN PRODUCTION!"); } set_proc_title("sbd: inquisitor"); if (pidfile) { - if (cl_lock_pidfile(pidfile) < 0) { + if (sbd_lock_pidfile(pidfile) < 0) { exit(1); } } sigemptyset(&procmask); sigaddset(&procmask, SIGCHLD); sigaddset(&procmask, SIG_LIVENESS); sigaddset(&procmask, SIG_EXITREQ); sigaddset(&procmask, SIG_TEST); sigaddset(&procmask, SIG_IO_FAIL); sigaddset(&procmask, SIG_PCMK_UNHEALTHY); sigaddset(&procmask, SIG_RESTART); sigaddset(&procmask, SIGUSR1); sigaddset(&procmask, SIGUSR2); sigprocmask(SIG_BLOCK, &procmask, NULL); /* We only want this to have an effect during watch right now; * pinging and fencing would be too confused */ if (check_pcmk) { recruit_servant("pcmk", 0); servant_count--; } servants_start(); timeout.tv_sec = timeout_loop; timeout.tv_nsec = 0; clock_gettime(CLOCK_MONOTONIC, &t_last_tickle); while (1) { int good_servants = 0; sig = sigtimedwait(&procmask, &sinfo, &timeout); clock_gettime(CLOCK_MONOTONIC, &t_now); if (sig == SIG_EXITREQ) { servants_kill(); - watchdog_close(); + watchdog_close(true); exiting = 1; } else if (sig == SIGCHLD) { while ((pid = waitpid(-1, &status, WNOHANG))) { if (pid == -1 && errno == ECHILD) { break; } else { cleanup_servant_by_pid(pid); } } } else if (sig == SIG_PCMK_UNHEALTHY) { s = lookup_servant_by_pid(sinfo.si_pid); if (s && strcmp(s->devname, "pcmk") == 0) { if (pcmk_healthy != 0) { cl_log(LOG_WARNING, "Pacemaker health check: UNHEALTHY"); } pcmk_healthy = 0; clock_gettime(CLOCK_MONOTONIC, &s->t_last); } else { cl_log(LOG_WARNING, "Ignoring SIG_PCMK_UNHEALTHY from unknown source"); } } else if (sig == SIG_IO_FAIL) { s = lookup_servant_by_pid(sinfo.si_pid); if (s) { DBGLOG(LOG_INFO, "Servant for %s requests to be disowned", s->devname); cleanup_servant_by_pid(sinfo.si_pid); } } else if (sig == SIG_LIVENESS) { s = lookup_servant_by_pid(sinfo.si_pid); if (s) { if (strcmp(s->devname, "pcmk") == 0) { if (pcmk_healthy != 1) { cl_log(LOG_INFO, "Pacemaker health check: OK"); } pcmk_healthy = 1; }; s->first_start = 0; clock_gettime(CLOCK_MONOTONIC, &s->t_last); } } else if (sig == SIG_TEST) { } else if (sig == SIGUSR1) { if (exiting) continue; servants_start(); } if (exiting) { if (check_all_dead()) { if (pidfile) { - cl_unlock_pidfile(pidfile); + sbd_unlock_pidfile(pidfile); } exit(0); } else continue; } good_servants = 0; for (s = servants_leader; s; s = s->next) { int age = t_now.tv_sec - s->t_last.tv_sec; if (!s->t_last.tv_sec) continue; if (age < (int)(timeout_io+timeout_loop)) { if (strcmp(s->devname, "pcmk") != 0) { good_servants++; } s->outdated = 0; } else if (!s->outdated) { if (strcmp(s->devname, "pcmk") == 0) { /* If the state is outdated, we * override the last reported * state */ pcmk_healthy = 0; cl_log(LOG_WARNING, "Pacemaker state outdated (age: %d)", age); } else if (!s->restart_blocked) { cl_log(LOG_WARNING, "Servant for %s outdated (age: %d)", s->devname, age); } s->outdated = 1; } } if (quorum_read(good_servants) || pcmk_healthy) { if (!decoupled) { if (inquisitor_decouple() < 0) { servants_kill(); exiting = 1; continue; } else { decoupled = 1; } } if (!quorum_read(good_servants)) { if (!pcmk_override) { cl_log(LOG_WARNING, "Majority of devices lost - surviving on pacemaker"); pcmk_override = 1; /* Just to ensure the message is only logged once */ } } else { pcmk_override = 0; } watchdog_tickle(); clock_gettime(CLOCK_MONOTONIC, &t_last_tickle); } /* Note that this can actually be negative, since we set * last_tickle after we set now. */ latency = t_now.tv_sec - t_last_tickle.tv_sec; if (timeout_watchdog && (latency > (int)timeout_watchdog)) { if (!decoupled) { /* We're still being watched by our * parent. We don't fence, but exit. */ cl_log(LOG_ERR, "SBD: Not enough votes to proceed. Aborting start-up."); servants_kill(); exiting = 1; continue; } if (debug_mode < 2) { /* At level 2 or above, we do nothing, but expect * things to eventually return to * normal. */ do_reset(); } else { cl_log(LOG_ERR, "SBD: DEBUG MODE: Would have fenced due to timeout!"); } } if (timeout_watchdog_warn && (latency > (int)timeout_watchdog_warn)) { cl_log(LOG_WARNING, "Latency: No liveness for %d s exceeds threshold of %d s (healthy servants: %d)", (int)latency, (int)timeout_watchdog_warn, good_servants); } for (s = servants_leader; s; s = s->next) { int age = t_now.tv_sec - s->t_started.tv_sec; if (age > servant_restart_interval) { s->restarts = 0; s->restart_blocked = 0; } if (servant_restart_count && (s->restarts >= servant_restart_count) && !s->restart_blocked) { if (servant_restart_count > 1) { cl_log(LOG_WARNING, "Max retry count (%d) reached: not restarting servant for %s", (int)servant_restart_count, s->devname); } s->restart_blocked = 1; } if (!s->restart_blocked) { servant_start(s); } } } /* not reached */ exit(0); } int inquisitor(void) { int sig, pid, inquisitor_pid; int status; sigset_t procmask; siginfo_t sinfo; /* Where's the best place for sysrq init ?*/ sysrq_init(); sigemptyset(&procmask); sigaddset(&procmask, SIGCHLD); sigaddset(&procmask, SIG_LIVENESS); sigprocmask(SIG_BLOCK, &procmask, NULL); open_any_device(); inquisitor_pid = make_daemon(); if (inquisitor_pid == 0) { inquisitor_child(); } /* We're the parent. Wait for a happy signal from our child * before we proceed - we either get "SIG_LIVENESS" when the * inquisitor has completed the first successful round, or * ECHLD when it exits with an error. */ while (1) { sig = sigwaitinfo(&procmask, &sinfo); if (sig == SIGCHLD) { while ((pid = waitpid(-1, &status, WNOHANG))) { if (pid == -1 && errno == ECHILD) { break; } /* We got here because the inquisitor * did not succeed. */ return -1; } } else if (sig == SIG_LIVENESS) { /* Inquisitor started up properly. */ return 0; } else { fprintf(stderr, "Nobody expected the spanish inquisition!\n"); continue; } } /* not reached */ return -1; } int messenger(const char *name, const char *msg) { int sig = 0; pid_t pid = 0; int status = 0; int servants_finished = 0; int successful_delivery = 0; sigset_t procmask; siginfo_t sinfo; struct servants_list_item *s; struct slot_msg_arg_t slot_msg_arg = {name, msg}; sigemptyset(&procmask); sigaddset(&procmask, SIGCHLD); sigprocmask(SIG_BLOCK, &procmask, NULL); for (s = servants_leader; s; s = s->next) { s->pid = assign_servant(s->devname, &slot_msg_wrapper, &slot_msg_arg); } while (!(quorum_write(successful_delivery) || (servants_finished == servant_count))) { sig = sigwaitinfo(&procmask, &sinfo); if (sig == SIGCHLD) { while ((pid = waitpid(-1, &status, WNOHANG))) { if (pid == -1 && errno == ECHILD) { break; } else { servants_finished++; if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { DBGLOG(LOG_INFO, "Process %d succeeded.", (int)pid); successful_delivery++; } else { cl_log(LOG_WARNING, "Process %d failed to deliver!", (int)pid); } } } } } if (quorum_write(successful_delivery)) { cl_log(LOG_INFO, "Message successfully delivered."); return 0; } else { cl_log(LOG_ERR, "Message is not delivered via more then a half of devices"); return -1; } } int dump_headers(void) { int rc = 0; struct servants_list_item *s = servants_leader; struct sbd_context *st; for (s = servants_leader; s; s = s->next) { fprintf(stdout, "==Dumping header on disk %s\n", s->devname); st = open_device(s->devname, LOG_WARNING); if (!st) { fprintf(stdout, "== disk %s unreadable!\n", s->devname); continue; } rc = header_dump(st); close_device(st); if (rc == -1) { fprintf(stdout, "==Header on disk %s NOT dumped\n", s->devname); } else { fprintf(stdout, "==Header on disk %s is dumped\n", s->devname); } } return rc; } int main(int argc, char **argv, char **envp) { int exit_status = 0; int c; int w = 0; + int qb_facility; if ((cmdname = strrchr(argv[0], '/')) == NULL) { cmdname = argv[0]; } else { ++cmdname; } - cl_log_set_entity(cmdname); - cl_log_enable_stderr(0); - cl_log_set_facility(LOG_DAEMON); + qb_facility = qb_log_facility2int("daemon"); + qb_log_init(cmdname, qb_facility, LOG_ERR); + + qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE); + qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE); sbd_get_uname(); while ((c = getopt(argc, argv, "C:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:")) != -1) { switch (c) { case 'D': break; case 'Z': debug_mode++; cl_log(LOG_INFO, "Debug mode now at level %d", (int)debug_mode); break; case 'R': skip_rt = 1; cl_log(LOG_INFO, "Realtime mode deactivated."); break; case 'S': start_mode = atoi(optarg); cl_log(LOG_INFO, "Start mode set to: %d", (int)start_mode); break; case 's': timeout_startup = atoi(optarg); cl_log(LOG_INFO, "Start timeout set to: %d", (int)timeout_startup); break; case 'v': debug = 1; cl_log(LOG_INFO, "Verbose mode enabled."); break; case 'T': watchdog_set_timeout = 0; cl_log(LOG_INFO, "Setting watchdog timeout disabled; using defaults."); break; case 'W': w++; break; case 'w': watchdogdev = strdup(optarg); break; case 'd': recruit_servant(optarg, 0); break; case 'P': check_pcmk = 1; break; case 'n': local_uname = strdup(optarg); cl_log(LOG_INFO, "Overriding local hostname to %s", local_uname); break; case 'p': pidfile = strdup(optarg); cl_log(LOG_INFO, "pidfile set to %s", pidfile); break; case 'C': timeout_watchdog_crashdump = atoi(optarg); cl_log(LOG_INFO, "Setting crashdump watchdog timeout to %d", (int)timeout_watchdog_crashdump); break; case '1': timeout_watchdog = atoi(optarg); break; case '2': timeout_allocate = atoi(optarg); break; case '3': timeout_loop = atoi(optarg); break; case '4': timeout_msgwait = atoi(optarg); break; case '5': timeout_watchdog_warn = atoi(optarg); cl_log(LOG_INFO, "Setting latency warning to %d", (int)timeout_watchdog_warn); break; case 't': servant_restart_interval = atoi(optarg); cl_log(LOG_INFO, "Setting servant restart interval to %d", (int)servant_restart_interval); break; case 'I': timeout_io = atoi(optarg); cl_log(LOG_INFO, "Setting IO timeout to %d", (int)timeout_io); break; case 'F': servant_restart_count = atoi(optarg); cl_log(LOG_INFO, "Servant restart count set to %d", (int)servant_restart_count); break; case 'h': usage(); return (0); default: exit_status = -2; goto out; break; } } if (w > 0) { watchdog_use = w % 2; } if (watchdog_use) { cl_log(LOG_INFO, "Watchdog enabled."); } else { cl_log(LOG_INFO, "Watchdog disabled."); } if (servant_count < 1 || servant_count > 3) { fprintf(stderr, "You must specify 1 to 3 devices via the -d option.\n"); exit_status = -1; goto out; } /* There must at least be one command following the options: */ if ((argc - optind) < 1) { fprintf(stderr, "Not enough arguments.\n"); exit_status = -2; goto out; } if (init_set_proc_title(argc, argv, envp) < 0) { fprintf(stderr, "Allocation of proc title failed.\n"); exit_status = -1; goto out; } maximize_priority(); if (strcmp(argv[optind], "create") == 0) { exit_status = init_devices(); } else if (strcmp(argv[optind], "dump") == 0) { exit_status = dump_headers(); } else if (strcmp(argv[optind], "allocate") == 0) { exit_status = allocate_slots(argv[optind + 1]); } else if (strcmp(argv[optind], "list") == 0) { exit_status = list_slots(); } else if (strcmp(argv[optind], "message") == 0) { exit_status = messenger(argv[optind + 1], argv[optind + 2]); } else if (strcmp(argv[optind], "ping") == 0) { exit_status = ping_via_slots(argv[optind + 1]); } else if (strcmp(argv[optind], "watch") == 0) { exit_status = inquisitor(); } else { exit_status = -2; } out: if (exit_status < 0) { if (exit_status == -2) { usage(); } else { fprintf(stderr, "sbd failed; please check the logs.\n"); } return (1); } return (0); } diff --git a/src/sbd.h b/src/sbd.h index 0f79cdc..48a9107 100644 --- a/src/sbd.h +++ b/src/sbd.h @@ -1,214 +1,224 @@ /* * Copyright (C) 2013 Lars Marowsky-Bree * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include -#include -#include -#include -#include -#include -#include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include +#include /* signals reserved for multi-disk sbd */ #define SIG_LIVENESS (SIGRTMIN + 1) /* report liveness of the disk */ #define SIG_EXITREQ (SIGRTMIN + 2) /* exit request to inquisitor */ #define SIG_TEST (SIGRTMIN + 3) /* trigger self test */ #define SIG_RESTART (SIGRTMIN + 4) /* trigger restart of all failed disk */ #define SIG_IO_FAIL (SIGRTMIN + 5) /* the IO child requests to be considered failed */ #define SIG_PCMK_UNHEALTHY (SIGRTMIN + 6) /* FIXME: should add dynamic check of SIG_XX >= SIGRTMAX */ +#define HOG_CHAR 0xff +#define HA_COREDIR "/var/lib/heartbeat/cores" + /* Sector data types */ struct sector_header_s { char magic[8]; unsigned char version; unsigned char slots; /* Caveat: stored in network byte-order */ uint32_t sector_size; uint32_t timeout_watchdog; uint32_t timeout_allocate; uint32_t timeout_loop; uint32_t timeout_msgwait; /* Minor version for extensions to the core data set: * compatible and optional values. */ unsigned char minor_version; uuid_t uuid; /* 16 bytes */ }; struct sector_mbox_s { signed char cmd; char from[64]; }; struct sector_node_s { /* slots will be created with in_use == 0 */ char in_use; char name[64]; }; struct servants_list_item { const char* devname; pid_t pid; int restarts; int restart_blocked; int outdated; int first_start; struct timespec t_last, t_started; struct servants_list_item *next; }; struct sbd_context { int devfd; io_context_t ioctx; struct iocb io; }; #define SBD_MSG_EMPTY 0x00 #define SBD_MSG_TEST 0x01 #define SBD_MSG_RESET 0x02 #define SBD_MSG_OFF 0x03 #define SBD_MSG_EXIT 0x04 #define SBD_MSG_CRASHDUMP 0x05 #define SLOT_TO_SECTOR(slot) (1+slot*2) #define MBOX_TO_SECTOR(mbox) (2+mbox*2) void usage(void); int watchdog_init_interval(void); int watchdog_tickle(void); int watchdog_init(void); void sysrq_init(void); -void watchdog_close(void); +void watchdog_close(bool disarm); struct sbd_context *open_device(const char* devname, int loglevel); void close_device(struct sbd_context *st); signed char cmd2char(const char *cmd); void * sector_alloc(void); const char* char2cmd(const char cmd); int sector_write(struct sbd_context *st, int sector, void *data); int sector_read(struct sbd_context *st, int sector, void *data); int slot_read(struct sbd_context *st, int slot, struct sector_node_s *s_node); int slot_write(struct sbd_context *st, int slot, struct sector_node_s *s_node); int mbox_write(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox); int mbox_read(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox); int mbox_write_verify(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox); /* After a call to header_write(), certain data fields will have been * converted to on-disk byte-order; the header should not be accessed * afterwards anymore! */ int header_write(struct sbd_context *st, struct sector_header_s *s_header); int header_read(struct sbd_context *st, struct sector_header_s *s_header); int valid_header(const struct sector_header_s *s_header); struct sector_header_s * header_get(struct sbd_context *st); int init_device(struct sbd_context *st); int slot_lookup(struct sbd_context *st, const struct sector_header_s *s_header, const char *name); int slot_unused(struct sbd_context *st, const struct sector_header_s *s_header); int slot_allocate(struct sbd_context *st, const char *name); int slot_list(struct sbd_context *st); int slot_ping(struct sbd_context *st, const char *name); int slot_msg(struct sbd_context *st, const char *name, const char *cmd); int header_dump(struct sbd_context *st); void sysrq_trigger(char t); void do_crashdump(void); void do_reset(void); void do_off(void); pid_t make_daemon(void); void maximize_priority(void); void sbd_get_uname(void); /* Tunable defaults: */ extern unsigned long timeout_watchdog; extern unsigned long timeout_watchdog_warn; extern unsigned long timeout_watchdog_crashdump; extern int timeout_allocate; extern int timeout_loop; extern int timeout_msgwait; extern int timeout_io; extern int timeout_startup; extern int watchdog_use; extern int watchdog_set_timeout; extern int skip_rt; extern int debug; extern int debug_mode; extern const char *watchdogdev; extern char* local_uname; /* Global, non-tunable variables: */ extern int sector_size; extern int watchdogfd; extern const char* cmdname; typedef int (*functionp_t)(const char* devname, const void* argp); int assign_servant(const char* devname, functionp_t functionp, const void* argp); int init_devices(void); struct slot_msg_arg_t { const char* name; const char* msg; }; int slot_msg_wrapper(const char* devname, const void* argp); int slot_ping_wrapper(const char* devname, const void* argp); int allocate_slots(const char *name); int list_slots(void); int ping_via_slots(const char *name); int dump_headers(void); int check_all_dead(void); void servant_exit(void); int servant(const char *diskname, const void* argp); void recruit_servant(const char *devname, pid_t pid); struct servants_list_item *lookup_servant_by_dev(const char *devname); struct servants_list_item *lookup_servant_by_pid(pid_t pid); void servants_kill(void); void servants_start(void); void servant_start(struct servants_list_item *s); void inquisitor_child(void); int inquisitor(void); int inquisitor_decouple(void); int messenger(const char *name, const char *msg); void cleanup_servant_by_pid(pid_t pid); int quorum_write(int good_servants); int quorum_read(int good_servants); int pcmk_have_quorum(void); int servant_pcmk(const char *diskname, const void* argp); -#define DBGLOG(lvl, fmt, args...) do { \ +int init_set_proc_title(int argc, char *argv[], char *envp[]); +void set_proc_title(const char *fmt,...); + + +#define cl_log(level, fmt, args...) qb_log_from_external_source( __func__, __FILE__, fmt, level, __LINE__, 0, ##args) + +# define cl_perror(fmt, args...) do { \ + const char *err = strerror(errno); \ + cl_log(LOG_ERR, fmt ": %s (%d)", ##args, err, errno); \ + } while(0) + +#define DBGLOG(lvl, fmt, args...) do { \ if (debug > 0) cl_log(lvl, fmt, ##args); \ } while(0) diff --git a/src/setproctitle.c b/src/setproctitle.c new file mode 100644 index 0000000..1d44a78 --- /dev/null +++ b/src/setproctitle.c @@ -0,0 +1,233 @@ +/* + * setproctitle.c + * + * The code in this file, setproctitle.c is heavily based on code from + * proftpd, please see the licening information below. + * + * This file added to the heartbeat tree by Horms + * + * Code to portably change the title of a programme as displayed + * by ps(1). + * + * heartbeat: Linux-HA heartbeat code + * + * Copyright (C) 1999,2000,2001 Alan Robertson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* + * ProFTPD - FTP server daemon + * Copyright (c) 1997, 1998 Public Flood Software + * Copyright (C) 1999, 2000 MacGyver aka Habeeb J. Dihu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + * + * As a special exemption, Public Flood Software/MacGyver aka Habeeb J. Dihu + * and other respective copyright holders give permission to link this program + * with OpenSSL, and distribute the resulting executable, without including + * the source code for OpenSSL in the source distribution. + */ + +#include +#include +#include +#include +#include + +#define PF_ARGV_NONE 0 +#define PF_ARGV_NEW 1 +#define PF_ARGV_WRITEABLE 2 +#define PF_ARGV_PSTAT 3 +#define PF_ARGV_PSSTRINGS 4 + +#if PF_ARGV_TYPE == PF_ARGV_PSTAT +# include +#endif + +#include + +#if PF_ARGV_TYPE != PF_ARGV_NONE +static char **Argv = NULL; +static char *LastArgv = NULL; +#endif /* PF_ARGV_TYPE != PF_ARGV_NONE */ + +extern char **environ; + +#ifdef HAVE___PROGNAME +extern char *__progname; +extern char *__progname_full; +#endif /* HAVE___PROGNAME */ + +int +init_set_proc_title(int argc, char *argv[], char *envp[]) +{ +#if PF_ARGV_TYPE == PF_ARGV_NONE + return 0; +#else + int i; + int envpsize; + char **p; + + /* Move the environment so setproctitle can use the space. + */ + for(i = envpsize = 0; envp[i] != NULL; i++) { + envpsize += strlen(envp[i]) + 1; + } + + p = (char **) malloc((i + 1) * sizeof(char *)); + if (p == NULL) { + return -1; + } + + environ = p; + + for(i = 0; envp[i] != NULL; i++) { + environ[i] = strdup(envp[i]); + if(environ[i] == NULL) { + goto error_environ; + } + } + environ[i] = NULL; + + Argv = argv; + + for(i = 0; i < argc; i++) { + if(!i || (LastArgv + 1 == argv[i])) + LastArgv = argv[i] + strlen(argv[i]); + } + + for(i = 0; envp[i] != NULL; i++) { + if((LastArgv + 1) == envp[i]) { + LastArgv = envp[i] + strlen(envp[i]); + } + } + +#ifdef HAVE___PROGNAME + /* Set the __progname and __progname_full variables so glibc and + * company don't go nuts. - MacGyver + */ + + __progname = strdup("sbd"); + if (__progname == NULL) { + goto error_environ; + } + __progname_full = strdup(argv[0]); + if (__progname_full == NULL) { + goto error_environ; + } +#endif /* HAVE___PROGNAME */ + + return 0; + +error_environ: + for(i = 0; environ[i] != NULL; i++) { + free(environ[i]); + } + free(environ); + return -1; +#endif /* PF_ARGV_TYPE == PF_ARGV_NONE */ +} + +void set_proc_title(const char *fmt,...) +{ +#if PF_ARGV_TYPE != PF_ARGV_NONE + va_list msg; + static char statbuf[BUFSIZ]; + +#ifndef HAVE_SETPROCTITLE +#if PF_ARGV_TYPE == PF_ARGV_PSTAT + union pstun pst; +#endif /* PF_ARGV_PSTAT */ + int i,maxlen = (LastArgv - Argv[0]) - 2; + char *p; +#endif /* HAVE_SETPROCTITLE */ + + va_start(msg,fmt); + + memset(statbuf, 0, sizeof(statbuf)); + + +#ifdef HAVE_SETPROCTITLE +# if __FreeBSD__ >= 4 && !defined(FREEBSD4_0) && !defined(FREEBSD4_1) + /* FreeBSD's setproctitle() automatically prepends the process name. */ + vsnprintf(statbuf, sizeof(statbuf), fmt, msg); + +# else /* FREEBSD4 */ + /* Manually append the process name for non-FreeBSD platforms. */ + vsnprintf(statbuf + strlen(statbuf), sizeof(statbuf) - strlen(statbuf), + fmt, msg); + +# endif /* FREEBSD4 */ + setproctitle("%s", statbuf); + +#else /* HAVE_SETPROCTITLE */ + /* Manually append the process name for non-setproctitle() platforms. */ + vsnprintf(statbuf + strlen(statbuf), sizeof(statbuf) - strlen(statbuf), + fmt, msg); + +#endif /* HAVE_SETPROCTITLE */ + + va_end(msg); + +#ifdef HAVE_SETPROCTITLE + return; +#else + i = strlen(statbuf); + +#if PF_ARGV_TYPE == PF_ARGV_NEW + /* We can just replace argv[] arguments. Nice and easy. + */ + Argv[0] = statbuf; + Argv[1] = NULL; +#endif /* PF_ARGV_NEW */ + +#if PF_ARGV_TYPE == PF_ARGV_WRITEABLE + /* We can overwrite individual argv[] arguments. Semi-nice. + */ + snprintf(Argv[0], maxlen, "%s", statbuf); + p = &Argv[0][i]; + + while(p < LastArgv) + *p++ = '\0'; + Argv[1] = NULL; +#endif /* PF_ARGV_WRITEABLE */ + +#if PF_ARGV_TYPE == PF_ARGV_PSTAT + pst.pst_command = statbuf; + pstat(PSTAT_SETCMD, pst, i, 0, 0); +#endif /* PF_ARGV_PSTAT */ + +#if PF_ARGV_TYPE == PF_ARGV_PSSTRINGS + PS_STRINGS->ps_nargvstr = 1; + PS_STRINGS->ps_argvstr = statbuf; +#endif /* PF_ARGV_PSSTRINGS */ + +#endif /* HAVE_SETPROCTITLE */ + +#endif /* PF_ARGV_TYPE != PF_ARGV_NONE */ +}