diff --git a/src/sbd-common.c b/src/sbd-common.c index 7ebf4a7..c18153f 100644 --- a/src/sbd-common.c +++ b/src/sbd-common.c @@ -1,832 +1,880 @@ /* * Copyright (C) 2013 Lars Marowsky-Bree * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include "sbd.h" #include #include #include #include #include #include #include #include #ifdef _POSIX_MEMLOCK # include #endif /* Tunable defaults: */ int timeout_watchdog = SBD_WATCHDOG_TIMEOUT_DEFAULT; int timeout_msgwait = 2 * SBD_WATCHDOG_TIMEOUT_DEFAULT; int timeout_allocate = 2; int timeout_loop = 1; int timeout_io = 3; int timeout_startup = 120; int watchdog_use = 1; int watchdog_set_timeout = 1; int timeout_watchdog_crashdump = 0; int skip_rt = 0; int debug = 0; int debug_mode = 0; /* Global, non-tunable variables: */ int sector_size = 0; int servant_health = 0; const char *cmdname; char *local_uname; void usage(void) { fprintf(stderr, "Shared storage fencing tool.\n" "Syntax:\n" " %s \n" "Options:\n" "-d Block device to use (mandatory; can be specified up to 3 times)\n" "-h Display this help.\n" "-n Set local node name; defaults to uname -n (optional)\n" "\n" "-R Do NOT enable realtime priority (debugging only)\n" "-W Use watchdog (recommended) (watch only)\n" "-w Specify watchdog device (optional) (watch only)\n" "-T Do NOT initialize the watchdog timeout (watch only)\n" "-S <0|1> Set start mode if the node was previously fenced (watch only)\n" "-p Write pidfile to the specified path (watch only)\n" "-v|-vv|-vvv Enable verbose|debug|debug-library logging (optional)\n" "\n" "-1 Set watchdog timeout to N seconds (optional, create only)\n" "-2 Set slot allocation timeout to N seconds (optional, create only)\n" "-3 Set daemon loop timeout to N seconds (optional, create only)\n" "-4 Set msgwait timeout to N seconds (optional, create only)\n" "-5 Warn if loop latency exceeds threshold (optional, watch only)\n" " (default is 3, set to 0 to disable)\n" "-C Watchdog timeout to set before crashdumping\n" " (def: 0s = disable gracefully, optional)\n" "-I Async IO read timeout (defaults to 3 * loop timeout, optional)\n" "-s Timeout to wait for devices to become available (def: 120s)\n" "-t Dampening delay before faulty servants are restarted (optional)\n" " (default is 5, set to 0 to disable)\n" "-F # of failures before a servant is considered faulty (optional)\n" " (default is 1, set to 0 to disable)\n" "-P Check Pacemaker quorum and node health (optional, watch only)\n" "-Z Enable trace mode. WARNING: UNSAFE FOR PRODUCTION!\n" "-r Set timeout-action to comma-separated combination of\n" " noflush|flush plus reboot|crashdump|off (default is flush,reboot)\n" "Commands:\n" #if SUPPORT_SHARED_DISK "create initialize N slots on - OVERWRITES DEVICE!\n" "list List all allocated slots on device, and messages.\n" "dump Dump meta-data header from device.\n" "allocate \n" " Allocate a slot for node (optional)\n" "message (test|reset|off|crashdump|clear|exit)\n" " Writes the specified message to node's slot.\n" #endif "watch Loop forever, monitoring own slot\n" "query-watchdog Check for available watchdog-devices and print some info\n" "test-watchdog Test the watchdog-device selected.\n" " Attention: This will arm the watchdog and have your system reset\n" " in case your watchdog is working properly!\n" , cmdname); } /* This duplicates some code from linux/ioprio.h since these are not included * even in linux-kernel-headers. Sucks. See also * /usr/src/linux/Documentation/block/ioprio.txt and ioprio_set(2) */ extern int sys_ioprio_set(int, int, int); int ioprio_set(int which, int who, int ioprio); inline int ioprio_set(int which, int who, int ioprio) { return syscall(__NR_ioprio_set, which, who, ioprio); } enum { IOPRIO_CLASS_NONE, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE, }; enum { IOPRIO_WHO_PROCESS = 1, IOPRIO_WHO_PGRP, IOPRIO_WHO_USER, }; #define IOPRIO_BITS (16) #define IOPRIO_CLASS_SHIFT (13) #define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) #define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT) #define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK) #define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) static void sbd_stack_hogger(unsigned char * inbuf, int kbytes) { unsigned char buf[1024]; if(kbytes <= 0) { return; } if (inbuf == NULL) { memset(buf, HOG_CHAR, sizeof(buf)); } else { memcpy(buf, inbuf, sizeof(buf)); } if (kbytes > 0) { sbd_stack_hogger(buf, kbytes-1); } return; } static void sbd_malloc_hogger(int kbytes) { int j; void**chunks; int chunksize = 1024; if(kbytes <= 0) { return; } /* * We could call mallopt(M_MMAP_MAX, 0) to disable it completely, * but we've already called mlockall() * * We could also call mallopt(M_TRIM_THRESHOLD, -1) to prevent malloc * from giving memory back to the system, but we've already called * mlockall(MCL_FUTURE), so there's no need. */ chunks = malloc(kbytes * sizeof(void *)); if (chunks == NULL) { cl_log(LOG_WARNING, "Could not preallocate chunk array"); return; } for (j=0; j < kbytes; ++j) { chunks[j] = malloc(chunksize); if (chunks[j] == NULL) { cl_log(LOG_WARNING, "Could not preallocate block %d", j); } else { memset(chunks[j], 0, chunksize); } } for (j=0; j < kbytes; ++j) { free(chunks[j]); } free(chunks); } static void sbd_memlock(int stackgrowK, int heapgrowK) { #ifdef _POSIX_MEMLOCK /* * We could call setrlimit(RLIMIT_MEMLOCK,...) with a large * number, but the mcp runs as root and mlock(2) says: * * Since Linux 2.6.9, no limits are placed on the amount of memory * that a privileged process may lock, and this limit instead * governs the amount of memory that an unprivileged process may * lock. */ if (mlockall(MCL_CURRENT|MCL_FUTURE) >= 0) { cl_log(LOG_INFO, "Locked ourselves in memory"); /* Now allocate some extra pages (MCL_FUTURE will ensure they stay around) */ sbd_malloc_hogger(heapgrowK); sbd_stack_hogger(NULL, stackgrowK); } else { cl_perror("Unable to lock ourselves into memory"); } #else cl_log(LOG_ERR, "Unable to lock ourselves into memory"); #endif } static int get_realtime_budget(void) { FILE *f; char fname[PATH_MAX]; int res = -1, lnum = 0, num; char *cgroup = NULL, *namespecs = NULL; snprintf(fname, PATH_MAX, "/proc/%jd/cgroup", (intmax_t)getpid()); f = fopen(fname, "rt"); if (f == NULL) { cl_log(LOG_WARNING, "Can't open cgroup file for pid=%jd", (intmax_t)getpid()); goto exit_res; } while( (num = fscanf(f, "%d:%m[^:]:%m[^\n]\n", &lnum, &namespecs, &cgroup)) !=EOF ) { if (namespecs && strstr(namespecs, "cpuacct")) { free(namespecs); break; } if (cgroup) { free(cgroup); cgroup = NULL; } if (namespecs) { free(namespecs); namespecs = NULL; } /* not to get stuck if format changes */ if ((num < 3) && ((fscanf(f, "%*[^\n]") == EOF) || (fscanf(f, "\n") == EOF))) { break; } } fclose(f); if (cgroup == NULL) { cl_log(LOG_WARNING, "Failed getting cgroup for pid=%jd", (intmax_t)getpid()); goto exit_res; } snprintf(fname, PATH_MAX, "/sys/fs/cgroup/cpu%s/cpu.rt_runtime_us", cgroup); f = fopen(fname, "rt"); if (f == NULL) { cl_log(LOG_WARNING, "cpu.rt_runtime_us existed for root-slice but " "doesn't for '%s'", cgroup); goto exit_res; } if (fscanf(f, "%d", &res) != 1) { cl_log(LOG_WARNING, "failed reading rt-budget from %s", fname); } else { cl_log(LOG_INFO, "slice='%s' has rt-budget=%d", cgroup, res); } fclose(f); exit_res: if (cgroup) { free(cgroup); } return res; } /* stolen from corosync */ + +#define LEGACY_CGROUP_PROC_PIDS "/sys/fs/cgroup/cpu/tasks" +#define UNIFIED_CGROUP_PROC_PIDS "/sys/fs/cgroup/cgroup.procs" + static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) { FILE *f; - int res = -1; + int res = -1, num; + char *rt_rq_name = NULL; + const char *root_pids = LEGACY_CGROUP_PROC_PIDS; /* * /sys/fs/cgroup is hardcoded, because most of Linux distributions are now * using systemd and systemd uses hardcoded path of cgroup mount point. * * This feature is expected to be removed as soon as systemd gets support * for managing RT configuration. */ - f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt"); - if (f == NULL) { - cl_log(LOG_DEBUG, "cpu.rt_runtime_us doesn't exist -> " + do { + f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt"); + if (f) { + break; + } + /* CONFIG_RT_GROUP_SCHED might still be enabled with cgroup-v2 + cgroup.procs on cgroup-toplevel tells us we have cgroup-v2 + (handy as we already need that to be in selinux-policy) + and name of rt_rq(s) in /proc/sched_debug tells us that + CONFIG_RT_GROUP_SCHED is enabled + cgroup-v2 has been around for a while in the kernel and it + is no mutual exclusive compile-time-configuration - so + checking what is actually mounted to go with what is there + */ + f = fopen(UNIFIED_CGROUP_PROC_PIDS, "rt"); + if (f) { + fclose(f); + f = fopen("/proc/sched_debug", "rt"); + if (f) { + while (((num = fscanf(f, "rt_rq[%*[^]]]:%m[^\n]\n", + &rt_rq_name)) != EOF) && + (rt_rq_name == NULL)) { + /* consume a line */ + if ((num > 0) || (fscanf(f, "%*[^\n]") == EOF) || + (fscanf(f, "\n") == EOF)) { + break; + } + } + /* no hierarchical rt-budget distribution with + cgroup-v2 so far - thus checking for budget is + useless + */ + if (rt_rq_name) { + free(rt_rq_name); + enforce_root_cgroup = true; + root_pids = UNIFIED_CGROUP_PROC_PIDS; + break; + } + fclose(f); + } + } + cl_log(LOG_DEBUG, "cpu.rt_runtime_us doesn't exist & " + "/proc/sched_debug doesn't contain rt_rq[...]:/ -> " "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED"); res = 0; goto exit_res; - } + } while (0); fclose(f); if ((!enforce_root_cgroup) && (get_realtime_budget() > 0)) { cl_log(LOG_DEBUG, "looks as if we have rt-budget in the slice we are " "-> skip moving to root-slice"); res = 0; goto exit_res; } - f = fopen("/sys/fs/cgroup/cpu/tasks", "w"); + f = fopen(root_pids, "w"); if (f == NULL) { - cl_log(LOG_WARNING, "Can't open cgroups tasks file for writing"); + cl_log(LOG_WARNING, "Can't open %s for writing", root_pids); goto exit_res; } if (fprintf(f, "%jd\n", (intmax_t)getpid()) <= 0) { - cl_log(LOG_WARNING, "Can't write sbd pid into cgroups tasks file"); + cl_log(LOG_WARNING, "Can't write sbd pid into %s", root_pids); goto close_and_exit_res; } + res = 0; + close_and_exit_res: if (fclose(f) != 0) { - cl_log(LOG_WARNING, "Can't close cgroups tasks file"); + cl_log(LOG_WARNING, "Can't close %s", root_pids); goto exit_res; } exit_res: return (res); } void sbd_make_realtime(int priority, int stackgrowK, int heapgrowK) { if(priority < 0) { return; } do { #ifdef SCHED_RR if (move_to_root_cgroup) { sbd_move_to_root_cgroup(enforce_moving_to_root_cgroup); } { int pmin = sched_get_priority_min(SCHED_RR); int pmax = sched_get_priority_max(SCHED_RR); struct sched_param sp; int pcurrent; if (priority == 0) { priority = pmax; } else if (priority < pmin) { priority = pmin; } else if (priority > pmax) { priority = pmax; } if (sched_getparam(0, &sp) < 0) { cl_perror("Unable to get scheduler priority"); } else if ((pcurrent = sched_getscheduler(0)) < 0) { cl_perror("Unable to get scheduler policy"); } else if ((pcurrent == SCHED_RR) && (sp.sched_priority >= priority)) { cl_log(LOG_INFO, "Stay with priority (%d) for policy SCHED_RR", sp.sched_priority); break; } else { memset(&sp, 0, sizeof(sp)); sp.sched_priority = priority; if (sched_setscheduler(0, SCHED_RR, &sp) < 0) { cl_perror( "Unable to set scheduler policy to SCHED_RR priority %d", priority); } else { cl_log(LOG_INFO, "Scheduler policy is now SCHED_RR priority %d", priority); break; } } } #else cl_log(LOG_ERR, "System does not support updating the scheduler policy"); #endif #ifdef PRIO_PGRP if (setpriority(PRIO_PGRP, 0, INT_MIN) < 0) { cl_perror("Unable to raise the scheduler priority"); } else { cl_log(LOG_INFO, "Scheduler priority raised to the maximum"); } #else cl_perror("System does not support setting the scheduler priority"); #endif } while (0); sbd_memlock(heapgrowK, stackgrowK); } void maximize_priority(void) { if (skip_rt) { cl_log(LOG_INFO, "Not elevating to realtime (-R specified)."); return; } sbd_make_realtime(0, 256, 256); if (ioprio_set(IOPRIO_WHO_PROCESS, getpid(), IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 1)) != 0) { cl_perror("ioprio_set() call failed."); } } void sysrq_init(void) { FILE* procf; int c; procf = fopen("/proc/sys/kernel/sysrq", "r"); if (!procf) { cl_perror("cannot open /proc/sys/kernel/sysrq for read."); return; } if (fscanf(procf, "%d", &c) != 1) { cl_perror("Parsing sysrq failed"); c = 0; } fclose(procf); if (c == 1) return; /* 8 for debugging dumps of processes, 128 for reboot/poweroff */ c |= 136; procf = fopen("/proc/sys/kernel/sysrq", "w"); if (!procf) { cl_perror("cannot open /proc/sys/kernel/sysrq for writing"); return; } fprintf(procf, "%d", c); fclose(procf); return; } void sysrq_trigger(char t) { FILE *procf; procf = fopen("/proc/sysrq-trigger", "a"); if (!procf) { cl_perror("Opening sysrq-trigger failed."); return; } cl_log(LOG_INFO, "sysrq-trigger: %c\n", t); fprintf(procf, "%c\n", t); fclose(procf); return; } static void do_exit(char kind, bool do_flush) { /* TODO: Turn debug_mode into a bit field? Delay + kdump for example */ const char *reason = NULL; if (kind == 'c') { cl_log(LOG_NOTICE, "Initiating kdump"); } else if (debug_mode == 1) { cl_log(LOG_WARNING, "Initiating kdump instead of panicking the node (debug mode)"); kind = 'c'; } if (debug_mode == 2) { cl_log(LOG_WARNING, "Shutting down SBD instead of panicking the node (debug mode)"); watchdog_close(true); exit(0); } if (debug_mode == 3) { /* Give the system some time to flush logs to disk before rebooting. */ cl_log(LOG_WARNING, "Delaying node panic by 10s (debug mode)"); watchdog_close(true); sync(); sleep(10); } switch(kind) { case 'b': reason = "reboot"; break; case 'c': reason = "crashdump"; break; case 'o': reason = "off"; break; default: reason = "unknown"; break; } cl_log(LOG_EMERG, "Rebooting system: %s", reason); if (do_flush) { sync(); } if (kind == 'c') { if (timeout_watchdog_crashdump) { if (timeout_watchdog != timeout_watchdog_crashdump) { timeout_watchdog = timeout_watchdog_crashdump; watchdog_init_interval(); } watchdog_close(false); } else { watchdog_close(true); } sysrq_trigger(kind); } else { watchdog_close(false); sysrq_trigger(kind); if (reboot((kind == 'o')?RB_POWER_OFF:RB_AUTOBOOT) < 0) { cl_perror("%s failed", (kind == 'o')?"Poweroff":"Reboot"); } } exit(1); } void do_crashdump(void) { do_exit('c', true); } void do_reset(void) { do_exit('b', true); } void do_off(void) { do_exit('o', true); } void do_timeout_action(void) { do_exit(timeout_sysrq_char, do_flush); } /* * Change directory to the directory our core file needs to go in * Call after you establish the userid you're running under. */ int sbd_cdtocoredir(void) { int rc; static const char *dir = NULL; if (dir == NULL) { dir = CRM_CORE_DIR; } if ((rc=chdir(dir)) < 0) { int errsave = errno; cl_perror("Cannot chdir to [%s]", dir); errno = errsave; } return rc; } pid_t make_daemon(void) { pid_t pid; const char * devnull = "/dev/null"; pid = fork(); if (pid < 0) { cl_log(LOG_ERR, "%s: could not start daemon\n", cmdname); cl_perror("fork"); exit(1); }else if (pid > 0) { return pid; } qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE); /* This is the child; ensure privileges have not been lost. */ maximize_priority(); sysrq_init(); umask(022); close(0); (void)open(devnull, O_RDONLY); close(1); (void)open(devnull, O_WRONLY); close(2); (void)open(devnull, O_WRONLY); sbd_cdtocoredir(); return 0; } void sbd_get_uname(void) { struct utsname uname_buf; int i; if (uname(&uname_buf) < 0) { cl_perror("uname() failed?"); exit(1); } local_uname = strdup(uname_buf.nodename); for (i = 0; i < strlen(local_uname); i++) local_uname[i] = tolower(local_uname[i]); } #define FMT_MAX 256 void sbd_set_format_string(int method, const char *daemon) { int offset = 0; char fmt[FMT_MAX]; struct utsname res; switch(method) { case QB_LOG_STDERR: break; case QB_LOG_SYSLOG: if(daemon && strcmp(daemon, "sbd") != 0) { offset += snprintf(fmt + offset, FMT_MAX - offset, "%10s: ", daemon); } break; default: /* When logging to a file */ if (uname(&res) == 0) { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%t [%d] %s %10s: ", getpid(), res.nodename, daemon); } else { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%t [%d] %10s: ", getpid(), daemon); } } if (debug && method >= QB_LOG_STDERR) { offset += snprintf(fmt + offset, FMT_MAX - offset, "(%%-12f:%%5l %%g) %%-7p: %%n: "); } else { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%g %%-7p: %%n: "); } if (method == QB_LOG_SYSLOG) { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%b"); } else { offset += snprintf(fmt + offset, FMT_MAX - offset, "\t%%b"); } if(offset > 0) { qb_log_format_set(method, fmt); } } int sigqueue_zero(pid_t pid, int sig) { union sigval signal_value; memset(&signal_value, 0, sizeof(signal_value)); return sigqueue(pid, sig, signal_value); } void notify_parent(void) { pid_t ppid; ppid = getppid(); if (ppid == 1) { /* Our parent died unexpectedly. Triggering * self-fence. */ cl_log(LOG_WARNING, "Our parent is dead."); do_timeout_action(); } switch (servant_health) { case pcmk_health_pending: case pcmk_health_shutdown: case pcmk_health_transient: DBGLOG(LOG_DEBUG, "Not notifying parent: state transient (%d)", servant_health); break; case pcmk_health_unknown: case pcmk_health_unclean: case pcmk_health_noquorum: DBGLOG(LOG_WARNING, "Notifying parent: UNHEALTHY (%d)", servant_health); sigqueue_zero(ppid, SIG_PCMK_UNHEALTHY); break; case pcmk_health_online: DBGLOG(LOG_DEBUG, "Notifying parent: healthy"); sigqueue_zero(ppid, SIG_LIVENESS); break; default: DBGLOG(LOG_WARNING, "Notifying parent: UNHEALTHY %d", servant_health); sigqueue_zero(ppid, SIG_PCMK_UNHEALTHY); break; } } void set_servant_health(enum pcmk_health state, int level, char const *format, ...) { if (servant_health != state) { va_list ap; int len = 0; char *string = NULL; servant_health = state; va_start(ap, format); len = vasprintf (&string, format, ap); if(len > 0) { cl_log(level, "%s", string); } va_end(ap); free(string); } } bool sbd_is_disk(struct servants_list_item *servant) { if ((servant != NULL) && (servant->devname != NULL) && (servant->devname[0] == '/')) { return true; } return false; } bool sbd_is_cluster(struct servants_list_item *servant) { if ((servant != NULL) && (servant->devname != NULL) && (strcmp("cluster", servant->devname) == 0)) { return true; } return false; } bool sbd_is_pcmk(struct servants_list_item *servant) { if ((servant != NULL) && (servant->devname != NULL) && (strcmp("pcmk", servant->devname) == 0)) { return true; } return false; } #define MAX_LEGITIMATE_AGE 3600 /* 1h should be plenty */ int seconds_diff_time_t(time_t a, time_t b) { long long diff; diff = a - b; if ((diff > -MAX_LEGITIMATE_AGE) && (diff < MAX_LEGITIMATE_AGE)) { return (int) diff; } DBGLOG(LOG_WARNING, "Detected unreasonable age (%lld)", diff); return MAX_LEGITIMATE_AGE; /* something is fishy - provoke timeout */ } int seconds_diff_timespec(struct timespec *a, struct timespec *b) { struct timeval diff; struct timeval a_tv; struct timeval b_tv; TIMESPEC_TO_TIMEVAL(&a_tv, a); TIMESPEC_TO_TIMEVAL(&b_tv, b); timersub(&a_tv, &b_tv, &diff); return seconds_diff_time_t(diff.tv_sec, 0); } diff --git a/src/sbd.sysconfig.in b/src/sbd.sysconfig.in index f49d780..9f40e9d 100644 --- a/src/sbd.sysconfig.in +++ b/src/sbd.sysconfig.in @@ -1,133 +1,147 @@ ## Type: string ## Default: "" # # SBD_DEVICE specifies the devices to use for exchanging sbd messages # and to monitor. If specifying more than one path, use ";" as # separator. # #SBD_DEVICE="" ## Type: yesno ## Default: yes # # Whether to enable the pacemaker integration. # SBD_PACEMAKER=yes ## Type: always / clean ## Default: always # # Specify the start mode for sbd. Setting this to "clean" will only # allow sbd to start if it was not previously fenced. See the -S option # in the man page. # SBD_STARTMODE=always ## Type: yesno / integer ## Default: no # # Whether to delay after starting sbd on boot for "msgwait" seconds. # This may be necessary if your cluster nodes reboot so fast that the # other nodes are still waiting in the fence acknowledgement phase. # This is an occasional issue with virtual machines. # # This can also be enabled by being set to a specific delay value, in # seconds. Sometimes a longer delay than the default, "msgwait", is # needed, for example in the cases where it's considered to be safer to # wait longer than: # corosync token timeout + consensus timeout + pcmk_delay_max + msgwait # # Be aware that the special value "1" means "yes" rather than "1s". # # Consider that you might have to adapt the startup-timeout accordingly # if the default isn't sufficient. (TimeoutStartSec for systemd) # # This option may be ignored at a later point, once pacemaker handles # this case better. # SBD_DELAY_START=no ## Type: string ## Default: /dev/watchdog # # Watchdog device to use. If set to /dev/null, no watchdog device will # be used. # SBD_WATCHDOG_DEV=/dev/watchdog ## Type: integer ## Default: @SBD_WATCHDOG_TIMEOUT_DEFAULT@ # # How long, in seconds, the watchdog will wait before panicking the # node if no-one tickles it. # # This depends mostly on your storage latency; the majority of devices # must be successfully read within this time, or else the node will # self-fence. # # If your sbd device(s) reside on a multipath setup or iSCSI, this # should be the time required to detect a path failure. # # Be aware that watchdog timeout set in the on-disk metadata takes # precedence. # SBD_WATCHDOG_TIMEOUT=@SBD_WATCHDOG_TIMEOUT_DEFAULT@ ## Type: string ## Default: "flush,reboot" # # Actions to be executed when the watchers don't timely report to the sbd # master process or one of the watchers detects that the master process # has died. # # Set timeout-action to comma-separated combination of # noflush|flush plus reboot|crashdump|off. # If just one of both is given the other stays at the default. # # This doesn't affect actions like off, crashdump, reboot explicitly # triggered via message slots. # And it does as well not configure the action a watchdog would # trigger should it run off (there is no generic interface). # SBD_TIMEOUT_ACTION=flush,reboot ## Type: yesno / auto ## Default: auto # # If CPUAccounting is enabled default is not to assign any RT-budget # to the system.slice which prevents sbd from running RR-scheduled. # # One way to escape that issue is to move sbd-processes from the # slice they were originally started to root-slice. # Of course starting sbd in a certain slice might be intentional. # Thus in auto-mode sbd will check if the slice has RT-budget assigned. # If that is the case sbd will stay in that slice while it will # be moved to root-slice otherwise. # +# With cgroup-v2 behavior is very much different. +# With CONFIG_RT_GROUP_SCHED enabled and cpu-controller enabled +# there currently is no way to configure RT-budget in any slice +# but the root-slice. Otherway round if there is RT-budget used +# in any but the root-slice enabling the cpu-controller is +# inhibited. +# Thus - unless strictly disabled by setting 'no' - with cgroup-v2 +# and CONFIG_RT_GROUP_SCHED enabled sbd is always moved +# to the root-slice regardless if the cpu-controller is at the +# moment enabled or not. +# Reason is that subsequent services might enable the cpu-controller +# or fail doing so if sbd was already using RT-budget in e.g. the +# system-slice. +# SBD_MOVE_TO_ROOT_CGROUP=auto ## Type: yesno ## Default: @SBD_SYNC_RESOURCE_STARTUP_DEFAULT@ # # If resource startup syncing is enabled then pacemakerd is # gonna wait to be pinged via IPC before it starts resources. # On shutdown pacemakerd is going to wait in a state where it # has cleanly shutdown resources till sbd fetches that state. # # The default is set when building SBD and Pacemaker from source. # Going for 'no' is safer if it can't be assured that SBD and # Pacemaker installed do both support the synchronization feature. # When going with 'yes' - also using package dependencies to # assure SBD & Pacemaker both support the synchronization # feature and are assuming the same default - an SBD configuration # inherited via an upgrade doesn't have to be altered to still # benefit from the new feature. # SBD_SYNC_RESOURCE_STARTUP=@SBD_SYNC_RESOURCE_STARTUP_SYSCONFIG@ ## Type: string ## Default: "" # # Additional options for starting sbd # SBD_OPTS=