Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/etc/sysconfig/pacemaker b/etc/sysconfig/pacemaker
index c66f459345..44e0ba63a0 100644
--- a/etc/sysconfig/pacemaker
+++ b/etc/sysconfig/pacemaker
@@ -1,170 +1,173 @@
#==#==# Variables that control logging
# Enable debug logging globally (yes|no) or by subsystem. Multiple subsystems
# may be comma-separated, for example: PCMK_debug=pacemakerd,pacemaker-execd
# Subsystems are:
# pacemakerd
# pacemaker-attrd
# pacemaker-based
# pacemaker-controld
# pacemaker-execd
# pacemaker-fenced
# pacemaker-schedulerd
# PCMK_debug=no
# Send detailed log messages to the specified file. Compared to messages logged
# via syslog, messages in this file may have extended information, and will
# include messages of "info" severity (and, if debug and/or trace logging
# has been enabled, those as well). This log is of more use to developers and
# advanced system administrators, and when reporting problems.
# PCMK_logfile=/var/log/pacemaker/pacemaker.log
# Set the permissions on the above log file to owner/group read/write
# PCMK_logfile_mode=0660
# Enable logging via syslog, using the specified syslog facility. Messages sent
# here are of value to all Pacemaker users. This can be disabled using "none",
# but that is not recommended. The default is "daemon".
# PCMK_logfacility=none|daemon|user|local0|local1|local2|local3|local4|local5|local6|local7
# Unless syslog logging is disabled using PCMK_logfacility=none, messages of
# the specified severity and higher will be sent to syslog. The default value
# of "notice" is appropriate for most installations; "info" is highly verbose
# and "debug" is almost certain to send you blind (which is why there is a
# separate detail log specified by PCMK_logfile).
# PCMK_logpriority=emerg|alert|crit|error|warning|notice|info|debug
# Log all messages from a comma-separated list of functions.
# PCMK_trace_functions=function1,function2,function3
# Log all messages from a comma-separated list of file names (without path).
# PCMK_trace_files=file1.c,file2.c
# Log all messages matching comma-separated list of formats.
# PCMK_trace_formats="Sent delete %d"
# Log all messages from a comma-separated list of tags.
# PCMK_trace_tags=tag1,tag2
# Dump the blackbox whenever the message at function and line is emitted,
# e.g. PCMK_trace_blackbox=te_graph_trigger:223,unpack_clone:81
# PCMK_trace_blackbox=fn:line,fn2:line2,...
# Enable blackbox logging globally or per-subsystem. The blackbox contains a
# rolling buffer of all logs (including info, debug, and trace) and is written
# after a crash or assertion failure, and/or when SIGTRAP is received. The
# blackbox recorder can also be enabled for Pacemaker daemons at runtime by
# sending SIGUSR1 (or SIGTRAP), and disabled by sending SIGUSR2. Specify value
# as for PCMK_debug above.
# PCMK_blackbox=no
#==#==# Advanced use only
# By default, nodes will join the cluster in an online state when they first
# start, unless they were previously put into standby mode. If this variable is
# set to "standby" or "online", it will force this node to join in the
# specified state when starting.
# (only supported for cluster nodes, not Pacemaker Remote nodes)
# PCMK_node_start_state=default
# Specify an alternate location for RNG schemas and XSL transforms.
# (This is of use only to developers.)
# PCMK_schema_directory=/some/path
# Pacemaker consists of a master process with multiple subsidiary daemons. If
# one of the daemons crashes, the master process will normally attempt to
# restart it. If this is set to "true", the master process will instead panic
# the host (see PCMK_panic_action). The default is unset.
# PCMK_fail_fast=no
# Pacemaker will panic its host under certain conditions. If this is set to
# "crash", Pacemaker will trigger a kernel crash (which is useful if you want a
-# kernel dump to investigate). For any other value, Pacemaker will trigger a
-# host reboot. The default is unset.
+# kernel dump to investigate). If "sync-reboot" is set, execute sync() before
+# host reboot (this leaves information about the crashed daemon in the log
+# file, but note that there is a possibility that the sync() call may not
+# return). For any other value, Pacemaker will trigger a host reboot. The
+# default is unset.
# PCMK_panic_action=crash
#==#==# Pacemaker Remote
# Use the contents of this file as the authorization key to use with Pacemaker
# Remote connections. This file must be readable by Pacemaker daemons (that is,
# it must allow read permissions to either the hacluster user or the haclient
# group), and its contents must be identical on all nodes. The default is
# "/etc/pacemaker/authkey".
# PCMK_authkey_location=/etc/pacemaker/authkey
# If the Pacemaker Remote service is run on the local node, it will listen
# for connections on this address. The value may be a resolvable hostname or an
# IPv4 or IPv6 numeric address. When resolving names or using the default
# wildcard address (i.e. listen on all available addresses), IPv6 will be
# preferred if available. When listening on an IPv6 address, IPv4 clients will
# be supported (via IPv4-mapped IPv6 addresses).
# PCMK_remote_address="192.0.2.1"
# Use this TCP port number when connecting to a Pacemaker Remote node. This
# value must be the same on all nodes. The default is "3121".
# PCMK_remote_port=3121
# Use these GnuTLS cipher priorities for TLS connections. See:
#
# https://gnutls.org/manual/html_node/Priority-Strings.html
#
# Pacemaker will append ":+ANON-DH" for remote CIB access (when enabled) and
# ":+DHE-PSK:+PSK" for Pacemaker Remote connections, as they are required for
# the respective functionality.
# PCMK_tls_priorities="NORMAL"
# Set bounds on the bit length of the prime number generated for Diffie-Hellman
# parameters needed by TLS connections. The default is not to set any bounds.
#
# If these values are specified, the server (Pacemaker Remote daemon, or CIB
# manager configured to accept remote clients) will use these values to provide
# a floor and/or ceiling for the value recommended by the GnuTLS library. The
# library will only accept a limited number of specific values, which vary by
# library version, so setting these is recommended only when required for
# compatibility with specific client versions.
#
# If PCMK_dh_min_bits is specified, the client (connecting cluster node or
# remote CIB command) will require that the server use a prime of at least this
# size. This is only recommended when the value must be lowered in order for
# the client's GnuTLS library to accept a connection to an older server.
# The client side does not use PCMK_dh_max_bits.
#
# PCMK_dh_min_bits=1024
# PCMK_dh_max_bits=2048
#==#==# IPC
# Force use of a particular class of IPC connection.
# PCMK_ipc_type=shared-mem|socket|posix|sysv
# Specify an IPC buffer size in bytes. This is useful when connecting to really
# big clusters that exceed the default 128KB buffer.
# PCMK_ipc_buffer=131072
#==#==# Profiling and memory leak testing (mainly useful to developers)
# Affect the behavior of glib's memory allocator. Setting to "always-malloc"
# when running under valgrind will help valgrind track malloc/free better;
# setting to "debug-blocks" when not running under valgrind will perform
# (somewhat expensive) memory checks.
# G_SLICE=always-malloc
# Uncommenting this will make malloc() initialize newly allocated memory
# and free() wipe it (to help catch uninitialized-memory/use-after-free).
# MALLOC_PERTURB_=221
# Uncommenting this will make malloc() and friends print to stderr and abort
# for some (inexpensive) memory checks.
# MALLOC_CHECK_=3
# Set as for PCMK_debug above to run some or all daemons under valgrind.
# PCMK_valgrind_enabled=no
# Set as for PCMK_debug above to run some or all daemons under valgrind with
# the callgrind tool enabled.
# PCMK_callgrind_enabled=no
# Set the options to pass to valgrind, when valgrind is enabled. See
# valgrind(1) man page for details. "--vgdb=no" is specified because
# pacemaker-execd can lower privileges when executing commands, which would
# otherwise leave a bunch of unremovable files in /tmp.
VALGRIND_OPTS="--leak-check=full --trace-children=no --vgdb=no --num-callers=25 --log-file=/var/lib/pacemaker/valgrind-%p --suppressions=/usr/share/pacemaker/tests/valgrind-pcmk.suppressions --gen-suppressions=all"
diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c
index 03ee7f17d1..07139b1162 100644
--- a/lib/common/watchdog.c
+++ b/lib/common/watchdog.c
@@ -1,301 +1,304 @@
/*
* Copyright 2013-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sched.h>
#include <sys/ioctl.h>
#include <sys/reboot.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <ctype.h>
#include <dirent.h>
#include <signal.h>
#ifdef _POSIX_MEMLOCK
# include <sys/mman.h>
#endif
static pid_t sbd_pid = 0;
static void
sysrq_trigger(char t)
{
#if SUPPORT_PROCFS
FILE *procf;
// Root can always write here, regardless of kernel.sysrq value
procf = fopen("/proc/sysrq-trigger", "a");
if (!procf) {
crm_perror(LOG_WARNING, "Opening sysrq-trigger failed");
return;
}
crm_info("sysrq-trigger: %c", t);
fprintf(procf, "%c\n", t);
fclose(procf);
#endif // SUPPORT_PROCFS
return;
}
/*!
* \internal
* \brief Panic the local host (if root) or tell pacemakerd to do so
*/
static void
panic_local(void)
{
int rc = pcmk_ok;
uid_t uid = geteuid();
pid_t ppid = getppid();
if(uid != 0 && ppid > 1) {
/* We're a non-root pacemaker daemon (pacemaker-based,
* pacemaker-controld, pacemaker-schedulerd, pacemaker-attrd, etc.) with
* the original pacemakerd parent.
*
* Of these, only the controller is likely to be initiating resets.
*/
crm_emerg("Signaling parent %lld to panic", (long long) ppid);
crm_exit(CRM_EX_PANIC);
return;
} else if (uid != 0) {
#if SUPPORT_PROCFS
/*
* No permissions, and no pacemakerd parent to escalate to.
* Track down the new pacemakerd process and send a signal instead.
*/
union sigval signal_value;
memset(&signal_value, 0, sizeof(signal_value));
ppid = pcmk__procfs_pid_of("pacemakerd");
crm_emerg("Signaling pacemakerd[%lld] to panic", (long long) ppid);
if(ppid > 1 && sigqueue(ppid, SIGQUIT, signal_value) < 0) {
crm_perror(LOG_EMERG, "Cannot signal pacemakerd[%lld] to panic",
(long long) ppid);
}
#endif // SUPPORT_PROCFS
/* The best we can do now is die */
crm_exit(CRM_EX_PANIC);
return;
}
/* We're either pacemakerd, or a pacemaker daemon running as root */
if (pcmk__str_eq("crash", getenv("PCMK_panic_action"), pcmk__str_casei)) {
sysrq_trigger('c');
} else {
+ if (pcmk__str_eq("sync-reboot", getenv("PCMK_panic_action"), pcmk__str_casei)) {
+ sync();
+ }
sysrq_trigger('b');
}
/* reboot(RB_HALT_SYSTEM); rc = errno; */
reboot(RB_AUTOBOOT);
rc = errno;
crm_emerg("Reboot failed, escalating to parent %lld: %s " CRM_XS " rc=%d",
(long long) ppid, pcmk_rc_str(rc), rc);
if(ppid > 1) {
/* child daemon */
exit(CRM_EX_PANIC);
} else {
/* pacemakerd or orphan child */
exit(CRM_EX_FATAL);
}
}
/*!
* \internal
* \brief Tell sbd to kill the local host, then exit
*/
static void
panic_sbd(void)
{
union sigval signal_value;
pid_t ppid = getppid();
crm_emerg("Signaling sbd[%lld] to panic", (long long) sbd_pid);
memset(&signal_value, 0, sizeof(signal_value));
/* TODO: Arrange for a slightly less brutal option? */
if(sigqueue(sbd_pid, SIGKILL, signal_value) < 0) {
crm_perror(LOG_EMERG, "Cannot signal sbd[%lld] to terminate",
(long long) sbd_pid);
panic_local();
}
if(ppid > 1) {
/* child daemon */
exit(CRM_EX_PANIC);
} else {
/* pacemakerd or orphan child */
exit(CRM_EX_FATAL);
}
}
/*!
* \internal
* \brief Panic the local host
*
* Panic the local host either by sbd (if running), directly, or by asking
* pacemakerd. If trace logging this function, exit instead.
*
* \param[in] origin Function caller (for logging only)
*/
void
pcmk__panic(const char *origin)
{
static struct qb_log_callsite *panic_cs = NULL;
if (panic_cs == NULL) {
panic_cs = qb_log_callsite_get(__func__, __FILE__, "panic-delay",
LOG_TRACE, __LINE__, crm_trace_nonlog);
}
/* Ensure sbd_pid is set */
(void) pcmk__locate_sbd();
if (panic_cs && panic_cs->targets) {
/* getppid() == 1 means our original parent no longer exists */
crm_emerg("Shutting down instead of panicking the node "
CRM_XS " origin=%s sbd=%lld parent=%d",
origin, (long long) sbd_pid, getppid());
crm_exit(CRM_EX_FATAL);
return;
}
if(sbd_pid > 1) {
crm_emerg("Signaling sbd[%lld] to panic the system: %s",
(long long) sbd_pid, origin);
panic_sbd();
} else {
crm_emerg("Panicking the system directly: %s", origin);
panic_local();
}
}
/*!
* \internal
* \brief Return the process ID of sbd (or 0 if it is not running)
*/
pid_t
pcmk__locate_sbd(void)
{
char *pidfile = NULL;
char *sbd_path = NULL;
int rc;
if(sbd_pid > 1) {
return sbd_pid;
}
/* Look for the pid file */
pidfile = crm_strdup_printf(PCMK_RUN_DIR "/sbd.pid");
sbd_path = crm_strdup_printf("%s/sbd", SBIN_DIR);
/* Read the pid file */
rc = pcmk__pidfile_matches(pidfile, 0, sbd_path, &sbd_pid);
if (rc == pcmk_rc_ok) {
crm_trace("SBD detected at pid %lld (via PID file %s)",
(long long) sbd_pid, pidfile);
#if SUPPORT_PROCFS
} else {
/* Fall back to /proc for systems that support it */
sbd_pid = pcmk__procfs_pid_of("sbd");
crm_trace("SBD detected at pid %lld (via procfs)",
(long long) sbd_pid);
#endif // SUPPORT_PROCFS
}
if(sbd_pid < 0) {
sbd_pid = 0;
crm_trace("SBD not detected");
}
free(pidfile);
free(sbd_path);
return sbd_pid;
}
long
pcmk__get_sbd_timeout(void)
{
static long sbd_timeout = -2;
if (sbd_timeout == -2) {
sbd_timeout = crm_get_msec(getenv("SBD_WATCHDOG_TIMEOUT"));
}
return sbd_timeout;
}
bool
pcmk__get_sbd_sync_resource_startup(void)
{
static bool sync_resource_startup = false;
static bool checked_sync_resource_startup = false;
if (!checked_sync_resource_startup) {
sync_resource_startup =
crm_is_true(getenv("SBD_SYNC_RESOURCE_STARTUP"));
checked_sync_resource_startup = true;
}
return sync_resource_startup;
}
long
pcmk__auto_watchdog_timeout()
{
long sbd_timeout = pcmk__get_sbd_timeout();
return (sbd_timeout <= 0)? 0 : (2 * sbd_timeout);
}
bool
pcmk__valid_sbd_timeout(const char *value)
{
long st_timeout = value? crm_get_msec(value) : 0;
if (st_timeout < 0) {
st_timeout = pcmk__auto_watchdog_timeout();
crm_debug("Using calculated value %ld for stonith-watchdog-timeout (%s)",
st_timeout, value);
}
if (st_timeout == 0) {
crm_debug("Watchdog may be enabled but stonith-watchdog-timeout is disabled (%s)",
value? value : "default");
} else if (pcmk__locate_sbd() == 0) {
crm_emerg("Shutting down: stonith-watchdog-timeout configured (%s) "
"but SBD not active", (value? value : "auto"));
crm_exit(CRM_EX_FATAL);
return false;
} else {
long sbd_timeout = pcmk__get_sbd_timeout();
if (st_timeout < sbd_timeout) {
crm_emerg("Shutting down: stonith-watchdog-timeout (%s) too short "
"(must be >%ldms)", value, sbd_timeout);
crm_exit(CRM_EX_FATAL);
return false;
}
crm_info("Watchdog configured with stonith-watchdog-timeout %s and SBD timeout %ldms",
value, sbd_timeout);
}
return true;
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:29 PM (11 h, 48 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1995978
Default Alt Text
(16 KB)

Event Timeline