Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4624513
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
16 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/etc/sysconfig/pacemaker b/etc/sysconfig/pacemaker
index c66f459345..44e0ba63a0 100644
--- a/etc/sysconfig/pacemaker
+++ b/etc/sysconfig/pacemaker
@@ -1,170 +1,173 @@
#==#==# Variables that control logging
# Enable debug logging globally (yes|no) or by subsystem. Multiple subsystems
# may be comma-separated, for example: PCMK_debug=pacemakerd,pacemaker-execd
# Subsystems are:
# pacemakerd
# pacemaker-attrd
# pacemaker-based
# pacemaker-controld
# pacemaker-execd
# pacemaker-fenced
# pacemaker-schedulerd
# PCMK_debug=no
# Send detailed log messages to the specified file. Compared to messages logged
# via syslog, messages in this file may have extended information, and will
# include messages of "info" severity (and, if debug and/or trace logging
# has been enabled, those as well). This log is of more use to developers and
# advanced system administrators, and when reporting problems.
# PCMK_logfile=/var/log/pacemaker/pacemaker.log
# Set the permissions on the above log file to owner/group read/write
# PCMK_logfile_mode=0660
# Enable logging via syslog, using the specified syslog facility. Messages sent
# here are of value to all Pacemaker users. This can be disabled using "none",
# but that is not recommended. The default is "daemon".
# PCMK_logfacility=none|daemon|user|local0|local1|local2|local3|local4|local5|local6|local7
# Unless syslog logging is disabled using PCMK_logfacility=none, messages of
# the specified severity and higher will be sent to syslog. The default value
# of "notice" is appropriate for most installations; "info" is highly verbose
# and "debug" is almost certain to send you blind (which is why there is a
# separate detail log specified by PCMK_logfile).
# PCMK_logpriority=emerg|alert|crit|error|warning|notice|info|debug
# Log all messages from a comma-separated list of functions.
# PCMK_trace_functions=function1,function2,function3
# Log all messages from a comma-separated list of file names (without path).
# PCMK_trace_files=file1.c,file2.c
# Log all messages matching comma-separated list of formats.
# PCMK_trace_formats="Sent delete %d"
# Log all messages from a comma-separated list of tags.
# PCMK_trace_tags=tag1,tag2
# Dump the blackbox whenever the message at function and line is emitted,
# e.g. PCMK_trace_blackbox=te_graph_trigger:223,unpack_clone:81
# PCMK_trace_blackbox=fn:line,fn2:line2,...
# Enable blackbox logging globally or per-subsystem. The blackbox contains a
# rolling buffer of all logs (including info, debug, and trace) and is written
# after a crash or assertion failure, and/or when SIGTRAP is received. The
# blackbox recorder can also be enabled for Pacemaker daemons at runtime by
# sending SIGUSR1 (or SIGTRAP), and disabled by sending SIGUSR2. Specify value
# as for PCMK_debug above.
# PCMK_blackbox=no
#==#==# Advanced use only
# By default, nodes will join the cluster in an online state when they first
# start, unless they were previously put into standby mode. If this variable is
# set to "standby" or "online", it will force this node to join in the
# specified state when starting.
# (only supported for cluster nodes, not Pacemaker Remote nodes)
# PCMK_node_start_state=default
# Specify an alternate location for RNG schemas and XSL transforms.
# (This is of use only to developers.)
# PCMK_schema_directory=/some/path
# Pacemaker consists of a master process with multiple subsidiary daemons. If
# one of the daemons crashes, the master process will normally attempt to
# restart it. If this is set to "true", the master process will instead panic
# the host (see PCMK_panic_action). The default is unset.
# PCMK_fail_fast=no
# Pacemaker will panic its host under certain conditions. If this is set to
# "crash", Pacemaker will trigger a kernel crash (which is useful if you want a
-# kernel dump to investigate). For any other value, Pacemaker will trigger a
-# host reboot. The default is unset.
+# kernel dump to investigate). If "sync-reboot" is set, execute sync() before
+# host reboot (this leaves information about the crashed daemon in the log
+# file, but note that there is a possibility that the sync() call may not
+# return). For any other value, Pacemaker will trigger a host reboot. The
+# default is unset.
# PCMK_panic_action=crash
#==#==# Pacemaker Remote
# Use the contents of this file as the authorization key to use with Pacemaker
# Remote connections. This file must be readable by Pacemaker daemons (that is,
# it must allow read permissions to either the hacluster user or the haclient
# group), and its contents must be identical on all nodes. The default is
# "/etc/pacemaker/authkey".
# PCMK_authkey_location=/etc/pacemaker/authkey
# If the Pacemaker Remote service is run on the local node, it will listen
# for connections on this address. The value may be a resolvable hostname or an
# IPv4 or IPv6 numeric address. When resolving names or using the default
# wildcard address (i.e. listen on all available addresses), IPv6 will be
# preferred if available. When listening on an IPv6 address, IPv4 clients will
# be supported (via IPv4-mapped IPv6 addresses).
# PCMK_remote_address="192.0.2.1"
# Use this TCP port number when connecting to a Pacemaker Remote node. This
# value must be the same on all nodes. The default is "3121".
# PCMK_remote_port=3121
# Use these GnuTLS cipher priorities for TLS connections. See:
#
# https://gnutls.org/manual/html_node/Priority-Strings.html
#
# Pacemaker will append ":+ANON-DH" for remote CIB access (when enabled) and
# ":+DHE-PSK:+PSK" for Pacemaker Remote connections, as they are required for
# the respective functionality.
# PCMK_tls_priorities="NORMAL"
# Set bounds on the bit length of the prime number generated for Diffie-Hellman
# parameters needed by TLS connections. The default is not to set any bounds.
#
# If these values are specified, the server (Pacemaker Remote daemon, or CIB
# manager configured to accept remote clients) will use these values to provide
# a floor and/or ceiling for the value recommended by the GnuTLS library. The
# library will only accept a limited number of specific values, which vary by
# library version, so setting these is recommended only when required for
# compatibility with specific client versions.
#
# If PCMK_dh_min_bits is specified, the client (connecting cluster node or
# remote CIB command) will require that the server use a prime of at least this
# size. This is only recommended when the value must be lowered in order for
# the client's GnuTLS library to accept a connection to an older server.
# The client side does not use PCMK_dh_max_bits.
#
# PCMK_dh_min_bits=1024
# PCMK_dh_max_bits=2048
#==#==# IPC
# Force use of a particular class of IPC connection.
# PCMK_ipc_type=shared-mem|socket|posix|sysv
# Specify an IPC buffer size in bytes. This is useful when connecting to really
# big clusters that exceed the default 128KB buffer.
# PCMK_ipc_buffer=131072
#==#==# Profiling and memory leak testing (mainly useful to developers)
# Affect the behavior of glib's memory allocator. Setting to "always-malloc"
# when running under valgrind will help valgrind track malloc/free better;
# setting to "debug-blocks" when not running under valgrind will perform
# (somewhat expensive) memory checks.
# G_SLICE=always-malloc
# Uncommenting this will make malloc() initialize newly allocated memory
# and free() wipe it (to help catch uninitialized-memory/use-after-free).
# MALLOC_PERTURB_=221
# Uncommenting this will make malloc() and friends print to stderr and abort
# for some (inexpensive) memory checks.
# MALLOC_CHECK_=3
# Set as for PCMK_debug above to run some or all daemons under valgrind.
# PCMK_valgrind_enabled=no
# Set as for PCMK_debug above to run some or all daemons under valgrind with
# the callgrind tool enabled.
# PCMK_callgrind_enabled=no
# Set the options to pass to valgrind, when valgrind is enabled. See
# valgrind(1) man page for details. "--vgdb=no" is specified because
# pacemaker-execd can lower privileges when executing commands, which would
# otherwise leave a bunch of unremovable files in /tmp.
VALGRIND_OPTS="--leak-check=full --trace-children=no --vgdb=no --num-callers=25 --log-file=/var/lib/pacemaker/valgrind-%p --suppressions=/usr/share/pacemaker/tests/valgrind-pcmk.suppressions --gen-suppressions=all"
diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c
index 03ee7f17d1..07139b1162 100644
--- a/lib/common/watchdog.c
+++ b/lib/common/watchdog.c
@@ -1,301 +1,304 @@
/*
* Copyright 2013-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sched.h>
#include <sys/ioctl.h>
#include <sys/reboot.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <ctype.h>
#include <dirent.h>
#include <signal.h>
#ifdef _POSIX_MEMLOCK
# include <sys/mman.h>
#endif
static pid_t sbd_pid = 0;
static void
sysrq_trigger(char t)
{
#if SUPPORT_PROCFS
FILE *procf;
// Root can always write here, regardless of kernel.sysrq value
procf = fopen("/proc/sysrq-trigger", "a");
if (!procf) {
crm_perror(LOG_WARNING, "Opening sysrq-trigger failed");
return;
}
crm_info("sysrq-trigger: %c", t);
fprintf(procf, "%c\n", t);
fclose(procf);
#endif // SUPPORT_PROCFS
return;
}
/*!
* \internal
* \brief Panic the local host (if root) or tell pacemakerd to do so
*/
static void
panic_local(void)
{
int rc = pcmk_ok;
uid_t uid = geteuid();
pid_t ppid = getppid();
if(uid != 0 && ppid > 1) {
/* We're a non-root pacemaker daemon (pacemaker-based,
* pacemaker-controld, pacemaker-schedulerd, pacemaker-attrd, etc.) with
* the original pacemakerd parent.
*
* Of these, only the controller is likely to be initiating resets.
*/
crm_emerg("Signaling parent %lld to panic", (long long) ppid);
crm_exit(CRM_EX_PANIC);
return;
} else if (uid != 0) {
#if SUPPORT_PROCFS
/*
* No permissions, and no pacemakerd parent to escalate to.
* Track down the new pacemakerd process and send a signal instead.
*/
union sigval signal_value;
memset(&signal_value, 0, sizeof(signal_value));
ppid = pcmk__procfs_pid_of("pacemakerd");
crm_emerg("Signaling pacemakerd[%lld] to panic", (long long) ppid);
if(ppid > 1 && sigqueue(ppid, SIGQUIT, signal_value) < 0) {
crm_perror(LOG_EMERG, "Cannot signal pacemakerd[%lld] to panic",
(long long) ppid);
}
#endif // SUPPORT_PROCFS
/* The best we can do now is die */
crm_exit(CRM_EX_PANIC);
return;
}
/* We're either pacemakerd, or a pacemaker daemon running as root */
if (pcmk__str_eq("crash", getenv("PCMK_panic_action"), pcmk__str_casei)) {
sysrq_trigger('c');
} else {
+ if (pcmk__str_eq("sync-reboot", getenv("PCMK_panic_action"), pcmk__str_casei)) {
+ sync();
+ }
sysrq_trigger('b');
}
/* reboot(RB_HALT_SYSTEM); rc = errno; */
reboot(RB_AUTOBOOT);
rc = errno;
crm_emerg("Reboot failed, escalating to parent %lld: %s " CRM_XS " rc=%d",
(long long) ppid, pcmk_rc_str(rc), rc);
if(ppid > 1) {
/* child daemon */
exit(CRM_EX_PANIC);
} else {
/* pacemakerd or orphan child */
exit(CRM_EX_FATAL);
}
}
/*!
* \internal
* \brief Tell sbd to kill the local host, then exit
*/
static void
panic_sbd(void)
{
union sigval signal_value;
pid_t ppid = getppid();
crm_emerg("Signaling sbd[%lld] to panic", (long long) sbd_pid);
memset(&signal_value, 0, sizeof(signal_value));
/* TODO: Arrange for a slightly less brutal option? */
if(sigqueue(sbd_pid, SIGKILL, signal_value) < 0) {
crm_perror(LOG_EMERG, "Cannot signal sbd[%lld] to terminate",
(long long) sbd_pid);
panic_local();
}
if(ppid > 1) {
/* child daemon */
exit(CRM_EX_PANIC);
} else {
/* pacemakerd or orphan child */
exit(CRM_EX_FATAL);
}
}
/*!
* \internal
* \brief Panic the local host
*
* Panic the local host either by sbd (if running), directly, or by asking
* pacemakerd. If trace logging this function, exit instead.
*
* \param[in] origin Function caller (for logging only)
*/
void
pcmk__panic(const char *origin)
{
static struct qb_log_callsite *panic_cs = NULL;
if (panic_cs == NULL) {
panic_cs = qb_log_callsite_get(__func__, __FILE__, "panic-delay",
LOG_TRACE, __LINE__, crm_trace_nonlog);
}
/* Ensure sbd_pid is set */
(void) pcmk__locate_sbd();
if (panic_cs && panic_cs->targets) {
/* getppid() == 1 means our original parent no longer exists */
crm_emerg("Shutting down instead of panicking the node "
CRM_XS " origin=%s sbd=%lld parent=%d",
origin, (long long) sbd_pid, getppid());
crm_exit(CRM_EX_FATAL);
return;
}
if(sbd_pid > 1) {
crm_emerg("Signaling sbd[%lld] to panic the system: %s",
(long long) sbd_pid, origin);
panic_sbd();
} else {
crm_emerg("Panicking the system directly: %s", origin);
panic_local();
}
}
/*!
* \internal
* \brief Return the process ID of sbd (or 0 if it is not running)
*/
pid_t
pcmk__locate_sbd(void)
{
char *pidfile = NULL;
char *sbd_path = NULL;
int rc;
if(sbd_pid > 1) {
return sbd_pid;
}
/* Look for the pid file */
pidfile = crm_strdup_printf(PCMK_RUN_DIR "/sbd.pid");
sbd_path = crm_strdup_printf("%s/sbd", SBIN_DIR);
/* Read the pid file */
rc = pcmk__pidfile_matches(pidfile, 0, sbd_path, &sbd_pid);
if (rc == pcmk_rc_ok) {
crm_trace("SBD detected at pid %lld (via PID file %s)",
(long long) sbd_pid, pidfile);
#if SUPPORT_PROCFS
} else {
/* Fall back to /proc for systems that support it */
sbd_pid = pcmk__procfs_pid_of("sbd");
crm_trace("SBD detected at pid %lld (via procfs)",
(long long) sbd_pid);
#endif // SUPPORT_PROCFS
}
if(sbd_pid < 0) {
sbd_pid = 0;
crm_trace("SBD not detected");
}
free(pidfile);
free(sbd_path);
return sbd_pid;
}
long
pcmk__get_sbd_timeout(void)
{
static long sbd_timeout = -2;
if (sbd_timeout == -2) {
sbd_timeout = crm_get_msec(getenv("SBD_WATCHDOG_TIMEOUT"));
}
return sbd_timeout;
}
bool
pcmk__get_sbd_sync_resource_startup(void)
{
static bool sync_resource_startup = false;
static bool checked_sync_resource_startup = false;
if (!checked_sync_resource_startup) {
sync_resource_startup =
crm_is_true(getenv("SBD_SYNC_RESOURCE_STARTUP"));
checked_sync_resource_startup = true;
}
return sync_resource_startup;
}
long
pcmk__auto_watchdog_timeout()
{
long sbd_timeout = pcmk__get_sbd_timeout();
return (sbd_timeout <= 0)? 0 : (2 * sbd_timeout);
}
bool
pcmk__valid_sbd_timeout(const char *value)
{
long st_timeout = value? crm_get_msec(value) : 0;
if (st_timeout < 0) {
st_timeout = pcmk__auto_watchdog_timeout();
crm_debug("Using calculated value %ld for stonith-watchdog-timeout (%s)",
st_timeout, value);
}
if (st_timeout == 0) {
crm_debug("Watchdog may be enabled but stonith-watchdog-timeout is disabled (%s)",
value? value : "default");
} else if (pcmk__locate_sbd() == 0) {
crm_emerg("Shutting down: stonith-watchdog-timeout configured (%s) "
"but SBD not active", (value? value : "auto"));
crm_exit(CRM_EX_FATAL);
return false;
} else {
long sbd_timeout = pcmk__get_sbd_timeout();
if (st_timeout < sbd_timeout) {
crm_emerg("Shutting down: stonith-watchdog-timeout (%s) too short "
"(must be >%ldms)", value, sbd_timeout);
crm_exit(CRM_EX_FATAL);
return false;
}
crm_info("Watchdog configured with stonith-watchdog-timeout %s and SBD timeout %ldms",
value, sbd_timeout);
}
return true;
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:29 PM (11 h, 48 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1995978
Default Alt Text
(16 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment