No OneTemporary
Actions

Size

16 KB

Referenced Files

None

Subscribers

None

View Options

	diff --git a/etc/sysconfig/pacemaker b/etc/sysconfig/pacemaker
	index c66f459345..44e0ba63a0 100644
	--- a/etc/sysconfig/pacemaker
	+++ b/etc/sysconfig/pacemaker
	@@ -1,170 +1,173 @@
	#==#==# Variables that control logging

	# Enable debug logging globally (yes\|no) or by subsystem. Multiple subsystems
	# may be comma-separated, for example: PCMK_debug=pacemakerd,pacemaker-execd
	# Subsystems are:
	# pacemakerd
	# pacemaker-attrd
	# pacemaker-based
	# pacemaker-controld
	# pacemaker-execd
	# pacemaker-fenced
	# pacemaker-schedulerd
	# PCMK_debug=no

	# Send detailed log messages to the specified file. Compared to messages logged
	# via syslog, messages in this file may have extended information, and will
	# include messages of "info" severity (and, if debug and/or trace logging
	# has been enabled, those as well). This log is of more use to developers and
	# advanced system administrators, and when reporting problems.
	# PCMK_logfile=/var/log/pacemaker/pacemaker.log

	# Set the permissions on the above log file to owner/group read/write
	# PCMK_logfile_mode=0660

	# Enable logging via syslog, using the specified syslog facility. Messages sent
	# here are of value to all Pacemaker users. This can be disabled using "none",
	# but that is not recommended. The default is "daemon".
	# PCMK_logfacility=none\|daemon\|user\|local0\|local1\|local2\|local3\|local4\|local5\|local6\|local7

	# Unless syslog logging is disabled using PCMK_logfacility=none, messages of
	# the specified severity and higher will be sent to syslog. The default value
	# of "notice" is appropriate for most installations; "info" is highly verbose
	# and "debug" is almost certain to send you blind (which is why there is a
	# separate detail log specified by PCMK_logfile).
	# PCMK_logpriority=emerg\|alert\|crit\|error\|warning\|notice\|info\|debug

	# Log all messages from a comma-separated list of functions.
	# PCMK_trace_functions=function1,function2,function3

	# Log all messages from a comma-separated list of file names (without path).
	# PCMK_trace_files=file1.c,file2.c

	# Log all messages matching comma-separated list of formats.
	# PCMK_trace_formats="Sent delete %d"

	# Log all messages from a comma-separated list of tags.
	# PCMK_trace_tags=tag1,tag2

	# Dump the blackbox whenever the message at function and line is emitted,
	# e.g. PCMK_trace_blackbox=te_graph_trigger:223,unpack_clone:81
	# PCMK_trace_blackbox=fn:line,fn2:line2,...

	# Enable blackbox logging globally or per-subsystem. The blackbox contains a
	# rolling buffer of all logs (including info, debug, and trace) and is written
	# after a crash or assertion failure, and/or when SIGTRAP is received. The
	# blackbox recorder can also be enabled for Pacemaker daemons at runtime by
	# sending SIGUSR1 (or SIGTRAP), and disabled by sending SIGUSR2. Specify value
	# as for PCMK_debug above.
	# PCMK_blackbox=no

	#==#==# Advanced use only

	# By default, nodes will join the cluster in an online state when they first
	# start, unless they were previously put into standby mode. If this variable is
	# set to "standby" or "online", it will force this node to join in the
	# specified state when starting.
	# (only supported for cluster nodes, not Pacemaker Remote nodes)
	# PCMK_node_start_state=default

	# Specify an alternate location for RNG schemas and XSL transforms.
	# (This is of use only to developers.)
	# PCMK_schema_directory=/some/path

	# Pacemaker consists of a master process with multiple subsidiary daemons. If
	# one of the daemons crashes, the master process will normally attempt to
	# restart it. If this is set to "true", the master process will instead panic
	# the host (see PCMK_panic_action). The default is unset.
	# PCMK_fail_fast=no

	# Pacemaker will panic its host under certain conditions. If this is set to
	# "crash", Pacemaker will trigger a kernel crash (which is useful if you want a
	-# kernel dump to investigate). For any other value, Pacemaker will trigger a
	-# host reboot. The default is unset.
	+# kernel dump to investigate). If "sync-reboot" is set, execute sync() before
	+# host reboot (this leaves information about the crashed daemon in the log
	+# file, but note that there is a possibility that the sync() call may not
	+# return). For any other value, Pacemaker will trigger a host reboot. The
	+# default is unset.
	# PCMK_panic_action=crash

	#==#==# Pacemaker Remote
	# Use the contents of this file as the authorization key to use with Pacemaker
	# Remote connections. This file must be readable by Pacemaker daemons (that is,
	# it must allow read permissions to either the hacluster user or the haclient
	# group), and its contents must be identical on all nodes. The default is
	# "/etc/pacemaker/authkey".
	# PCMK_authkey_location=/etc/pacemaker/authkey

	# If the Pacemaker Remote service is run on the local node, it will listen
	# for connections on this address. The value may be a resolvable hostname or an
	# IPv4 or IPv6 numeric address. When resolving names or using the default
	# wildcard address (i.e. listen on all available addresses), IPv6 will be
	# preferred if available. When listening on an IPv6 address, IPv4 clients will
	# be supported (via IPv4-mapped IPv6 addresses).
	# PCMK_remote_address="192.0.2.1"

	# Use this TCP port number when connecting to a Pacemaker Remote node. This
	# value must be the same on all nodes. The default is "3121".
	# PCMK_remote_port=3121

	# Use these GnuTLS cipher priorities for TLS connections. See:
	#
	# https://gnutls.org/manual/html_node/Priority-Strings.html
	#
	# Pacemaker will append ":+ANON-DH" for remote CIB access (when enabled) and
	# ":+DHE-PSK:+PSK" for Pacemaker Remote connections, as they are required for
	# the respective functionality.
	# PCMK_tls_priorities="NORMAL"

	# Set bounds on the bit length of the prime number generated for Diffie-Hellman
	# parameters needed by TLS connections. The default is not to set any bounds.
	#
	# If these values are specified, the server (Pacemaker Remote daemon, or CIB
	# manager configured to accept remote clients) will use these values to provide
	# a floor and/or ceiling for the value recommended by the GnuTLS library. The
	# library will only accept a limited number of specific values, which vary by
	# library version, so setting these is recommended only when required for
	# compatibility with specific client versions.
	#
	# If PCMK_dh_min_bits is specified, the client (connecting cluster node or
	# remote CIB command) will require that the server use a prime of at least this
	# size. This is only recommended when the value must be lowered in order for
	# the client's GnuTLS library to accept a connection to an older server.
	# The client side does not use PCMK_dh_max_bits.
	#
	# PCMK_dh_min_bits=1024
	# PCMK_dh_max_bits=2048

	#==#==# IPC

	# Force use of a particular class of IPC connection.
	# PCMK_ipc_type=shared-mem\|socket\|posix\|sysv

	# Specify an IPC buffer size in bytes. This is useful when connecting to really
	# big clusters that exceed the default 128KB buffer.
	# PCMK_ipc_buffer=131072

	#==#==# Profiling and memory leak testing (mainly useful to developers)

	# Affect the behavior of glib's memory allocator. Setting to "always-malloc"
	# when running under valgrind will help valgrind track malloc/free better;
	# setting to "debug-blocks" when not running under valgrind will perform
	# (somewhat expensive) memory checks.
	# G_SLICE=always-malloc

	# Uncommenting this will make malloc() initialize newly allocated memory
	# and free() wipe it (to help catch uninitialized-memory/use-after-free).
	# MALLOC_PERTURB_=221

	# Uncommenting this will make malloc() and friends print to stderr and abort
	# for some (inexpensive) memory checks.
	# MALLOC_CHECK_=3

	# Set as for PCMK_debug above to run some or all daemons under valgrind.
	# PCMK_valgrind_enabled=no

	# Set as for PCMK_debug above to run some or all daemons under valgrind with
	# the callgrind tool enabled.
	# PCMK_callgrind_enabled=no

	# Set the options to pass to valgrind, when valgrind is enabled. See
	# valgrind(1) man page for details. "--vgdb=no" is specified because
	# pacemaker-execd can lower privileges when executing commands, which would
	# otherwise leave a bunch of unremovable files in /tmp.
	VALGRIND_OPTS="--leak-check=full --trace-children=no --vgdb=no --num-callers=25 --log-file=/var/lib/pacemaker/valgrind-%p --suppressions=/usr/share/pacemaker/tests/valgrind-pcmk.suppressions --gen-suppressions=all"
	diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c
	index 03ee7f17d1..07139b1162 100644
	--- a/lib/common/watchdog.c
	+++ b/lib/common/watchdog.c
	@@ -1,301 +1,304 @@
	/*
	* Copyright 2013-2020 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <sched.h>
	#include <sys/ioctl.h>
	#include <sys/reboot.h>

	#include <sys/types.h>
	#include <sys/stat.h>
	#include <unistd.h>
	#include <ctype.h>
	#include <dirent.h>
	#include <signal.h>

	#ifdef _POSIX_MEMLOCK
	# include <sys/mman.h>
	#endif

	static pid_t sbd_pid = 0;

	static void
	sysrq_trigger(char t)
	{
	#if SUPPORT_PROCFS
	FILE *procf;

	// Root can always write here, regardless of kernel.sysrq value
	procf = fopen("/proc/sysrq-trigger", "a");
	if (!procf) {
	crm_perror(LOG_WARNING, "Opening sysrq-trigger failed");
	return;
	}
	crm_info("sysrq-trigger: %c", t);
	fprintf(procf, "%c\n", t);
	fclose(procf);
	#endif // SUPPORT_PROCFS
	return;
	}


	/*!
	* \internal
	* \brief Panic the local host (if root) or tell pacemakerd to do so
	*/
	static void
	panic_local(void)
	{
	int rc = pcmk_ok;
	uid_t uid = geteuid();
	pid_t ppid = getppid();

	if(uid != 0 && ppid > 1) {
	/* We're a non-root pacemaker daemon (pacemaker-based,
	* pacemaker-controld, pacemaker-schedulerd, pacemaker-attrd, etc.) with
	* the original pacemakerd parent.
	*
	* Of these, only the controller is likely to be initiating resets.
	*/
	crm_emerg("Signaling parent %lld to panic", (long long) ppid);
	crm_exit(CRM_EX_PANIC);
	return;

	} else if (uid != 0) {
	#if SUPPORT_PROCFS
	/*
	* No permissions, and no pacemakerd parent to escalate to.
	* Track down the new pacemakerd process and send a signal instead.
	*/
	union sigval signal_value;

	memset(&signal_value, 0, sizeof(signal_value));
	ppid = pcmk__procfs_pid_of("pacemakerd");
	crm_emerg("Signaling pacemakerd[%lld] to panic", (long long) ppid);

	if(ppid > 1 && sigqueue(ppid, SIGQUIT, signal_value) < 0) {
	crm_perror(LOG_EMERG, "Cannot signal pacemakerd[%lld] to panic",
	(long long) ppid);
	}
	#endif // SUPPORT_PROCFS

	/* The best we can do now is die */
	crm_exit(CRM_EX_PANIC);
	return;
	}

	/* We're either pacemakerd, or a pacemaker daemon running as root */

	if (pcmk__str_eq("crash", getenv("PCMK_panic_action"), pcmk__str_casei)) {
	sysrq_trigger('c');
	} else {
	+ if (pcmk__str_eq("sync-reboot", getenv("PCMK_panic_action"), pcmk__str_casei)) {
	+ sync();
	+ }
	sysrq_trigger('b');
	}
	/* reboot(RB_HALT_SYSTEM); rc = errno; */
	reboot(RB_AUTOBOOT);
	rc = errno;

	crm_emerg("Reboot failed, escalating to parent %lld: %s " CRM_XS " rc=%d",
	(long long) ppid, pcmk_rc_str(rc), rc);

	if(ppid > 1) {
	/* child daemon */
	exit(CRM_EX_PANIC);
	} else {
	/* pacemakerd or orphan child */
	exit(CRM_EX_FATAL);
	}
	}

	/*!
	* \internal
	* \brief Tell sbd to kill the local host, then exit
	*/
	static void
	panic_sbd(void)
	{
	union sigval signal_value;
	pid_t ppid = getppid();

	crm_emerg("Signaling sbd[%lld] to panic", (long long) sbd_pid);

	memset(&signal_value, 0, sizeof(signal_value));
	/* TODO: Arrange for a slightly less brutal option? */
	if(sigqueue(sbd_pid, SIGKILL, signal_value) < 0) {
	crm_perror(LOG_EMERG, "Cannot signal sbd[%lld] to terminate",
	(long long) sbd_pid);
	panic_local();
	}

	if(ppid > 1) {
	/* child daemon */
	exit(CRM_EX_PANIC);
	} else {
	/* pacemakerd or orphan child */
	exit(CRM_EX_FATAL);
	}
	}

	/*!
	* \internal
	* \brief Panic the local host
	*
	* Panic the local host either by sbd (if running), directly, or by asking
	* pacemakerd. If trace logging this function, exit instead.
	*
	* \param[in] origin Function caller (for logging only)
	*/
	void
	pcmk__panic(const char *origin)
	{
	static struct qb_log_callsite *panic_cs = NULL;

	if (panic_cs == NULL) {
	panic_cs = qb_log_callsite_get(__func__, __FILE__, "panic-delay",
	LOG_TRACE, __LINE__, crm_trace_nonlog);
	}

	/* Ensure sbd_pid is set */
	(void) pcmk__locate_sbd();

	if (panic_cs && panic_cs->targets) {
	/* getppid() == 1 means our original parent no longer exists */
	crm_emerg("Shutting down instead of panicking the node "
	CRM_XS " origin=%s sbd=%lld parent=%d",
	origin, (long long) sbd_pid, getppid());
	crm_exit(CRM_EX_FATAL);
	return;
	}

	if(sbd_pid > 1) {
	crm_emerg("Signaling sbd[%lld] to panic the system: %s",
	(long long) sbd_pid, origin);
	panic_sbd();

	} else {
	crm_emerg("Panicking the system directly: %s", origin);
	panic_local();
	}
	}

	/*!
	* \internal
	* \brief Return the process ID of sbd (or 0 if it is not running)
	*/
	pid_t
	pcmk__locate_sbd(void)
	{
	char *pidfile = NULL;
	char *sbd_path = NULL;
	int rc;

	if(sbd_pid > 1) {
	return sbd_pid;
	}

	/* Look for the pid file */
	pidfile = crm_strdup_printf(PCMK_RUN_DIR "/sbd.pid");
	sbd_path = crm_strdup_printf("%s/sbd", SBIN_DIR);

	/* Read the pid file */
	rc = pcmk__pidfile_matches(pidfile, 0, sbd_path, &sbd_pid);
	if (rc == pcmk_rc_ok) {
	crm_trace("SBD detected at pid %lld (via PID file %s)",
	(long long) sbd_pid, pidfile);

	#if SUPPORT_PROCFS
	} else {
	/* Fall back to /proc for systems that support it */
	sbd_pid = pcmk__procfs_pid_of("sbd");
	crm_trace("SBD detected at pid %lld (via procfs)",
	(long long) sbd_pid);
	#endif // SUPPORT_PROCFS
	}

	if(sbd_pid < 0) {
	sbd_pid = 0;
	crm_trace("SBD not detected");
	}

	free(pidfile);
	free(sbd_path);

	return sbd_pid;
	}

	long
	pcmk__get_sbd_timeout(void)
	{
	static long sbd_timeout = -2;

	if (sbd_timeout == -2) {
	sbd_timeout = crm_get_msec(getenv("SBD_WATCHDOG_TIMEOUT"));
	}
	return sbd_timeout;
	}

	bool
	pcmk__get_sbd_sync_resource_startup(void)
	{
	static bool sync_resource_startup = false;
	static bool checked_sync_resource_startup = false;

	if (!checked_sync_resource_startup) {
	sync_resource_startup =
	crm_is_true(getenv("SBD_SYNC_RESOURCE_STARTUP"));
	checked_sync_resource_startup = true;
	}

	return sync_resource_startup;
	}

	long
	pcmk__auto_watchdog_timeout()
	{
	long sbd_timeout = pcmk__get_sbd_timeout();

	return (sbd_timeout <= 0)? 0 : (2 * sbd_timeout);
	}

	bool
	pcmk__valid_sbd_timeout(const char *value)
	{
	long st_timeout = value? crm_get_msec(value) : 0;

	if (st_timeout < 0) {
	st_timeout = pcmk__auto_watchdog_timeout();
	crm_debug("Using calculated value %ld for stonith-watchdog-timeout (%s)",
	st_timeout, value);
	}

	if (st_timeout == 0) {
	crm_debug("Watchdog may be enabled but stonith-watchdog-timeout is disabled (%s)",
	value? value : "default");

	} else if (pcmk__locate_sbd() == 0) {
	crm_emerg("Shutting down: stonith-watchdog-timeout configured (%s) "
	"but SBD not active", (value? value : "auto"));
	crm_exit(CRM_EX_FATAL);
	return false;

	} else {
	long sbd_timeout = pcmk__get_sbd_timeout();

	if (st_timeout < sbd_timeout) {
	crm_emerg("Shutting down: stonith-watchdog-timeout (%s) too short "
	"(must be >%ldms)", value, sbd_timeout);
	crm_exit(CRM_EX_FATAL);
	return false;
	}
	crm_info("Watchdog configured with stonith-watchdog-timeout %s and SBD timeout %ldms",
	value, sbd_timeout);
	}
	return true;
	}

File Metadata

Mime Type: text/x-diff
Expires: Tue, Jul 8, 6:29 PM (11 h, 48 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1995978
Default Alt Text: (16 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions