diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am index 05a0d7425d..7a68d1f924 100644 --- a/lib/common/Makefile.am +++ b/lib/common/Makefile.am @@ -1,50 +1,49 @@ # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # include $(top_srcdir)/Makefile.common -AM_CPPFLAGS += -I$(top_builddir)/lib/gnu -I$(top_srcdir)/lib/gnu \ - -DSBINDIR=\"$(sbindir)\" +AM_CPPFLAGS += -I$(top_builddir)/lib/gnu -I$(top_srcdir)/lib/gnu ## libraries lib_LTLIBRARIES = libcrmcommon.la # Can't use -Wcast-qual here because glib insists on pretending things are const # when they're not and thus we need the crm_element_value_const() hack # s390 needs -fPIC # s390-suse-linux/bin/ld: .libs/ipc.o: relocation R_390_PC32DBL against `__stack_chk_fail@@GLIBC_2.4' can not be used when making a shared object; recompile with -fPIC CFLAGS = $(CFLAGS_COPY:-Wcast-qual=) -fPIC libcrmcommon_la_LDFLAGS = -version-info 9:1:6 libcrmcommon_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libcrmcommon_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) libcrmcommon_la_LIBADD = @LIBADD_DL@ $(GNUTLSLIBS) -lm libcrmcommon_la_SOURCES = compat.c digest.c ipc.c io.c procfs.c utils.c xml.c \ iso8601.c remote.c mainloop.c logging.c watchdog.c \ schemas.c strings.c xpath.c attrd_client.c if BUILD_CIBSECRETS libcrmcommon_la_SOURCES += cib_secrets.c endif libcrmcommon_la_SOURCES += $(top_builddir)/lib/gnu/md5.c clean-generic: rm -f *.log *.debug *.xml *~ diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c index a171721abb..4c4abc1e2c 100644 --- a/lib/common/watchdog.c +++ b/lib/common/watchdog.c @@ -1,277 +1,277 @@ /* * Copyright (C) 2013 Lars Marowsky-Bree * 2014 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #ifdef _POSIX_MEMLOCK # include #endif static int sbd_pid = 0; enum pcmk_panic_flags { pcmk_panic_none = 0x00, pcmk_panic_delay = 0x01, pcmk_panic_kdump = 0x02, pcmk_panic_shutdown = 0x04, }; #define SYSRQ "/proc/sys/kernel/sysrq" void sysrq_init(void) { static bool need_init = true; FILE* procf; int c; if(need_init) { need_init = false; } else { return; } procf = fopen(SYSRQ, "r"); if (!procf) { crm_perror(LOG_ERR, "Cannot open "SYSRQ" for read"); return; } if (fscanf(procf, "%d", &c) != 1) { crm_perror(LOG_ERR, "Parsing "SYSRQ" failed"); c = 0; } fclose(procf); if (c == 1) return; /* 8 for debugging dumps of processes, 128 for reboot/poweroff */ c |= 136; procf = fopen(SYSRQ, "w"); if (!procf) { crm_perror(LOG_ERR, "Cannot write to "SYSRQ); return; } fprintf(procf, "%d", c); fclose(procf); return; } static void sysrq_trigger(char t) { FILE *procf; sysrq_init(); procf = fopen("/proc/sysrq-trigger", "a"); if (!procf) { crm_perror(LOG_ERR, "Opening sysrq-trigger failed"); return; } crm_info("sysrq-trigger: %c", t); fprintf(procf, "%c\n", t); fclose(procf); return; } static void pcmk_panic_local(void) { int rc = pcmk_ok; uid_t uid = geteuid(); pid_t ppid = getppid(); if(uid != 0 && ppid > 1) { /* We're a non-root pacemaker daemon (cib, crmd, pengine, * attrd, etc) with the original pacemakerd parent * * Of these, only crmd is likely to be initiating resets */ do_crm_log_always(LOG_EMERG, "Signaling parent %d to panic", ppid); crm_exit(pcmk_err_panic); return; } else if (uid != 0) { /* * No permissions and no pacemakerd parent to escalate to * Track down the new pacakerd process and send a signal instead */ union sigval signal_value; memset(&signal_value, 0, sizeof(signal_value)); ppid = crm_procfs_pid_of("pacemakerd"); do_crm_log_always(LOG_EMERG, "Signaling pacemakerd(%d) to panic", ppid); if(ppid > 1 && sigqueue(ppid, SIGQUIT, signal_value) < 0) { crm_perror(LOG_EMERG, "Cannot signal pacemakerd(%d) to panic", ppid); } /* The best we can do now is die */ crm_exit(pcmk_err_panic); return; } /* We're either pacemakerd, or a pacemaker daemon running as root */ if (safe_str_eq("crash", getenv("PCMK_panic_action"))) { sysrq_trigger('c'); } else { sysrq_trigger('b'); } /* reboot(RB_HALT_SYSTEM); rc = errno; */ reboot(RB_AUTOBOOT); rc = errno; do_crm_log_always(LOG_EMERG, "Reboot failed, escalating to %d: %s (%d)", ppid, pcmk_strerror(rc), rc); if(ppid > 1) { /* child daemon */ exit(pcmk_err_panic); } else { /* pacemakerd or orphan child */ exit(DAEMON_RESPAWN_STOP); } } static void pcmk_panic_sbd(void) { union sigval signal_value; pid_t ppid = getppid(); do_crm_log_always(LOG_EMERG, "Signaling sbd(%d) to panic", sbd_pid); memset(&signal_value, 0, sizeof(signal_value)); /* TODO: Arrange for a slightly less brutal option? */ if(sigqueue(sbd_pid, SIGKILL, signal_value) < 0) { crm_perror(LOG_EMERG, "Cannot signal SBD(%d) to terminate", sbd_pid); pcmk_panic_local(); } if(ppid > 1) { /* child daemon */ exit(pcmk_err_panic); } else { /* pacemakerd or orphan child */ exit(DAEMON_RESPAWN_STOP); } } void pcmk_panic(const char *origin) { static struct qb_log_callsite *panic_cs = NULL; if (panic_cs == NULL) { panic_cs = qb_log_callsite_get(__func__, __FILE__, "panic-delay", LOG_TRACE, __LINE__, crm_trace_nonlog); } /* Ensure sbd_pid is set */ (void)pcmk_locate_sbd(); if (panic_cs && panic_cs->targets) { /* getppid() == 1 means our original parent no longer exists */ do_crm_log_always(LOG_EMERG, "Shutting down instead of panicking the node: origin=%s, sbd=%d, parent=%d", origin, sbd_pid, getppid()); crm_exit(DAEMON_RESPAWN_STOP); return; } if(sbd_pid > 1) { do_crm_log_always(LOG_EMERG, "Signaling sbd(%d) to panic the system: %s", sbd_pid, origin); pcmk_panic_sbd(); } else { do_crm_log_always(LOG_EMERG, "Panicking the system directly: %s", origin); pcmk_panic_local(); } } pid_t pcmk_locate_sbd(void) { char *pidfile = NULL; char *sbd_path = NULL; if(sbd_pid > 1) { return sbd_pid; } /* Look for the pid file */ pidfile = crm_strdup_printf("%s/sbd.pid", HA_STATE_DIR); - sbd_path = crm_strdup_printf("%s/sbd", SBINDIR); + sbd_path = crm_strdup_printf("%s/sbd", SBIN_DIR); /* Read the pid file */ CRM_ASSERT(pidfile); sbd_pid = crm_pidfile_inuse(pidfile, 0, sbd_path); if(sbd_pid > 0) { crm_trace("SBD detected at pid=%d (file)", sbd_pid); } else { /* Fall back to /proc for systems that support it */ sbd_pid = crm_procfs_pid_of("sbd"); crm_trace("SBD detected at pid=%d (proc)", sbd_pid); } if(sbd_pid < 0) { sbd_pid = 0; crm_trace("SBD not detected"); } free(pidfile); free(sbd_path); return sbd_pid; } long crm_get_sbd_timeout(void) { const char *env_value = getenv("SBD_WATCHDOG_TIMEOUT"); long sbd_timeout = crm_get_msec(env_value); return sbd_timeout; } gboolean check_sbd_timeout(const char *value) { long sbd_timeout = crm_get_sbd_timeout(); long st_timeout = crm_get_msec(value); if(value == NULL || st_timeout <= 0) { crm_notice("Watchdog may be enabled but stonith-watchdog-timeout is disabled: %s", value); } else if(pcmk_locate_sbd() == 0) { do_crm_log_always(LOG_EMERG, "Shutting down: stonith-watchdog-timeout is configured (%ldms) but SBD is not active", st_timeout); crm_exit(DAEMON_RESPAWN_STOP); return FALSE; } else if(st_timeout < sbd_timeout) { do_crm_log_always(LOG_EMERG, "Shutting down: stonith-watchdog-timeout (%ldms) is too short (must be greater than %ldms)", st_timeout, sbd_timeout); crm_exit(DAEMON_RESPAWN_STOP); return FALSE; } crm_info("Watchdog functionality is consistent: %s delay exceeds timeout of %ldms", value, sbd_timeout); return TRUE; }