diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h index 8f1cfb2503..46b56c0e01 100644 --- a/include/crm/common/options_internal.h +++ b/include/crm/common/options_internal.h @@ -1,140 +1,141 @@ /* * Copyright 2006-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__OPTIONS_INTERNAL__H # define PCMK__OPTIONS_INTERNAL__H # ifndef PCMK__CONFIG_H # define PCMK__CONFIG_H # include // _Noreturn # endif # include // GHashTable # include // bool _Noreturn void pcmk__cli_help(char cmd); /* * Environment variable option handling */ const char *pcmk__env_option(const char *option); void pcmk__set_env_option(const char *option, const char *value, bool compat); bool pcmk__env_option_enabled(const char *daemon, const char *option); /* * Cluster option handling */ typedef struct pcmk__cluster_option_s { const char *name; const char *alt_name; const char *type; const char *values; const char *default_value; bool (*is_valid)(const char *); const char *description_short; const char *description_long; } pcmk__cluster_option_t; const char *pcmk__cluster_option(GHashTable *options, const pcmk__cluster_option_t *option_list, int len, const char *name); gchar *pcmk__format_option_metadata(const char *name, const char *desc_short, const char *desc_long, pcmk__cluster_option_t *option_list, int len); void pcmk__validate_cluster_options(GHashTable *options, pcmk__cluster_option_t *option_list, int len); bool pcmk__valid_interval_spec(const char *value); bool pcmk__valid_boolean(const char *value); bool pcmk__valid_number(const char *value); bool pcmk__valid_positive_number(const char *value); bool pcmk__valid_quorum(const char *value); bool pcmk__valid_script(const char *value); bool pcmk__valid_percentage(const char *value); // from watchdog.c long pcmk__get_sbd_timeout(void); bool pcmk__get_sbd_sync_resource_startup(void); long pcmk__auto_watchdog_timeout(void); bool pcmk__valid_sbd_timeout(const char *value); // Constants for environment variable names #define PCMK__ENV_AUTHKEY_LOCATION "authkey_location" #define PCMK__ENV_BLACKBOX "blackbox" #define PCMK__ENV_CALLGRIND_ENABLED "callgrind_enabled" #define PCMK__ENV_CIB_TIMEOUT "cib_timeout" #define PCMK__ENV_CLUSTER_TYPE "cluster_type" #define PCMK__ENV_DEBUG "debug" #define PCMK__ENV_FAIL_FAST "fail_fast" #define PCMK__ENV_LOGFACILITY "logfacility" #define PCMK__ENV_LOGFILE "logfile" #define PCMK__ENV_LOGFILE_MODE "logfile_mode" #define PCMK__ENV_LOGPRIORITY "logpriority" #define PCMK__ENV_NODE_ACTION_LIMIT "node_action_limit" #define PCMK__ENV_NODE_START_STATE "node_start_state" +#define PCMK__ENV_PANIC_ACTION "panic_action" #define PCMK__ENV_PHYSICAL_HOST "physical_host" #define PCMK__ENV_REMOTE_ADDRESS "remote_address" #define PCMK__ENV_REMOTE_PID1 "remote_pid1" #define PCMK__ENV_REMOTE_PORT "remote_port" #define PCMK__ENV_RESPAWNED "respawned" #define PCMK__ENV_SCHEMA_DIRECTORY "schema_directory" #define PCMK__ENV_SERVICE "service" #define PCMK__ENV_SHUTDOWN_DELAY "shutdown_delay" #define PCMK__ENV_STDERR "stderr" #define PCMK__ENV_TRACE_BLACKBOX "trace_blackbox" #define PCMK__ENV_TRACE_FILES "trace_files" #define PCMK__ENV_TRACE_FORMATS "trace_formats" #define PCMK__ENV_TRACE_FUNCTIONS "trace_functions" #define PCMK__ENV_TRACE_TAGS "trace_tags" #define PCMK__ENV_VALGRIND_ENABLED "valgrind_enabled" // @COMPAT Drop at 3.0.0; likely last used in 1.1.24 #define PCMK__ENV_MCP "mcp" // @COMPAT Drop at 3.0.0; added unused in 1.1.9 #define PCMK__ENV_QUORUM_TYPE "quorum_type" // Constants for cluster option names #define PCMK__OPT_NODE_HEALTH_BASE "node-health-base" #define PCMK__OPT_NODE_HEALTH_GREEN "node-health-green" #define PCMK__OPT_NODE_HEALTH_RED "node-health-red" #define PCMK__OPT_NODE_HEALTH_STRATEGY "node-health-strategy" #define PCMK__OPT_NODE_HEALTH_YELLOW "node-health-yellow" // Constants for meta-attribute names #define PCMK__META_ALLOW_UNHEALTHY_NODES "allow-unhealthy-nodes" // Constants for enumerated values for various options #define PCMK__VALUE_CLUSTER "cluster" #define PCMK__VALUE_CUSTOM "custom" #define PCMK__VALUE_FENCING "fencing" #define PCMK__VALUE_GREEN "green" #define PCMK__VALUE_LOCAL "local" #define PCMK__VALUE_MIGRATE_ON_RED "migrate-on-red" #define PCMK__VALUE_NONE "none" #define PCMK__VALUE_NOTHING "nothing" #define PCMK__VALUE_ONLY_GREEN "only-green" #define PCMK__VALUE_PROGRESSIVE "progressive" #define PCMK__VALUE_QUORUM "quorum" #define PCMK__VALUE_RED "red" #define PCMK__VALUE_UNFENCING "unfencing" #define PCMK__VALUE_YELLOW "yellow" #endif // PCMK__OPTIONS_INTERNAL__H diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c index 6c67555c53..e5692140ad 100644 --- a/lib/common/watchdog.c +++ b/lib/common/watchdog.c @@ -1,307 +1,310 @@ /* * Copyright 2013-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include static pid_t sbd_pid = 0; static void sysrq_trigger(char t) { #if HAVE_LINUX_PROCFS FILE *procf; // Root can always write here, regardless of kernel.sysrq value procf = fopen("/proc/sysrq-trigger", "a"); if (!procf) { crm_perror(LOG_WARNING, "Opening sysrq-trigger failed"); return; } crm_info("sysrq-trigger: %c", t); fprintf(procf, "%c\n", t); fclose(procf); #endif // HAVE_LINUX_PROCFS return; } /*! * \internal * \brief Panic the local host (if root) or tell pacemakerd to do so */ static void panic_local(void) { int rc = pcmk_ok; uid_t uid = geteuid(); pid_t ppid = getppid(); + const char *panic_action = pcmk__env_option(PCMK__ENV_PANIC_ACTION); if(uid != 0 && ppid > 1) { /* We're a non-root pacemaker daemon (pacemaker-based, * pacemaker-controld, pacemaker-schedulerd, pacemaker-attrd, etc.) with * the original pacemakerd parent. * * Of these, only the controller is likely to be initiating resets. */ crm_emerg("Signaling parent %lld to panic", (long long) ppid); crm_exit(CRM_EX_PANIC); return; } else if (uid != 0) { #if HAVE_LINUX_PROCFS /* * No permissions, and no pacemakerd parent to escalate to. * Track down the new pacemakerd process and send a signal instead. */ union sigval signal_value; memset(&signal_value, 0, sizeof(signal_value)); ppid = pcmk__procfs_pid_of("pacemakerd"); crm_emerg("Signaling pacemakerd[%lld] to panic", (long long) ppid); if(ppid > 1 && sigqueue(ppid, SIGQUIT, signal_value) < 0) { crm_perror(LOG_EMERG, "Cannot signal pacemakerd[%lld] to panic", (long long) ppid); } #endif // HAVE_LINUX_PROCFS /* The best we can do now is die */ crm_exit(CRM_EX_PANIC); return; } /* We're either pacemakerd, or a pacemaker daemon running as root */ - if (pcmk__str_eq("crash", getenv("PCMK_panic_action"), pcmk__str_casei)) { + if (pcmk__str_eq(panic_action, "crash", pcmk__str_casei)) { sysrq_trigger('c'); - } else if (pcmk__str_eq("sync-crash", getenv("PCMK_panic_action"), pcmk__str_casei)) { + + } else if (pcmk__str_eq(panic_action, "sync-crash", pcmk__str_casei)) { sync(); sysrq_trigger('c'); + } else { - if (pcmk__str_eq("sync-reboot", getenv("PCMK_panic_action"), pcmk__str_casei)) { + if (pcmk__str_eq(panic_action, "sync-reboot", pcmk__str_casei)) { sync(); } sysrq_trigger('b'); } /* reboot(RB_HALT_SYSTEM); rc = errno; */ reboot(RB_AUTOBOOT); rc = errno; crm_emerg("Reboot failed, escalating to parent %lld: %s " CRM_XS " rc=%d", (long long) ppid, pcmk_rc_str(rc), rc); if(ppid > 1) { /* child daemon */ exit(CRM_EX_PANIC); } else { /* pacemakerd or orphan child */ exit(CRM_EX_FATAL); } } /*! * \internal * \brief Tell sbd to kill the local host, then exit */ static void panic_sbd(void) { union sigval signal_value; pid_t ppid = getppid(); crm_emerg("Signaling sbd[%lld] to panic", (long long) sbd_pid); memset(&signal_value, 0, sizeof(signal_value)); /* TODO: Arrange for a slightly less brutal option? */ if(sigqueue(sbd_pid, SIGKILL, signal_value) < 0) { crm_perror(LOG_EMERG, "Cannot signal sbd[%lld] to terminate", (long long) sbd_pid); panic_local(); } if(ppid > 1) { /* child daemon */ exit(CRM_EX_PANIC); } else { /* pacemakerd or orphan child */ exit(CRM_EX_FATAL); } } /*! * \internal * \brief Panic the local host * * Panic the local host either by sbd (if running), directly, or by asking * pacemakerd. If trace logging this function, exit instead. * * \param[in] origin Function caller (for logging only) */ void pcmk__panic(const char *origin) { /* Ensure sbd_pid is set */ (void) pcmk__locate_sbd(); pcmk__if_tracing( { // getppid() == 1 means our original parent no longer exists crm_emerg("Shutting down instead of panicking the node " CRM_XS " origin=%s sbd=%lld parent=%d", origin, (long long) sbd_pid, getppid()); crm_exit(CRM_EX_FATAL); return; }, {} ); if(sbd_pid > 1) { crm_emerg("Signaling sbd[%lld] to panic the system: %s", (long long) sbd_pid, origin); panic_sbd(); } else { crm_emerg("Panicking the system directly: %s", origin); panic_local(); } } /*! * \internal * \brief Return the process ID of sbd (or 0 if it is not running) */ pid_t pcmk__locate_sbd(void) { char *pidfile = NULL; char *sbd_path = NULL; int rc; if(sbd_pid > 1) { return sbd_pid; } /* Look for the pid file */ pidfile = crm_strdup_printf(PCMK_RUN_DIR "/sbd.pid"); sbd_path = crm_strdup_printf("%s/sbd", SBIN_DIR); /* Read the pid file */ rc = pcmk__pidfile_matches(pidfile, 0, sbd_path, &sbd_pid); if (rc == pcmk_rc_ok) { crm_trace("SBD detected at pid %lld (via PID file %s)", (long long) sbd_pid, pidfile); #if HAVE_LINUX_PROCFS } else { /* Fall back to /proc for systems that support it */ sbd_pid = pcmk__procfs_pid_of("sbd"); crm_trace("SBD detected at pid %lld (via procfs)", (long long) sbd_pid); #endif // HAVE_LINUX_PROCFS } if(sbd_pid < 0) { sbd_pid = 0; crm_trace("SBD not detected"); } free(pidfile); free(sbd_path); return sbd_pid; } long pcmk__get_sbd_timeout(void) { static long sbd_timeout = -2; if (sbd_timeout == -2) { sbd_timeout = crm_get_msec(getenv("SBD_WATCHDOG_TIMEOUT")); } return sbd_timeout; } bool pcmk__get_sbd_sync_resource_startup(void) { static int sync_resource_startup = PCMK__SBD_SYNC_DEFAULT; static bool checked_sync_resource_startup = false; if (!checked_sync_resource_startup) { const char *sync_env = getenv("SBD_SYNC_RESOURCE_STARTUP"); if (sync_env == NULL) { crm_trace("Defaulting to %sstart-up synchronization with sbd", (PCMK__SBD_SYNC_DEFAULT? "" : "no ")); } else if (crm_str_to_boolean(sync_env, &sync_resource_startup) < 0) { crm_warn("Defaulting to %sstart-up synchronization with sbd " "because environment value '%s' is invalid", (PCMK__SBD_SYNC_DEFAULT? "" : "no "), sync_env); } checked_sync_resource_startup = true; } return sync_resource_startup != 0; } long pcmk__auto_watchdog_timeout(void) { long sbd_timeout = pcmk__get_sbd_timeout(); return (sbd_timeout <= 0)? 0 : (2 * sbd_timeout); } bool pcmk__valid_sbd_timeout(const char *value) { long st_timeout = value? crm_get_msec(value) : 0; if (st_timeout < 0) { st_timeout = pcmk__auto_watchdog_timeout(); crm_debug("Using calculated value %ld for stonith-watchdog-timeout (%s)", st_timeout, value); } if (st_timeout == 0) { crm_debug("Watchdog may be enabled but stonith-watchdog-timeout is disabled (%s)", value? value : "default"); } else if (pcmk__locate_sbd() == 0) { crm_emerg("Shutting down: stonith-watchdog-timeout configured (%s) " "but SBD not active", (value? value : "auto")); crm_exit(CRM_EX_FATAL); return false; } else { long sbd_timeout = pcmk__get_sbd_timeout(); if (st_timeout < sbd_timeout) { crm_emerg("Shutting down: stonith-watchdog-timeout (%s) too short " "(must be >%ldms)", value, sbd_timeout); crm_exit(CRM_EX_FATAL); return false; } crm_info("Watchdog configured with stonith-watchdog-timeout %s and SBD timeout %ldms", value, sbd_timeout); } return true; }