diff --git a/include/crm/common/options.h b/include/crm/common/options.h index 64cbf5ea28..f0cc68bc72 100644 --- a/include/crm/common/options.h +++ b/include/crm/common/options.h @@ -1,231 +1,232 @@ /* * Copyright 2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_OPTIONS__H #define PCMK__CRM_COMMON_OPTIONS__H #ifdef __cplusplus extern "C" { #endif /** * \file * \brief API related to options * \ingroup core */ /* * Cluster options */ #define PCMK_OPT_BATCH_LIMIT "batch-limit" #define PCMK_OPT_CLUSTER_DELAY "cluster-delay" #define PCMK_OPT_CLUSTER_INFRASTRUCTURE "cluster-infrastructure" #define PCMK_OPT_CLUSTER_IPC_LIMIT "cluster-ipc-limit" #define PCMK_OPT_CLUSTER_NAME "cluster-name" #define PCMK_OPT_CLUSTER_RECHECK_INTERVAL "cluster-recheck-interval" #define PCMK_OPT_CONCURRENT_FENCING "concurrent-fencing" #define PCMK_OPT_DC_DEADTIME "dc-deadtime" #define PCMK_OPT_DC_VERSION "dc-version" #define PCMK_OPT_ELECTION_TIMEOUT "election-timeout" #define PCMK_OPT_ENABLE_ACL "enable-acl" #define PCMK_OPT_ENABLE_STARTUP_PROBES "enable-startup-probes" #define PCMK_OPT_FENCE_REACTION "fence-reaction" #define PCMK_OPT_HAVE_WATCHDOG "have-watchdog" #define PCMK_OPT_JOIN_FINALIZATION_TIMEOUT "join-finalization-timeout" #define PCMK_OPT_JOIN_INTEGRATION_TIMEOUT "join-integration-timeout" #define PCMK_OPT_LOAD_THRESHOLD "load-threshold" #define PCMK_OPT_MAINTENANCE_MODE "maintenance-mode" #define PCMK_OPT_MIGRATION_LIMIT "migration-limit" #define PCMK_OPT_NO_QUORUM_POLICY "no-quorum-policy" #define PCMK_OPT_NODE_ACTION_LIMIT "node-action-limit" #define PCMK_OPT_NODE_HEALTH_BASE "node-health-base" #define PCMK_OPT_NODE_HEALTH_GREEN "node-health-green" #define PCMK_OPT_NODE_HEALTH_RED "node-health-red" #define PCMK_OPT_NODE_HEALTH_STRATEGY "node-health-strategy" #define PCMK_OPT_NODE_HEALTH_YELLOW "node-health-yellow" #define PCMK_OPT_NODE_PENDING_TIMEOUT "node-pending-timeout" #define PCMK_OPT_PE_ERROR_SERIES_MAX "pe-error-series-max" #define PCMK_OPT_PE_INPUT_SERIES_MAX "pe-input-series-max" #define PCMK_OPT_PE_WARN_SERIES_MAX "pe-warn-series-max" #define PCMK_OPT_PLACEMENT_STRATEGY "placement-strategy" #define PCMK_OPT_PRIORITY_FENCING_DELAY "priority-fencing-delay" #define PCMK_OPT_SHUTDOWN_ESCALATION "shutdown-escalation" #define PCMK_OPT_SHUTDOWN_LOCK "shutdown-lock" #define PCMK_OPT_SHUTDOWN_LOCK_LIMIT "shutdown-lock-limit" #define PCMK_OPT_START_FAILURE_IS_FATAL "start-failure-is-fatal" #define PCMK_OPT_STARTUP_FENCING "startup-fencing" #define PCMK_OPT_STONITH_ACTION "stonith-action" #define PCMK_OPT_STONITH_ENABLED "stonith-enabled" #define PCMK_OPT_STONITH_MAX_ATTEMPTS "stonith-max-attempts" #define PCMK_OPT_STONITH_TIMEOUT "stonith-timeout" #define PCMK_OPT_STONITH_WATCHDOG_TIMEOUT "stonith-watchdog-timeout" #define PCMK_OPT_STOP_ALL_RESOURCES "stop-all-resources" #define PCMK_OPT_STOP_ORPHAN_ACTIONS "stop-orphan-actions" #define PCMK_OPT_STOP_ORPHAN_RESOURCES "stop-orphan-resources" #define PCMK_OPT_SYMMETRIC_CLUSTER "symmetric-cluster" #define PCMK_OPT_TRANSITION_DELAY "transition-delay" /* * Meta-attributes */ #define PCMK_META_ALLOW_MIGRATE "allow-migrate" #define PCMK_META_ALLOW_UNHEALTHY_NODES "allow-unhealthy-nodes" #define PCMK_META_CLONE_MAX "clone-max" #define PCMK_META_CLONE_MIN "clone-min" #define PCMK_META_CLONE_NODE_MAX "clone-node-max" #define PCMK_META_CONTAINER_ATTRIBUTE_TARGET "container-attribute-target" #define PCMK_META_CRITICAL "critical" #define PCMK_META_ENABLED "enabled" #define PCMK_META_FAILURE_TIMEOUT "failure-timeout" #define PCMK_META_GLOBALLY_UNIQUE "globally-unique" #define PCMK_META_INTERLEAVE "interleave" #define PCMK_META_INTERVAL "interval" #define PCMK_META_IS_MANAGED "is-managed" #define PCMK_META_INTERVAL_ORIGIN "interval-origin" #define PCMK_META_MAINTENANCE "maintenance" #define PCMK_META_MIGRATION_THRESHOLD "migration-threshold" #define PCMK_META_MULTIPLE_ACTIVE "multiple-active" #define PCMK_META_NOTIFY "notify" #define PCMK_META_ON_FAIL "on-fail" #define PCMK_META_ORDERED "ordered" #define PCMK_META_PRIORITY "priority" #define PCMK_META_PROMOTABLE "promotable" #define PCMK_META_PROMOTED_MAX "promoted-max" #define PCMK_META_PROMOTED_NODE_MAX "promoted-node-max" #define PCMK_META_RECORD_PENDING "record-pending" #define PCMK_META_REMOTE_ADDR "remote-addr" #define PCMK_META_REMOTE_ALLOW_MIGRATE "remote-allow-migrate" #define PCMK_META_REMOTE_CONNECT_TIMEOUT "remote-connect-timeout" #define PCMK_META_REMOTE_NODE "remote-node" #define PCMK_META_REMOTE_PORT "remote-port" #define PCMK_META_REQUIRES "requires" #define PCMK_META_RESOURCE_STICKINESS "resource-stickiness" #define PCMK_META_START_DELAY "start-delay" #define PCMK_META_TARGET_ROLE "target-role" #define PCMK_META_TIMEOUT "timeout" #define PCMK_META_TIMESTAMP_FORMAT "timestamp-format" /* * Remote resource instance attributes */ #define PCMK_REMOTE_RA_ADDR "addr" #define PCMK_REMOTE_RA_PORT "port" #define PCMK_REMOTE_RA_RECONNECT_INTERVAL "reconnect_interval" #define PCMK_REMOTE_RA_SERVER "server" /* * Enumerated values */ #define PCMK_VALUE_ALWAYS "always" #define PCMK_VALUE_AND "and" #define PCMK_VALUE_BALANCED "balanced" #define PCMK_VALUE_BLOCK "block" #define PCMK_VALUE_BOOLEAN "boolean" #define PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS "cib-bootstrap-options" #define PCMK_VALUE_COROSYNC "corosync" +#define PCMK_VALUE_CRASH "crash" #define PCMK_VALUE_CREATE "create" #define PCMK_VALUE_CUSTOM "custom" #define PCMK_VALUE_DATE_SPEC "date_spec" #define PCMK_VALUE_DEFAULT "default" #define PCMK_VALUE_DEFINED "defined" #define PCMK_VALUE_DELETE "delete" #define PCMK_VALUE_DEMOTE "demote" #define PCMK_VALUE_DENY "deny" #define PCMK_VALUE_DURATION "duration" #define PCMK_VALUE_DYNAMIC_LIST "dynamic-list" #define PCMK_VALUE_EQ "eq" #define PCMK_VALUE_EXCLUSIVE "exclusive" #define PCMK_VALUE_FAILED "failed" #define PCMK_VALUE_FALSE "false" #define PCMK_VALUE_FENCE "fence" #define PCMK_VALUE_FENCING "fencing" #define PCMK_VALUE_FREEZE "freeze" #define PCMK_VALUE_GRANTED "granted" #define PCMK_VALUE_GREEN "green" #define PCMK_VALUE_GT "gt" #define PCMK_VALUE_GTE "gte" #define PCMK_VALUE_HOST "host" #define PCMK_VALUE_IGNORE "ignore" #define PCMK_VALUE_IN_RANGE "in_range" #define PCMK_VALUE_INFINITY "INFINITY" #define PCMK_VALUE_INTEGER "integer" #define PCMK_VALUE_LITERAL "literal" #define PCMK_VALUE_LT "lt" #define PCMK_VALUE_LTE "lte" #define PCMK_VALUE_MANDATORY "Mandatory" #define PCMK_VALUE_MEMBER "member" #define PCMK_VALUE_META "meta" #define PCMK_VALUE_MIGRATE_ON_RED "migrate-on-red" #define PCMK_VALUE_MINIMAL "minimal" #define PCMK_VALUE_MINUS_INFINITY "-" PCMK_VALUE_INFINITY #define PCMK_VALUE_MODIFY "modify" #define PCMK_VALUE_MOVE "move" #define PCMK_VALUE_NE "ne" #define PCMK_VALUE_NEVER "never" #define PCMK_VALUE_NONE "none" #define PCMK_VALUE_NONNEGATIVE_INTEGER "nonnegative_integer" #define PCMK_VALUE_NOT_DEFINED "not_defined" #define PCMK_VALUE_NOTHING "nothing" #define PCMK_VALUE_NUMBER "number" #define PCMK_VALUE_OFFLINE "offline" #define PCMK_VALUE_ONLINE "online" #define PCMK_VALUE_ONLY_GREEN "only-green" #define PCMK_VALUE_OPTIONAL "Optional" #define PCMK_VALUE_OR "or" #define PCMK_VALUE_PANIC "panic" #define PCMK_VALUE_PARAM "param" #define PCMK_VALUE_PENDING "pending" #define PCMK_VALUE_PERCENTAGE "percentage" #define PCMK_VALUE_PLUS_INFINITY "+" PCMK_VALUE_INFINITY #define PCMK_VALUE_PORT "port" #define PCMK_VALUE_PROGRESSIVE "progressive" #define PCMK_VALUE_QUORUM "quorum" #define PCMK_VALUE_READ "read" #define PCMK_VALUE_RED "red" #define PCMK_VALUE_REMOTE "remote" #define PCMK_VALUE_RESTART "restart" #define PCMK_VALUE_RESTART_CONTAINER "restart-container" #define PCMK_VALUE_REVOKED "revoked" #define PCMK_VALUE_SCORE "score" #define PCMK_VALUE_SELECT "select" #define PCMK_VALUE_SERIALIZE "Serialize" #define PCMK_VALUE_STANDBY "standby" #define PCMK_VALUE_STATIC_LIST "static-list" #define PCMK_VALUE_STATUS "status" #define PCMK_VALUE_STRING "string" #define PCMK_VALUE_STOP "stop" #define PCMK_VALUE_STOP_ONLY "stop_only" #define PCMK_VALUE_STOP_START "stop_start" #define PCMK_VALUE_STOP_UNEXPECTED "stop_unexpected" #define PCMK_VALUE_SUCCESS "success" #define PCMK_VALUE_TIMEOUT "timeout" #define PCMK_VALUE_TRUE "true" #define PCMK_VALUE_UNFENCING "unfencing" #define PCMK_VALUE_UNKNOWN "unknown" #define PCMK_VALUE_UTILIZATION "utilization" #define PCMK_VALUE_VERSION "version" #define PCMK_VALUE_WRITE "write" #define PCMK_VALUE_YELLOW "yellow" // @COMPAT This will become a deprecated alias for PCMK_VALUE_FENCE (see T279) #define PCMK_VALUE_FENCE_LEGACY "suicide" #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_OPTIONS__H diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c index ade08fdaf9..c259b812a1 100644 --- a/lib/common/watchdog.c +++ b/lib/common/watchdog.c @@ -1,333 +1,333 @@ /* * Copyright 2013-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include static pid_t sbd_pid = 0; /*! * \internal * \brief Trigger a sysrq command if supported on current platform * * \param[in] t Sysrq command to trigger */ static void sysrq_trigger(char t) { #if HAVE_LINUX_PROCFS // Root can always write here, regardless of kernel.sysrq value FILE *procf = fopen("/proc/sysrq-trigger", "a"); if (procf == NULL) { crm_warn("Could not open sysrq-trigger: %s", strerror(errno)); } else { fprintf(procf, "%c\n", t); fclose(procf); } #endif // HAVE_LINUX_PROCFS } /*! * \internal * \brief Tell pacemakerd to panic the local host * * \param[in] ppid Process ID of parent process */ static void panic_local_nonroot(pid_t ppid) { if (ppid > 1) { // pacemakerd is still our parent crm_emerg("Escalating panic to " PCMK__SERVER_PACEMAKERD "[%lld]", (long long) ppid); } else { // Signal (non-parent) pacemakerd if possible #if HAVE_LINUX_PROCFS ppid = pcmk__procfs_pid_of(PCMK__SERVER_PACEMAKERD); if (ppid > 0) { union sigval signal_value; crm_emerg("Signaling " PCMK__SERVER_PACEMAKERD "[%lld] to panic", (long long) ppid); memset(&signal_value, 0, sizeof(signal_value)); if (sigqueue(ppid, SIGQUIT, signal_value) < 0) { crm_emerg("Exiting after signal failure: %s", strerror(errno)); } } else { #endif crm_emerg("Exiting with no known " PCMK__SERVER_PACEMAKERD "process"); #if HAVE_LINUX_PROCFS } #endif } crm_exit(CRM_EX_PANIC); } /*! * \internal * \brief Panic the local host (if root) or tell pacemakerd to do so */ static void panic_local(void) { const char *full_panic_action = pcmk__env_option(PCMK__ENV_PANIC_ACTION); const char *panic_action = full_panic_action; int reboot_cmd = RB_AUTOBOOT; // Default panic action is reboot if (geteuid() != 0) { // Non-root caller such as the controller panic_local_nonroot(getppid()); return; } if (pcmk__starts_with(full_panic_action, "sync-")) { panic_action += sizeof("sync-") - 1; sync(); } if (pcmk__str_empty(full_panic_action) || pcmk__str_eq(panic_action, "reboot", pcmk__str_none)) { sysrq_trigger('b'); - } else if (pcmk__str_eq(panic_action, "crash", pcmk__str_none)) { + } else if (pcmk__str_eq(panic_action, PCMK_VALUE_CRASH, pcmk__str_none)) { sysrq_trigger('c'); } else if (pcmk__str_eq(panic_action, "off", pcmk__str_none)) { sysrq_trigger('o'); #ifdef RB_POWER_OFF reboot_cmd = RB_POWER_OFF; #elif defined(RB_POWEROFF) reboot_cmd = RB_POWEROFF; #endif } else { crm_warn("Using default 'reboot' for local option PCMK_" PCMK__ENV_PANIC_ACTION " because '%s' is not a valid value", full_panic_action); sysrq_trigger('b'); } // sysrq failed or is not supported on this platform, so fall back to reboot reboot(reboot_cmd); // Even reboot failed, nothing left to do but exit crm_emerg("Exiting after reboot failed: %s", strerror(errno)); if (getppid() > 1) { // pacemakerd is parent process crm_exit(CRM_EX_PANIC); } else { // This is pacemakerd, or an orphaned subdaemon crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Tell sbd to kill the local host, then exit */ static void panic_sbd(void) { union sigval signal_value; pid_t ppid = getppid(); crm_emerg("Signaling sbd[%lld] to panic", (long long) sbd_pid); memset(&signal_value, 0, sizeof(signal_value)); /* TODO: Arrange for a slightly less brutal option? */ if(sigqueue(sbd_pid, SIGKILL, signal_value) < 0) { crm_perror(LOG_EMERG, "Cannot signal sbd[%lld] to terminate", (long long) sbd_pid); panic_local(); } if(ppid > 1) { /* child daemon */ crm_exit(CRM_EX_PANIC); } else { /* pacemakerd or orphan child */ crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Panic the local host * * Panic the local host either by sbd (if running), directly, or by asking * pacemakerd. If trace logging this function, exit instead. * * \param[in] origin Function caller (for logging only) */ void pcmk__panic(const char *origin) { /* Ensure sbd_pid is set */ (void) pcmk__locate_sbd(); pcmk__if_tracing( { // getppid() == 1 means our original parent no longer exists crm_emerg("Shutting down instead of panicking the node " QB_XS " origin=%s sbd=%lld parent=%d", origin, (long long) sbd_pid, getppid()); crm_exit(CRM_EX_FATAL); return; }, {} ); if(sbd_pid > 1) { crm_emerg("Signaling sbd[%lld] to panic the system: %s", (long long) sbd_pid, origin); panic_sbd(); } else { crm_emerg("Panicking the system directly: %s", origin); panic_local(); } } /*! * \internal * \brief Return the process ID of sbd (or 0 if it is not running) */ pid_t pcmk__locate_sbd(void) { char *pidfile = NULL; char *sbd_path = NULL; int rc; if(sbd_pid > 1) { return sbd_pid; } /* Look for the pid file */ pidfile = crm_strdup_printf(PCMK_RUN_DIR "/sbd.pid"); sbd_path = crm_strdup_printf("%s/sbd", SBIN_DIR); /* Read the pid file */ rc = pcmk__pidfile_matches(pidfile, 0, sbd_path, &sbd_pid); if (rc == pcmk_rc_ok) { crm_trace("SBD detected at pid %lld (via PID file %s)", (long long) sbd_pid, pidfile); #if HAVE_LINUX_PROCFS } else { /* Fall back to /proc for systems that support it */ sbd_pid = pcmk__procfs_pid_of("sbd"); crm_trace("SBD detected at pid %lld (via procfs)", (long long) sbd_pid); #endif // HAVE_LINUX_PROCFS } if(sbd_pid < 0) { sbd_pid = 0; crm_trace("SBD not detected"); } free(pidfile); free(sbd_path); return sbd_pid; } long pcmk__get_sbd_watchdog_timeout(void) { static long sbd_timeout = -2; if (sbd_timeout == -2) { sbd_timeout = crm_get_msec(getenv("SBD_WATCHDOG_TIMEOUT")); } return sbd_timeout; } bool pcmk__get_sbd_sync_resource_startup(void) { static int sync_resource_startup = PCMK__SBD_SYNC_DEFAULT; static bool checked_sync_resource_startup = false; if (!checked_sync_resource_startup) { const char *sync_env = getenv("SBD_SYNC_RESOURCE_STARTUP"); if (sync_env == NULL) { crm_trace("Defaulting to %sstart-up synchronization with sbd", (PCMK__SBD_SYNC_DEFAULT? "" : "no ")); } else if (crm_str_to_boolean(sync_env, &sync_resource_startup) < 0) { crm_warn("Defaulting to %sstart-up synchronization with sbd " "because environment value '%s' is invalid", (PCMK__SBD_SYNC_DEFAULT? "" : "no "), sync_env); } checked_sync_resource_startup = true; } return sync_resource_startup != 0; } long pcmk__auto_stonith_watchdog_timeout(void) { long sbd_timeout = pcmk__get_sbd_watchdog_timeout(); return (sbd_timeout <= 0)? 0 : (2 * sbd_timeout); } bool pcmk__valid_stonith_watchdog_timeout(const char *value) { /* @COMPAT At a compatibility break, accept either negative values or a * specific string like "auto" (but not both) to mean "auto-calculate the * timeout." Reject other values that aren't parsable as timeouts. */ long st_timeout = value? crm_get_msec(value) : 0; if (st_timeout < 0) { st_timeout = pcmk__auto_stonith_watchdog_timeout(); crm_debug("Using calculated value %ld for " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " (%s)", st_timeout, value); } if (st_timeout == 0) { crm_debug("Watchdog may be enabled but " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " is disabled (%s)", value? value : "default"); } else if (pcmk__locate_sbd() == 0) { crm_emerg("Shutting down: " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " configured (%s) but SBD not active", pcmk__s(value, "auto")); crm_exit(CRM_EX_FATAL); return false; } else { long sbd_timeout = pcmk__get_sbd_watchdog_timeout(); if (st_timeout < sbd_timeout) { crm_emerg("Shutting down: " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " (%s) too short (must be >%ldms)", value, sbd_timeout); crm_exit(CRM_EX_FATAL); return false; } crm_info("Watchdog configured with " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " %s and SBD timeout %ldms", value, sbd_timeout); } return true; }