diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h index 13a37e2623..f6bb4f8498 100644 --- a/include/crm/common/internal.h +++ b/include/crm/common/internal.h @@ -1,345 +1,345 @@ /* * Copyright 2015-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_INTERNAL__H #define PCMK__CRM_COMMON_INTERNAL__H #include // pid_t, getpid() #include // bool #include // uint8_t, uint64_t #include // PRIu64 #include // guint, GList, GHashTable #include // xmlNode #include // do_crm_log_unlikely(), etc. #include // mainloop_io_t, struct ipc_client_callbacks #include // crm_strdup_printf() #include #include #include #include #include #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif /* This says whether the current application is a Pacemaker daemon or not, * and is used to change default logging settings such as whether to log to * stderr, etc., as well as a few other details such as whether blackbox signal * handling is enabled. * * It is set when logging is initialized, and does not need to be set directly. */ extern bool pcmk__is_daemon; // Number of elements in a statically defined array #define PCMK__NELEM(a) ((int) (sizeof(a)/sizeof(a[0])) ) #if PCMK__ENABLE_CIBSECRETS /* internal CIB utilities (from cib_secrets.c) */ int pcmk__substitute_secrets(const char *rsc_id, GHashTable *params); #endif /* internal main loop utilities (from mainloop.c) */ int pcmk__add_mainloop_ipc(crm_ipc_t *ipc, int priority, void *userdata, const struct ipc_client_callbacks *callbacks, mainloop_io_t **source); guint pcmk__mainloop_timer_get_period(const mainloop_timer_t *timer); /* internal name/value utilities (from nvpair.c) */ int pcmk__scan_nvpair(const gchar *input, gchar **name, gchar **value); char *pcmk__format_nvpair(const char *name, const char *value, const char *units); /* internal procfs utilities (from procfs.c) */ pid_t pcmk__procfs_pid_of(const char *name); unsigned int pcmk__procfs_num_cores(void); int pcmk__procfs_pid2path(pid_t pid, char **path); bool pcmk__procfs_has_pids(void); DIR *pcmk__procfs_fd_dir(void); void pcmk__sysrq_trigger(char t); bool pcmk__throttle_cib_load(const char *server, float *load); bool pcmk__throttle_load_avg(float *load); /* internal functions related to process IDs (from pid.c) */ /*! * \internal * \brief Check whether process exists (by PID and optionally executable path) * * \param[in] pid PID of process to check * \param[in] daemon If not NULL, path component to match with procfs entry * * \return Standard Pacemaker return code * \note Particular return codes of interest include pcmk_rc_ok for alive, * ESRCH for process is not alive (verified by kill and/or executable path * match), EACCES for caller unable or not allowed to check. A result of * "alive" is less reliable when \p daemon is not provided or procfs is * not available, since there is no guarantee that the PID has not been * recycled for another process. * \note This function cannot be used to verify \e authenticity of the process. */ int pcmk__pid_active(pid_t pid, const char *daemon); -int pcmk__pidfile_matches(const char *filename, pid_t expected_pid, - const char *expected_name, pid_t *pid); +int pcmk__pidfile_matches(const char *filename, const char *expected_name, + pid_t *pid); // bitwise arithmetic utilities /*! * \internal * \brief Set specified flags in a flag group * * \param[in] function Function name of caller * \param[in] line Line number of caller * \param[in] log_level Log a message at this level * \param[in] flag_type Label describing this flag group (for logging) * \param[in] target Name of object whose flags these are (for logging) * \param[in] flag_group Flag group being manipulated * \param[in] flags Which flags in the group should be set * \param[in] flags_str Readable equivalent of \p flags (for logging) * * \return Possibly modified flag group */ static inline uint64_t pcmk__set_flags_as(const char *function, int line, uint8_t log_level, const char *flag_type, const char *target, uint64_t flag_group, uint64_t flags, const char *flags_str) { uint64_t result = flag_group | flags; if (result != flag_group) { do_crm_log_unlikely(log_level, "%s flags %#.8" PRIx64 " (%s) for %s set by %s:%d", pcmk__s(flag_type, "Group of"), flags, pcmk__s(flags_str, "flags"), pcmk__s(target, "target"), function, line); } return result; } /*! * \internal * \brief Clear specified flags in a flag group * * \param[in] function Function name of caller * \param[in] line Line number of caller * \param[in] log_level Log a message at this level * \param[in] flag_type Label describing this flag group (for logging) * \param[in] target Name of object whose flags these are (for logging) * \param[in] flag_group Flag group being manipulated * \param[in] flags Which flags in the group should be cleared * \param[in] flags_str Readable equivalent of \p flags (for logging) * * \return Possibly modified flag group */ static inline uint64_t pcmk__clear_flags_as(const char *function, int line, uint8_t log_level, const char *flag_type, const char *target, uint64_t flag_group, uint64_t flags, const char *flags_str) { uint64_t result = flag_group & ~flags; if (result != flag_group) { do_crm_log_unlikely(log_level, "%s flags %#.8" PRIx64 " (%s) for %s cleared by %s:%d", pcmk__s(flag_type, "Group of"), flags, pcmk__s(flags_str, "flags"), pcmk__s(target, "target"), function, line); } return result; } /*! * \internal * \brief Get readable string for whether specified flags are set * * \param[in] flag_group Group of flags to check * \param[in] flags Which flags in \p flag_group should be checked * * \return "true" if all \p flags are set in \p flag_group, otherwise "false" */ static inline const char * pcmk__flag_text(uint64_t flag_group, uint64_t flags) { return pcmk__btoa(pcmk_all_flags_set(flag_group, flags)); } // miscellaneous utilities (from utils.c) void pcmk__panic(const char *reason); pid_t pcmk__locate_sbd(void); void pcmk__sleep_ms(unsigned int ms); guint pcmk__create_timer(guint interval_ms, GSourceFunc fn, gpointer data); guint pcmk__timeout_ms2s(guint timeout_ms); extern int pcmk__score_red; extern int pcmk__score_green; extern int pcmk__score_yellow; /*! * \internal * \brief Allocate new zero-initialized memory, asserting on failure * * \param[in] file File where \p function is located * \param[in] function Calling function * \param[in] line Line within \p file * \param[in] nmemb Number of elements to allocate memory for * \param[in] size Size of each element * * \return Newly allocated memory of of size nmemb * size (guaranteed * not to be \c NULL) * * \note The caller is responsible for freeing the return value using \c free(). */ static inline void * pcmk__assert_alloc_as(const char *file, const char *function, uint32_t line, size_t nmemb, size_t size) { void *ptr = calloc(nmemb, size); if (ptr == NULL) { crm_abort(file, function, line, "Out of memory", FALSE, TRUE); crm_exit(CRM_EX_OSERR); } return ptr; } /*! * \internal * \brief Allocate new zero-initialized memory, asserting on failure * * \param[in] nmemb Number of elements to allocate memory for * \param[in] size Size of each element * * \return Newly allocated memory of of size nmemb * size (guaranteed * not to be \c NULL) * * \note The caller is responsible for freeing the return value using \c free(). */ #define pcmk__assert_alloc(nmemb, size) \ pcmk__assert_alloc_as(__FILE__, __func__, __LINE__, nmemb, size) /*! * \internal * \brief Resize a dynamically allocated memory block * * \param[in] ptr Memory block to resize (or NULL to allocate new memory) * \param[in] size New size of memory block in bytes (must be > 0) * * \return Pointer to resized memory block * * \note This asserts on error, so the result is guaranteed to be non-NULL * (which is the main advantage of this over directly using realloc()). */ static inline void * pcmk__realloc(void *ptr, size_t size) { void *new_ptr; // realloc(p, 0) can replace free(p) but this wrapper can't pcmk__assert(size > 0); new_ptr = realloc(ptr, size); if (new_ptr == NULL) { free(ptr); abort(); } return new_ptr; } static inline char * pcmk__getpid_s(void) { return crm_strdup_printf("%lu", (unsigned long) getpid()); } // More efficient than g_list_length(list) == 1 static inline bool pcmk__list_of_1(GList *list) { return list && (list->next == NULL); } // More efficient than g_list_length(list) > 1 static inline bool pcmk__list_of_multiple(GList *list) { return list && (list->next != NULL); } /* convenience functions for failure-related node attributes */ #define PCMK__FAIL_COUNT_PREFIX "fail-count" #define PCMK__LAST_FAILURE_PREFIX "last-failure" /*! * \internal * \brief Generate a failure-related node attribute name for a resource * * \param[in] prefix Start of attribute name * \param[in] rsc_id Resource name * \param[in] op Operation name * \param[in] interval_ms Operation interval * * \return Newly allocated string with attribute name * * \note Failure attributes are named like PREFIX-RSC#OP_INTERVAL (for example, * "fail-count-myrsc#monitor_30000"). The '#' is used because it is not * a valid character in a resource ID, to reliably distinguish where the * operation name begins. The '_' is used simply to be more comparable to * action labels like "myrsc_monitor_30000". */ static inline char * pcmk__fail_attr_name(const char *prefix, const char *rsc_id, const char *op, guint interval_ms) { CRM_CHECK(prefix && rsc_id && op, return NULL); return crm_strdup_printf("%s-%s#%s_%u", prefix, rsc_id, op, interval_ms); } static inline char * pcmk__failcount_name(const char *rsc_id, const char *op, guint interval_ms) { return pcmk__fail_attr_name(PCMK__FAIL_COUNT_PREFIX, rsc_id, op, interval_ms); } static inline char * pcmk__lastfailure_name(const char *rsc_id, const char *op, guint interval_ms) { return pcmk__fail_attr_name(PCMK__LAST_FAILURE_PREFIX, rsc_id, op, interval_ms); } // internal resource agent functions (from agents.c) int pcmk__effective_rc(int rc); #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_INTERNAL__H diff --git a/lib/common/pid.c b/lib/common/pid.c index b95e266f57..69f936c03d 100644 --- a/lib/common/pid.c +++ b/lib/common/pid.c @@ -1,202 +1,193 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include int pcmk__pid_active(pid_t pid, const char *daemon) { static pid_t last_asked_pid = 0; /* log spam prevention */ int rc = 0; if (pid <= 0) { return EINVAL; } rc = kill(pid, 0); if ((rc < 0) && (errno == ESRCH)) { return ESRCH; /* no such PID detected */ } else if ((daemon == NULL) || !pcmk__procfs_has_pids()) { // The kill result is all we have, we can't check the name if (rc == 0) { return pcmk_rc_ok; } rc = errno; if (last_asked_pid != pid) { crm_info("Cannot examine PID %lld: %s", (long long) pid, pcmk_rc_str(rc)); last_asked_pid = pid; } return rc; /* errno != ESRCH */ } else { /* make sure PID hasn't been reused by another process XXX: might still be just a zombie, which could confuse decisions */ bool checked_through_kill = (rc == 0); bool paths_equal = false; char *exe_path = NULL; char *myexe_path = NULL; rc = pcmk__procfs_pid2path(pid, &exe_path); if (rc != pcmk_rc_ok) { if (rc != EACCES) { // Check again to filter out races if ((kill(pid, 0) < 0) && (errno == ESRCH)) { return ESRCH; } } if (last_asked_pid != pid) { if (rc == EACCES) { crm_info("Could not get executable for PID %lld: %s " QB_XS " rc=%d", (long long) pid, pcmk_rc_str(rc), rc); } else { crm_err("Could not get executable for PID %lld: %s " QB_XS " rc=%d", (long long) pid, pcmk_rc_str(rc), rc); } last_asked_pid = pid; } if (rc == EACCES) { // Trust kill if it was OK (we can't double-check via path) return checked_through_kill? pcmk_rc_ok : EACCES; } else { return ESRCH; /* most likely errno == ENOENT */ } } if (daemon[0] != '/') { myexe_path = crm_strdup_printf(CRM_DAEMON_DIR "/%s", daemon); } else { myexe_path = pcmk__str_copy(daemon); } paths_equal = pcmk__str_eq(exe_path, myexe_path, pcmk__str_none); free(exe_path); free(myexe_path); if (paths_equal) { return pcmk_rc_ok; } } return ESRCH; } #define LOCKSTRLEN 11 /*! * \internal * \brief Read a process ID from a file * * \param[in] filename Process ID file to read * \param[out] pid Where to put PID that was read * * \return Standard Pacemaker return code */ static int read_pidfile(const char *filename, pid_t *pid) { int fd; struct stat sbuf; int rc = pcmk_rc_ok; long long pid_read = 0; char buf[LOCKSTRLEN + 1]; CRM_CHECK((filename != NULL) && (pid != NULL), return EINVAL); fd = open(filename, O_RDONLY); if (fd < 0) { return errno; } if ((fstat(fd, &sbuf) >= 0) && (sbuf.st_size < LOCKSTRLEN)) { sleep(2); /* if someone was about to create one, * give'm a sec to do so */ } if (read(fd, buf, sizeof(buf)) < 1) { rc = errno; goto bail; } errno = 0; rc = sscanf(buf, "%lld", &pid_read); if (rc > 0) { if (pid_read <= 0) { rc = ESRCH; } else { rc = pcmk_rc_ok; *pid = (pid_t) pid_read; crm_trace("Read pid %lld from %s", pid_read, filename); } } else if (rc == 0) { rc = ENODATA; } else { rc = errno; } bail: close(fd); return rc; } /*! * \internal * \brief Check whether a process from a PID file matches expected values * * \param[in] filename Path of PID file - * \param[in] expected_pid If positive, compare to this PID * \param[in] expected_name If not NULL, the PID from the PID file is valid * only if it is active as a process with this name * \param[out] pid If not NULL, store PID found in PID file here * * \return Standard Pacemaker return code */ int -pcmk__pidfile_matches(const char *filename, pid_t expected_pid, - const char *expected_name, pid_t *pid) +pcmk__pidfile_matches(const char *filename, const char *expected_name, + pid_t *pid) { pid_t pidfile_pid = 0; int rc = read_pidfile(filename, &pidfile_pid); if (pid) { *pid = pidfile_pid; } if (rc != pcmk_rc_ok) { // Error reading PID file or invalid contents unlink(filename); rc = ENOENT; - } else if ((expected_pid > 0) && (pidfile_pid == expected_pid)) { - // PID in file matches what was expected - rc = pcmk_rc_ok; - } else if (pcmk__pid_active(pidfile_pid, expected_name) == ESRCH) { // Contains a stale value unlink(filename); rc = ENOENT; - - } else if ((expected_pid > 0) && (pidfile_pid != expected_pid)) { - // Locked by existing process - rc = EEXIST; } return rc; } diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c index 2f464eed47..606a25d867 100644 --- a/lib/common/watchdog.c +++ b/lib/common/watchdog.c @@ -1,293 +1,293 @@ /* - * Copyright 2013-2024 the Pacemaker project contributors + * Copyright 2013-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include static pid_t sbd_pid = 0; /*! * \internal * \brief Tell pacemakerd to panic the local host * * \param[in] ppid Process ID of parent process */ static void panic_local_nonroot(pid_t ppid) { if (ppid > 1) { // pacemakerd is still our parent crm_emerg("Escalating panic to " PCMK__SERVER_PACEMAKERD "[%lld]", (long long) ppid); } else { // Signal (non-parent) pacemakerd if possible ppid = pcmk__procfs_pid_of(PCMK__SERVER_PACEMAKERD); if (ppid > 0) { union sigval signal_value; crm_emerg("Signaling " PCMK__SERVER_PACEMAKERD "[%lld] to panic", (long long) ppid); memset(&signal_value, 0, sizeof(signal_value)); if (sigqueue(ppid, SIGQUIT, signal_value) < 0) { crm_emerg("Exiting after signal failure: %s", strerror(errno)); } } else { crm_emerg("Exiting with no known " PCMK__SERVER_PACEMAKERD "process"); } } crm_exit(CRM_EX_PANIC); } /*! * \internal * \brief Panic the local host (if root) or tell pacemakerd to do so */ static void panic_local(void) { const char *full_panic_action = pcmk__env_option(PCMK__ENV_PANIC_ACTION); const char *panic_action = full_panic_action; int reboot_cmd = RB_AUTOBOOT; // Default panic action is reboot if (geteuid() != 0) { // Non-root caller such as the controller panic_local_nonroot(getppid()); return; } if (pcmk__starts_with(full_panic_action, "sync-")) { panic_action += sizeof("sync-") - 1; sync(); } if (pcmk__str_empty(full_panic_action) || pcmk__str_eq(panic_action, PCMK_VALUE_REBOOT, pcmk__str_none)) { pcmk__sysrq_trigger('b'); } else if (pcmk__str_eq(panic_action, PCMK_VALUE_CRASH, pcmk__str_none)) { pcmk__sysrq_trigger('c'); } else if (pcmk__str_eq(panic_action, PCMK_VALUE_OFF, pcmk__str_none)) { pcmk__sysrq_trigger('o'); #ifdef RB_POWER_OFF reboot_cmd = RB_POWER_OFF; #elif defined(RB_POWEROFF) reboot_cmd = RB_POWEROFF; #endif } else { crm_warn("Using default '" PCMK_VALUE_REBOOT "' for local option PCMK_" PCMK__ENV_PANIC_ACTION " because '%s' is not a valid value", full_panic_action); pcmk__sysrq_trigger('b'); } // sysrq failed or is not supported on this platform, so fall back to reboot reboot(reboot_cmd); // Even reboot failed, nothing left to do but exit crm_emerg("Exiting after reboot failed: %s", strerror(errno)); if (getppid() > 1) { // pacemakerd is parent process crm_exit(CRM_EX_PANIC); } else { // This is pacemakerd, or an orphaned subdaemon crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Tell sbd to kill the local host, then exit */ static void panic_sbd(void) { union sigval signal_value; pid_t ppid = getppid(); memset(&signal_value, 0, sizeof(signal_value)); /* TODO: Arrange for a slightly less brutal option? */ if(sigqueue(sbd_pid, SIGKILL, signal_value) < 0) { crm_emerg("Panicking directly because couldn't signal sbd"); panic_local(); } if(ppid > 1) { /* child daemon */ crm_exit(CRM_EX_PANIC); } else { /* pacemakerd or orphan child */ crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Panic the local host * * Panic the local host either by sbd (if running), directly, or by asking * pacemakerd. If trace logging this function, exit instead. * * \param[in] reason Why panic is needed (for logging only) */ void pcmk__panic(const char *reason) { if (pcmk__locate_sbd() > 1) { crm_emerg("Signaling sbd[%lld] to panic the system: %s", (long long) sbd_pid, reason); panic_sbd(); } else { crm_emerg("Panicking the system directly: %s", reason); panic_local(); } } /*! * \internal * \brief Return the process ID of sbd (or 0 if it is not running) */ pid_t pcmk__locate_sbd(void) { const char *pidfile = PCMK__RUN_DIR "/sbd.pid"; int rc; if(sbd_pid > 1) { return sbd_pid; } /* Read the pid file */ - rc = pcmk__pidfile_matches(pidfile, 0, SBIN_DIR "/sbd", &sbd_pid); + rc = pcmk__pidfile_matches(pidfile, SBIN_DIR "/sbd", &sbd_pid); if (rc == pcmk_rc_ok) { crm_trace("SBD detected at pid %lld (via PID file %s)", (long long) sbd_pid, pidfile); } else { /* Fall back to /proc for systems that support it */ sbd_pid = pcmk__procfs_pid_of("sbd"); if (sbd_pid != 0) { crm_trace("SBD detected at pid %lld (via procfs)", (long long) sbd_pid); } } if(sbd_pid < 0) { sbd_pid = 0; crm_trace("SBD not detected"); } return sbd_pid; } long pcmk__get_sbd_watchdog_timeout(void) { static long sbd_timeout = -2; if (sbd_timeout == -2) { sbd_timeout = crm_get_msec(getenv("SBD_WATCHDOG_TIMEOUT")); } return sbd_timeout; } bool pcmk__get_sbd_sync_resource_startup(void) { static int sync_resource_startup = PCMK__SBD_SYNC_DEFAULT; static bool checked_sync_resource_startup = false; if (!checked_sync_resource_startup) { const char *sync_env = getenv("SBD_SYNC_RESOURCE_STARTUP"); if (sync_env == NULL) { crm_trace("Defaulting to %sstart-up synchronization with sbd", (PCMK__SBD_SYNC_DEFAULT? "" : "no ")); } else if (crm_str_to_boolean(sync_env, &sync_resource_startup) < 0) { crm_warn("Defaulting to %sstart-up synchronization with sbd " "because environment value '%s' is invalid", (PCMK__SBD_SYNC_DEFAULT? "" : "no "), sync_env); } checked_sync_resource_startup = true; } return sync_resource_startup != 0; } long pcmk__auto_stonith_watchdog_timeout(void) { long sbd_timeout = pcmk__get_sbd_watchdog_timeout(); return (sbd_timeout <= 0)? 0 : (2 * sbd_timeout); } bool pcmk__valid_stonith_watchdog_timeout(const char *value) { /* @COMPAT At a compatibility break, accept either negative values or a * specific string like "auto" (but not both) to mean "auto-calculate the * timeout." Reject other values that aren't parsable as timeouts. */ long long st_timeout = 0; if (value != NULL) { /* @COMPAT So far it has been documented that a negative value is * valid. Parse it as an integer first to avoid the warning from * crm_get_msec(). */ int rc = pcmk__scan_ll(value, &st_timeout, PCMK__PARSE_INT_DEFAULT); if (rc != pcmk_rc_ok || st_timeout >= 0) { st_timeout = crm_get_msec(value); } } if (st_timeout < 0) { st_timeout = pcmk__auto_stonith_watchdog_timeout(); crm_debug("Using calculated value %lld for " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " (%s)", st_timeout, value); } if (st_timeout == 0) { crm_debug("Watchdog may be enabled but " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " is disabled (%s)", value? value : "default"); } else if (pcmk__locate_sbd() == 0) { crm_emerg("Shutting down: " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " configured (%s) but SBD not active", pcmk__s(value, "auto")); crm_exit(CRM_EX_FATAL); return false; } else { long sbd_timeout = pcmk__get_sbd_watchdog_timeout(); if (st_timeout < sbd_timeout) { crm_emerg("Shutting down: " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " (%s) too short (must be >%ldms)", value, sbd_timeout); crm_exit(CRM_EX_FATAL); return false; } crm_info("Watchdog configured with " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " %s and SBD timeout %ldms", value, sbd_timeout); } return true; }