diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c index f24d14216d..3294ee7c70 100644 --- a/daemons/pacemakerd/pcmkd_subdaemons.c +++ b/daemons/pacemakerd/pcmkd_subdaemons.c @@ -1,947 +1,941 @@ /* * Copyright 2010-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include "pacemakerd.h" #if SUPPORT_COROSYNC #include "pcmkd_corosync.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include enum child_daemon_flags { child_none = 0, child_respawn = 1 << 0, child_needs_cluster = 1 << 1, child_needs_retry = 1 << 2, child_active_before_startup = 1 << 3, child_shutting_down = 1 << 4, }; typedef struct pcmk_child_s { enum pcmk_ipc_server server; pid_t pid; int respawn_count; const char *uid; int check_count; uint32_t flags; } pcmk_child_t; #define PCMK_PROCESS_CHECK_INTERVAL 1000 /* 1s */ #define PCMK_PROCESS_CHECK_RETRIES 5 #define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */ /* Index into the array below */ #define PCMK_CHILD_CONTROLD 5 static pcmk_child_t pcmk_children[] = { { pcmk_ipc_based, 0, 0, CRM_DAEMON_USER, 0, child_respawn | child_needs_cluster }, { pcmk_ipc_fenced, 0, 0, NULL, 0, child_respawn | child_needs_cluster }, { pcmk_ipc_execd, 0, 0, NULL, 0, child_respawn }, { pcmk_ipc_attrd, 0, 0, CRM_DAEMON_USER, 0, child_respawn | child_needs_cluster }, { pcmk_ipc_schedulerd, 0, 0, CRM_DAEMON_USER, 0, child_respawn }, { pcmk_ipc_controld, 0, 0, CRM_DAEMON_USER, 0, child_respawn | child_needs_cluster }, }; crm_trigger_t *shutdown_trigger = NULL; crm_trigger_t *startup_trigger = NULL; time_t subdaemon_check_progress = 0; // Whether we need root group access to talk to cluster layer static bool need_root_group = true; /* When contacted via pacemakerd-api by a client having sbd in * the name we assume it is sbd-daemon which wants to know * if pacemakerd shutdown gracefully. * Thus when everything is shutdown properly pacemakerd * waits till it has reported the graceful completion of * shutdown to sbd and just when sbd-client closes the * connection we can assume that the report has arrived * properly so that pacemakerd can finally exit. * Following two variables are used to track that handshake. */ unsigned int shutdown_complete_state_reported_to = 0; bool shutdown_complete_state_reported_client_closed = false; /* state we report when asked via pacemakerd-api status-ping */ const char *pacemakerd_state = PCMK__VALUE_INIT; bool running_with_sbd = false; GMainLoop *mainloop = NULL; static bool fatal_error = false; static int child_liveness(pcmk_child_t *child); static gboolean escalate_shutdown(gpointer data); static int start_child(pcmk_child_t * child); static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode); static void pcmk_process_exit(pcmk_child_t * child); static gboolean pcmk_shutdown_worker(gpointer user_data); static void stop_child(pcmk_child_t *child, int signal); static void for_each_child(void (*fn)(pcmk_child_t *child)) { for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) { fn(&pcmk_children[i]); } } /*! * \internal * \brief Get path to subdaemon executable * * \param[in] subdaemon Subdaemon to get path for * * \return Newly allocated string with path to subdaemon executable * \note It is the caller's responsibility to free() the return value */ static inline char * subdaemon_path(pcmk_child_t *subdaemon) { return crm_strdup_printf(CRM_DAEMON_DIR "/%s", pcmk__server_name(subdaemon->server)); } static bool pcmkd_cluster_connected(void) { #if SUPPORT_COROSYNC return pcmkd_corosync_connected(); #else return true; #endif } static gboolean check_next_subdaemon(gpointer user_data) { static int next_child = 0; pcmk_child_t *child = &(pcmk_children[next_child]); const char *name = pcmk__server_name(child->server); const long long pid = PCMK__SPECIAL_PID_AS_0(child->pid); int rc = child_liveness(child); crm_trace("Checked subdaemon %s[%lld]: %s (%d)", name, pid, pcmk_rc_str(rc), rc); switch (rc) { case pcmk_rc_ok: child->check_count = 0; subdaemon_check_progress = time(NULL); break; case pcmk_rc_ipc_pid_only: // Child was previously OK if (pcmk_is_set(child->flags, child_shutting_down)) { crm_notice("Subdaemon %s[%lld] has stopped accepting IPC " "connections during shutdown", name, pid); } else if (++(child->check_count) >= PCMK_PROCESS_CHECK_RETRIES) { // cts-lab looks for this message crm_crit("Subdaemon %s[%lld] is unresponsive to IPC " "after %d attempt%s and will now be killed", name, pid, child->check_count, pcmk__plural_s(child->check_count)); stop_child(child, SIGKILL); if (pcmk_is_set(child->flags, child_respawn)) { // Respawn limit hasn't been reached, so retry another round child->check_count = 0; } } else { crm_notice("Subdaemon %s[%lld] is unresponsive to IPC " "after %d attempt%s (will recheck later)", name, pid, child->check_count, pcmk__plural_s(child->check_count)); if (pcmk_is_set(child->flags, child_respawn)) { /* as long as the respawn-limit isn't reached and we haven't run out of connect retries we account this as progress we are willing to tell to sbd */ subdaemon_check_progress = time(NULL); } } /* go to the next child and see if we can make progress there */ break; case pcmk_rc_ipc_unresponsive: if (!pcmk_is_set(child->flags, child_respawn)) { /* if a subdaemon is down and we don't want it to be restarted this is a success during shutdown. if it isn't restarted anymore due to MAX_RESPAWN it is rather no success. */ if (child->respawn_count <= MAX_RESPAWN) { subdaemon_check_progress = time(NULL); } } if (!pcmk_is_set(child->flags, child_active_before_startup)) { crm_trace("Subdaemon %s[%lld] terminated", name, pid); break; } if (pcmk_is_set(child->flags, child_respawn)) { // cts-lab looks for this message crm_err("Subdaemon %s[%lld] terminated", name, pid); } else { /* orderly shutdown */ crm_notice("Subdaemon %s[%lld] terminated", name, pid); } pcmk_process_exit(child); break; default: crm_exit(CRM_EX_FATAL); break; /* static analysis/noreturn */ } if (++next_child >= PCMK__NELEM(pcmk_children)) { next_child = 0; } return G_SOURCE_CONTINUE; } static gboolean escalate_shutdown(gpointer data) { pcmk_child_t *child = data; if (child->pid == PCMK__SPECIAL_PID) { pcmk_process_exit(child); } else if (child->pid != 0) { /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */ crm_err("Subdaemon %s not terminating in a timely manner, forcing", pcmk__server_name(child->server)); stop_child(child, SIGSEGV); } return G_SOURCE_REMOVE; } static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { pcmk_child_t *child = mainloop_child_userdata(p); const char *name = mainloop_child_name(p); if (signo) { // cts-lab looks for this message do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR), "%s[%d] terminated with signal %d (%s)%s", name, pid, signo, strsignal(signo), (core? " and dumped core" : "")); pcmk_process_exit(child); return; } switch(exitcode) { case CRM_EX_OK: crm_info("%s[%d] exited with status %d (%s)", name, pid, exitcode, crm_exit_str(exitcode)); break; case CRM_EX_FATAL: crm_warn("Shutting cluster down because %s[%d] had fatal failure", name, pid); child->flags &= ~child_respawn; fatal_error = true; pcmk_shutdown(SIGTERM); break; case CRM_EX_PANIC: { char *msg = NULL; child->flags &= ~child_respawn; fatal_error = true; msg = crm_strdup_printf("Subdaemon %s[%d] requested panic", name, pid); pcmk__panic(msg); // Should never get here free(msg); pcmk_shutdown(SIGTERM); } break; default: // cts-lab looks for this message crm_err("%s[%d] exited with status %d (%s)", name, pid, exitcode, crm_exit_str(exitcode)); break; } pcmk_process_exit(child); } static void pcmk_process_exit(pcmk_child_t * child) { const char *name = pcmk__server_name(child->server); child->pid = 0; child->flags &= ~child_active_before_startup; child->check_count = 0; child->respawn_count += 1; if (child->respawn_count > MAX_RESPAWN) { crm_err("Subdaemon %s exceeded maximum respawn count", name); child->flags &= ~child_respawn; } if (shutdown_trigger) { /* resume step-wise shutdown (returned TRUE yields no parallelizing) */ mainloop_set_trigger(shutdown_trigger); } else if (!pcmk_is_set(child->flags, child_respawn)) { /* nothing to do */ } else if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) { pcmk__panic("Subdaemon failed"); } else if (child_liveness(child) == pcmk_rc_ok) { crm_warn("Not respawning subdaemon %s because IPC endpoint %s is OK", name, pcmk__server_ipc_name(child->server)); } else if (pcmk_is_set(child->flags, child_needs_cluster) && !pcmkd_cluster_connected()) { crm_notice("Not respawning subdaemon %s until cluster returns", name); child->flags |= child_needs_retry; } else { // cts-lab looks for this message crm_notice("Respawning subdaemon %s after unexpected exit", name); start_child(child); } } static gboolean pcmk_shutdown_worker(gpointer user_data) { static int phase = PCMK__NELEM(pcmk_children) - 1; static time_t next_log = 0; if (phase == PCMK__NELEM(pcmk_children) - 1) { crm_notice("Shutting down Pacemaker"); pacemakerd_state = PCMK__VALUE_SHUTTING_DOWN; } for (; phase >= 0; phase--) { pcmk_child_t *child = &(pcmk_children[phase]); const char *name = pcmk__server_name(child->server); time_t now = 0; if (child->pid == 0) { /* cleanup */ crm_debug("Subdaemon %s confirmed stopped", name); child->pid = 0; continue; } now = time(NULL); if (pcmk_is_set(child->flags, child_respawn)) { if (child->pid == PCMK__SPECIAL_PID) { crm_warn("Subdaemon %s cannot be terminated (shutdown " "will be escalated after %ld seconds if it does " "not terminate on its own; set PCMK_" PCMK__ENV_FAIL_FAST "=1 to exit immediately " "instead)", name, (long) SHUTDOWN_ESCALATION_PERIOD); } next_log = now + 30; child->flags &= ~child_respawn; stop_child(child, SIGTERM); if (phase < PCMK_CHILD_CONTROLD) { pcmk__create_timer(SHUTDOWN_ESCALATION_PERIOD, escalate_shutdown, child); } } else if (now >= next_log) { next_log = now + 30; crm_notice("Still waiting for subdaemon %s to terminate " QB_XS " pid=%lld", name, (long long) child->pid); } return G_SOURCE_CONTINUE; } crm_notice("Shutdown complete"); pacemakerd_state = PCMK__VALUE_SHUTDOWN_COMPLETE; if (!fatal_error && running_with_sbd && pcmk__get_sbd_sync_resource_startup() && !shutdown_complete_state_reported_client_closed) { crm_notice("Waiting for SBD to pick up shutdown-complete-state."); return G_SOURCE_CONTINUE; } g_main_loop_quit(mainloop); if (fatal_error) { crm_notice("Shutting down and staying down after fatal error"); #if SUPPORT_COROSYNC pcmkd_shutdown_corosync(); #endif crm_exit(CRM_EX_FATAL); } return G_SOURCE_CONTINUE; } /* TODO once libqb is taught to juggle with IPC end-points carried over as bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325) it shall hand over these descriptors here if/once they are successfully pre-opened in (presumably) child_liveness(), to avoid any remaining room for races */ // \return Standard Pacemaker return code static int start_child(pcmk_child_t * child) { uid_t uid = 0; gid_t gid = 0; bool use_valgrind = false; bool use_callgrind = false; const char *name = pcmk__server_name(child->server); const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED); const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED); child->flags &= ~(child_active_before_startup | child_shutting_down); child->check_count = 0; if (env_callgrind != NULL && crm_is_true(env_callgrind)) { use_callgrind = true; use_valgrind = true; } else if ((env_callgrind != NULL) && (strstr(env_callgrind, name) != NULL)) { use_callgrind = true; use_valgrind = true; } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) { use_valgrind = true; } else if ((env_valgrind != NULL) && (strstr(env_valgrind, name) != NULL)) { use_valgrind = true; } if (use_valgrind && strlen(PCMK__VALGRIND_EXEC) == 0) { crm_warn("Cannot enable valgrind for subdaemon %s: valgrind not found", name); use_valgrind = false; } if ((child->uid != NULL) && (crm_user_lookup(child->uid, &uid, &gid) < 0)) { crm_err("Invalid user (%s) for subdaemon %s: not found", child->uid, name); return EACCES; } child->pid = fork(); pcmk__assert(child->pid != -1); if (child->pid > 0) { /* parent */ mainloop_child_add(child->pid, 0, name, child, pcmk_child_exit); if (use_valgrind) { crm_info("Forked process %lld using user %lu (%s) and group %lu " "for subdaemon %s (valgrind enabled: %s)", (long long) child->pid, (unsigned long) uid, pcmk__s(child->uid, "root"), (unsigned long) gid, name, PCMK__VALGRIND_EXEC); } else { crm_info("Forked process %lld using user %lu (%s) and group %lu " "for subdaemon %s", (long long) child->pid, (unsigned long) uid, pcmk__s(child->uid, "root"), (unsigned long) gid, name); } return pcmk_rc_ok; } else { char *path = subdaemon_path(child); /* Start a new session */ setsid(); if(gid) { // Drop root group access if not needed if (!need_root_group && (setgid(gid) < 0)) { crm_warn("Could not set subdaemon %s group to %lu: %s", name, (unsigned long) gid, strerror(errno)); } /* Initialize supplementary groups to only those always granted to * the user, plus haclient (so we can access IPC). */ if (initgroups(child->uid, gid) < 0) { crm_err("Cannot initialize system groups for subdaemon %s: %s " QB_XS " errno=%d", name, pcmk_rc_str(errno), errno); } } if (uid && setuid(uid) < 0) { crm_warn("Could not set subdaemon %s user to %s: %s " QB_XS " uid=%lu errno=%d", name, strerror(errno), child->uid, (unsigned long) uid, errno); } pcmk__close_fds_in_child(); - - close(STDIN_FILENO); - pcmk__open_devnull(O_RDONLY); // stdin (fd 0) - close(STDOUT_FILENO); - pcmk__open_devnull(O_WRONLY); // stdout (fd 1) - close(STDERR_FILENO); - pcmk__open_devnull(O_WRONLY); // stderr (fd 2) + pcmk__null_std_streams(); if (use_callgrind) { char *out_file = pcmk__str_copy("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.opt.%p"); execlp(PCMK__VALGRIND_EXEC, PCMK__VALGRIND_EXEC, "--tool=callgrind", out_file, path, (char *) NULL); free(out_file); } else if (use_valgrind) { execlp(PCMK__VALGRIND_EXEC, PCMK__VALGRIND_EXEC, path, (char *) NULL); } else { execlp(path, path, (char *) NULL); } free(path); crm_crit("Could not execute subdaemon %s: %s", name, strerror(errno)); crm_exit(CRM_EX_FATAL); } return pcmk_rc_ok; /* never reached */ } /*! * \internal * \brief Check the liveness of the child based on IPC name and PID if tracked * * \param[in,out] child Child tracked data * * \return Standard Pacemaker return code * * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive * indicating that no trace of IPC liveness was detected, * pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by * an unauthorized process, and pcmk_rc_ipc_pid_only indicating that * the child is up by PID but not IPC end-point (possibly starting). * \note This function doesn't modify any of \p child members but \c pid, * and is not actively toying with processes as such but invoking * \c stop_child in one particular case (there's for some reason * a different authentic holder of the IPC end-point). */ static int child_liveness(pcmk_child_t *child) { uid_t cl_uid = 0; gid_t cl_gid = 0; const uid_t root_uid = 0; const gid_t root_gid = 0; const uid_t *ref_uid; const gid_t *ref_gid; const char *name = pcmk__server_name(child->server); int rc = pcmk_rc_ipc_unresponsive; int pid_active = pcmk_rc_ok; int legacy_rc = pcmk_ok; pid_t ipc_pid = 0; if (child->uid == NULL) { ref_uid = &root_uid; ref_gid = &root_gid; } else { ref_uid = &cl_uid; ref_gid = &cl_gid; legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid); } if (legacy_rc < 0) { rc = pcmk_legacy2rc(legacy_rc); crm_err("Could not find user and group IDs for user %s: %s " QB_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc); } else { const char *ipc_name = pcmk__server_ipc_name(child->server); rc = pcmk__ipc_is_authentic_process_active(ipc_name, *ref_uid, *ref_gid, &ipc_pid); if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) { if (child->pid <= 0) { /* If rc is pcmk_rc_ok, ipc_pid is nonzero and this * initializes a new child. If rc is * pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will * investigate further. */ child->pid = ipc_pid; } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) { /* An unexpected (but authorized) process is responding to * IPC. Investigate further. */ rc = pcmk_rc_ipc_unresponsive; } } } if (rc != pcmk_rc_ipc_unresponsive) { return rc; } /* If we get here, a child without IPC is being tracked, no IPC liveness * has been detected, or IPC liveness has been detected with an * unexpected (but authorized) process. This is safe on FreeBSD since * the only change possible from a proper child's PID into "special" PID * of 1 behind more loosely related process. */ pid_active = pcmk__pid_active(child->pid, name); if ((ipc_pid != 0) && ((pid_active != pcmk_rc_ok) || (ipc_pid == PCMK__SPECIAL_PID) || (pcmk__pid_active(ipc_pid, name) == pcmk_rc_ok))) { /* An unexpected (but authorized) process was detected at the IPC * endpoint, and either it is active, or the child we're tracking is * not. */ if (pid_active == pcmk_rc_ok) { /* The child we're tracking is active. Kill it, and adopt the * detected process. This assumes that our children don't fork * (thus getting a different PID owning the IPC), but rather the * tracking got out of sync because of some means external to * Pacemaker, and adopting the detected process is better than * killing it and possibly having to spawn a new child. */ /* not possessing IPC, afterall (what about corosync CPG?) */ stop_child(child, SIGKILL); } rc = pcmk_rc_ok; child->pid = ipc_pid; } else if (pid_active == pcmk_rc_ok) { // Our tracked child's PID was found active, but not its IPC rc = pcmk_rc_ipc_pid_only; } else if ((child->pid == 0) && (pid_active == EINVAL)) { // FreeBSD can return EINVAL rc = pcmk_rc_ipc_unresponsive; } else if (pid_active == EACCES) { rc = pcmk_rc_ipc_unauthorized; } else if (pid_active == ESRCH) { rc = pcmk_rc_ipc_unresponsive; } else { rc = pid_active; } return rc; } static void reset_respawn_count(pcmk_child_t *child) { /* Restore pristine state */ child->respawn_count = 0; } #define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */ static int child_up_but_no_ipc(pcmk_child_t *child) { const char *ipc_name = pcmk__server_ipc_name(child->server); if (child->respawn_count == WAIT_TRIES) { crm_crit("%s IPC endpoint for existing process %lld did not (re)appear", ipc_name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid)); return pcmk_rc_ipc_pid_only; } crm_warn("Cannot find %s IPC endpoint for existing process %ld, could still " "reappear in %d attempts", ipc_name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid), WAIT_TRIES - child->respawn_count); return EAGAIN; } static int child_alive(pcmk_child_t *child) { const char *name = pcmk__server_name(child->server); if (child->pid == PCMK__SPECIAL_PID) { if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) { crm_crit("Cannot track pre-existing process for %s IPC on this " "platform and PCMK_" PCMK__ENV_FAIL_FAST " requested", name); return EOPNOTSUPP; } else if (child->respawn_count == WAIT_TRIES) { /* Because PCMK__ENV_FAIL_FAST wasn't requested, we can't bail * out. Instead, switch to IPC liveness monitoring which is not * very suitable for heavy system load. */ crm_notice("Cannot track pre-existing process for %s IPC on this " "platform but assuming it is stable and using liveness " "monitoring", name); crm_warn("The process for %s IPC cannot be terminated, so " "shutdown will be delayed by %d s to allow time for it " "to terminate on its own", name, SHUTDOWN_ESCALATION_PERIOD); } else { crm_warn("Cannot track pre-existing process for %s IPC on this " "platform; checking %d more times", name, WAIT_TRIES - child->respawn_count); return EAGAIN; } } crm_notice("Tracking existing %s process (pid=%lld)", name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid)); child->respawn_count = -1; /* 0~keep watching */ child->flags |= child_active_before_startup; return pcmk_rc_ok; } static int find_and_track_child(pcmk_child_t *child, int rounds, bool *wait_in_progress) { int rc = pcmk_rc_ok; const char *name = pcmk__server_name(child->server); if (child->respawn_count < 0) { return EAGAIN; } rc = child_liveness(child); if (rc == pcmk_rc_ipc_unresponsive) { /* As a speculation, don't give up if there are more rounds to * come for other reasons, but don't artificially wait just * because of this, since we would preferably start ASAP. */ return EAGAIN; } child->respawn_count = rounds; if (rc == pcmk_rc_ok) { rc = child_alive(child); if (rc == EAGAIN) { *wait_in_progress = true; } } else if (rc == pcmk_rc_ipc_pid_only) { rc = child_up_but_no_ipc(child); if (rc == EAGAIN) { *wait_in_progress = true; } } else { crm_crit("Checked liveness of %s: %s " QB_XS " rc=%d", name, pcmk_rc_str(rc), rc); } return rc; } /*! * \internal * \brief Initial one-off check of the pre-existing "child" processes * * With "child" process, we mean the subdaemon that defines an API end-point * (all of them do as of the comment) -- the possible complement is skipped * as it is deemed it has no such shared resources to cause conflicts about, * hence it can presumably be started anew without hesitation. * If that won't hold true in the future, the concept of a shared resource * will have to be generalized beyond the API end-point. * * For boundary cases that the "child" is still starting (IPC end-point is yet * to be witnessed), or more rarely (practically FreeBSD only), when there's * a pre-existing "untrackable" authentic process, we give the situation some * time to possibly unfold in the right direction, meaning that said socket * will appear or the unattainable process will disappear per the observable * IPC, respectively. * * \return Standard Pacemaker return code * * \note Since this gets run at the very start, \c respawn_count fields * for particular children get temporarily overloaded with "rounds * of waiting" tracking, restored once we are about to finish with * success (i.e. returning value >=0) and will remain unrestored * otherwise. One way to suppress liveness detection logic for * particular child is to set the said value to a negative number. */ int find_and_track_existing_processes(void) { bool wait_in_progress; size_t i, rounds; for (rounds = 1; rounds <= WAIT_TRIES; rounds++) { wait_in_progress = false; for (i = 0; i < PCMK__NELEM(pcmk_children); i++) { int rc = find_and_track_child(&pcmk_children[i], rounds, &wait_in_progress); if (rc == pcmk_rc_ok) { break; } else if (rc != EAGAIN) { return rc; } } if (!wait_in_progress) { break; } pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen } for_each_child(reset_respawn_count); pcmk__create_timer(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon, NULL); return pcmk_rc_ok; } static void start_subdaemon(pcmk_child_t *child) { if (child->pid != 0) { /* We are already tracking this process */ return; } start_child(child); } gboolean init_children_processes(void *user_data) { if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) { /* Corosync clusters can drop root group access, because we set * uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect * to corosync. */ need_root_group = false; } /* start any children that have not been detected */ for_each_child(start_subdaemon); /* From this point on, any daemons being started will be due to * respawning rather than node start. * * This may be useful for the daemons to know */ pcmk__set_env_option(PCMK__ENV_RESPAWNED, PCMK_VALUE_TRUE, false); pacemakerd_state = PCMK__VALUE_RUNNING; return G_SOURCE_CONTINUE; } void pcmk_shutdown(int nsig) { if (shutdown_trigger == NULL) { shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL); } mainloop_set_trigger(shutdown_trigger); } static void restart_subdaemon(pcmk_child_t *child) { if (!pcmk_is_set(child->flags, child_needs_retry) || child->pid != 0) { return; } crm_notice("Respawning cluster-based subdaemon %s", pcmk__server_name(child->server)); if (start_child(child)) { child->flags &= ~child_needs_retry; } } void restart_cluster_subdaemons(void) { for_each_child(restart_subdaemon); } static void stop_child(pcmk_child_t *child, int signal) { const char *name = pcmk__server_name(child->server); if (signal == 0) { signal = SIGTERM; } /* why to skip PID of 1? - FreeBSD ~ how untrackable process behind IPC is masqueraded as - elsewhere: how "init" task is designated; in particular, in systemd arrangement of socket-based activation, this is pretty real */ if (child->pid == PCMK__SPECIAL_PID) { crm_debug("Nothing to do to stop subdaemon %s[%lld]", name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid)); return; } if (child->pid <= 0) { crm_trace("Nothing to do to stop subdaemon %s: Not running", name); return; } errno = 0; if (kill(child->pid, signal) == 0) { crm_notice("Stopping subdaemon %s " QB_XS " via signal %d to process %lld", name, signal, (long long) child->pid); child->flags |= child_shutting_down; } else { crm_err("Could not stop subdaemon %s[%lld] with signal %d: %s", name, (long long) child->pid, signal, strerror(errno)); } } diff --git a/include/crm/common/io_internal.h b/include/crm/common/io_internal.h index db9eced65a..05ed97d01d 100644 --- a/include/crm/common/io_internal.h +++ b/include/crm/common/io_internal.h @@ -1,63 +1,76 @@ /* * Copyright 2022-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_IO_INTERNAL__H #define PCMK__CRM_COMMON_IO_INTERNAL__H #include // open() #include // bool +#include // freopen() #include // uid_t, gid_t #ifdef __cplusplus extern "C" { #endif int pcmk__real_path(const char *path, char **resolved_path); char *pcmk__series_filename(const char *directory, const char *series, unsigned int sequence, bool bzip); int pcmk__read_series_sequence(const char *directory, const char *series, unsigned int *seq); void pcmk__write_series_sequence(const char *directory, const char *series, unsigned int sequence, int max); int pcmk__chown_series_sequence(const char *directory, const char *series, uid_t uid, gid_t gid); int pcmk__build_path(const char *path_c, mode_t mode); char *pcmk__full_path(const char *filename, const char *dirname); bool pcmk__daemon_can_write(const char *dir, const char *file); void pcmk__sync_directory(const char *name); int pcmk__file_contents(const char *filename, char **contents); int pcmk__write_sync(int fd, const char *contents); int pcmk__set_nonblocking(int fd); const char *pcmk__get_tmpdir(void); void pcmk__close_fds_in_child(void); /*! * \internal - * \brief Open /dev/null to consume next available file descriptor + * \brief Reopen the standard streams using \c /dev/null * - * Open /dev/null, disregarding the result. This is intended when daemonizing to - * be able to null stdin, stdout, and stderr. - * - * \param[in] flags O_RDONLY (stdin) or O_WRONLY (stdout and stderr) + * This is intended for use when daemonizing, to null \c stdin, \c stdout, + * and \c stderr. Failures are ignored. */ static inline void -pcmk__open_devnull(int flags) +pcmk__null_std_streams(void) { - (void) open("/dev/null", flags); + /* The "(void) !" is to suppress an obnoxious gcc warning. At least on some + * systems, freopen() has the attribute __warn_unused_result__. "(void)" + * alone does not suppress the warning. This policy is controversial but + * intentional and longstanding. + * + * https://stackoverflow.com/questions/40576003/ignoring-warning-wunused-result + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66425#c34 + * + * @TODO Consider replacing our uses of fork() with g_subprocess_*() or + * g_spawn_*(). These interfaces have arguments for redirecting standard + * streams to /dev/null. + */ + (void) !freopen("/dev/null", "r", stdin); + (void) !freopen("/dev/null", "w", stdout); + (void) !freopen("/dev/null", "w", stderr); } #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_IO_INTERNAL__H diff --git a/lib/common/utils.c b/lib/common/utils.c index 3da7f946dc..a63e22a54a 100644 --- a/lib/common/utils.c +++ b/lib/common/utils.c @@ -1,483 +1,475 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // xmlCleanupParser() #include #include "crmcommon_private.h" CRM_TRACE_INIT_DATA(common); bool pcmk__config_has_error = false; bool pcmk__config_has_warning = false; char *crm_system_name = NULL; /*! * \brief Free all memory used by libcrmcommon * * Free all global memory allocated by the libcrmcommon library. This should be * called before exiting a process that uses the library, and the process should * not call any libcrmcommon or libxml2 APIs after calling this one. */ void pcmk_common_cleanup(void) { // @TODO This isn't really everything, move all cleanup here mainloop_cleanup(); pcmk__schema_cleanup(); pcmk__free_common_logger(); free(crm_system_name); crm_system_name = NULL; // Clean up external library global state qb_log_fini(); // Don't log anything after this point xmlCleanupParser(); } bool pcmk__is_user_in_group(const char *user, const char *group) { struct group *grent; char **gr_mem; if (user == NULL || group == NULL) { return false; } setgrent(); while ((grent = getgrent()) != NULL) { if (grent->gr_mem == NULL) { continue; } if(strcmp(group, grent->gr_name) != 0) { continue; } gr_mem = grent->gr_mem; while (*gr_mem != NULL) { if (!strcmp(user, *gr_mem++)) { endgrent(); return true; } } } endgrent(); return false; } int crm_user_lookup(const char *name, uid_t * uid, gid_t * gid) { int rc = pcmk_ok; char *buffer = NULL; struct passwd pwd; struct passwd *pwentry = NULL; buffer = calloc(1, PCMK__PW_BUFFER_LEN); if (buffer == NULL) { return -ENOMEM; } rc = getpwnam_r(name, &pwd, buffer, PCMK__PW_BUFFER_LEN, &pwentry); if (pwentry) { if (uid) { *uid = pwentry->pw_uid; } if (gid) { *gid = pwentry->pw_gid; } crm_trace("User %s has uid=%d gid=%d", name, pwentry->pw_uid, pwentry->pw_gid); } else { rc = rc? -rc : -EINVAL; crm_info("User %s lookup: %s", name, pcmk_strerror(rc)); } free(buffer); return rc; } /*! * \brief Get user and group IDs of pacemaker daemon user * * \param[out] uid If non-NULL, where to store daemon user ID * \param[out] gid If non-NULL, where to store daemon group ID * * \return pcmk_ok on success, -errno otherwise */ int pcmk_daemon_user(uid_t *uid, gid_t *gid) { static uid_t daemon_uid; static gid_t daemon_gid; static bool found = false; int rc = pcmk_ok; if (!found) { rc = crm_user_lookup(CRM_DAEMON_USER, &daemon_uid, &daemon_gid); if (rc == pcmk_ok) { found = true; } } if (found) { if (uid) { *uid = daemon_uid; } if (gid) { *gid = daemon_gid; } } return rc; } /*! * \internal * \brief Return the integer equivalent of a portion of a string * * \param[in] text Pointer to beginning of string portion * \param[out] end_text This will point to next character after integer */ static int version_helper(const char *text, const char **end_text) { int atoi_result = -1; pcmk__assert(end_text != NULL); errno = 0; if (text != NULL && text[0] != 0) { /* seemingly sacrificing const-correctness -- because while strtol doesn't modify the input, it doesn't want to artificially taint the "end_text" pointer-to-pointer-to-first-char-in-string with constness in case the input wasn't actually constant -- by semantic definition not a single character will get modified so it shall be perfectly safe to make compiler happy with dropping "const" qualifier here */ atoi_result = (int) strtol(text, (char **) end_text, 10); if (errno == EINVAL) { crm_err("Conversion of '%s' %c failed", text, text[0]); atoi_result = -1; } } return atoi_result; } /* * version1 < version2 : -1 * version1 = version2 : 0 * version1 > version2 : 1 */ int compare_version(const char *version1, const char *version2) { int rc = 0; int lpc = 0; const char *ver1_iter, *ver2_iter; if (version1 == NULL && version2 == NULL) { return 0; } else if (version1 == NULL) { return -1; } else if (version2 == NULL) { return 1; } ver1_iter = version1; ver2_iter = version2; while (1) { int digit1 = 0; int digit2 = 0; lpc++; if (ver1_iter == ver2_iter) { break; } if (ver1_iter != NULL) { digit1 = version_helper(ver1_iter, &ver1_iter); } if (ver2_iter != NULL) { digit2 = version_helper(ver2_iter, &ver2_iter); } if (digit1 < digit2) { rc = -1; break; } else if (digit1 > digit2) { rc = 1; break; } if (ver1_iter != NULL && *ver1_iter == '.') { ver1_iter++; } if (ver1_iter != NULL && *ver1_iter == '\0') { ver1_iter = NULL; } if (ver2_iter != NULL && *ver2_iter == '.') { ver2_iter++; } if (ver2_iter != NULL && *ver2_iter == 0) { ver2_iter = NULL; } } if (rc == 0) { crm_trace("%s == %s (%d)", version1, version2, lpc); } else if (rc < 0) { crm_trace("%s < %s (%d)", version1, version2, lpc); } else if (rc > 0) { crm_trace("%s > %s (%d)", version1, version2, lpc); } return rc; } /*! * \internal * \brief Convert the current process to a daemon process * * Fork a child process, exit the parent, and close the standard * input/output/error file descriptors. * * \param[in] name Daemon executable name */ void pcmk__daemonize(const char *name) { pid_t pid = fork(); if (pid < 0) { fprintf(stderr, "%s: could not start daemon\n", name); crm_perror(LOG_ERR, "fork"); crm_exit(CRM_EX_OSERR); } else if (pid > 0) { crm_exit(CRM_EX_OK); } umask(S_IWGRP | S_IWOTH | S_IROTH); - - close(STDIN_FILENO); - pcmk__open_devnull(O_RDONLY); // stdin (fd 0) - - close(STDOUT_FILENO); - pcmk__open_devnull(O_WRONLY); // stdout (fd 1) - - close(STDERR_FILENO); - pcmk__open_devnull(O_WRONLY); // stderr (fd 2) + pcmk__null_std_streams(); } #ifdef HAVE_UUID_UUID_H # include #endif char * crm_generate_uuid(void) { unsigned char uuid[16]; char *buffer = malloc(37); /* Including NUL byte */ pcmk__mem_assert(buffer); uuid_generate(uuid); uuid_unparse(uuid, buffer); return buffer; } /*! * \internal * \brief Sleep for given milliseconds * * \param[in] ms Time to sleep * * \note The full time might not be slept if a signal is received. */ void pcmk__sleep_ms(unsigned int ms) { // @TODO Impose a sane maximum sleep to avoid hanging a process for long //CRM_CHECK(ms <= MAX_SLEEP, ms = MAX_SLEEP); // Use sleep() for any whole seconds if (ms >= 1000) { sleep(ms / 1000); ms -= ms / 1000; } if (ms == 0) { return; } #if defined(HAVE_NANOSLEEP) // nanosleep() is POSIX-2008, so prefer that { struct timespec req = { .tv_sec = 0, .tv_nsec = (long) (ms * 1000000) }; nanosleep(&req, NULL); } #elif defined(HAVE_USLEEP) // usleep() is widely available, though considered obsolete usleep((useconds_t) ms); #else // Otherwise use a trick with select() timeout { struct timeval tv = { .tv_sec = 0, .tv_usec = (suseconds_t) ms }; select(0, NULL, NULL, NULL, &tv); } #endif } /*! * \internal * \brief Add a timer * * \param[in] interval_ms The interval for the function to be called, in ms * \param[in] fn The function to be called * \param[in] data Data to be passed to fn (can be NULL) * * \return The ID of the event source */ guint pcmk__create_timer(guint interval_ms, GSourceFunc fn, gpointer data) { pcmk__assert(interval_ms != 0 && fn != NULL); if (interval_ms % 1000 == 0) { /* In case interval_ms is 0, the call to pcmk__timeout_ms2s ensures * an interval of one second. */ return g_timeout_add_seconds(pcmk__timeout_ms2s(interval_ms), fn, data); } else { return g_timeout_add(interval_ms, fn, data); } } /*! * \internal * \brief Convert milliseconds to seconds * * \param[in] timeout_ms The interval, in ms * * \return If \p timeout_ms is 0, return 0. Otherwise, return the number of * seconds, rounded to the nearest integer, with a minimum of 1. */ guint pcmk__timeout_ms2s(guint timeout_ms) { guint quot, rem; if (timeout_ms == 0) { return 0; } else if (timeout_ms < 1000) { return 1; } quot = timeout_ms / 1000; rem = timeout_ms % 1000; if (rem >= 500) { quot += 1; } return quot; } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START #include // gnutls_global_init(), etc. #include static void _gnutls_log_func(int level, const char *msg) { crm_trace("%s", msg); } void crm_gnutls_global_init(void) { signal(SIGPIPE, SIG_IGN); gnutls_global_init(); gnutls_global_set_log_level(8); gnutls_global_set_log_function(_gnutls_log_func); } /*! * \brief Check whether string represents a client name used by cluster daemons * * \param[in] name String to check * * \return true if name is standard client name used by daemons, false otherwise * * \note This is provided by the client, and so cannot be used by itself as a * secure means of authentication. */ bool crm_is_daemon_name(const char *name) { return pcmk__str_any_of(name, "attrd", CRM_SYSTEM_CIB, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, CRM_SYSTEM_LRMD, CRM_SYSTEM_MCP, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, "pacemaker-attrd", "pacemaker-based", "pacemaker-controld", "pacemaker-execd", "pacemaker-fenced", "pacemaker-remoted", "pacemaker-schedulerd", "stonith-ng", "stonithd", NULL); } // LCOV_EXCL_STOP // End deprecated API