diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c index 1056783ecf..999946abf9 100644 --- a/daemons/pacemakerd/pcmkd_subdaemons.c +++ b/daemons/pacemakerd/pcmkd_subdaemons.c @@ -1,881 +1,886 @@ /* * Copyright 2010-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include "pacemakerd.h" #if SUPPORT_COROSYNC #include "pcmkd_corosync.h" #endif #include #include #include #include #include #include #include #include #include #include #include enum child_daemon_flags { child_none = 0, child_respawn = 1 << 0, child_needs_cluster = 1 << 1, child_needs_retry = 1 << 2, child_active_before_startup = 1 << 3, }; typedef struct pcmk_child_s { enum pcmk_ipc_server server; pid_t pid; int respawn_count; const char *uid; int check_count; uint32_t flags; } pcmk_child_t; #define PCMK_PROCESS_CHECK_INTERVAL 1 #define PCMK_PROCESS_CHECK_RETRIES 5 #define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */ /* Index into the array below */ #define PCMK_CHILD_CONTROLD 5 static pcmk_child_t pcmk_children[] = { { pcmk_ipc_based, 0, 0, CRM_DAEMON_USER, 0, child_respawn | child_needs_cluster }, { pcmk_ipc_fenced, 0, 0, NULL, 0, child_respawn | child_needs_cluster }, { pcmk_ipc_execd, 0, 0, NULL, 0, child_respawn }, { pcmk_ipc_attrd, 0, 0, CRM_DAEMON_USER, 0, child_respawn | child_needs_cluster }, { pcmk_ipc_schedulerd, 0, 0, CRM_DAEMON_USER, 0, child_respawn }, { pcmk_ipc_controld, 0, 0, CRM_DAEMON_USER, 0, child_respawn | child_needs_cluster }, }; static char *opts_default[] = { NULL, NULL }; static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL }; crm_trigger_t *shutdown_trigger = NULL; crm_trigger_t *startup_trigger = NULL; time_t subdaemon_check_progress = 0; // Whether we need root group access to talk to cluster layer static bool need_root_group = true; /* When contacted via pacemakerd-api by a client having sbd in * the name we assume it is sbd-daemon which wants to know * if pacemakerd shutdown gracefully. * Thus when everything is shutdown properly pacemakerd * waits till it has reported the graceful completion of * shutdown to sbd and just when sbd-client closes the * connection we can assume that the report has arrived * properly so that pacemakerd can finally exit. * Following two variables are used to track that handshake. */ unsigned int shutdown_complete_state_reported_to = 0; gboolean shutdown_complete_state_reported_client_closed = FALSE; /* state we report when asked via pacemakerd-api status-ping */ const char *pacemakerd_state = PCMK__VALUE_INIT; gboolean running_with_sbd = FALSE; /* local copy */ GMainLoop *mainloop = NULL; static gboolean fatal_error = FALSE; static int child_liveness(pcmk_child_t *child); static gboolean escalate_shutdown(gpointer data); static int start_child(pcmk_child_t * child); static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode); static void pcmk_process_exit(pcmk_child_t * child); static gboolean pcmk_shutdown_worker(gpointer user_data); static gboolean stop_child(pcmk_child_t * child, int signal); /*! * \internal * \brief Get path to subdaemon executable * * \param[in] subdaemon Subdaemon to get path for * * \return Newly allocated string with path to subdaemon executable * \note It is the caller's responsibility to free() the return value */ static inline char * subdaemon_path(pcmk_child_t *subdaemon) { return crm_strdup_printf(CRM_DAEMON_DIR "/%s", pcmk__server_name(subdaemon->server)); } static bool pcmkd_cluster_connected(void) { #if SUPPORT_COROSYNC return pcmkd_corosync_connected(); #else return true; #endif } static gboolean check_next_subdaemon(gpointer user_data) { static int next_child = 0; pcmk_child_t *child = &(pcmk_children[next_child]); const char *name = pcmk__server_name(child->server); const long long pid = PCMK__SPECIAL_PID_AS_0(child->pid); int rc = child_liveness(child); crm_trace("Checked subdaemon %s[%lld]: %s (%d)", name, pid, pcmk_rc_str(rc), rc); switch (rc) { case pcmk_rc_ok: child->check_count = 0; subdaemon_check_progress = time(NULL); break; case pcmk_rc_ipc_pid_only: // Child was previously OK if (++(child->check_count) >= PCMK_PROCESS_CHECK_RETRIES) { + // cts-lab looks for this message crm_crit("Subdaemon %s[%lld] is unresponsive to IPC " "after %d attempt%s and will now be killed", name, pid, child->check_count, pcmk__plural_s(child->check_count)); stop_child(child, SIGKILL); if (pcmk_is_set(child->flags, child_respawn)) { // Respawn limit hasn't been reached, so retry another round child->check_count = 0; } } else { crm_notice("Subdaemon %s[%lld] is unresponsive to IPC " "after %d attempt%s (will recheck later)", name, pid, child->check_count, pcmk__plural_s(child->check_count)); if (pcmk_is_set(child->flags, child_respawn)) { /* as long as the respawn-limit isn't reached and we haven't run out of connect retries we account this as progress we are willing to tell to sbd */ subdaemon_check_progress = time(NULL); } } /* go to the next child and see if we can make progress there */ break; case pcmk_rc_ipc_unresponsive: if (!pcmk_is_set(child->flags, child_respawn)) { /* if a subdaemon is down and we don't want it to be restarted this is a success during shutdown. if it isn't restarted anymore due to MAX_RESPAWN it is rather no success. */ if (child->respawn_count <= MAX_RESPAWN) { subdaemon_check_progress = time(NULL); } } if (!pcmk_is_set(child->flags, child_active_before_startup)) { crm_trace("Subdaemon %s[%lld] terminated", name, pid); break; } if (pcmk_is_set(child->flags, child_respawn)) { + // cts-lab looks for this message crm_err("Subdaemon %s[%lld] terminated", name, pid); } else { /* orderly shutdown */ crm_notice("Subdaemon %s[%lld] terminated", name, pid); } pcmk_process_exit(child); break; default: crm_exit(CRM_EX_FATAL); break; /* static analysis/noreturn */ } if (++next_child >= PCMK__NELEM(pcmk_children)) { next_child = 0; } return G_SOURCE_CONTINUE; } static gboolean escalate_shutdown(gpointer data) { pcmk_child_t *child = data; if (child->pid == PCMK__SPECIAL_PID) { pcmk_process_exit(child); } else if (child->pid != 0) { /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */ crm_err("Subdaemon %s not terminating in a timely manner, forcing", pcmk__server_name(child->server)); stop_child(child, SIGSEGV); } return FALSE; } static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { pcmk_child_t *child = mainloop_child_userdata(p); const char *name = mainloop_child_name(p); if (signo) { + // cts-lab looks for this message do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR), "%s[%d] terminated with signal %d (%s)%s", name, pid, signo, strsignal(signo), (core? " and dumped core" : "")); } else { switch(exitcode) { case CRM_EX_OK: crm_info("%s[%d] exited with status %d (%s)", name, pid, exitcode, crm_exit_str(exitcode)); break; case CRM_EX_FATAL: crm_warn("Shutting cluster down because %s[%d] had fatal failure", name, pid); child->flags &= ~child_respawn; fatal_error = TRUE; pcmk_shutdown(SIGTERM); break; case CRM_EX_PANIC: crm_emerg("%s[%d] instructed the machine to reset", name, pid); child->flags &= ~child_respawn; fatal_error = TRUE; pcmk__panic(__func__); pcmk_shutdown(SIGTERM); break; default: + // cts-lab looks for this message crm_err("%s[%d] exited with status %d (%s)", name, pid, exitcode, crm_exit_str(exitcode)); break; } } pcmk_process_exit(child); } static void pcmk_process_exit(pcmk_child_t * child) { const char *name = pcmk__server_name(child->server); child->pid = 0; child->flags &= ~child_active_before_startup; child->check_count = 0; child->respawn_count += 1; if (child->respawn_count > MAX_RESPAWN) { crm_err("Subdaemon %s exceeded maximum respawn count", name); child->flags &= ~child_respawn; } if (shutdown_trigger) { /* resume step-wise shutdown (returned TRUE yields no parallelizing) */ mainloop_set_trigger(shutdown_trigger); } else if (!pcmk_is_set(child->flags, child_respawn)) { /* nothing to do */ } else if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) { crm_err("Rebooting system because of subdaemon %s failure", name); pcmk__panic(__func__); } else if (child_liveness(child) == pcmk_rc_ok) { crm_warn("Not respawning subdaemon %s because IPC endpoint %s is OK", name, pcmk__server_ipc_name(child->server)); } else if (pcmk_is_set(child->flags, child_needs_cluster) && !pcmkd_cluster_connected()) { crm_notice("Not respawning subdaemon %s until cluster returns", name); child->flags |= child_needs_retry; } else { + // cts-lab looks for this message crm_notice("Respawning subdaemon %s after unexpected exit", name); start_child(child); } } static gboolean pcmk_shutdown_worker(gpointer user_data) { static int phase = PCMK__NELEM(pcmk_children) - 1; static time_t next_log = 0; if (phase == PCMK__NELEM(pcmk_children) - 1) { crm_notice("Shutting down Pacemaker"); pacemakerd_state = PCMK__VALUE_SHUTTING_DOWN; } for (; phase >= 0; phase--) { pcmk_child_t *child = &(pcmk_children[phase]); const char *name = pcmk__server_name(child->server); if (child->pid != 0) { time_t now = time(NULL); if (pcmk_is_set(child->flags, child_respawn)) { if (child->pid == PCMK__SPECIAL_PID) { crm_warn("Subdaemon %s cannot be terminated (shutdown " "will be escalated after %ld seconds if it does " "not terminate on its own; set PCMK_" PCMK__ENV_FAIL_FAST "=1 to exit immediately " "instead)", name, (long) SHUTDOWN_ESCALATION_PERIOD); } next_log = now + 30; child->flags &= ~child_respawn; stop_child(child, SIGTERM); if (phase < PCMK_CHILD_CONTROLD) { g_timeout_add(SHUTDOWN_ESCALATION_PERIOD, escalate_shutdown, child); } } else if (now >= next_log) { next_log = now + 30; crm_notice("Still waiting for subdaemon %s to terminate " QB_XS " pid=%lld", name, (long long) child->pid); } return TRUE; } /* cleanup */ crm_debug("Subdaemon %s confirmed stopped", name); child->pid = 0; } crm_notice("Shutdown complete"); pacemakerd_state = PCMK__VALUE_SHUTDOWN_COMPLETE; if (!fatal_error && running_with_sbd && pcmk__get_sbd_sync_resource_startup() && !shutdown_complete_state_reported_client_closed) { crm_notice("Waiting for SBD to pick up shutdown-complete-state."); return TRUE; } g_main_loop_quit(mainloop); if (fatal_error) { crm_notice("Shutting down and staying down after fatal error"); #ifdef SUPPORT_COROSYNC pcmkd_shutdown_corosync(); #endif crm_exit(CRM_EX_FATAL); } return TRUE; } /* TODO once libqb is taught to juggle with IPC end-points carried over as bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325) it shall hand over these descriptors here if/once they are successfully pre-opened in (presumably) child_liveness(), to avoid any remaining room for races */ // \return Standard Pacemaker return code static int start_child(pcmk_child_t * child) { uid_t uid = 0; gid_t gid = 0; gboolean use_valgrind = FALSE; gboolean use_callgrind = FALSE; const char *name = pcmk__server_name(child->server); const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED); const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED); child->flags &= ~child_active_before_startup; child->check_count = 0; if (env_callgrind != NULL && crm_is_true(env_callgrind)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if ((env_callgrind != NULL) && (strstr(env_callgrind, name) != NULL)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) { use_valgrind = TRUE; } else if ((env_valgrind != NULL) && (strstr(env_valgrind, name) != NULL)) { use_valgrind = TRUE; } if (use_valgrind && strlen(VALGRIND_BIN) == 0) { crm_warn("Cannot enable valgrind for subdaemon %s: valgrind not found", name); use_valgrind = FALSE; } if (child->uid) { if (crm_user_lookup(child->uid, &uid, &gid) < 0) { crm_err("Invalid user (%s) for subdaemon %s: not found", child->uid, name); return EACCES; } crm_info("Using uid %lu and group %lu for subdaemon %s", (unsigned long) uid, (unsigned long) gid, name); } child->pid = fork(); CRM_ASSERT(child->pid != -1); if (child->pid > 0) { /* parent */ mainloop_child_add(child->pid, 0, name, child, pcmk_child_exit); crm_info("Forked process %lld for subdaemon %s%s", (long long) child->pid, name, use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : ""); return pcmk_rc_ok; } else { /* Start a new session */ (void)setsid(); /* Setup the two alternate arg arrays */ opts_vgrind[0] = pcmk__str_copy(VALGRIND_BIN); if (use_callgrind) { opts_vgrind[1] = pcmk__str_copy("--tool=callgrind"); opts_vgrind[2] = pcmk__str_copy("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p"); opts_vgrind[3] = subdaemon_path(child); opts_vgrind[4] = NULL; } else { opts_vgrind[1] = subdaemon_path(child); opts_vgrind[2] = NULL; opts_vgrind[3] = NULL; opts_vgrind[4] = NULL; } opts_default[0] = subdaemon_path(child); if(gid) { // Drop root group access if not needed if (!need_root_group && (setgid(gid) < 0)) { crm_warn("Could not set subdaemon %s group to %lu: %s", name, (unsigned long) gid, strerror(errno)); } /* Initialize supplementary groups to only those always granted to * the user, plus haclient (so we can access IPC). */ if (initgroups(child->uid, gid) < 0) { crm_err("Cannot initialize system groups for subdaemon %s: %s " QB_XS " errno=%d", name, pcmk_rc_str(errno), errno); } } if (uid && setuid(uid) < 0) { crm_warn("Could not set subdaemon %s user to %s: %s " QB_XS " uid=%lu errno=%d", name, strerror(errno), child->uid, (unsigned long) uid, errno); } pcmk__close_fds_in_child(true); pcmk__open_devnull(O_RDONLY); // stdin (fd 0) pcmk__open_devnull(O_WRONLY); // stdout (fd 1) pcmk__open_devnull(O_WRONLY); // stderr (fd 2) if (use_valgrind) { (void)execvp(VALGRIND_BIN, opts_vgrind); } else { char *path = subdaemon_path(child); (void) execvp(path, opts_default); free(path); } crm_crit("Could not execute subdaemon %s: %s", name, strerror(errno)); crm_exit(CRM_EX_FATAL); } return pcmk_rc_ok; /* never reached */ } /*! * \internal * \brief Check the liveness of the child based on IPC name and PID if tracked * * \param[in,out] child Child tracked data * * \return Standard Pacemaker return code * * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive * indicating that no trace of IPC liveness was detected, * pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by * an unauthorized process, and pcmk_rc_ipc_pid_only indicating that * the child is up by PID but not IPC end-point (possibly starting). * \note This function doesn't modify any of \p child members but \c pid, * and is not actively toying with processes as such but invoking * \c stop_child in one particular case (there's for some reason * a different authentic holder of the IPC end-point). */ static int child_liveness(pcmk_child_t *child) { uid_t cl_uid = 0; gid_t cl_gid = 0; const uid_t root_uid = 0; const gid_t root_gid = 0; const uid_t *ref_uid; const gid_t *ref_gid; const char *name = pcmk__server_name(child->server); int rc = pcmk_rc_ipc_unresponsive; int legacy_rc = pcmk_ok; pid_t ipc_pid = 0; if (child->uid == NULL) { ref_uid = &root_uid; ref_gid = &root_gid; } else { ref_uid = &cl_uid; ref_gid = &cl_gid; legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid); } if (legacy_rc < 0) { rc = pcmk_legacy2rc(legacy_rc); crm_err("Could not find user and group IDs for user %s: %s " QB_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc); } else { const char *ipc_name = pcmk__server_ipc_name(child->server); rc = pcmk__ipc_is_authentic_process_active(ipc_name, *ref_uid, *ref_gid, &ipc_pid); if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) { if (child->pid <= 0) { /* If rc is pcmk_rc_ok, ipc_pid is nonzero and this * initializes a new child. If rc is * pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will * investigate further. */ child->pid = ipc_pid; } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) { /* An unexpected (but authorized) process is responding to * IPC. Investigate further. */ rc = pcmk_rc_ipc_unresponsive; } } } if (rc == pcmk_rc_ipc_unresponsive) { /* If we get here, a child without IPC is being tracked, no IPC liveness * has been detected, or IPC liveness has been detected with an * unexpected (but authorized) process. This is safe on FreeBSD since * the only change possible from a proper child's PID into "special" PID * of 1 behind more loosely related process. */ int ret = pcmk__pid_active(child->pid, name); if (ipc_pid && ((ret != pcmk_rc_ok) || ipc_pid == PCMK__SPECIAL_PID || (pcmk__pid_active(ipc_pid, name) == pcmk_rc_ok))) { /* An unexpected (but authorized) process was detected at the IPC * endpoint, and either it is active, or the child we're tracking is * not. */ if (ret == pcmk_rc_ok) { /* The child we're tracking is active. Kill it, and adopt the * detected process. This assumes that our children don't fork * (thus getting a different PID owning the IPC), but rather the * tracking got out of sync because of some means external to * Pacemaker, and adopting the detected process is better than * killing it and possibly having to spawn a new child. */ /* not possessing IPC, afterall (what about corosync CPG?) */ stop_child(child, SIGKILL); } rc = pcmk_rc_ok; child->pid = ipc_pid; } else if (ret == pcmk_rc_ok) { // Our tracked child's PID was found active, but not its IPC rc = pcmk_rc_ipc_pid_only; } else if ((child->pid == 0) && (ret == EINVAL)) { // FreeBSD can return EINVAL rc = pcmk_rc_ipc_unresponsive; } else { switch (ret) { case EACCES: rc = pcmk_rc_ipc_unauthorized; break; case ESRCH: rc = pcmk_rc_ipc_unresponsive; break; default: rc = ret; break; } } } return rc; } /*! * \internal * \brief Initial one-off check of the pre-existing "child" processes * * With "child" process, we mean the subdaemon that defines an API end-point * (all of them do as of the comment) -- the possible complement is skipped * as it is deemed it has no such shared resources to cause conflicts about, * hence it can presumably be started anew without hesitation. * If that won't hold true in the future, the concept of a shared resource * will have to be generalized beyond the API end-point. * * For boundary cases that the "child" is still starting (IPC end-point is yet * to be witnessed), or more rarely (practically FreeBSD only), when there's * a pre-existing "untrackable" authentic process, we give the situation some * time to possibly unfold in the right direction, meaning that said socket * will appear or the unattainable process will disappear per the observable * IPC, respectively. * * \return Standard Pacemaker return code * * \note Since this gets run at the very start, \c respawn_count fields * for particular children get temporarily overloaded with "rounds * of waiting" tracking, restored once we are about to finish with * success (i.e. returning value >=0) and will remain unrestored * otherwise. One way to suppress liveness detection logic for * particular child is to set the said value to a negative number. */ #define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */ int find_and_track_existing_processes(void) { bool wait_in_progress; int rc; size_t i, rounds; for (rounds = 1; rounds <= WAIT_TRIES; rounds++) { wait_in_progress = false; for (i = 0; i < PCMK__NELEM(pcmk_children); i++) { const char *name = pcmk__server_name(pcmk_children[i].server); const char *ipc_name = NULL; if (pcmk_children[i].respawn_count < 0) { continue; } rc = child_liveness(&pcmk_children[i]); if (rc == pcmk_rc_ipc_unresponsive) { /* As a speculation, don't give up if there are more rounds to * come for other reasons, but don't artificially wait just * because of this, since we would preferably start ASAP. */ continue; } // @TODO Functionize more of this to reduce nesting ipc_name = pcmk__server_ipc_name(pcmk_children[i].server); pcmk_children[i].respawn_count = rounds; switch (rc) { case pcmk_rc_ok: if (pcmk_children[i].pid == PCMK__SPECIAL_PID) { if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) { crm_crit("Cannot reliably track pre-existing" " authentic process behind %s IPC on this" " platform and PCMK_" PCMK__ENV_FAIL_FAST " requested", ipc_name); return EOPNOTSUPP; } else if (pcmk_children[i].respawn_count == WAIT_TRIES) { crm_notice("Assuming pre-existing authentic, though" " on this platform untrackable, process" " behind %s IPC is stable (was in %d" " previous samples) so rather than" " bailing out (PCMK_" PCMK__ENV_FAIL_FAST " not requested), we just switch to a" " less optimal IPC liveness monitoring" " (not very suitable for heavy load)", name, WAIT_TRIES - 1); crm_warn("The process behind %s IPC cannot be" " terminated, so the overall shutdown" " will get delayed implicitly (%ld s)," " which serves as a graceful period for" " its native termination if it vitally" " depends on some other daemons going" " down in a controlled way already", name, (long) SHUTDOWN_ESCALATION_PERIOD); } else { wait_in_progress = true; crm_warn("Cannot reliably track pre-existing" " authentic process behind %s IPC on this" " platform, can still disappear in %d" " attempt(s)", ipc_name, WAIT_TRIES - pcmk_children[i].respawn_count); continue; } } crm_notice("Tracking existing %s process (pid=%lld)", name, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[i].pid)); pcmk_children[i].respawn_count = -1; /* 0~keep watching */ pcmk_children[i].flags |= child_active_before_startup; break; case pcmk_rc_ipc_pid_only: if (pcmk_children[i].respawn_count == WAIT_TRIES) { crm_crit("%s IPC endpoint for existing authentic" " process %lld did not (re)appear", ipc_name, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[i].pid)); return rc; } wait_in_progress = true; crm_warn("Cannot find %s IPC endpoint for existing" " authentic process %lld, can still (re)appear" " in %d attempts (?)", ipc_name, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[i].pid), WAIT_TRIES - pcmk_children[i].respawn_count); continue; default: crm_crit("Checked liveness of %s: %s " QB_XS " rc=%d", name, pcmk_rc_str(rc), rc); return rc; } } if (!wait_in_progress) { break; } pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen } for (i = 0; i < PCMK__NELEM(pcmk_children); i++) { pcmk_children[i].respawn_count = 0; /* restore pristine state */ } g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon, NULL); return pcmk_rc_ok; } gboolean init_children_processes(void *user_data) { if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) { /* Corosync clusters can drop root group access, because we set * uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect * to corosync. */ need_root_group = false; } /* start any children that have not been detected */ for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) { if (pcmk_children[i].pid != 0) { /* we are already tracking it */ continue; } start_child(&(pcmk_children[i])); } /* From this point on, any daemons being started will be due to * respawning rather than node start. * * This may be useful for the daemons to know */ pcmk__set_env_option(PCMK__ENV_RESPAWNED, PCMK_VALUE_TRUE, false); pacemakerd_state = PCMK__VALUE_RUNNING; return TRUE; } void pcmk_shutdown(int nsig) { if (shutdown_trigger == NULL) { shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL); } mainloop_set_trigger(shutdown_trigger); } void restart_cluster_subdaemons(void) { for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) { if (!pcmk_is_set(pcmk_children[i].flags, child_needs_retry) || pcmk_children[i].pid != 0) { continue; } crm_notice("Respawning cluster-based subdaemon %s", pcmk__server_name(pcmk_children[i].server)); if (start_child(&pcmk_children[i])) { pcmk_children[i].flags &= ~child_needs_retry; } } } static gboolean stop_child(pcmk_child_t * child, int signal) { const char *name = pcmk__server_name(child->server); if (signal == 0) { signal = SIGTERM; } /* why to skip PID of 1? - FreeBSD ~ how untrackable process behind IPC is masqueraded as - elsewhere: how "init" task is designated; in particular, in systemd arrangement of socket-based activation, this is pretty real */ if (child->pid == PCMK__SPECIAL_PID) { crm_debug("Nothing to do to stop subdaemon %s[%lld]", name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid)); return TRUE; } if (child->pid <= 0) { crm_trace("Nothing to do to stop subdaemon %s: Not running", name); return TRUE; } errno = 0; if (kill(child->pid, signal) == 0) { crm_notice("Stopping subdaemon %s " QB_XS " via signal %d to process %lld", name, signal, (long long) child->pid); } else { crm_err("Could not stop subdaemon %s[%lld] with signal %d: %s", name, (long long) child->pid, signal, strerror(errno)); } return TRUE; } diff --git a/python/pacemaker/_cts/patterns.py b/python/pacemaker/_cts/patterns.py index 03a405c708..4950e95686 100644 --- a/python/pacemaker/_cts/patterns.py +++ b/python/pacemaker/_cts/patterns.py @@ -1,396 +1,397 @@ """Pattern-holding classes for Pacemaker's Cluster Test Suite (CTS).""" __all__ = ["PatternSelector"] __copyright__ = "Copyright 2008-2024 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+)" import argparse from pacemaker.buildoptions import BuildOptions class BasePatterns: """ The base class for holding a stack-specific set of command and log file/stdout patterns. Stack-specific classes need to be built on top of this one. """ def __init__(self): """Create a new BasePatterns instance which holds a very minimal set of basic patterns.""" self._bad_news = [] self._components = {} self._name = "crm-base" self._ignore = [ "avoid confusing Valgrind", # Logging bug in some versions of libvirtd r"libvirtd.*: internal error: Failed to parse PCI config address", # pcs can log this when node is fenced, but fencing is OK in some # tests (and we will catch it in pacemaker logs when not OK) r"pcs.daemon:No response from: .* request: get_configs, error:", # This is overbroad, but there's no way to say that only certain # transition errors are acceptable. We have to rely on causes of a # transition error logging their own error message, which should # always be the case. r"pacemaker-schedulerd.* Calculated transition .*/pe-error", ] self._commands = { "StatusCmd": "crmadmin -t 60 -S %s 2>/dev/null", "CibQuery": "cibadmin -Ql", "CibAddXml": "cibadmin --modify -c --xml-text %s", "CibDelXpath": "cibadmin --delete --xpath %s", "RscRunning": BuildOptions.DAEMON_DIR + "/cts-exec-helper -R -r %s", "CIBfile": "%s:" + BuildOptions.CIB_DIR + "/cib.xml", "TmpDir": "/tmp", "BreakCommCmd": "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1", "FixCommCmd": "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1", "MaintenanceModeOn": "cibadmin --modify -c --xml-text ''", "MaintenanceModeOff": "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"", "StandbyCmd": "crm_attribute -Vq -U %s -n standby -l forever -v %s 2>/dev/null", "StandbyQueryCmd": "crm_attribute -qG -U %s -n standby -l forever -d off 2>/dev/null", } self._search = { "Pat:DC_IDLE": r"pacemaker-controld.*State transition.*-> S_IDLE", # This won't work if we have multiple partitions "Pat:Local_started": r"%s\W.*controller successfully started", "Pat:NonDC_started": r"%s\W.*State transition.*-> S_NOT_DC", "Pat:DC_started": r"%s\W.*State transition.*-> S_IDLE", "Pat:We_stopped": r"%s\W.*OVERRIDE THIS PATTERN", "Pat:They_stopped": r"%s\W.*LOST:.* %s ", "Pat:They_dead": r"node %s.*: is dead", "Pat:They_up": r"%s %s\W.*OVERRIDE THIS PATTERN", "Pat:TransitionComplete": "Transition status: Complete: complete", "Pat:Fencing_start": r"Requesting peer fencing .* targeting %s", "Pat:Fencing_ok": r"pacemaker-fenced.*:\s*Operation .* targeting %s by .* for .*@.*: OK", "Pat:Fencing_recover": r"pacemaker-schedulerd.*: Recover\s+%s", "Pat:Fencing_active": r"stonith resource .* is active on 2 nodes (attempting recovery)", "Pat:Fencing_probe": r"pacemaker-controld.* Result of probe operation for %s on .*: Error", "Pat:RscOpOK": r"pacemaker-controld.*:\s+Result of %s operation for %s.*: (0 \()?ok", "Pat:RscOpFail": r"pacemaker-schedulerd.*:.*Unexpected result .* recorded for %s of %s ", "Pat:CloneOpFail": r"pacemaker-schedulerd.*:.*Unexpected result .* recorded for %s of (%s|%s) ", "Pat:RscRemoteOpOK": r"pacemaker-controld.*:\s+Result of %s operation for %s on %s: (0 \()?ok", "Pat:NodeFenced": r"pacemaker-controld.*:\s* Peer %s was terminated \(.*\) by .* on behalf of .*: OK", } def get_component(self, key): """ Return the patterns for a single component as a list, given by key. This is typically the name of some subprogram (pacemaker-based, pacemaker-fenced, etc.) or various special purpose keys. If key is unknown, return an empty list. """ if key in self._components: return self._components[key] print("Unknown component '%s' for %s" % (key, self._name)) return [] def get_patterns(self, key): """ Return various patterns supported by this object, given by key. Depending on the key, this could either be a list or a hash. If key is unknown, return None. """ if key == "BadNews": return self._bad_news if key == "BadNewsIgnore": return self._ignore if key == "Commands": return self._commands if key == "Search": return self._search if key == "Components": return self._components print("Unknown pattern '%s' for %s" % (key, self._name)) return None def __getitem__(self, key): if key == "Name": return self._name if key in self._commands: return self._commands[key] if key in self._search: return self._search[key] print("Unknown template '%s' for %s" % (key, self._name)) return None class Corosync2Patterns(BasePatterns): """Patterns for Corosync version 2 cluster manager class.""" def __init__(self): BasePatterns.__init__(self) self._name = "crm-corosync" self._commands.update({ "StartCmd": "service corosync start && service pacemaker start", "StopCmd": "service pacemaker stop; [ ! -e /usr/sbin/pacemaker-remoted ] || service pacemaker_remote stop; service corosync stop", "EpochCmd": "crm_node -e", "QuorumCmd": "crm_node -q", "PartitionCmd": "crm_node -p", }) self._search.update({ # Close enough ... "Corosync Cluster Engine exiting normally" isn't # printed reliably. "Pat:We_stopped": r"%s\W.*Unloading all Corosync service engines", "Pat:They_stopped": r"%s\W.*pacemaker-controld.*Node %s(\[|\s).*state is now lost", "Pat:They_dead": r"pacemaker-controld.*Node %s(\[|\s).*state is now lost", "Pat:They_up": r"\W%s\W.*pacemaker-controld.*Node %s state is now member", "Pat:ChildExit": r"\[[0-9]+\] exited with status [0-9]+ \(", # "with signal 9" == pcmk_child_exit(), "$" == check_active_before_startup_processes() "Pat:ChildKilled": r"%s\W.*pacemakerd.*%s\[[0-9]+\] terminated( with signal 9|$)", - "Pat:ChildRespawn": r"%s\W.*pacemakerd.*Respawning %s subdaemon after unexpected exit", + "Pat:ChildRespawn": r"%s\W.*pacemakerd.*Respawning subdaemon %s after unexpected exit", "Pat:InfraUp": r"%s\W.*corosync.*Initializing transport", "Pat:PacemakerUp": r"%s\W.*pacemakerd.*Starting Pacemaker", }) self._ignore += [ r"crm_mon:", r"crmadmin:", r"update_trace_data", r"async_notify:.*strange, client not found", r"Parse error: Ignoring unknown option .*nodename", r"error.*: Operation 'reboot' .* using FencingFail returned ", r"getinfo response error: 1$", r"sbd.* error: inquisitor_child: DEBUG MODE IS ACTIVE", r"sbd.* pcmk:\s*error:.*Connection to cib_ro.* (failed|closed)", ] self._bad_news = [ r"[^(]error:", r"crit:", r"ERROR:", r"CRIT:", r"Shutting down...NOW", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r"(pacemakerd|pacemaker-execd|pacemaker-controld):.*, exiting", r"schedulerd.*Attempting recovery of resource", r"is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", r"Search terminated:", r":global_timer_callback", r"Faking parameter digest creation", r"Parameters to .* action changed:", r"Parameters to .* changed", - r"pacemakerd.*\[[0-9]+\] terminated( with signal| as IPC server|$)", + r"pacemakerd.*\[[0-9]+\] terminated( with signal|$)", + r"pacemakerd.*\[[0-9]+\] .* will now be killed", r"pacemaker-schedulerd.*Recover\s+.*\(.* -\> .*\)", r"rsyslogd.* lost .* due to rate-limiting", r"Peer is not part of our cluster", r"We appear to be in an election loop", r"Unknown node -> we will not deliver message", r"(Blackbox dump requested|Problem detected)", r"pacemakerd.*Could not connect to Cluster Configuration Database API", r"Receiving messages from a node we think is dead", r"share the same cluster nodeid", r"share the same name", r"pacemaker-controld:.*Transition failed: terminated", r"Local CIB .* differs from .*:", r"warn.*:\s*Continuing but .* will NOT be used", r"warn.*:\s*Cluster configuration file .* is corrupt", r"Election storm", r"stalled the FSA with pending inputs", ] self._components["common-ignore"] = [ r"Pending action:", r"resource( was|s were) active at shutdown", r"pending LRM operations at shutdown", r"Lost connection to the CIB manager", r"pacemaker-controld.*:\s*Action A_RECOVER .* not supported", r"pacemaker-controld.*:\s*Exiting now due to errors", r".*:\s*Requesting fencing \([^)]+\) targeting node ", r"(Blackbox dump requested|Problem detected)", ] self._components["corosync-ignore"] = [ r"Could not connect to Corosync CFG: CS_ERR_LIBRARY", r"error:.*Connection to the CPG API failed: Library error", r"\[[0-9]+\] exited with status [0-9]+ \(", r"\[[0-9]+\] terminated with signal 15", r"pacemaker-based.*error:.*Corosync connection lost", r"pacemaker-fenced.*error:.*Corosync connection terminated", r"pacemaker-controld.*State transition .* S_RECOVERY", r"pacemaker-controld.*error:.*Input (I_ERROR|I_TERMINATE ) .*received in state", r"pacemaker-controld.*error:.*Could not recover from internal error", r"error:.*Connection to cib_(shm|rw).* (failed|closed)", r"error:.*cib_(shm|rw) IPC provider disconnected while waiting", r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", r"error: Lost fencer connection", ] self._components["corosync"] = [ # We expect each daemon to lose its cluster connection. # However, if the CIB manager loses its connection first, # it's possible for another daemon to lose that connection and # exit before losing the cluster connection. r"pacemakerd.*:\s*warning:.*Lost connection to cluster layer", r"pacemaker-attrd.*:\s*(crit|error):.*Lost connection to (Corosync process group|the CIB manager)", r"pacemaker-based.*:\s*(crit|error):.*Lost connection to cluster layer", r"pacemaker-controld.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)", r"pacemaker-fenced.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)", r"schedulerd.*Scheduling node .* for fencing", r"pacemaker-controld.*:\s*Peer .* was terminated \(.*\) by .* on behalf of .*:\s*OK", ] self._components["pacemaker-based"] = [ r"pacemakerd.* pacemaker-attrd\[[0-9]+\] exited with status 102", r"pacemakerd.* pacemaker-controld\[[0-9]+\] exited with status 1", - r"pacemakerd.* Respawning pacemaker-attrd subdaemon after unexpected exit", - r"pacemakerd.* Respawning pacemaker-based subdaemon after unexpected exit", - r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit", - r"pacemakerd.* Respawning pacemaker-fenced subdaemon after unexpected exit", + r"pacemakerd.* Respawning subdaemon pacemaker-attrd after unexpected exit", + r"pacemakerd.* Respawning subdaemon pacemaker-based after unexpected exit", + r"pacemakerd.* Respawning subdaemon pacemaker-controld after unexpected exit", + r"pacemakerd.* Respawning subdaemon pacemaker-fenced after unexpected exit", r"pacemaker-.* Connection to cib_.* (failed|closed)", r"pacemaker-attrd.*:.*Lost connection to the CIB manager", r"pacemaker-controld.*:.*Lost connection to the CIB manager", r"pacemaker-controld.*I_ERROR.*handle_cib_disconnect", r"pacemaker-controld.* State transition .* S_RECOVERY", r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover", r"pacemaker-controld.*Could not recover from internal error", ] self._components["pacemaker-based-ignore"] = [ r"pacemaker-execd.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)", r"pacemaker-controld.*:Could not connect to attrd: Connection refused", ] self._components["pacemaker-execd"] = [ r"pacemaker-controld.*Lost connection to local executor", r"pacemaker-controld.*I_ERROR.*lrm_connection_destroy", r"pacemaker-controld.*State transition .* S_RECOVERY", r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover", r"pacemaker-controld.*Could not recover from internal error", r"pacemakerd.*pacemaker-controld\[[0-9]+\] exited with status 1", - r"pacemakerd.* Respawning pacemaker-execd subdaemon after unexpected exit", - r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit", + r"pacemakerd.* Respawning subdaemon pacemaker-execd after unexpected exit", + r"pacemakerd.* Respawning subdaemon pacemaker-controld after unexpected exit", ] self._components["pacemaker-execd-ignore"] = [ r"pacemaker-(attrd|controld).*Connection to lrmd.* (failed|closed)", r"pacemaker-(attrd|controld).*Could not execute alert", ] self._components["pacemaker-controld"] = [ r"State transition .* -> S_IDLE", ] self._components["pacemaker-controld-ignore"] = [] self._components["pacemaker-attrd"] = [] self._components["pacemaker-attrd-ignore"] = [] self._components["pacemaker-schedulerd"] = [ r"State transition .* S_RECOVERY", - r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit", + r"pacemakerd.* Respawning subdaemon pacemaker-controld after unexpected exit", r"pacemaker-controld\[[0-9]+\] exited with status 1 \(", r"pacemaker-controld.*Lost connection to the scheduler", r"pacemaker-controld.*I_ERROR.*save_cib_contents", r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover", r"pacemaker-controld.*Could not recover from internal error", ] self._components["pacemaker-schedulerd-ignore"] = [ r"Connection to pengine.* (failed|closed)", ] self._components["pacemaker-fenced"] = [ r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", r"Lost fencer connection", r"pacemaker-controld.*Fencer successfully connected", ] self._components["pacemaker-fenced-ignore"] = [ r"(error|warning):.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", r"error:.*Lost fencer connection", r"error:.*Fencer connection failed \(will retry\)", r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)", ] self._components["pacemaker-fenced-ignore"].extend(self._components["common-ignore"]) patternVariants = { "crm-base": BasePatterns, "crm-corosync": Corosync2Patterns } class PatternSelector: """Choose from among several Pattern objects and return the information from that object.""" def __init__(self, name="crm-corosync"): """ Create a new PatternSelector object. Instantiate whatever class is given by name. Defaults to Corosync2Patterns for "crm-corosync" or None. While other objects could be supported in the future, only this and the base object are supported at this time. """ self._name = name # If no name was given, use the default. Otherwise, look up the appropriate # class in patternVariants, instantiate it, and use that. if not name: self._base = Corosync2Patterns() else: self._base = patternVariants[name]() def get_patterns(self, kind): """Call get_patterns on the previously instantiated pattern object.""" return self._base.get_patterns(kind) def get_template(self, key): """ Return a single pattern from the previously instantiated pattern object. If no pattern exists for the given key, return None. """ return self._base[key] def get_component(self, kind): """Call get_component on the previously instantiated pattern object.""" return self._base.get_component(kind) def __getitem__(self, key): """Return the pattern for the given key, or None if it does not exist.""" return self.get_template(key) # PYTHONPATH=python python python/pacemaker/_cts/patterns.py -k crm-corosync -t StartCmd if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("-k", "--kind", metavar="KIND") parser.add_argument("-t", "--template", metavar="TEMPLATE") args = parser.parse_args() print(PatternSelector(args.kind)[args.template])