Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c
index 1056783ecf..999946abf9 100644
--- a/daemons/pacemakerd/pcmkd_subdaemons.c
+++ b/daemons/pacemakerd/pcmkd_subdaemons.c
@@ -1,881 +1,886 @@
/*
* Copyright 2010-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include "pacemakerd.h"
#if SUPPORT_COROSYNC
#include "pcmkd_corosync.h"
#endif
#include <errno.h>
#include <grp.h>
#include <signal.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
#include <crm/cluster.h>
#include <crm/common/xml.h>
enum child_daemon_flags {
child_none = 0,
child_respawn = 1 << 0,
child_needs_cluster = 1 << 1,
child_needs_retry = 1 << 2,
child_active_before_startup = 1 << 3,
};
typedef struct pcmk_child_s {
enum pcmk_ipc_server server;
pid_t pid;
int respawn_count;
const char *uid;
int check_count;
uint32_t flags;
} pcmk_child_t;
#define PCMK_PROCESS_CHECK_INTERVAL 1
#define PCMK_PROCESS_CHECK_RETRIES 5
#define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */
/* Index into the array below */
#define PCMK_CHILD_CONTROLD 5
static pcmk_child_t pcmk_children[] = {
{
pcmk_ipc_based, 0, 0, CRM_DAEMON_USER,
0, child_respawn | child_needs_cluster
},
{
pcmk_ipc_fenced, 0, 0, NULL,
0, child_respawn | child_needs_cluster
},
{
pcmk_ipc_execd, 0, 0, NULL,
0, child_respawn
},
{
pcmk_ipc_attrd, 0, 0, CRM_DAEMON_USER,
0, child_respawn | child_needs_cluster
},
{
pcmk_ipc_schedulerd, 0, 0, CRM_DAEMON_USER,
0, child_respawn
},
{
pcmk_ipc_controld, 0, 0, CRM_DAEMON_USER,
0, child_respawn | child_needs_cluster
},
};
static char *opts_default[] = { NULL, NULL };
static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
crm_trigger_t *shutdown_trigger = NULL;
crm_trigger_t *startup_trigger = NULL;
time_t subdaemon_check_progress = 0;
// Whether we need root group access to talk to cluster layer
static bool need_root_group = true;
/* When contacted via pacemakerd-api by a client having sbd in
* the name we assume it is sbd-daemon which wants to know
* if pacemakerd shutdown gracefully.
* Thus when everything is shutdown properly pacemakerd
* waits till it has reported the graceful completion of
* shutdown to sbd and just when sbd-client closes the
* connection we can assume that the report has arrived
* properly so that pacemakerd can finally exit.
* Following two variables are used to track that handshake.
*/
unsigned int shutdown_complete_state_reported_to = 0;
gboolean shutdown_complete_state_reported_client_closed = FALSE;
/* state we report when asked via pacemakerd-api status-ping */
const char *pacemakerd_state = PCMK__VALUE_INIT;
gboolean running_with_sbd = FALSE; /* local copy */
GMainLoop *mainloop = NULL;
static gboolean fatal_error = FALSE;
static int child_liveness(pcmk_child_t *child);
static gboolean escalate_shutdown(gpointer data);
static int start_child(pcmk_child_t * child);
static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
static void pcmk_process_exit(pcmk_child_t * child);
static gboolean pcmk_shutdown_worker(gpointer user_data);
static gboolean stop_child(pcmk_child_t * child, int signal);
/*!
* \internal
* \brief Get path to subdaemon executable
*
* \param[in] subdaemon Subdaemon to get path for
*
* \return Newly allocated string with path to subdaemon executable
* \note It is the caller's responsibility to free() the return value
*/
static inline char *
subdaemon_path(pcmk_child_t *subdaemon)
{
return crm_strdup_printf(CRM_DAEMON_DIR "/%s",
pcmk__server_name(subdaemon->server));
}
static bool
pcmkd_cluster_connected(void)
{
#if SUPPORT_COROSYNC
return pcmkd_corosync_connected();
#else
return true;
#endif
}
static gboolean
check_next_subdaemon(gpointer user_data)
{
static int next_child = 0;
pcmk_child_t *child = &(pcmk_children[next_child]);
const char *name = pcmk__server_name(child->server);
const long long pid = PCMK__SPECIAL_PID_AS_0(child->pid);
int rc = child_liveness(child);
crm_trace("Checked subdaemon %s[%lld]: %s (%d)",
name, pid, pcmk_rc_str(rc), rc);
switch (rc) {
case pcmk_rc_ok:
child->check_count = 0;
subdaemon_check_progress = time(NULL);
break;
case pcmk_rc_ipc_pid_only: // Child was previously OK
if (++(child->check_count) >= PCMK_PROCESS_CHECK_RETRIES) {
+ // cts-lab looks for this message
crm_crit("Subdaemon %s[%lld] is unresponsive to IPC "
"after %d attempt%s and will now be killed",
name, pid, child->check_count,
pcmk__plural_s(child->check_count));
stop_child(child, SIGKILL);
if (pcmk_is_set(child->flags, child_respawn)) {
// Respawn limit hasn't been reached, so retry another round
child->check_count = 0;
}
} else {
crm_notice("Subdaemon %s[%lld] is unresponsive to IPC "
"after %d attempt%s (will recheck later)",
name, pid, child->check_count,
pcmk__plural_s(child->check_count));
if (pcmk_is_set(child->flags, child_respawn)) {
/* as long as the respawn-limit isn't reached
and we haven't run out of connect retries
we account this as progress we are willing
to tell to sbd
*/
subdaemon_check_progress = time(NULL);
}
}
/* go to the next child and see if
we can make progress there
*/
break;
case pcmk_rc_ipc_unresponsive:
if (!pcmk_is_set(child->flags, child_respawn)) {
/* if a subdaemon is down and we don't want it
to be restarted this is a success during
shutdown. if it isn't restarted anymore
due to MAX_RESPAWN it is
rather no success.
*/
if (child->respawn_count <= MAX_RESPAWN) {
subdaemon_check_progress = time(NULL);
}
}
if (!pcmk_is_set(child->flags, child_active_before_startup)) {
crm_trace("Subdaemon %s[%lld] terminated", name, pid);
break;
}
if (pcmk_is_set(child->flags, child_respawn)) {
+ // cts-lab looks for this message
crm_err("Subdaemon %s[%lld] terminated", name, pid);
} else {
/* orderly shutdown */
crm_notice("Subdaemon %s[%lld] terminated", name, pid);
}
pcmk_process_exit(child);
break;
default:
crm_exit(CRM_EX_FATAL);
break; /* static analysis/noreturn */
}
if (++next_child >= PCMK__NELEM(pcmk_children)) {
next_child = 0;
}
return G_SOURCE_CONTINUE;
}
static gboolean
escalate_shutdown(gpointer data)
{
pcmk_child_t *child = data;
if (child->pid == PCMK__SPECIAL_PID) {
pcmk_process_exit(child);
} else if (child->pid != 0) {
/* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
crm_err("Subdaemon %s not terminating in a timely manner, forcing",
pcmk__server_name(child->server));
stop_child(child, SIGSEGV);
}
return FALSE;
}
static void
pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
pcmk_child_t *child = mainloop_child_userdata(p);
const char *name = mainloop_child_name(p);
if (signo) {
+ // cts-lab looks for this message
do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
"%s[%d] terminated with signal %d (%s)%s",
name, pid, signo, strsignal(signo),
(core? " and dumped core" : ""));
} else {
switch(exitcode) {
case CRM_EX_OK:
crm_info("%s[%d] exited with status %d (%s)",
name, pid, exitcode, crm_exit_str(exitcode));
break;
case CRM_EX_FATAL:
crm_warn("Shutting cluster down because %s[%d] had fatal failure",
name, pid);
child->flags &= ~child_respawn;
fatal_error = TRUE;
pcmk_shutdown(SIGTERM);
break;
case CRM_EX_PANIC:
crm_emerg("%s[%d] instructed the machine to reset", name, pid);
child->flags &= ~child_respawn;
fatal_error = TRUE;
pcmk__panic(__func__);
pcmk_shutdown(SIGTERM);
break;
default:
+ // cts-lab looks for this message
crm_err("%s[%d] exited with status %d (%s)",
name, pid, exitcode, crm_exit_str(exitcode));
break;
}
}
pcmk_process_exit(child);
}
static void
pcmk_process_exit(pcmk_child_t * child)
{
const char *name = pcmk__server_name(child->server);
child->pid = 0;
child->flags &= ~child_active_before_startup;
child->check_count = 0;
child->respawn_count += 1;
if (child->respawn_count > MAX_RESPAWN) {
crm_err("Subdaemon %s exceeded maximum respawn count", name);
child->flags &= ~child_respawn;
}
if (shutdown_trigger) {
/* resume step-wise shutdown (returned TRUE yields no parallelizing) */
mainloop_set_trigger(shutdown_trigger);
} else if (!pcmk_is_set(child->flags, child_respawn)) {
/* nothing to do */
} else if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
crm_err("Rebooting system because of subdaemon %s failure", name);
pcmk__panic(__func__);
} else if (child_liveness(child) == pcmk_rc_ok) {
crm_warn("Not respawning subdaemon %s because IPC endpoint %s is OK",
name, pcmk__server_ipc_name(child->server));
} else if (pcmk_is_set(child->flags, child_needs_cluster) && !pcmkd_cluster_connected()) {
crm_notice("Not respawning subdaemon %s until cluster returns", name);
child->flags |= child_needs_retry;
} else {
+ // cts-lab looks for this message
crm_notice("Respawning subdaemon %s after unexpected exit", name);
start_child(child);
}
}
static gboolean
pcmk_shutdown_worker(gpointer user_data)
{
static int phase = PCMK__NELEM(pcmk_children) - 1;
static time_t next_log = 0;
if (phase == PCMK__NELEM(pcmk_children) - 1) {
crm_notice("Shutting down Pacemaker");
pacemakerd_state = PCMK__VALUE_SHUTTING_DOWN;
}
for (; phase >= 0; phase--) {
pcmk_child_t *child = &(pcmk_children[phase]);
const char *name = pcmk__server_name(child->server);
if (child->pid != 0) {
time_t now = time(NULL);
if (pcmk_is_set(child->flags, child_respawn)) {
if (child->pid == PCMK__SPECIAL_PID) {
crm_warn("Subdaemon %s cannot be terminated (shutdown "
"will be escalated after %ld seconds if it does "
"not terminate on its own; set PCMK_"
PCMK__ENV_FAIL_FAST "=1 to exit immediately "
"instead)",
name, (long) SHUTDOWN_ESCALATION_PERIOD);
}
next_log = now + 30;
child->flags &= ~child_respawn;
stop_child(child, SIGTERM);
if (phase < PCMK_CHILD_CONTROLD) {
g_timeout_add(SHUTDOWN_ESCALATION_PERIOD,
escalate_shutdown, child);
}
} else if (now >= next_log) {
next_log = now + 30;
crm_notice("Still waiting for subdaemon %s to terminate "
QB_XS " pid=%lld", name, (long long) child->pid);
}
return TRUE;
}
/* cleanup */
crm_debug("Subdaemon %s confirmed stopped", name);
child->pid = 0;
}
crm_notice("Shutdown complete");
pacemakerd_state = PCMK__VALUE_SHUTDOWN_COMPLETE;
if (!fatal_error && running_with_sbd &&
pcmk__get_sbd_sync_resource_startup() &&
!shutdown_complete_state_reported_client_closed) {
crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
return TRUE;
}
g_main_loop_quit(mainloop);
if (fatal_error) {
crm_notice("Shutting down and staying down after fatal error");
#ifdef SUPPORT_COROSYNC
pcmkd_shutdown_corosync();
#endif
crm_exit(CRM_EX_FATAL);
}
return TRUE;
}
/* TODO once libqb is taught to juggle with IPC end-points carried over as
bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325)
it shall hand over these descriptors here if/once they are successfully
pre-opened in (presumably) child_liveness(), to avoid any remaining
room for races */
// \return Standard Pacemaker return code
static int
start_child(pcmk_child_t * child)
{
uid_t uid = 0;
gid_t gid = 0;
gboolean use_valgrind = FALSE;
gboolean use_callgrind = FALSE;
const char *name = pcmk__server_name(child->server);
const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED);
const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED);
child->flags &= ~child_active_before_startup;
child->check_count = 0;
if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if ((env_callgrind != NULL)
&& (strstr(env_callgrind, name) != NULL)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
use_valgrind = TRUE;
} else if ((env_valgrind != NULL)
&& (strstr(env_valgrind, name) != NULL)) {
use_valgrind = TRUE;
}
if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
crm_warn("Cannot enable valgrind for subdaemon %s: valgrind not found",
name);
use_valgrind = FALSE;
}
if (child->uid) {
if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
crm_err("Invalid user (%s) for subdaemon %s: not found",
child->uid, name);
return EACCES;
}
crm_info("Using uid %lu and group %lu for subdaemon %s",
(unsigned long) uid, (unsigned long) gid, name);
}
child->pid = fork();
CRM_ASSERT(child->pid != -1);
if (child->pid > 0) {
/* parent */
mainloop_child_add(child->pid, 0, name, child, pcmk_child_exit);
crm_info("Forked process %lld for subdaemon %s%s",
(long long) child->pid, name,
use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
return pcmk_rc_ok;
} else {
/* Start a new session */
(void)setsid();
/* Setup the two alternate arg arrays */
opts_vgrind[0] = pcmk__str_copy(VALGRIND_BIN);
if (use_callgrind) {
opts_vgrind[1] = pcmk__str_copy("--tool=callgrind");
opts_vgrind[2] = pcmk__str_copy("--callgrind-out-file="
CRM_STATE_DIR "/callgrind.out.%p");
opts_vgrind[3] = subdaemon_path(child);
opts_vgrind[4] = NULL;
} else {
opts_vgrind[1] = subdaemon_path(child);
opts_vgrind[2] = NULL;
opts_vgrind[3] = NULL;
opts_vgrind[4] = NULL;
}
opts_default[0] = subdaemon_path(child);
if(gid) {
// Drop root group access if not needed
if (!need_root_group && (setgid(gid) < 0)) {
crm_warn("Could not set subdaemon %s group to %lu: %s",
name, (unsigned long) gid, strerror(errno));
}
/* Initialize supplementary groups to only those always granted to
* the user, plus haclient (so we can access IPC).
*/
if (initgroups(child->uid, gid) < 0) {
crm_err("Cannot initialize system groups for subdaemon %s: %s "
QB_XS " errno=%d",
name, pcmk_rc_str(errno), errno);
}
}
if (uid && setuid(uid) < 0) {
crm_warn("Could not set subdaemon %s user to %s: %s "
QB_XS " uid=%lu errno=%d",
name, strerror(errno), child->uid, (unsigned long) uid,
errno);
}
pcmk__close_fds_in_child(true);
pcmk__open_devnull(O_RDONLY); // stdin (fd 0)
pcmk__open_devnull(O_WRONLY); // stdout (fd 1)
pcmk__open_devnull(O_WRONLY); // stderr (fd 2)
if (use_valgrind) {
(void)execvp(VALGRIND_BIN, opts_vgrind);
} else {
char *path = subdaemon_path(child);
(void) execvp(path, opts_default);
free(path);
}
crm_crit("Could not execute subdaemon %s: %s", name, strerror(errno));
crm_exit(CRM_EX_FATAL);
}
return pcmk_rc_ok; /* never reached */
}
/*!
* \internal
* \brief Check the liveness of the child based on IPC name and PID if tracked
*
* \param[in,out] child Child tracked data
*
* \return Standard Pacemaker return code
*
* \note Return codes of particular interest include pcmk_rc_ipc_unresponsive
* indicating that no trace of IPC liveness was detected,
* pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by
* an unauthorized process, and pcmk_rc_ipc_pid_only indicating that
* the child is up by PID but not IPC end-point (possibly starting).
* \note This function doesn't modify any of \p child members but \c pid,
* and is not actively toying with processes as such but invoking
* \c stop_child in one particular case (there's for some reason
* a different authentic holder of the IPC end-point).
*/
static int
child_liveness(pcmk_child_t *child)
{
uid_t cl_uid = 0;
gid_t cl_gid = 0;
const uid_t root_uid = 0;
const gid_t root_gid = 0;
const uid_t *ref_uid;
const gid_t *ref_gid;
const char *name = pcmk__server_name(child->server);
int rc = pcmk_rc_ipc_unresponsive;
int legacy_rc = pcmk_ok;
pid_t ipc_pid = 0;
if (child->uid == NULL) {
ref_uid = &root_uid;
ref_gid = &root_gid;
} else {
ref_uid = &cl_uid;
ref_gid = &cl_gid;
legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid);
}
if (legacy_rc < 0) {
rc = pcmk_legacy2rc(legacy_rc);
crm_err("Could not find user and group IDs for user %s: %s "
QB_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc);
} else {
const char *ipc_name = pcmk__server_ipc_name(child->server);
rc = pcmk__ipc_is_authentic_process_active(ipc_name,
*ref_uid, *ref_gid,
&ipc_pid);
if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) {
if (child->pid <= 0) {
/* If rc is pcmk_rc_ok, ipc_pid is nonzero and this
* initializes a new child. If rc is
* pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will
* investigate further.
*/
child->pid = ipc_pid;
} else if ((ipc_pid != 0) && (child->pid != ipc_pid)) {
/* An unexpected (but authorized) process is responding to
* IPC. Investigate further.
*/
rc = pcmk_rc_ipc_unresponsive;
}
}
}
if (rc == pcmk_rc_ipc_unresponsive) {
/* If we get here, a child without IPC is being tracked, no IPC liveness
* has been detected, or IPC liveness has been detected with an
* unexpected (but authorized) process. This is safe on FreeBSD since
* the only change possible from a proper child's PID into "special" PID
* of 1 behind more loosely related process.
*/
int ret = pcmk__pid_active(child->pid, name);
if (ipc_pid && ((ret != pcmk_rc_ok)
|| ipc_pid == PCMK__SPECIAL_PID
|| (pcmk__pid_active(ipc_pid, name) == pcmk_rc_ok))) {
/* An unexpected (but authorized) process was detected at the IPC
* endpoint, and either it is active, or the child we're tracking is
* not.
*/
if (ret == pcmk_rc_ok) {
/* The child we're tracking is active. Kill it, and adopt the
* detected process. This assumes that our children don't fork
* (thus getting a different PID owning the IPC), but rather the
* tracking got out of sync because of some means external to
* Pacemaker, and adopting the detected process is better than
* killing it and possibly having to spawn a new child.
*/
/* not possessing IPC, afterall (what about corosync CPG?) */
stop_child(child, SIGKILL);
}
rc = pcmk_rc_ok;
child->pid = ipc_pid;
} else if (ret == pcmk_rc_ok) {
// Our tracked child's PID was found active, but not its IPC
rc = pcmk_rc_ipc_pid_only;
} else if ((child->pid == 0) && (ret == EINVAL)) {
// FreeBSD can return EINVAL
rc = pcmk_rc_ipc_unresponsive;
} else {
switch (ret) {
case EACCES:
rc = pcmk_rc_ipc_unauthorized;
break;
case ESRCH:
rc = pcmk_rc_ipc_unresponsive;
break;
default:
rc = ret;
break;
}
}
}
return rc;
}
/*!
* \internal
* \brief Initial one-off check of the pre-existing "child" processes
*
* With "child" process, we mean the subdaemon that defines an API end-point
* (all of them do as of the comment) -- the possible complement is skipped
* as it is deemed it has no such shared resources to cause conflicts about,
* hence it can presumably be started anew without hesitation.
* If that won't hold true in the future, the concept of a shared resource
* will have to be generalized beyond the API end-point.
*
* For boundary cases that the "child" is still starting (IPC end-point is yet
* to be witnessed), or more rarely (practically FreeBSD only), when there's
* a pre-existing "untrackable" authentic process, we give the situation some
* time to possibly unfold in the right direction, meaning that said socket
* will appear or the unattainable process will disappear per the observable
* IPC, respectively.
*
* \return Standard Pacemaker return code
*
* \note Since this gets run at the very start, \c respawn_count fields
* for particular children get temporarily overloaded with "rounds
* of waiting" tracking, restored once we are about to finish with
* success (i.e. returning value >=0) and will remain unrestored
* otherwise. One way to suppress liveness detection logic for
* particular child is to set the said value to a negative number.
*/
#define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */
int
find_and_track_existing_processes(void)
{
bool wait_in_progress;
int rc;
size_t i, rounds;
for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
wait_in_progress = false;
for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
const char *name = pcmk__server_name(pcmk_children[i].server);
const char *ipc_name = NULL;
if (pcmk_children[i].respawn_count < 0) {
continue;
}
rc = child_liveness(&pcmk_children[i]);
if (rc == pcmk_rc_ipc_unresponsive) {
/* As a speculation, don't give up if there are more rounds to
* come for other reasons, but don't artificially wait just
* because of this, since we would preferably start ASAP.
*/
continue;
}
// @TODO Functionize more of this to reduce nesting
ipc_name = pcmk__server_ipc_name(pcmk_children[i].server);
pcmk_children[i].respawn_count = rounds;
switch (rc) {
case pcmk_rc_ok:
if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
crm_crit("Cannot reliably track pre-existing"
" authentic process behind %s IPC on this"
" platform and PCMK_" PCMK__ENV_FAIL_FAST
" requested", ipc_name);
return EOPNOTSUPP;
} else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
crm_notice("Assuming pre-existing authentic, though"
" on this platform untrackable, process"
" behind %s IPC is stable (was in %d"
" previous samples) so rather than"
" bailing out (PCMK_" PCMK__ENV_FAIL_FAST
" not requested), we just switch to a"
" less optimal IPC liveness monitoring"
" (not very suitable for heavy load)",
name, WAIT_TRIES - 1);
crm_warn("The process behind %s IPC cannot be"
" terminated, so the overall shutdown"
" will get delayed implicitly (%ld s),"
" which serves as a graceful period for"
" its native termination if it vitally"
" depends on some other daemons going"
" down in a controlled way already",
name, (long) SHUTDOWN_ESCALATION_PERIOD);
} else {
wait_in_progress = true;
crm_warn("Cannot reliably track pre-existing"
" authentic process behind %s IPC on this"
" platform, can still disappear in %d"
" attempt(s)", ipc_name,
WAIT_TRIES - pcmk_children[i].respawn_count);
continue;
}
}
crm_notice("Tracking existing %s process (pid=%lld)",
name,
(long long) PCMK__SPECIAL_PID_AS_0(
pcmk_children[i].pid));
pcmk_children[i].respawn_count = -1; /* 0~keep watching */
pcmk_children[i].flags |= child_active_before_startup;
break;
case pcmk_rc_ipc_pid_only:
if (pcmk_children[i].respawn_count == WAIT_TRIES) {
crm_crit("%s IPC endpoint for existing authentic"
" process %lld did not (re)appear",
ipc_name,
(long long) PCMK__SPECIAL_PID_AS_0(
pcmk_children[i].pid));
return rc;
}
wait_in_progress = true;
crm_warn("Cannot find %s IPC endpoint for existing"
" authentic process %lld, can still (re)appear"
" in %d attempts (?)",
ipc_name,
(long long) PCMK__SPECIAL_PID_AS_0(
pcmk_children[i].pid),
WAIT_TRIES - pcmk_children[i].respawn_count);
continue;
default:
crm_crit("Checked liveness of %s: %s " QB_XS " rc=%d",
name, pcmk_rc_str(rc), rc);
return rc;
}
}
if (!wait_in_progress) {
break;
}
pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen
}
for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
pcmk_children[i].respawn_count = 0; /* restore pristine state */
}
g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon,
NULL);
return pcmk_rc_ok;
}
gboolean
init_children_processes(void *user_data)
{
if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
/* Corosync clusters can drop root group access, because we set
* uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect
* to corosync.
*/
need_root_group = false;
}
/* start any children that have not been detected */
for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
if (pcmk_children[i].pid != 0) {
/* we are already tracking it */
continue;
}
start_child(&(pcmk_children[i]));
}
/* From this point on, any daemons being started will be due to
* respawning rather than node start.
*
* This may be useful for the daemons to know
*/
pcmk__set_env_option(PCMK__ENV_RESPAWNED, PCMK_VALUE_TRUE, false);
pacemakerd_state = PCMK__VALUE_RUNNING;
return TRUE;
}
void
pcmk_shutdown(int nsig)
{
if (shutdown_trigger == NULL) {
shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
}
mainloop_set_trigger(shutdown_trigger);
}
void
restart_cluster_subdaemons(void)
{
for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
if (!pcmk_is_set(pcmk_children[i].flags, child_needs_retry) || pcmk_children[i].pid != 0) {
continue;
}
crm_notice("Respawning cluster-based subdaemon %s",
pcmk__server_name(pcmk_children[i].server));
if (start_child(&pcmk_children[i])) {
pcmk_children[i].flags &= ~child_needs_retry;
}
}
}
static gboolean
stop_child(pcmk_child_t * child, int signal)
{
const char *name = pcmk__server_name(child->server);
if (signal == 0) {
signal = SIGTERM;
}
/* why to skip PID of 1?
- FreeBSD ~ how untrackable process behind IPC is masqueraded as
- elsewhere: how "init" task is designated; in particular, in systemd
arrangement of socket-based activation, this is pretty real */
if (child->pid == PCMK__SPECIAL_PID) {
crm_debug("Nothing to do to stop subdaemon %s[%lld]",
name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
return TRUE;
}
if (child->pid <= 0) {
crm_trace("Nothing to do to stop subdaemon %s: Not running", name);
return TRUE;
}
errno = 0;
if (kill(child->pid, signal) == 0) {
crm_notice("Stopping subdaemon %s "
QB_XS " via signal %d to process %lld",
name, signal, (long long) child->pid);
} else {
crm_err("Could not stop subdaemon %s[%lld] with signal %d: %s",
name, (long long) child->pid, signal, strerror(errno));
}
return TRUE;
}
diff --git a/python/pacemaker/_cts/patterns.py b/python/pacemaker/_cts/patterns.py
index 03a405c708..4950e95686 100644
--- a/python/pacemaker/_cts/patterns.py
+++ b/python/pacemaker/_cts/patterns.py
@@ -1,396 +1,397 @@
"""Pattern-holding classes for Pacemaker's Cluster Test Suite (CTS)."""
__all__ = ["PatternSelector"]
__copyright__ = "Copyright 2008-2024 the Pacemaker project contributors"
__license__ = "GNU General Public License version 2 or later (GPLv2+)"
import argparse
from pacemaker.buildoptions import BuildOptions
class BasePatterns:
"""
The base class for holding a stack-specific set of command and log file/stdout patterns.
Stack-specific classes need to be built on top of this one.
"""
def __init__(self):
"""Create a new BasePatterns instance which holds a very minimal set of basic patterns."""
self._bad_news = []
self._components = {}
self._name = "crm-base"
self._ignore = [
"avoid confusing Valgrind",
# Logging bug in some versions of libvirtd
r"libvirtd.*: internal error: Failed to parse PCI config address",
# pcs can log this when node is fenced, but fencing is OK in some
# tests (and we will catch it in pacemaker logs when not OK)
r"pcs.daemon:No response from: .* request: get_configs, error:",
# This is overbroad, but there's no way to say that only certain
# transition errors are acceptable. We have to rely on causes of a
# transition error logging their own error message, which should
# always be the case.
r"pacemaker-schedulerd.* Calculated transition .*/pe-error",
]
self._commands = {
"StatusCmd": "crmadmin -t 60 -S %s 2>/dev/null",
"CibQuery": "cibadmin -Ql",
"CibAddXml": "cibadmin --modify -c --xml-text %s",
"CibDelXpath": "cibadmin --delete --xpath %s",
"RscRunning": BuildOptions.DAEMON_DIR + "/cts-exec-helper -R -r %s",
"CIBfile": "%s:" + BuildOptions.CIB_DIR + "/cib.xml",
"TmpDir": "/tmp",
"BreakCommCmd": "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1",
"FixCommCmd": "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1",
"MaintenanceModeOn": "cibadmin --modify -c --xml-text '<cluster_property_set id=\"cib-bootstrap-options\"><nvpair id=\"cts-maintenance-mode-setting\" name=\"maintenance-mode\" value=\"true\"/></cluster_property_set>'",
"MaintenanceModeOff": "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"",
"StandbyCmd": "crm_attribute -Vq -U %s -n standby -l forever -v %s 2>/dev/null",
"StandbyQueryCmd": "crm_attribute -qG -U %s -n standby -l forever -d off 2>/dev/null",
}
self._search = {
"Pat:DC_IDLE": r"pacemaker-controld.*State transition.*-> S_IDLE",
# This won't work if we have multiple partitions
"Pat:Local_started": r"%s\W.*controller successfully started",
"Pat:NonDC_started": r"%s\W.*State transition.*-> S_NOT_DC",
"Pat:DC_started": r"%s\W.*State transition.*-> S_IDLE",
"Pat:We_stopped": r"%s\W.*OVERRIDE THIS PATTERN",
"Pat:They_stopped": r"%s\W.*LOST:.* %s ",
"Pat:They_dead": r"node %s.*: is dead",
"Pat:They_up": r"%s %s\W.*OVERRIDE THIS PATTERN",
"Pat:TransitionComplete": "Transition status: Complete: complete",
"Pat:Fencing_start": r"Requesting peer fencing .* targeting %s",
"Pat:Fencing_ok": r"pacemaker-fenced.*:\s*Operation .* targeting %s by .* for .*@.*: OK",
"Pat:Fencing_recover": r"pacemaker-schedulerd.*: Recover\s+%s",
"Pat:Fencing_active": r"stonith resource .* is active on 2 nodes (attempting recovery)",
"Pat:Fencing_probe": r"pacemaker-controld.* Result of probe operation for %s on .*: Error",
"Pat:RscOpOK": r"pacemaker-controld.*:\s+Result of %s operation for %s.*: (0 \()?ok",
"Pat:RscOpFail": r"pacemaker-schedulerd.*:.*Unexpected result .* recorded for %s of %s ",
"Pat:CloneOpFail": r"pacemaker-schedulerd.*:.*Unexpected result .* recorded for %s of (%s|%s) ",
"Pat:RscRemoteOpOK": r"pacemaker-controld.*:\s+Result of %s operation for %s on %s: (0 \()?ok",
"Pat:NodeFenced": r"pacemaker-controld.*:\s* Peer %s was terminated \(.*\) by .* on behalf of .*: OK",
}
def get_component(self, key):
"""
Return the patterns for a single component as a list, given by key.
This is typically the name of some subprogram (pacemaker-based,
pacemaker-fenced, etc.) or various special purpose keys. If key is
unknown, return an empty list.
"""
if key in self._components:
return self._components[key]
print("Unknown component '%s' for %s" % (key, self._name))
return []
def get_patterns(self, key):
"""
Return various patterns supported by this object, given by key.
Depending on the key, this could either be a list or a hash. If key is
unknown, return None.
"""
if key == "BadNews":
return self._bad_news
if key == "BadNewsIgnore":
return self._ignore
if key == "Commands":
return self._commands
if key == "Search":
return self._search
if key == "Components":
return self._components
print("Unknown pattern '%s' for %s" % (key, self._name))
return None
def __getitem__(self, key):
if key == "Name":
return self._name
if key in self._commands:
return self._commands[key]
if key in self._search:
return self._search[key]
print("Unknown template '%s' for %s" % (key, self._name))
return None
class Corosync2Patterns(BasePatterns):
"""Patterns for Corosync version 2 cluster manager class."""
def __init__(self):
BasePatterns.__init__(self)
self._name = "crm-corosync"
self._commands.update({
"StartCmd": "service corosync start && service pacemaker start",
"StopCmd": "service pacemaker stop; [ ! -e /usr/sbin/pacemaker-remoted ] || service pacemaker_remote stop; service corosync stop",
"EpochCmd": "crm_node -e",
"QuorumCmd": "crm_node -q",
"PartitionCmd": "crm_node -p",
})
self._search.update({
# Close enough ... "Corosync Cluster Engine exiting normally" isn't
# printed reliably.
"Pat:We_stopped": r"%s\W.*Unloading all Corosync service engines",
"Pat:They_stopped": r"%s\W.*pacemaker-controld.*Node %s(\[|\s).*state is now lost",
"Pat:They_dead": r"pacemaker-controld.*Node %s(\[|\s).*state is now lost",
"Pat:They_up": r"\W%s\W.*pacemaker-controld.*Node %s state is now member",
"Pat:ChildExit": r"\[[0-9]+\] exited with status [0-9]+ \(",
# "with signal 9" == pcmk_child_exit(), "$" == check_active_before_startup_processes()
"Pat:ChildKilled": r"%s\W.*pacemakerd.*%s\[[0-9]+\] terminated( with signal 9|$)",
- "Pat:ChildRespawn": r"%s\W.*pacemakerd.*Respawning %s subdaemon after unexpected exit",
+ "Pat:ChildRespawn": r"%s\W.*pacemakerd.*Respawning subdaemon %s after unexpected exit",
"Pat:InfraUp": r"%s\W.*corosync.*Initializing transport",
"Pat:PacemakerUp": r"%s\W.*pacemakerd.*Starting Pacemaker",
})
self._ignore += [
r"crm_mon:",
r"crmadmin:",
r"update_trace_data",
r"async_notify:.*strange, client not found",
r"Parse error: Ignoring unknown option .*nodename",
r"error.*: Operation 'reboot' .* using FencingFail returned ",
r"getinfo response error: 1$",
r"sbd.* error: inquisitor_child: DEBUG MODE IS ACTIVE",
r"sbd.* pcmk:\s*error:.*Connection to cib_ro.* (failed|closed)",
]
self._bad_news = [
r"[^(]error:",
r"crit:",
r"ERROR:",
r"CRIT:",
r"Shutting down...NOW",
r"Timer I_TERMINATE just popped",
r"input=I_ERROR",
r"input=I_FAIL",
r"input=I_INTEGRATED cause=C_TIMER_POPPED",
r"input=I_FINALIZED cause=C_TIMER_POPPED",
r"input=I_ERROR",
r"(pacemakerd|pacemaker-execd|pacemaker-controld):.*, exiting",
r"schedulerd.*Attempting recovery of resource",
r"is taking more than 2x its timeout",
r"Confirm not received from",
r"Welcome reply not received from",
r"Attempting to schedule .* after a stop",
r"Resource .* was active at shutdown",
r"duplicate entries for call_id",
r"Search terminated:",
r":global_timer_callback",
r"Faking parameter digest creation",
r"Parameters to .* action changed:",
r"Parameters to .* changed",
- r"pacemakerd.*\[[0-9]+\] terminated( with signal| as IPC server|$)",
+ r"pacemakerd.*\[[0-9]+\] terminated( with signal|$)",
+ r"pacemakerd.*\[[0-9]+\] .* will now be killed",
r"pacemaker-schedulerd.*Recover\s+.*\(.* -\> .*\)",
r"rsyslogd.* lost .* due to rate-limiting",
r"Peer is not part of our cluster",
r"We appear to be in an election loop",
r"Unknown node -> we will not deliver message",
r"(Blackbox dump requested|Problem detected)",
r"pacemakerd.*Could not connect to Cluster Configuration Database API",
r"Receiving messages from a node we think is dead",
r"share the same cluster nodeid",
r"share the same name",
r"pacemaker-controld:.*Transition failed: terminated",
r"Local CIB .* differs from .*:",
r"warn.*:\s*Continuing but .* will NOT be used",
r"warn.*:\s*Cluster configuration file .* is corrupt",
r"Election storm",
r"stalled the FSA with pending inputs",
]
self._components["common-ignore"] = [
r"Pending action:",
r"resource( was|s were) active at shutdown",
r"pending LRM operations at shutdown",
r"Lost connection to the CIB manager",
r"pacemaker-controld.*:\s*Action A_RECOVER .* not supported",
r"pacemaker-controld.*:\s*Exiting now due to errors",
r".*:\s*Requesting fencing \([^)]+\) targeting node ",
r"(Blackbox dump requested|Problem detected)",
]
self._components["corosync-ignore"] = [
r"Could not connect to Corosync CFG: CS_ERR_LIBRARY",
r"error:.*Connection to the CPG API failed: Library error",
r"\[[0-9]+\] exited with status [0-9]+ \(",
r"\[[0-9]+\] terminated with signal 15",
r"pacemaker-based.*error:.*Corosync connection lost",
r"pacemaker-fenced.*error:.*Corosync connection terminated",
r"pacemaker-controld.*State transition .* S_RECOVERY",
r"pacemaker-controld.*error:.*Input (I_ERROR|I_TERMINATE ) .*received in state",
r"pacemaker-controld.*error:.*Could not recover from internal error",
r"error:.*Connection to cib_(shm|rw).* (failed|closed)",
r"error:.*cib_(shm|rw) IPC provider disconnected while waiting",
r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
r"error: Lost fencer connection",
]
self._components["corosync"] = [
# We expect each daemon to lose its cluster connection.
# However, if the CIB manager loses its connection first,
# it's possible for another daemon to lose that connection and
# exit before losing the cluster connection.
r"pacemakerd.*:\s*warning:.*Lost connection to cluster layer",
r"pacemaker-attrd.*:\s*(crit|error):.*Lost connection to (Corosync process group|the CIB manager)",
r"pacemaker-based.*:\s*(crit|error):.*Lost connection to cluster layer",
r"pacemaker-controld.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)",
r"pacemaker-fenced.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)",
r"schedulerd.*Scheduling node .* for fencing",
r"pacemaker-controld.*:\s*Peer .* was terminated \(.*\) by .* on behalf of .*:\s*OK",
]
self._components["pacemaker-based"] = [
r"pacemakerd.* pacemaker-attrd\[[0-9]+\] exited with status 102",
r"pacemakerd.* pacemaker-controld\[[0-9]+\] exited with status 1",
- r"pacemakerd.* Respawning pacemaker-attrd subdaemon after unexpected exit",
- r"pacemakerd.* Respawning pacemaker-based subdaemon after unexpected exit",
- r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit",
- r"pacemakerd.* Respawning pacemaker-fenced subdaemon after unexpected exit",
+ r"pacemakerd.* Respawning subdaemon pacemaker-attrd after unexpected exit",
+ r"pacemakerd.* Respawning subdaemon pacemaker-based after unexpected exit",
+ r"pacemakerd.* Respawning subdaemon pacemaker-controld after unexpected exit",
+ r"pacemakerd.* Respawning subdaemon pacemaker-fenced after unexpected exit",
r"pacemaker-.* Connection to cib_.* (failed|closed)",
r"pacemaker-attrd.*:.*Lost connection to the CIB manager",
r"pacemaker-controld.*:.*Lost connection to the CIB manager",
r"pacemaker-controld.*I_ERROR.*handle_cib_disconnect",
r"pacemaker-controld.* State transition .* S_RECOVERY",
r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
r"pacemaker-controld.*Could not recover from internal error",
]
self._components["pacemaker-based-ignore"] = [
r"pacemaker-execd.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)",
r"pacemaker-controld.*:Could not connect to attrd: Connection refused",
]
self._components["pacemaker-execd"] = [
r"pacemaker-controld.*Lost connection to local executor",
r"pacemaker-controld.*I_ERROR.*lrm_connection_destroy",
r"pacemaker-controld.*State transition .* S_RECOVERY",
r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
r"pacemaker-controld.*Could not recover from internal error",
r"pacemakerd.*pacemaker-controld\[[0-9]+\] exited with status 1",
- r"pacemakerd.* Respawning pacemaker-execd subdaemon after unexpected exit",
- r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit",
+ r"pacemakerd.* Respawning subdaemon pacemaker-execd after unexpected exit",
+ r"pacemakerd.* Respawning subdaemon pacemaker-controld after unexpected exit",
]
self._components["pacemaker-execd-ignore"] = [
r"pacemaker-(attrd|controld).*Connection to lrmd.* (failed|closed)",
r"pacemaker-(attrd|controld).*Could not execute alert",
]
self._components["pacemaker-controld"] = [
r"State transition .* -> S_IDLE",
]
self._components["pacemaker-controld-ignore"] = []
self._components["pacemaker-attrd"] = []
self._components["pacemaker-attrd-ignore"] = []
self._components["pacemaker-schedulerd"] = [
r"State transition .* S_RECOVERY",
- r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit",
+ r"pacemakerd.* Respawning subdaemon pacemaker-controld after unexpected exit",
r"pacemaker-controld\[[0-9]+\] exited with status 1 \(",
r"pacemaker-controld.*Lost connection to the scheduler",
r"pacemaker-controld.*I_ERROR.*save_cib_contents",
r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
r"pacemaker-controld.*Could not recover from internal error",
]
self._components["pacemaker-schedulerd-ignore"] = [
r"Connection to pengine.* (failed|closed)",
]
self._components["pacemaker-fenced"] = [
r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
r"Lost fencer connection",
r"pacemaker-controld.*Fencer successfully connected",
]
self._components["pacemaker-fenced-ignore"] = [
r"(error|warning):.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
r"error:.*Lost fencer connection",
r"error:.*Fencer connection failed \(will retry\)",
r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)",
]
self._components["pacemaker-fenced-ignore"].extend(self._components["common-ignore"])
patternVariants = {
"crm-base": BasePatterns,
"crm-corosync": Corosync2Patterns
}
class PatternSelector:
"""Choose from among several Pattern objects and return the information from that object."""
def __init__(self, name="crm-corosync"):
"""
Create a new PatternSelector object.
Instantiate whatever class is given by name. Defaults to Corosync2Patterns
for "crm-corosync" or None. While other objects could be supported in the
future, only this and the base object are supported at this time.
"""
self._name = name
# If no name was given, use the default. Otherwise, look up the appropriate
# class in patternVariants, instantiate it, and use that.
if not name:
self._base = Corosync2Patterns()
else:
self._base = patternVariants[name]()
def get_patterns(self, kind):
"""Call get_patterns on the previously instantiated pattern object."""
return self._base.get_patterns(kind)
def get_template(self, key):
"""
Return a single pattern from the previously instantiated pattern object.
If no pattern exists for the given key, return None.
"""
return self._base[key]
def get_component(self, kind):
"""Call get_component on the previously instantiated pattern object."""
return self._base.get_component(kind)
def __getitem__(self, key):
"""Return the pattern for the given key, or None if it does not exist."""
return self.get_template(key)
# PYTHONPATH=python python python/pacemaker/_cts/patterns.py -k crm-corosync -t StartCmd
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-k", "--kind", metavar="KIND")
parser.add_argument("-t", "--template", metavar="TEMPLATE")
args = parser.parse_args()
print(PatternSelector(args.kind)[args.template])

File Metadata

Mime Type
text/x-diff
Expires
Mon, Apr 21, 4:43 PM (21 h, 46 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1664977
Default Alt Text
(52 KB)

Event Timeline