Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F3686729
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
52 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c
index 1056783ecf..999946abf9 100644
--- a/daemons/pacemakerd/pcmkd_subdaemons.c
+++ b/daemons/pacemakerd/pcmkd_subdaemons.c
@@ -1,881 +1,886 @@
/*
* Copyright 2010-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include "pacemakerd.h"
#if SUPPORT_COROSYNC
#include "pcmkd_corosync.h"
#endif
#include <errno.h>
#include <grp.h>
#include <signal.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
#include <crm/cluster.h>
#include <crm/common/xml.h>
enum child_daemon_flags {
child_none = 0,
child_respawn = 1 << 0,
child_needs_cluster = 1 << 1,
child_needs_retry = 1 << 2,
child_active_before_startup = 1 << 3,
};
typedef struct pcmk_child_s {
enum pcmk_ipc_server server;
pid_t pid;
int respawn_count;
const char *uid;
int check_count;
uint32_t flags;
} pcmk_child_t;
#define PCMK_PROCESS_CHECK_INTERVAL 1
#define PCMK_PROCESS_CHECK_RETRIES 5
#define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */
/* Index into the array below */
#define PCMK_CHILD_CONTROLD 5
static pcmk_child_t pcmk_children[] = {
{
pcmk_ipc_based, 0, 0, CRM_DAEMON_USER,
0, child_respawn | child_needs_cluster
},
{
pcmk_ipc_fenced, 0, 0, NULL,
0, child_respawn | child_needs_cluster
},
{
pcmk_ipc_execd, 0, 0, NULL,
0, child_respawn
},
{
pcmk_ipc_attrd, 0, 0, CRM_DAEMON_USER,
0, child_respawn | child_needs_cluster
},
{
pcmk_ipc_schedulerd, 0, 0, CRM_DAEMON_USER,
0, child_respawn
},
{
pcmk_ipc_controld, 0, 0, CRM_DAEMON_USER,
0, child_respawn | child_needs_cluster
},
};
static char *opts_default[] = { NULL, NULL };
static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
crm_trigger_t *shutdown_trigger = NULL;
crm_trigger_t *startup_trigger = NULL;
time_t subdaemon_check_progress = 0;
// Whether we need root group access to talk to cluster layer
static bool need_root_group = true;
/* When contacted via pacemakerd-api by a client having sbd in
* the name we assume it is sbd-daemon which wants to know
* if pacemakerd shutdown gracefully.
* Thus when everything is shutdown properly pacemakerd
* waits till it has reported the graceful completion of
* shutdown to sbd and just when sbd-client closes the
* connection we can assume that the report has arrived
* properly so that pacemakerd can finally exit.
* Following two variables are used to track that handshake.
*/
unsigned int shutdown_complete_state_reported_to = 0;
gboolean shutdown_complete_state_reported_client_closed = FALSE;
/* state we report when asked via pacemakerd-api status-ping */
const char *pacemakerd_state = PCMK__VALUE_INIT;
gboolean running_with_sbd = FALSE; /* local copy */
GMainLoop *mainloop = NULL;
static gboolean fatal_error = FALSE;
static int child_liveness(pcmk_child_t *child);
static gboolean escalate_shutdown(gpointer data);
static int start_child(pcmk_child_t * child);
static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
static void pcmk_process_exit(pcmk_child_t * child);
static gboolean pcmk_shutdown_worker(gpointer user_data);
static gboolean stop_child(pcmk_child_t * child, int signal);
/*!
* \internal
* \brief Get path to subdaemon executable
*
* \param[in] subdaemon Subdaemon to get path for
*
* \return Newly allocated string with path to subdaemon executable
* \note It is the caller's responsibility to free() the return value
*/
static inline char *
subdaemon_path(pcmk_child_t *subdaemon)
{
return crm_strdup_printf(CRM_DAEMON_DIR "/%s",
pcmk__server_name(subdaemon->server));
}
static bool
pcmkd_cluster_connected(void)
{
#if SUPPORT_COROSYNC
return pcmkd_corosync_connected();
#else
return true;
#endif
}
static gboolean
check_next_subdaemon(gpointer user_data)
{
static int next_child = 0;
pcmk_child_t *child = &(pcmk_children[next_child]);
const char *name = pcmk__server_name(child->server);
const long long pid = PCMK__SPECIAL_PID_AS_0(child->pid);
int rc = child_liveness(child);
crm_trace("Checked subdaemon %s[%lld]: %s (%d)",
name, pid, pcmk_rc_str(rc), rc);
switch (rc) {
case pcmk_rc_ok:
child->check_count = 0;
subdaemon_check_progress = time(NULL);
break;
case pcmk_rc_ipc_pid_only: // Child was previously OK
if (++(child->check_count) >= PCMK_PROCESS_CHECK_RETRIES) {
+ // cts-lab looks for this message
crm_crit("Subdaemon %s[%lld] is unresponsive to IPC "
"after %d attempt%s and will now be killed",
name, pid, child->check_count,
pcmk__plural_s(child->check_count));
stop_child(child, SIGKILL);
if (pcmk_is_set(child->flags, child_respawn)) {
// Respawn limit hasn't been reached, so retry another round
child->check_count = 0;
}
} else {
crm_notice("Subdaemon %s[%lld] is unresponsive to IPC "
"after %d attempt%s (will recheck later)",
name, pid, child->check_count,
pcmk__plural_s(child->check_count));
if (pcmk_is_set(child->flags, child_respawn)) {
/* as long as the respawn-limit isn't reached
and we haven't run out of connect retries
we account this as progress we are willing
to tell to sbd
*/
subdaemon_check_progress = time(NULL);
}
}
/* go to the next child and see if
we can make progress there
*/
break;
case pcmk_rc_ipc_unresponsive:
if (!pcmk_is_set(child->flags, child_respawn)) {
/* if a subdaemon is down and we don't want it
to be restarted this is a success during
shutdown. if it isn't restarted anymore
due to MAX_RESPAWN it is
rather no success.
*/
if (child->respawn_count <= MAX_RESPAWN) {
subdaemon_check_progress = time(NULL);
}
}
if (!pcmk_is_set(child->flags, child_active_before_startup)) {
crm_trace("Subdaemon %s[%lld] terminated", name, pid);
break;
}
if (pcmk_is_set(child->flags, child_respawn)) {
+ // cts-lab looks for this message
crm_err("Subdaemon %s[%lld] terminated", name, pid);
} else {
/* orderly shutdown */
crm_notice("Subdaemon %s[%lld] terminated", name, pid);
}
pcmk_process_exit(child);
break;
default:
crm_exit(CRM_EX_FATAL);
break; /* static analysis/noreturn */
}
if (++next_child >= PCMK__NELEM(pcmk_children)) {
next_child = 0;
}
return G_SOURCE_CONTINUE;
}
static gboolean
escalate_shutdown(gpointer data)
{
pcmk_child_t *child = data;
if (child->pid == PCMK__SPECIAL_PID) {
pcmk_process_exit(child);
} else if (child->pid != 0) {
/* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
crm_err("Subdaemon %s not terminating in a timely manner, forcing",
pcmk__server_name(child->server));
stop_child(child, SIGSEGV);
}
return FALSE;
}
static void
pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
pcmk_child_t *child = mainloop_child_userdata(p);
const char *name = mainloop_child_name(p);
if (signo) {
+ // cts-lab looks for this message
do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
"%s[%d] terminated with signal %d (%s)%s",
name, pid, signo, strsignal(signo),
(core? " and dumped core" : ""));
} else {
switch(exitcode) {
case CRM_EX_OK:
crm_info("%s[%d] exited with status %d (%s)",
name, pid, exitcode, crm_exit_str(exitcode));
break;
case CRM_EX_FATAL:
crm_warn("Shutting cluster down because %s[%d] had fatal failure",
name, pid);
child->flags &= ~child_respawn;
fatal_error = TRUE;
pcmk_shutdown(SIGTERM);
break;
case CRM_EX_PANIC:
crm_emerg("%s[%d] instructed the machine to reset", name, pid);
child->flags &= ~child_respawn;
fatal_error = TRUE;
pcmk__panic(__func__);
pcmk_shutdown(SIGTERM);
break;
default:
+ // cts-lab looks for this message
crm_err("%s[%d] exited with status %d (%s)",
name, pid, exitcode, crm_exit_str(exitcode));
break;
}
}
pcmk_process_exit(child);
}
static void
pcmk_process_exit(pcmk_child_t * child)
{
const char *name = pcmk__server_name(child->server);
child->pid = 0;
child->flags &= ~child_active_before_startup;
child->check_count = 0;
child->respawn_count += 1;
if (child->respawn_count > MAX_RESPAWN) {
crm_err("Subdaemon %s exceeded maximum respawn count", name);
child->flags &= ~child_respawn;
}
if (shutdown_trigger) {
/* resume step-wise shutdown (returned TRUE yields no parallelizing) */
mainloop_set_trigger(shutdown_trigger);
} else if (!pcmk_is_set(child->flags, child_respawn)) {
/* nothing to do */
} else if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
crm_err("Rebooting system because of subdaemon %s failure", name);
pcmk__panic(__func__);
} else if (child_liveness(child) == pcmk_rc_ok) {
crm_warn("Not respawning subdaemon %s because IPC endpoint %s is OK",
name, pcmk__server_ipc_name(child->server));
} else if (pcmk_is_set(child->flags, child_needs_cluster) && !pcmkd_cluster_connected()) {
crm_notice("Not respawning subdaemon %s until cluster returns", name);
child->flags |= child_needs_retry;
} else {
+ // cts-lab looks for this message
crm_notice("Respawning subdaemon %s after unexpected exit", name);
start_child(child);
}
}
static gboolean
pcmk_shutdown_worker(gpointer user_data)
{
static int phase = PCMK__NELEM(pcmk_children) - 1;
static time_t next_log = 0;
if (phase == PCMK__NELEM(pcmk_children) - 1) {
crm_notice("Shutting down Pacemaker");
pacemakerd_state = PCMK__VALUE_SHUTTING_DOWN;
}
for (; phase >= 0; phase--) {
pcmk_child_t *child = &(pcmk_children[phase]);
const char *name = pcmk__server_name(child->server);
if (child->pid != 0) {
time_t now = time(NULL);
if (pcmk_is_set(child->flags, child_respawn)) {
if (child->pid == PCMK__SPECIAL_PID) {
crm_warn("Subdaemon %s cannot be terminated (shutdown "
"will be escalated after %ld seconds if it does "
"not terminate on its own; set PCMK_"
PCMK__ENV_FAIL_FAST "=1 to exit immediately "
"instead)",
name, (long) SHUTDOWN_ESCALATION_PERIOD);
}
next_log = now + 30;
child->flags &= ~child_respawn;
stop_child(child, SIGTERM);
if (phase < PCMK_CHILD_CONTROLD) {
g_timeout_add(SHUTDOWN_ESCALATION_PERIOD,
escalate_shutdown, child);
}
} else if (now >= next_log) {
next_log = now + 30;
crm_notice("Still waiting for subdaemon %s to terminate "
QB_XS " pid=%lld", name, (long long) child->pid);
}
return TRUE;
}
/* cleanup */
crm_debug("Subdaemon %s confirmed stopped", name);
child->pid = 0;
}
crm_notice("Shutdown complete");
pacemakerd_state = PCMK__VALUE_SHUTDOWN_COMPLETE;
if (!fatal_error && running_with_sbd &&
pcmk__get_sbd_sync_resource_startup() &&
!shutdown_complete_state_reported_client_closed) {
crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
return TRUE;
}
g_main_loop_quit(mainloop);
if (fatal_error) {
crm_notice("Shutting down and staying down after fatal error");
#ifdef SUPPORT_COROSYNC
pcmkd_shutdown_corosync();
#endif
crm_exit(CRM_EX_FATAL);
}
return TRUE;
}
/* TODO once libqb is taught to juggle with IPC end-points carried over as
bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325)
it shall hand over these descriptors here if/once they are successfully
pre-opened in (presumably) child_liveness(), to avoid any remaining
room for races */
// \return Standard Pacemaker return code
static int
start_child(pcmk_child_t * child)
{
uid_t uid = 0;
gid_t gid = 0;
gboolean use_valgrind = FALSE;
gboolean use_callgrind = FALSE;
const char *name = pcmk__server_name(child->server);
const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED);
const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED);
child->flags &= ~child_active_before_startup;
child->check_count = 0;
if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if ((env_callgrind != NULL)
&& (strstr(env_callgrind, name) != NULL)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
use_valgrind = TRUE;
} else if ((env_valgrind != NULL)
&& (strstr(env_valgrind, name) != NULL)) {
use_valgrind = TRUE;
}
if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
crm_warn("Cannot enable valgrind for subdaemon %s: valgrind not found",
name);
use_valgrind = FALSE;
}
if (child->uid) {
if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
crm_err("Invalid user (%s) for subdaemon %s: not found",
child->uid, name);
return EACCES;
}
crm_info("Using uid %lu and group %lu for subdaemon %s",
(unsigned long) uid, (unsigned long) gid, name);
}
child->pid = fork();
CRM_ASSERT(child->pid != -1);
if (child->pid > 0) {
/* parent */
mainloop_child_add(child->pid, 0, name, child, pcmk_child_exit);
crm_info("Forked process %lld for subdaemon %s%s",
(long long) child->pid, name,
use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
return pcmk_rc_ok;
} else {
/* Start a new session */
(void)setsid();
/* Setup the two alternate arg arrays */
opts_vgrind[0] = pcmk__str_copy(VALGRIND_BIN);
if (use_callgrind) {
opts_vgrind[1] = pcmk__str_copy("--tool=callgrind");
opts_vgrind[2] = pcmk__str_copy("--callgrind-out-file="
CRM_STATE_DIR "/callgrind.out.%p");
opts_vgrind[3] = subdaemon_path(child);
opts_vgrind[4] = NULL;
} else {
opts_vgrind[1] = subdaemon_path(child);
opts_vgrind[2] = NULL;
opts_vgrind[3] = NULL;
opts_vgrind[4] = NULL;
}
opts_default[0] = subdaemon_path(child);
if(gid) {
// Drop root group access if not needed
if (!need_root_group && (setgid(gid) < 0)) {
crm_warn("Could not set subdaemon %s group to %lu: %s",
name, (unsigned long) gid, strerror(errno));
}
/* Initialize supplementary groups to only those always granted to
* the user, plus haclient (so we can access IPC).
*/
if (initgroups(child->uid, gid) < 0) {
crm_err("Cannot initialize system groups for subdaemon %s: %s "
QB_XS " errno=%d",
name, pcmk_rc_str(errno), errno);
}
}
if (uid && setuid(uid) < 0) {
crm_warn("Could not set subdaemon %s user to %s: %s "
QB_XS " uid=%lu errno=%d",
name, strerror(errno), child->uid, (unsigned long) uid,
errno);
}
pcmk__close_fds_in_child(true);
pcmk__open_devnull(O_RDONLY); // stdin (fd 0)
pcmk__open_devnull(O_WRONLY); // stdout (fd 1)
pcmk__open_devnull(O_WRONLY); // stderr (fd 2)
if (use_valgrind) {
(void)execvp(VALGRIND_BIN, opts_vgrind);
} else {
char *path = subdaemon_path(child);
(void) execvp(path, opts_default);
free(path);
}
crm_crit("Could not execute subdaemon %s: %s", name, strerror(errno));
crm_exit(CRM_EX_FATAL);
}
return pcmk_rc_ok; /* never reached */
}
/*!
* \internal
* \brief Check the liveness of the child based on IPC name and PID if tracked
*
* \param[in,out] child Child tracked data
*
* \return Standard Pacemaker return code
*
* \note Return codes of particular interest include pcmk_rc_ipc_unresponsive
* indicating that no trace of IPC liveness was detected,
* pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by
* an unauthorized process, and pcmk_rc_ipc_pid_only indicating that
* the child is up by PID but not IPC end-point (possibly starting).
* \note This function doesn't modify any of \p child members but \c pid,
* and is not actively toying with processes as such but invoking
* \c stop_child in one particular case (there's for some reason
* a different authentic holder of the IPC end-point).
*/
static int
child_liveness(pcmk_child_t *child)
{
uid_t cl_uid = 0;
gid_t cl_gid = 0;
const uid_t root_uid = 0;
const gid_t root_gid = 0;
const uid_t *ref_uid;
const gid_t *ref_gid;
const char *name = pcmk__server_name(child->server);
int rc = pcmk_rc_ipc_unresponsive;
int legacy_rc = pcmk_ok;
pid_t ipc_pid = 0;
if (child->uid == NULL) {
ref_uid = &root_uid;
ref_gid = &root_gid;
} else {
ref_uid = &cl_uid;
ref_gid = &cl_gid;
legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid);
}
if (legacy_rc < 0) {
rc = pcmk_legacy2rc(legacy_rc);
crm_err("Could not find user and group IDs for user %s: %s "
QB_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc);
} else {
const char *ipc_name = pcmk__server_ipc_name(child->server);
rc = pcmk__ipc_is_authentic_process_active(ipc_name,
*ref_uid, *ref_gid,
&ipc_pid);
if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) {
if (child->pid <= 0) {
/* If rc is pcmk_rc_ok, ipc_pid is nonzero and this
* initializes a new child. If rc is
* pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will
* investigate further.
*/
child->pid = ipc_pid;
} else if ((ipc_pid != 0) && (child->pid != ipc_pid)) {
/* An unexpected (but authorized) process is responding to
* IPC. Investigate further.
*/
rc = pcmk_rc_ipc_unresponsive;
}
}
}
if (rc == pcmk_rc_ipc_unresponsive) {
/* If we get here, a child without IPC is being tracked, no IPC liveness
* has been detected, or IPC liveness has been detected with an
* unexpected (but authorized) process. This is safe on FreeBSD since
* the only change possible from a proper child's PID into "special" PID
* of 1 behind more loosely related process.
*/
int ret = pcmk__pid_active(child->pid, name);
if (ipc_pid && ((ret != pcmk_rc_ok)
|| ipc_pid == PCMK__SPECIAL_PID
|| (pcmk__pid_active(ipc_pid, name) == pcmk_rc_ok))) {
/* An unexpected (but authorized) process was detected at the IPC
* endpoint, and either it is active, or the child we're tracking is
* not.
*/
if (ret == pcmk_rc_ok) {
/* The child we're tracking is active. Kill it, and adopt the
* detected process. This assumes that our children don't fork
* (thus getting a different PID owning the IPC), but rather the
* tracking got out of sync because of some means external to
* Pacemaker, and adopting the detected process is better than
* killing it and possibly having to spawn a new child.
*/
/* not possessing IPC, afterall (what about corosync CPG?) */
stop_child(child, SIGKILL);
}
rc = pcmk_rc_ok;
child->pid = ipc_pid;
} else if (ret == pcmk_rc_ok) {
// Our tracked child's PID was found active, but not its IPC
rc = pcmk_rc_ipc_pid_only;
} else if ((child->pid == 0) && (ret == EINVAL)) {
// FreeBSD can return EINVAL
rc = pcmk_rc_ipc_unresponsive;
} else {
switch (ret) {
case EACCES:
rc = pcmk_rc_ipc_unauthorized;
break;
case ESRCH:
rc = pcmk_rc_ipc_unresponsive;
break;
default:
rc = ret;
break;
}
}
}
return rc;
}
/*!
* \internal
* \brief Initial one-off check of the pre-existing "child" processes
*
* With "child" process, we mean the subdaemon that defines an API end-point
* (all of them do as of the comment) -- the possible complement is skipped
* as it is deemed it has no such shared resources to cause conflicts about,
* hence it can presumably be started anew without hesitation.
* If that won't hold true in the future, the concept of a shared resource
* will have to be generalized beyond the API end-point.
*
* For boundary cases that the "child" is still starting (IPC end-point is yet
* to be witnessed), or more rarely (practically FreeBSD only), when there's
* a pre-existing "untrackable" authentic process, we give the situation some
* time to possibly unfold in the right direction, meaning that said socket
* will appear or the unattainable process will disappear per the observable
* IPC, respectively.
*
* \return Standard Pacemaker return code
*
* \note Since this gets run at the very start, \c respawn_count fields
* for particular children get temporarily overloaded with "rounds
* of waiting" tracking, restored once we are about to finish with
* success (i.e. returning value >=0) and will remain unrestored
* otherwise. One way to suppress liveness detection logic for
* particular child is to set the said value to a negative number.
*/
#define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */
int
find_and_track_existing_processes(void)
{
bool wait_in_progress;
int rc;
size_t i, rounds;
for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
wait_in_progress = false;
for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
const char *name = pcmk__server_name(pcmk_children[i].server);
const char *ipc_name = NULL;
if (pcmk_children[i].respawn_count < 0) {
continue;
}
rc = child_liveness(&pcmk_children[i]);
if (rc == pcmk_rc_ipc_unresponsive) {
/* As a speculation, don't give up if there are more rounds to
* come for other reasons, but don't artificially wait just
* because of this, since we would preferably start ASAP.
*/
continue;
}
// @TODO Functionize more of this to reduce nesting
ipc_name = pcmk__server_ipc_name(pcmk_children[i].server);
pcmk_children[i].respawn_count = rounds;
switch (rc) {
case pcmk_rc_ok:
if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
crm_crit("Cannot reliably track pre-existing"
" authentic process behind %s IPC on this"
" platform and PCMK_" PCMK__ENV_FAIL_FAST
" requested", ipc_name);
return EOPNOTSUPP;
} else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
crm_notice("Assuming pre-existing authentic, though"
" on this platform untrackable, process"
" behind %s IPC is stable (was in %d"
" previous samples) so rather than"
" bailing out (PCMK_" PCMK__ENV_FAIL_FAST
" not requested), we just switch to a"
" less optimal IPC liveness monitoring"
" (not very suitable for heavy load)",
name, WAIT_TRIES - 1);
crm_warn("The process behind %s IPC cannot be"
" terminated, so the overall shutdown"
" will get delayed implicitly (%ld s),"
" which serves as a graceful period for"
" its native termination if it vitally"
" depends on some other daemons going"
" down in a controlled way already",
name, (long) SHUTDOWN_ESCALATION_PERIOD);
} else {
wait_in_progress = true;
crm_warn("Cannot reliably track pre-existing"
" authentic process behind %s IPC on this"
" platform, can still disappear in %d"
" attempt(s)", ipc_name,
WAIT_TRIES - pcmk_children[i].respawn_count);
continue;
}
}
crm_notice("Tracking existing %s process (pid=%lld)",
name,
(long long) PCMK__SPECIAL_PID_AS_0(
pcmk_children[i].pid));
pcmk_children[i].respawn_count = -1; /* 0~keep watching */
pcmk_children[i].flags |= child_active_before_startup;
break;
case pcmk_rc_ipc_pid_only:
if (pcmk_children[i].respawn_count == WAIT_TRIES) {
crm_crit("%s IPC endpoint for existing authentic"
" process %lld did not (re)appear",
ipc_name,
(long long) PCMK__SPECIAL_PID_AS_0(
pcmk_children[i].pid));
return rc;
}
wait_in_progress = true;
crm_warn("Cannot find %s IPC endpoint for existing"
" authentic process %lld, can still (re)appear"
" in %d attempts (?)",
ipc_name,
(long long) PCMK__SPECIAL_PID_AS_0(
pcmk_children[i].pid),
WAIT_TRIES - pcmk_children[i].respawn_count);
continue;
default:
crm_crit("Checked liveness of %s: %s " QB_XS " rc=%d",
name, pcmk_rc_str(rc), rc);
return rc;
}
}
if (!wait_in_progress) {
break;
}
pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen
}
for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
pcmk_children[i].respawn_count = 0; /* restore pristine state */
}
g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon,
NULL);
return pcmk_rc_ok;
}
gboolean
init_children_processes(void *user_data)
{
if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
/* Corosync clusters can drop root group access, because we set
* uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect
* to corosync.
*/
need_root_group = false;
}
/* start any children that have not been detected */
for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
if (pcmk_children[i].pid != 0) {
/* we are already tracking it */
continue;
}
start_child(&(pcmk_children[i]));
}
/* From this point on, any daemons being started will be due to
* respawning rather than node start.
*
* This may be useful for the daemons to know
*/
pcmk__set_env_option(PCMK__ENV_RESPAWNED, PCMK_VALUE_TRUE, false);
pacemakerd_state = PCMK__VALUE_RUNNING;
return TRUE;
}
void
pcmk_shutdown(int nsig)
{
if (shutdown_trigger == NULL) {
shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
}
mainloop_set_trigger(shutdown_trigger);
}
void
restart_cluster_subdaemons(void)
{
for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
if (!pcmk_is_set(pcmk_children[i].flags, child_needs_retry) || pcmk_children[i].pid != 0) {
continue;
}
crm_notice("Respawning cluster-based subdaemon %s",
pcmk__server_name(pcmk_children[i].server));
if (start_child(&pcmk_children[i])) {
pcmk_children[i].flags &= ~child_needs_retry;
}
}
}
static gboolean
stop_child(pcmk_child_t * child, int signal)
{
const char *name = pcmk__server_name(child->server);
if (signal == 0) {
signal = SIGTERM;
}
/* why to skip PID of 1?
- FreeBSD ~ how untrackable process behind IPC is masqueraded as
- elsewhere: how "init" task is designated; in particular, in systemd
arrangement of socket-based activation, this is pretty real */
if (child->pid == PCMK__SPECIAL_PID) {
crm_debug("Nothing to do to stop subdaemon %s[%lld]",
name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
return TRUE;
}
if (child->pid <= 0) {
crm_trace("Nothing to do to stop subdaemon %s: Not running", name);
return TRUE;
}
errno = 0;
if (kill(child->pid, signal) == 0) {
crm_notice("Stopping subdaemon %s "
QB_XS " via signal %d to process %lld",
name, signal, (long long) child->pid);
} else {
crm_err("Could not stop subdaemon %s[%lld] with signal %d: %s",
name, (long long) child->pid, signal, strerror(errno));
}
return TRUE;
}
diff --git a/python/pacemaker/_cts/patterns.py b/python/pacemaker/_cts/patterns.py
index 03a405c708..4950e95686 100644
--- a/python/pacemaker/_cts/patterns.py
+++ b/python/pacemaker/_cts/patterns.py
@@ -1,396 +1,397 @@
"""Pattern-holding classes for Pacemaker's Cluster Test Suite (CTS)."""
__all__ = ["PatternSelector"]
__copyright__ = "Copyright 2008-2024 the Pacemaker project contributors"
__license__ = "GNU General Public License version 2 or later (GPLv2+)"
import argparse
from pacemaker.buildoptions import BuildOptions
class BasePatterns:
"""
The base class for holding a stack-specific set of command and log file/stdout patterns.
Stack-specific classes need to be built on top of this one.
"""
def __init__(self):
"""Create a new BasePatterns instance which holds a very minimal set of basic patterns."""
self._bad_news = []
self._components = {}
self._name = "crm-base"
self._ignore = [
"avoid confusing Valgrind",
# Logging bug in some versions of libvirtd
r"libvirtd.*: internal error: Failed to parse PCI config address",
# pcs can log this when node is fenced, but fencing is OK in some
# tests (and we will catch it in pacemaker logs when not OK)
r"pcs.daemon:No response from: .* request: get_configs, error:",
# This is overbroad, but there's no way to say that only certain
# transition errors are acceptable. We have to rely on causes of a
# transition error logging their own error message, which should
# always be the case.
r"pacemaker-schedulerd.* Calculated transition .*/pe-error",
]
self._commands = {
"StatusCmd": "crmadmin -t 60 -S %s 2>/dev/null",
"CibQuery": "cibadmin -Ql",
"CibAddXml": "cibadmin --modify -c --xml-text %s",
"CibDelXpath": "cibadmin --delete --xpath %s",
"RscRunning": BuildOptions.DAEMON_DIR + "/cts-exec-helper -R -r %s",
"CIBfile": "%s:" + BuildOptions.CIB_DIR + "/cib.xml",
"TmpDir": "/tmp",
"BreakCommCmd": "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1",
"FixCommCmd": "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1",
"MaintenanceModeOn": "cibadmin --modify -c --xml-text '<cluster_property_set id=\"cib-bootstrap-options\"><nvpair id=\"cts-maintenance-mode-setting\" name=\"maintenance-mode\" value=\"true\"/></cluster_property_set>'",
"MaintenanceModeOff": "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"",
"StandbyCmd": "crm_attribute -Vq -U %s -n standby -l forever -v %s 2>/dev/null",
"StandbyQueryCmd": "crm_attribute -qG -U %s -n standby -l forever -d off 2>/dev/null",
}
self._search = {
"Pat:DC_IDLE": r"pacemaker-controld.*State transition.*-> S_IDLE",
# This won't work if we have multiple partitions
"Pat:Local_started": r"%s\W.*controller successfully started",
"Pat:NonDC_started": r"%s\W.*State transition.*-> S_NOT_DC",
"Pat:DC_started": r"%s\W.*State transition.*-> S_IDLE",
"Pat:We_stopped": r"%s\W.*OVERRIDE THIS PATTERN",
"Pat:They_stopped": r"%s\W.*LOST:.* %s ",
"Pat:They_dead": r"node %s.*: is dead",
"Pat:They_up": r"%s %s\W.*OVERRIDE THIS PATTERN",
"Pat:TransitionComplete": "Transition status: Complete: complete",
"Pat:Fencing_start": r"Requesting peer fencing .* targeting %s",
"Pat:Fencing_ok": r"pacemaker-fenced.*:\s*Operation .* targeting %s by .* for .*@.*: OK",
"Pat:Fencing_recover": r"pacemaker-schedulerd.*: Recover\s+%s",
"Pat:Fencing_active": r"stonith resource .* is active on 2 nodes (attempting recovery)",
"Pat:Fencing_probe": r"pacemaker-controld.* Result of probe operation for %s on .*: Error",
"Pat:RscOpOK": r"pacemaker-controld.*:\s+Result of %s operation for %s.*: (0 \()?ok",
"Pat:RscOpFail": r"pacemaker-schedulerd.*:.*Unexpected result .* recorded for %s of %s ",
"Pat:CloneOpFail": r"pacemaker-schedulerd.*:.*Unexpected result .* recorded for %s of (%s|%s) ",
"Pat:RscRemoteOpOK": r"pacemaker-controld.*:\s+Result of %s operation for %s on %s: (0 \()?ok",
"Pat:NodeFenced": r"pacemaker-controld.*:\s* Peer %s was terminated \(.*\) by .* on behalf of .*: OK",
}
def get_component(self, key):
"""
Return the patterns for a single component as a list, given by key.
This is typically the name of some subprogram (pacemaker-based,
pacemaker-fenced, etc.) or various special purpose keys. If key is
unknown, return an empty list.
"""
if key in self._components:
return self._components[key]
print("Unknown component '%s' for %s" % (key, self._name))
return []
def get_patterns(self, key):
"""
Return various patterns supported by this object, given by key.
Depending on the key, this could either be a list or a hash. If key is
unknown, return None.
"""
if key == "BadNews":
return self._bad_news
if key == "BadNewsIgnore":
return self._ignore
if key == "Commands":
return self._commands
if key == "Search":
return self._search
if key == "Components":
return self._components
print("Unknown pattern '%s' for %s" % (key, self._name))
return None
def __getitem__(self, key):
if key == "Name":
return self._name
if key in self._commands:
return self._commands[key]
if key in self._search:
return self._search[key]
print("Unknown template '%s' for %s" % (key, self._name))
return None
class Corosync2Patterns(BasePatterns):
"""Patterns for Corosync version 2 cluster manager class."""
def __init__(self):
BasePatterns.__init__(self)
self._name = "crm-corosync"
self._commands.update({
"StartCmd": "service corosync start && service pacemaker start",
"StopCmd": "service pacemaker stop; [ ! -e /usr/sbin/pacemaker-remoted ] || service pacemaker_remote stop; service corosync stop",
"EpochCmd": "crm_node -e",
"QuorumCmd": "crm_node -q",
"PartitionCmd": "crm_node -p",
})
self._search.update({
# Close enough ... "Corosync Cluster Engine exiting normally" isn't
# printed reliably.
"Pat:We_stopped": r"%s\W.*Unloading all Corosync service engines",
"Pat:They_stopped": r"%s\W.*pacemaker-controld.*Node %s(\[|\s).*state is now lost",
"Pat:They_dead": r"pacemaker-controld.*Node %s(\[|\s).*state is now lost",
"Pat:They_up": r"\W%s\W.*pacemaker-controld.*Node %s state is now member",
"Pat:ChildExit": r"\[[0-9]+\] exited with status [0-9]+ \(",
# "with signal 9" == pcmk_child_exit(), "$" == check_active_before_startup_processes()
"Pat:ChildKilled": r"%s\W.*pacemakerd.*%s\[[0-9]+\] terminated( with signal 9|$)",
- "Pat:ChildRespawn": r"%s\W.*pacemakerd.*Respawning %s subdaemon after unexpected exit",
+ "Pat:ChildRespawn": r"%s\W.*pacemakerd.*Respawning subdaemon %s after unexpected exit",
"Pat:InfraUp": r"%s\W.*corosync.*Initializing transport",
"Pat:PacemakerUp": r"%s\W.*pacemakerd.*Starting Pacemaker",
})
self._ignore += [
r"crm_mon:",
r"crmadmin:",
r"update_trace_data",
r"async_notify:.*strange, client not found",
r"Parse error: Ignoring unknown option .*nodename",
r"error.*: Operation 'reboot' .* using FencingFail returned ",
r"getinfo response error: 1$",
r"sbd.* error: inquisitor_child: DEBUG MODE IS ACTIVE",
r"sbd.* pcmk:\s*error:.*Connection to cib_ro.* (failed|closed)",
]
self._bad_news = [
r"[^(]error:",
r"crit:",
r"ERROR:",
r"CRIT:",
r"Shutting down...NOW",
r"Timer I_TERMINATE just popped",
r"input=I_ERROR",
r"input=I_FAIL",
r"input=I_INTEGRATED cause=C_TIMER_POPPED",
r"input=I_FINALIZED cause=C_TIMER_POPPED",
r"input=I_ERROR",
r"(pacemakerd|pacemaker-execd|pacemaker-controld):.*, exiting",
r"schedulerd.*Attempting recovery of resource",
r"is taking more than 2x its timeout",
r"Confirm not received from",
r"Welcome reply not received from",
r"Attempting to schedule .* after a stop",
r"Resource .* was active at shutdown",
r"duplicate entries for call_id",
r"Search terminated:",
r":global_timer_callback",
r"Faking parameter digest creation",
r"Parameters to .* action changed:",
r"Parameters to .* changed",
- r"pacemakerd.*\[[0-9]+\] terminated( with signal| as IPC server|$)",
+ r"pacemakerd.*\[[0-9]+\] terminated( with signal|$)",
+ r"pacemakerd.*\[[0-9]+\] .* will now be killed",
r"pacemaker-schedulerd.*Recover\s+.*\(.* -\> .*\)",
r"rsyslogd.* lost .* due to rate-limiting",
r"Peer is not part of our cluster",
r"We appear to be in an election loop",
r"Unknown node -> we will not deliver message",
r"(Blackbox dump requested|Problem detected)",
r"pacemakerd.*Could not connect to Cluster Configuration Database API",
r"Receiving messages from a node we think is dead",
r"share the same cluster nodeid",
r"share the same name",
r"pacemaker-controld:.*Transition failed: terminated",
r"Local CIB .* differs from .*:",
r"warn.*:\s*Continuing but .* will NOT be used",
r"warn.*:\s*Cluster configuration file .* is corrupt",
r"Election storm",
r"stalled the FSA with pending inputs",
]
self._components["common-ignore"] = [
r"Pending action:",
r"resource( was|s were) active at shutdown",
r"pending LRM operations at shutdown",
r"Lost connection to the CIB manager",
r"pacemaker-controld.*:\s*Action A_RECOVER .* not supported",
r"pacemaker-controld.*:\s*Exiting now due to errors",
r".*:\s*Requesting fencing \([^)]+\) targeting node ",
r"(Blackbox dump requested|Problem detected)",
]
self._components["corosync-ignore"] = [
r"Could not connect to Corosync CFG: CS_ERR_LIBRARY",
r"error:.*Connection to the CPG API failed: Library error",
r"\[[0-9]+\] exited with status [0-9]+ \(",
r"\[[0-9]+\] terminated with signal 15",
r"pacemaker-based.*error:.*Corosync connection lost",
r"pacemaker-fenced.*error:.*Corosync connection terminated",
r"pacemaker-controld.*State transition .* S_RECOVERY",
r"pacemaker-controld.*error:.*Input (I_ERROR|I_TERMINATE ) .*received in state",
r"pacemaker-controld.*error:.*Could not recover from internal error",
r"error:.*Connection to cib_(shm|rw).* (failed|closed)",
r"error:.*cib_(shm|rw) IPC provider disconnected while waiting",
r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
r"error: Lost fencer connection",
]
self._components["corosync"] = [
# We expect each daemon to lose its cluster connection.
# However, if the CIB manager loses its connection first,
# it's possible for another daemon to lose that connection and
# exit before losing the cluster connection.
r"pacemakerd.*:\s*warning:.*Lost connection to cluster layer",
r"pacemaker-attrd.*:\s*(crit|error):.*Lost connection to (Corosync process group|the CIB manager)",
r"pacemaker-based.*:\s*(crit|error):.*Lost connection to cluster layer",
r"pacemaker-controld.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)",
r"pacemaker-fenced.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)",
r"schedulerd.*Scheduling node .* for fencing",
r"pacemaker-controld.*:\s*Peer .* was terminated \(.*\) by .* on behalf of .*:\s*OK",
]
self._components["pacemaker-based"] = [
r"pacemakerd.* pacemaker-attrd\[[0-9]+\] exited with status 102",
r"pacemakerd.* pacemaker-controld\[[0-9]+\] exited with status 1",
- r"pacemakerd.* Respawning pacemaker-attrd subdaemon after unexpected exit",
- r"pacemakerd.* Respawning pacemaker-based subdaemon after unexpected exit",
- r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit",
- r"pacemakerd.* Respawning pacemaker-fenced subdaemon after unexpected exit",
+ r"pacemakerd.* Respawning subdaemon pacemaker-attrd after unexpected exit",
+ r"pacemakerd.* Respawning subdaemon pacemaker-based after unexpected exit",
+ r"pacemakerd.* Respawning subdaemon pacemaker-controld after unexpected exit",
+ r"pacemakerd.* Respawning subdaemon pacemaker-fenced after unexpected exit",
r"pacemaker-.* Connection to cib_.* (failed|closed)",
r"pacemaker-attrd.*:.*Lost connection to the CIB manager",
r"pacemaker-controld.*:.*Lost connection to the CIB manager",
r"pacemaker-controld.*I_ERROR.*handle_cib_disconnect",
r"pacemaker-controld.* State transition .* S_RECOVERY",
r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
r"pacemaker-controld.*Could not recover from internal error",
]
self._components["pacemaker-based-ignore"] = [
r"pacemaker-execd.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)",
r"pacemaker-controld.*:Could not connect to attrd: Connection refused",
]
self._components["pacemaker-execd"] = [
r"pacemaker-controld.*Lost connection to local executor",
r"pacemaker-controld.*I_ERROR.*lrm_connection_destroy",
r"pacemaker-controld.*State transition .* S_RECOVERY",
r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
r"pacemaker-controld.*Could not recover from internal error",
r"pacemakerd.*pacemaker-controld\[[0-9]+\] exited with status 1",
- r"pacemakerd.* Respawning pacemaker-execd subdaemon after unexpected exit",
- r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit",
+ r"pacemakerd.* Respawning subdaemon pacemaker-execd after unexpected exit",
+ r"pacemakerd.* Respawning subdaemon pacemaker-controld after unexpected exit",
]
self._components["pacemaker-execd-ignore"] = [
r"pacemaker-(attrd|controld).*Connection to lrmd.* (failed|closed)",
r"pacemaker-(attrd|controld).*Could not execute alert",
]
self._components["pacemaker-controld"] = [
r"State transition .* -> S_IDLE",
]
self._components["pacemaker-controld-ignore"] = []
self._components["pacemaker-attrd"] = []
self._components["pacemaker-attrd-ignore"] = []
self._components["pacemaker-schedulerd"] = [
r"State transition .* S_RECOVERY",
- r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit",
+ r"pacemakerd.* Respawning subdaemon pacemaker-controld after unexpected exit",
r"pacemaker-controld\[[0-9]+\] exited with status 1 \(",
r"pacemaker-controld.*Lost connection to the scheduler",
r"pacemaker-controld.*I_ERROR.*save_cib_contents",
r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
r"pacemaker-controld.*Could not recover from internal error",
]
self._components["pacemaker-schedulerd-ignore"] = [
r"Connection to pengine.* (failed|closed)",
]
self._components["pacemaker-fenced"] = [
r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
r"Lost fencer connection",
r"pacemaker-controld.*Fencer successfully connected",
]
self._components["pacemaker-fenced-ignore"] = [
r"(error|warning):.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
r"error:.*Lost fencer connection",
r"error:.*Fencer connection failed \(will retry\)",
r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)",
]
self._components["pacemaker-fenced-ignore"].extend(self._components["common-ignore"])
patternVariants = {
"crm-base": BasePatterns,
"crm-corosync": Corosync2Patterns
}
class PatternSelector:
"""Choose from among several Pattern objects and return the information from that object."""
def __init__(self, name="crm-corosync"):
"""
Create a new PatternSelector object.
Instantiate whatever class is given by name. Defaults to Corosync2Patterns
for "crm-corosync" or None. While other objects could be supported in the
future, only this and the base object are supported at this time.
"""
self._name = name
# If no name was given, use the default. Otherwise, look up the appropriate
# class in patternVariants, instantiate it, and use that.
if not name:
self._base = Corosync2Patterns()
else:
self._base = patternVariants[name]()
def get_patterns(self, kind):
"""Call get_patterns on the previously instantiated pattern object."""
return self._base.get_patterns(kind)
def get_template(self, key):
"""
Return a single pattern from the previously instantiated pattern object.
If no pattern exists for the given key, return None.
"""
return self._base[key]
def get_component(self, kind):
"""Call get_component on the previously instantiated pattern object."""
return self._base.get_component(kind)
def __getitem__(self, key):
"""Return the pattern for the given key, or None if it does not exist."""
return self.get_template(key)
# PYTHONPATH=python python python/pacemaker/_cts/patterns.py -k crm-corosync -t StartCmd
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-k", "--kind", metavar="KIND")
parser.add_argument("-t", "--template", metavar="TEMPLATE")
args = parser.parse_args()
print(PatternSelector(args.kind)[args.template])
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Apr 21, 4:43 PM (1 d, 9 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1664977
Default Alt Text
(52 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment