Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c
index b85e7eaf6c..217ec35741 100644
--- a/daemons/pacemakerd/pacemakerd.c
+++ b/daemons/pacemakerd/pacemakerd.c
@@ -1,486 +1,486 @@
/*
- * Copyright 2010-2024 the Pacemaker project contributors
+ * Copyright 2010-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include "pacemakerd.h"
#if SUPPORT_COROSYNC
#include "pcmkd_corosync.h"
#endif
#include <pwd.h>
#include <errno.h>
#include <unistd.h>
#include <stdio.h>
#include <stdbool.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <crm/crm.h> /* indirectly: CRM_EX_* */
#include <crm/common/mainloop.h>
#include <crm/common/xml.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/ipc_pacemakerd.h>
#include <crm/common/output_internal.h>
#include <crm/cluster/internal.h>
#include <crm/cluster.h>
#define SUMMARY "pacemakerd - primary Pacemaker daemon that launches and monitors all subsidiary Pacemaker daemons"
struct {
gboolean features;
gboolean foreground;
gboolean shutdown;
gboolean standby;
} options;
static pcmk__output_t *out = NULL;
static pcmk__supported_format_t formats[] = {
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
PCMK__OUTPUT_ARGS("features")
static int
pacemakerd_features(pcmk__output_t *out, va_list args) {
out->info(out, "Pacemaker %s (Build: %s)\n Supporting v%s: %s", PACEMAKER_VERSION,
BUILD_VERSION, CRM_FEATURE_SET, CRM_FEATURES);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("features")
static int
pacemakerd_features_xml(pcmk__output_t *out, va_list args) {
gchar **feature_list = g_strsplit(CRM_FEATURES, " ", 0);
pcmk__output_xml_create_parent(out, PCMK_XE_PACEMAKERD,
PCMK_XA_VERSION, PACEMAKER_VERSION,
PCMK_XA_BUILD, BUILD_VERSION,
PCMK_XA_FEATURE_SET, CRM_FEATURE_SET,
NULL);
out->begin_list(out, NULL, NULL, PCMK_XE_FEATURES);
for (char **s = feature_list; *s != NULL; s++) {
pcmk__output_create_xml_text_node(out, PCMK_XE_FEATURE, *s);
}
out->end_list(out);
pcmk__output_xml_pop_parent(out);
g_strfreev(feature_list);
return pcmk_rc_ok;
}
static pcmk__message_entry_t fmt_functions[] = {
{ "features", "default", pacemakerd_features },
{ "features", "xml", pacemakerd_features_xml },
{ NULL, NULL, NULL }
};
static gboolean
pid_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return TRUE;
}
static gboolean
standby_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
options.standby = TRUE;
pcmk__set_env_option(PCMK__ENV_NODE_START_STATE, PCMK_VALUE_STANDBY, false);
return TRUE;
}
static GOptionEntry entries[] = {
{ "features", 'F', 0, G_OPTION_ARG_NONE, &options.features,
"Display full version and list of features Pacemaker was built with",
NULL },
{ "foreground", 'f', 0, G_OPTION_ARG_NONE, &options.foreground,
"(Ignored) Pacemaker always runs in the foreground",
NULL },
{ "pid-file", 'p', 0, G_OPTION_ARG_CALLBACK, pid_cb,
"(Ignored) Daemon pid file location",
"FILE" },
{ "shutdown", 'S', 0, G_OPTION_ARG_NONE, &options.shutdown,
"Instruct Pacemaker to shutdown on this machine",
NULL },
{ "standby", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, standby_cb,
"Start node in standby state",
NULL },
{ NULL }
};
static void
pcmk_ignore(int nsig)
{
crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig);
}
static void
pcmk_sigquit(int nsig)
{
pcmk__panic("Received SIGQUIT");
}
static void
pacemakerd_chown(const char *path, uid_t uid, gid_t gid)
{
int rc = chown(path, uid, gid);
if (rc < 0) {
crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s",
path, CRM_DAEMON_USER, gid, pcmk_rc_str(errno));
}
}
static void
create_pcmk_dirs(void)
{
uid_t pcmk_uid = 0;
gid_t pcmk_gid = 0;
const char *dirs[] = {
PCMK__PERSISTENT_DATA_DIR, // core/blackbox/scheduler/CIB files
CRM_CORE_DIR, // core files
CRM_BLACKBOX_DIR, // blackbox dumps
PCMK_SCHEDULER_INPUT_DIR, // scheduler inputs
CRM_CONFIG_DIR, // the Cluster Information Base (CIB)
// Don't build PCMK__OCF_TMP_DIR the executor will do it
NULL
};
if (pcmk_daemon_user(&pcmk_uid, &pcmk_gid) < 0) {
crm_err("Cluster user %s does not exist, aborting Pacemaker startup",
CRM_DAEMON_USER);
crm_exit(CRM_EX_NOUSER);
}
// Used by some resource agents
if ((mkdir(CRM_STATE_DIR, 0750) < 0) && (errno != EEXIST)) {
crm_warn("Could not create directory " CRM_STATE_DIR ": %s",
pcmk_rc_str(errno));
} else {
pacemakerd_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid);
}
for (int i = 0; dirs[i] != NULL; ++i) {
int rc = pcmk__build_path(dirs[i], 0750);
if (rc != pcmk_rc_ok) {
crm_warn("Could not create directory %s: %s",
dirs[i], pcmk_rc_str(rc));
} else {
pacemakerd_chown(dirs[i], pcmk_uid, pcmk_gid);
}
}
}
static void
remove_core_file_limit(void)
{
struct rlimit cores;
// Get current limits
if (getrlimit(RLIMIT_CORE, &cores) < 0) {
crm_notice("Unable to check system core file limits "
"(consider ensuring the size is unlimited): %s",
strerror(errno));
return;
}
// Check whether core dumps are disabled
if (cores.rlim_max == 0) {
if (geteuid() != 0) { // Yes, and there's nothing we can do about it
crm_notice("Core dumps are disabled (consider enabling them)");
return;
}
cores.rlim_max = RLIM_INFINITY; // Yes, but we're root, so enable them
}
// Raise soft limit to hard limit (if not already done)
if (cores.rlim_cur != cores.rlim_max) {
cores.rlim_cur = cores.rlim_max;
if (setrlimit(RLIMIT_CORE, &cores) < 0) {
crm_notice("Unable to raise system limit on core file size "
"(consider doing so manually): %s",
strerror(errno));
return;
}
}
if (cores.rlim_cur == RLIM_INFINITY) {
crm_trace("Core file size is unlimited");
} else {
crm_trace("Core file size is limited to %llu bytes",
(unsigned long long) cores.rlim_cur);
}
}
static void
pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api,
enum pcmk_ipc_event event_type, crm_exit_t status,
void *event_data, void *user_data)
{
pcmk_pacemakerd_api_reply_t *reply = event_data;
switch (event_type) {
case pcmk_ipc_event_reply:
break;
default:
return;
}
if (status != CRM_EX_OK) {
out->err(out, "Bad reply from pacemakerd: %s", crm_exit_str(status));
return;
}
if (reply->reply_type != pcmk_pacemakerd_reply_shutdown) {
out->err(out, "Unknown reply type %d from pacemakerd",
reply->reply_type);
}
}
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
GOptionContext *context = NULL;
context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
pcmk__add_main_args(context, entries);
return context;
}
int
main(int argc, char **argv)
{
int rc = pcmk_rc_ok;
crm_exit_t exit_code = CRM_EX_OK;
GError *error = NULL;
GOptionGroup *output_group = NULL;
pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
gchar **processed_args = pcmk__cmdline_preproc(argv, "p");
GOptionContext *context = build_arg_context(args, &output_group);
bool old_instance_connected = false;
pcmk_ipc_api_t *old_instance = NULL;
qb_ipcs_service_t *ipcs = NULL;
subdaemon_check_progress = time(NULL);
setenv("LC_ALL", "C", 1); // Ensure logs are in a common language
crm_log_preinit(NULL, argc, argv);
mainloop_add_signal(SIGHUP, pcmk_ignore);
mainloop_add_signal(SIGQUIT, pcmk_sigquit);
pcmk__register_formats(output_group, formats);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if ((rc != pcmk_rc_ok) || (out == NULL)) {
exit_code = CRM_EX_ERROR;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s",
args->output_ty, pcmk_rc_str(rc));
goto done;
}
pcmk__register_messages(out, fmt_functions);
if (options.features) {
out->message(out, "features");
exit_code = CRM_EX_OK;
goto done;
}
if (args->version) {
out->version(out, false);
goto done;
}
if (options.shutdown) {
pcmk__cli_init_logging(PCMK__SERVER_PACEMAKERD, args->verbosity);
} else {
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
}
crm_debug("Checking for existing Pacemaker instance");
rc = pcmk_new_ipc_api(&old_instance, pcmk_ipc_pacemakerd);
if (old_instance == NULL) {
out->err(out, "Could not check for existing pacemakerd: %s", pcmk_rc_str(rc));
exit_code = pcmk_rc2exitc(rc);
goto done;
}
pcmk_register_ipc_callback(old_instance, pacemakerd_event_cb, NULL);
rc = pcmk__connect_ipc(old_instance, pcmk_ipc_dispatch_sync, 2);
if (rc != pcmk_rc_ok) {
crm_debug("No existing %s instance found: %s",
pcmk_ipc_name(old_instance, true), pcmk_rc_str(rc));
}
old_instance_connected = pcmk_ipc_is_connected(old_instance);
if (options.shutdown) {
if (old_instance_connected) {
rc = pcmk_pacemakerd_api_shutdown(old_instance, crm_system_name);
pcmk_dispatch_ipc(old_instance);
exit_code = pcmk_rc2exitc(rc);
if (exit_code != CRM_EX_OK) {
pcmk_free_ipc_api(old_instance);
goto done;
}
/* We get the ACK immediately, and the response right after that,
* but it might take a while for pacemakerd to get around to
* shutting down. Wait for that to happen (with 30-minute timeout).
*/
for (int i = 0; i < 900; i++) {
if (!pcmk_ipc_is_connected(old_instance)) {
exit_code = CRM_EX_OK;
pcmk_free_ipc_api(old_instance);
goto done;
}
sleep(2);
}
exit_code = CRM_EX_TIMEOUT;
pcmk_free_ipc_api(old_instance);
goto done;
} else {
out->err(out, "Could not request shutdown "
"of existing Pacemaker instance: %s", pcmk_rc_str(rc));
pcmk_free_ipc_api(old_instance);
exit_code = CRM_EX_DISCONNECT;
goto done;
}
} else if (old_instance_connected) {
pcmk_free_ipc_api(old_instance);
crm_err("Aborting start-up because active Pacemaker instance found");
exit_code = CRM_EX_FATAL;
goto done;
}
pcmk_free_ipc_api(old_instance);
/* Don't allow any accidental output after this point. */
if (out != NULL) {
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
out = NULL;
}
#if SUPPORT_COROSYNC
if (pacemakerd_read_config() == FALSE) {
crm_exit(CRM_EX_UNAVAILABLE);
}
#endif
// OCF shell functions and cluster-glue need facility under different name
{
const char *facility = pcmk__env_option(PCMK__ENV_LOGFACILITY);
if (!pcmk__str_eq(facility, PCMK_VALUE_NONE,
pcmk__str_casei|pcmk__str_null_matches)) {
pcmk__set_env_option("LOGFACILITY", facility, true);
}
}
crm_notice("Starting Pacemaker %s " QB_XS " build=%s features:%s",
PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES);
mainloop = g_main_loop_new(NULL, FALSE);
remove_core_file_limit();
create_pcmk_dirs();
pcmk__serve_pacemakerd_ipc(&ipcs, &pacemakerd_ipc_callbacks);
#if SUPPORT_COROSYNC
/* Allows us to block shutdown */
if (!cluster_connect_cfg()) {
exit_code = CRM_EX_PROTOCOL;
goto done;
}
#endif
if (pcmk__locate_sbd() > 0) {
- running_with_sbd = TRUE;
+ running_with_sbd = true;
}
switch (find_and_track_existing_processes()) {
case pcmk_rc_ok:
break;
case pcmk_rc_ipc_unauthorized:
exit_code = CRM_EX_CANTCREAT;
goto done;
default:
exit_code = CRM_EX_FATAL;
goto done;
};
mainloop_add_signal(SIGTERM, pcmk_shutdown);
mainloop_add_signal(SIGINT, pcmk_shutdown);
if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) {
crm_notice("Waiting for startup-trigger from SBD.");
pacemakerd_state = PCMK__VALUE_WAIT_FOR_PING;
startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
} else {
if (running_with_sbd) {
crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported "
"by your SBD version) improve reliability of "
"interworking between SBD & pacemaker.");
}
pacemakerd_state = PCMK__VALUE_STARTING_DAEMONS;
init_children_processes(NULL);
}
crm_notice("Pacemaker daemon successfully started and accepting connections");
g_main_loop_run(mainloop);
if (ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
g_main_loop_unref(mainloop);
#if SUPPORT_COROSYNC
cluster_disconnect_cfg();
#endif
done:
g_strfreev(processed_args);
pcmk__free_arg_context(context);
pcmk__output_and_clear_error(&error, out);
if (out != NULL) {
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
}
pcmk__unregister_formats();
crm_exit(exit_code);
}
diff --git a/daemons/pacemakerd/pacemakerd.h b/daemons/pacemakerd/pacemakerd.h
index 51e32b19e8..4c2ea7e38c 100644
--- a/daemons/pacemakerd/pacemakerd.h
+++ b/daemons/pacemakerd/pacemakerd.h
@@ -1,27 +1,27 @@
/*
- * Copyright 2010-2023 the Pacemaker project contributors
+ * Copyright 2010-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#define MAX_RESPAWN 100
extern GMainLoop *mainloop;
extern struct qb_ipcs_service_handlers pacemakerd_ipc_callbacks;
extern const char *pacemakerd_state;
-extern gboolean running_with_sbd;
-extern gboolean shutdown_complete_state_reported_client_closed;
+extern bool running_with_sbd;
+extern bool shutdown_complete_state_reported_client_closed;
extern unsigned int shutdown_complete_state_reported_to;
extern crm_trigger_t *shutdown_trigger;
extern crm_trigger_t *startup_trigger;
extern time_t subdaemon_check_progress;
int find_and_track_existing_processes(void);
gboolean init_children_processes(void *user_data);
void pcmk_shutdown(int nsig);
void restart_cluster_subdaemons(void);
diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c
index 715cd754dc..15ef6a4977 100644
--- a/daemons/pacemakerd/pcmkd_subdaemons.c
+++ b/daemons/pacemakerd/pcmkd_subdaemons.c
@@ -1,900 +1,944 @@
/*
- * Copyright 2010-2024 the Pacemaker project contributors
+ * Copyright 2010-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include "pacemakerd.h"
#if SUPPORT_COROSYNC
#include "pcmkd_corosync.h"
#endif
#include <errno.h>
#include <grp.h>
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
#include <crm/cluster.h>
#include <crm/common/xml.h>
enum child_daemon_flags {
child_none = 0,
child_respawn = 1 << 0,
child_needs_cluster = 1 << 1,
child_needs_retry = 1 << 2,
child_active_before_startup = 1 << 3,
+ child_shutting_down = 1 << 4,
};
typedef struct pcmk_child_s {
enum pcmk_ipc_server server;
pid_t pid;
int respawn_count;
const char *uid;
int check_count;
uint32_t flags;
} pcmk_child_t;
#define PCMK_PROCESS_CHECK_INTERVAL 1000 /* 1s */
#define PCMK_PROCESS_CHECK_RETRIES 5
#define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */
/* Index into the array below */
#define PCMK_CHILD_CONTROLD 5
static pcmk_child_t pcmk_children[] = {
{
pcmk_ipc_based, 0, 0, CRM_DAEMON_USER,
0, child_respawn | child_needs_cluster
},
{
pcmk_ipc_fenced, 0, 0, NULL,
0, child_respawn | child_needs_cluster
},
{
pcmk_ipc_execd, 0, 0, NULL,
0, child_respawn
},
{
pcmk_ipc_attrd, 0, 0, CRM_DAEMON_USER,
0, child_respawn | child_needs_cluster
},
{
pcmk_ipc_schedulerd, 0, 0, CRM_DAEMON_USER,
0, child_respawn
},
{
pcmk_ipc_controld, 0, 0, CRM_DAEMON_USER,
0, child_respawn | child_needs_cluster
},
};
-static char *opts_default[] = { NULL, NULL };
-static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
-
crm_trigger_t *shutdown_trigger = NULL;
crm_trigger_t *startup_trigger = NULL;
time_t subdaemon_check_progress = 0;
// Whether we need root group access to talk to cluster layer
static bool need_root_group = true;
/* When contacted via pacemakerd-api by a client having sbd in
* the name we assume it is sbd-daemon which wants to know
* if pacemakerd shutdown gracefully.
* Thus when everything is shutdown properly pacemakerd
* waits till it has reported the graceful completion of
* shutdown to sbd and just when sbd-client closes the
* connection we can assume that the report has arrived
* properly so that pacemakerd can finally exit.
* Following two variables are used to track that handshake.
*/
unsigned int shutdown_complete_state_reported_to = 0;
-gboolean shutdown_complete_state_reported_client_closed = FALSE;
+bool shutdown_complete_state_reported_client_closed = false;
/* state we report when asked via pacemakerd-api status-ping */
const char *pacemakerd_state = PCMK__VALUE_INIT;
-gboolean running_with_sbd = FALSE; /* local copy */
+bool running_with_sbd = false;
GMainLoop *mainloop = NULL;
-static gboolean fatal_error = FALSE;
+static bool fatal_error = false;
static int child_liveness(pcmk_child_t *child);
static gboolean escalate_shutdown(gpointer data);
static int start_child(pcmk_child_t * child);
static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
static void pcmk_process_exit(pcmk_child_t * child);
static gboolean pcmk_shutdown_worker(gpointer user_data);
-static gboolean stop_child(pcmk_child_t * child, int signal);
+static void stop_child(pcmk_child_t *child, int signal);
+
+static void
+for_each_child(void (*fn)(pcmk_child_t *child))
+{
+ for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
+ fn(&pcmk_children[i]);
+ }
+}
/*!
* \internal
* \brief Get path to subdaemon executable
*
* \param[in] subdaemon Subdaemon to get path for
*
* \return Newly allocated string with path to subdaemon executable
* \note It is the caller's responsibility to free() the return value
*/
static inline char *
subdaemon_path(pcmk_child_t *subdaemon)
{
return crm_strdup_printf(CRM_DAEMON_DIR "/%s",
pcmk__server_name(subdaemon->server));
}
static bool
pcmkd_cluster_connected(void)
{
#if SUPPORT_COROSYNC
return pcmkd_corosync_connected();
#else
return true;
#endif
}
static gboolean
check_next_subdaemon(gpointer user_data)
{
static int next_child = 0;
pcmk_child_t *child = &(pcmk_children[next_child]);
const char *name = pcmk__server_name(child->server);
const long long pid = PCMK__SPECIAL_PID_AS_0(child->pid);
int rc = child_liveness(child);
crm_trace("Checked subdaemon %s[%lld]: %s (%d)",
name, pid, pcmk_rc_str(rc), rc);
switch (rc) {
case pcmk_rc_ok:
child->check_count = 0;
subdaemon_check_progress = time(NULL);
break;
case pcmk_rc_ipc_pid_only: // Child was previously OK
- if (++(child->check_count) >= PCMK_PROCESS_CHECK_RETRIES) {
+ if (pcmk_is_set(child->flags, child_shutting_down)) {
+ crm_notice("Subdaemon %s[%lld] has stopped accepting IPC "
+ "connections during shutdown", name, pid);
+
+ } else if (++(child->check_count) >= PCMK_PROCESS_CHECK_RETRIES) {
// cts-lab looks for this message
crm_crit("Subdaemon %s[%lld] is unresponsive to IPC "
"after %d attempt%s and will now be killed",
name, pid, child->check_count,
pcmk__plural_s(child->check_count));
stop_child(child, SIGKILL);
if (pcmk_is_set(child->flags, child_respawn)) {
// Respawn limit hasn't been reached, so retry another round
child->check_count = 0;
}
+
} else {
crm_notice("Subdaemon %s[%lld] is unresponsive to IPC "
"after %d attempt%s (will recheck later)",
name, pid, child->check_count,
pcmk__plural_s(child->check_count));
if (pcmk_is_set(child->flags, child_respawn)) {
/* as long as the respawn-limit isn't reached
and we haven't run out of connect retries
we account this as progress we are willing
to tell to sbd
*/
subdaemon_check_progress = time(NULL);
}
}
/* go to the next child and see if
we can make progress there
*/
break;
case pcmk_rc_ipc_unresponsive:
if (!pcmk_is_set(child->flags, child_respawn)) {
/* if a subdaemon is down and we don't want it
to be restarted this is a success during
shutdown. if it isn't restarted anymore
due to MAX_RESPAWN it is
rather no success.
*/
if (child->respawn_count <= MAX_RESPAWN) {
subdaemon_check_progress = time(NULL);
}
}
if (!pcmk_is_set(child->flags, child_active_before_startup)) {
crm_trace("Subdaemon %s[%lld] terminated", name, pid);
break;
}
if (pcmk_is_set(child->flags, child_respawn)) {
// cts-lab looks for this message
crm_err("Subdaemon %s[%lld] terminated", name, pid);
} else {
/* orderly shutdown */
crm_notice("Subdaemon %s[%lld] terminated", name, pid);
}
pcmk_process_exit(child);
break;
default:
crm_exit(CRM_EX_FATAL);
break; /* static analysis/noreturn */
}
if (++next_child >= PCMK__NELEM(pcmk_children)) {
next_child = 0;
}
return G_SOURCE_CONTINUE;
}
static gboolean
escalate_shutdown(gpointer data)
{
pcmk_child_t *child = data;
if (child->pid == PCMK__SPECIAL_PID) {
pcmk_process_exit(child);
} else if (child->pid != 0) {
/* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
crm_err("Subdaemon %s not terminating in a timely manner, forcing",
pcmk__server_name(child->server));
stop_child(child, SIGSEGV);
}
- return FALSE;
+
+ return G_SOURCE_REMOVE;
}
static void
pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
pcmk_child_t *child = mainloop_child_userdata(p);
const char *name = mainloop_child_name(p);
if (signo) {
// cts-lab looks for this message
do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
"%s[%d] terminated with signal %d (%s)%s",
name, pid, signo, strsignal(signo),
(core? " and dumped core" : ""));
+ pcmk_process_exit(child);
+ return;
+ }
- } else {
- switch(exitcode) {
- case CRM_EX_OK:
- crm_info("%s[%d] exited with status %d (%s)",
- name, pid, exitcode, crm_exit_str(exitcode));
- break;
+ switch(exitcode) {
+ case CRM_EX_OK:
+ crm_info("%s[%d] exited with status %d (%s)",
+ name, pid, exitcode, crm_exit_str(exitcode));
+ break;
- case CRM_EX_FATAL:
- crm_warn("Shutting cluster down because %s[%d] had fatal failure",
- name, pid);
- child->flags &= ~child_respawn;
- fatal_error = TRUE;
- pcmk_shutdown(SIGTERM);
- break;
+ case CRM_EX_FATAL:
+ crm_warn("Shutting cluster down because %s[%d] had fatal failure",
+ name, pid);
+ child->flags &= ~child_respawn;
+ fatal_error = true;
+ pcmk_shutdown(SIGTERM);
+ break;
- case CRM_EX_PANIC:
- {
- char *msg = NULL;
+ case CRM_EX_PANIC:
+ {
+ char *msg = NULL;
- child->flags &= ~child_respawn;
- fatal_error = TRUE;
- msg = crm_strdup_printf("Subdaemon %s[%d] requested panic",
- name, pid);
- pcmk__panic(msg);
+ child->flags &= ~child_respawn;
+ fatal_error = true;
+ msg = crm_strdup_printf("Subdaemon %s[%d] requested panic",
+ name, pid);
+ pcmk__panic(msg);
- // Should never get here
- free(msg);
- pcmk_shutdown(SIGTERM);
- }
- break;
+ // Should never get here
+ free(msg);
+ pcmk_shutdown(SIGTERM);
+ }
+ break;
- default:
- // cts-lab looks for this message
- crm_err("%s[%d] exited with status %d (%s)",
- name, pid, exitcode, crm_exit_str(exitcode));
- break;
- }
+ default:
+ // cts-lab looks for this message
+ crm_err("%s[%d] exited with status %d (%s)",
+ name, pid, exitcode, crm_exit_str(exitcode));
+ break;
}
pcmk_process_exit(child);
}
static void
pcmk_process_exit(pcmk_child_t * child)
{
const char *name = pcmk__server_name(child->server);
child->pid = 0;
child->flags &= ~child_active_before_startup;
child->check_count = 0;
child->respawn_count += 1;
if (child->respawn_count > MAX_RESPAWN) {
crm_err("Subdaemon %s exceeded maximum respawn count", name);
child->flags &= ~child_respawn;
}
if (shutdown_trigger) {
/* resume step-wise shutdown (returned TRUE yields no parallelizing) */
mainloop_set_trigger(shutdown_trigger);
} else if (!pcmk_is_set(child->flags, child_respawn)) {
/* nothing to do */
} else if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
pcmk__panic("Subdaemon failed");
} else if (child_liveness(child) == pcmk_rc_ok) {
crm_warn("Not respawning subdaemon %s because IPC endpoint %s is OK",
name, pcmk__server_ipc_name(child->server));
} else if (pcmk_is_set(child->flags, child_needs_cluster) && !pcmkd_cluster_connected()) {
crm_notice("Not respawning subdaemon %s until cluster returns", name);
child->flags |= child_needs_retry;
} else {
// cts-lab looks for this message
crm_notice("Respawning subdaemon %s after unexpected exit", name);
start_child(child);
}
}
static gboolean
pcmk_shutdown_worker(gpointer user_data)
{
static int phase = PCMK__NELEM(pcmk_children) - 1;
static time_t next_log = 0;
if (phase == PCMK__NELEM(pcmk_children) - 1) {
crm_notice("Shutting down Pacemaker");
pacemakerd_state = PCMK__VALUE_SHUTTING_DOWN;
}
for (; phase >= 0; phase--) {
pcmk_child_t *child = &(pcmk_children[phase]);
const char *name = pcmk__server_name(child->server);
+ time_t now = 0;
- if (child->pid != 0) {
- time_t now = time(NULL);
+ if (child->pid == 0) {
+ /* cleanup */
+ crm_debug("Subdaemon %s confirmed stopped", name);
+ child->pid = 0;
+ continue;
+ }
- if (pcmk_is_set(child->flags, child_respawn)) {
- if (child->pid == PCMK__SPECIAL_PID) {
- crm_warn("Subdaemon %s cannot be terminated (shutdown "
- "will be escalated after %ld seconds if it does "
- "not terminate on its own; set PCMK_"
- PCMK__ENV_FAIL_FAST "=1 to exit immediately "
- "instead)",
- name, (long) SHUTDOWN_ESCALATION_PERIOD);
- }
- next_log = now + 30;
- child->flags &= ~child_respawn;
- stop_child(child, SIGTERM);
- if (phase < PCMK_CHILD_CONTROLD) {
- pcmk__create_timer(SHUTDOWN_ESCALATION_PERIOD,
- escalate_shutdown, child);
- }
+ now = time(NULL);
- } else if (now >= next_log) {
- next_log = now + 30;
- crm_notice("Still waiting for subdaemon %s to terminate "
- QB_XS " pid=%lld", name, (long long) child->pid);
+ if (pcmk_is_set(child->flags, child_respawn)) {
+ if (child->pid == PCMK__SPECIAL_PID) {
+ crm_warn("Subdaemon %s cannot be terminated (shutdown "
+ "will be escalated after %ld seconds if it does "
+ "not terminate on its own; set PCMK_"
+ PCMK__ENV_FAIL_FAST "=1 to exit immediately "
+ "instead)",
+ name, (long) SHUTDOWN_ESCALATION_PERIOD);
}
- return TRUE;
+ next_log = now + 30;
+ child->flags &= ~child_respawn;
+ stop_child(child, SIGTERM);
+ if (phase < PCMK_CHILD_CONTROLD) {
+ pcmk__create_timer(SHUTDOWN_ESCALATION_PERIOD,
+ escalate_shutdown, child);
+ }
+
+ } else if (now >= next_log) {
+ next_log = now + 30;
+ crm_notice("Still waiting for subdaemon %s to terminate "
+ QB_XS " pid=%lld", name, (long long) child->pid);
}
- /* cleanup */
- crm_debug("Subdaemon %s confirmed stopped", name);
- child->pid = 0;
+ return G_SOURCE_CONTINUE;
}
crm_notice("Shutdown complete");
pacemakerd_state = PCMK__VALUE_SHUTDOWN_COMPLETE;
if (!fatal_error && running_with_sbd &&
pcmk__get_sbd_sync_resource_startup() &&
!shutdown_complete_state_reported_client_closed) {
crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
- return TRUE;
+ return G_SOURCE_CONTINUE;
}
g_main_loop_quit(mainloop);
if (fatal_error) {
crm_notice("Shutting down and staying down after fatal error");
#if SUPPORT_COROSYNC
pcmkd_shutdown_corosync();
#endif
crm_exit(CRM_EX_FATAL);
}
- return TRUE;
+ return G_SOURCE_CONTINUE;
}
/* TODO once libqb is taught to juggle with IPC end-points carried over as
bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325)
it shall hand over these descriptors here if/once they are successfully
pre-opened in (presumably) child_liveness(), to avoid any remaining
room for races */
// \return Standard Pacemaker return code
static int
start_child(pcmk_child_t * child)
{
uid_t uid = 0;
gid_t gid = 0;
- gboolean use_valgrind = FALSE;
- gboolean use_callgrind = FALSE;
+ bool use_valgrind = false;
+ bool use_callgrind = false;
const char *name = pcmk__server_name(child->server);
const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED);
const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED);
- child->flags &= ~child_active_before_startup;
+ child->flags &= ~(child_active_before_startup | child_shutting_down);
child->check_count = 0;
if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
- use_callgrind = TRUE;
- use_valgrind = TRUE;
+ use_callgrind = true;
+ use_valgrind = true;
} else if ((env_callgrind != NULL)
&& (strstr(env_callgrind, name) != NULL)) {
- use_callgrind = TRUE;
- use_valgrind = TRUE;
+ use_callgrind = true;
+ use_valgrind = true;
} else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
- use_valgrind = TRUE;
+ use_valgrind = true;
} else if ((env_valgrind != NULL)
&& (strstr(env_valgrind, name) != NULL)) {
- use_valgrind = TRUE;
+ use_valgrind = true;
}
if (use_valgrind && strlen(PCMK__VALGRIND_EXEC) == 0) {
crm_warn("Cannot enable valgrind for subdaemon %s: valgrind not found",
name);
- use_valgrind = FALSE;
+ use_valgrind = false;
}
if ((child->uid != NULL) && (crm_user_lookup(child->uid, &uid, &gid) < 0)) {
crm_err("Invalid user (%s) for subdaemon %s: not found",
child->uid, name);
return EACCES;
}
child->pid = fork();
pcmk__assert(child->pid != -1);
if (child->pid > 0) {
/* parent */
mainloop_child_add(child->pid, 0, name, child, pcmk_child_exit);
- if (use_valgrind) {
- crm_info("Forked process %lld using user %lu (%s) and group %lu "
- "for subdaemon %s (valgrind enabled: %s)",
- (long long) child->pid, (unsigned long) uid,
- pcmk__s(child->uid, "root"), (unsigned long) gid, name,
- PCMK__VALGRIND_EXEC);
- } else {
- crm_info("Forked process %lld using user %lu (%s) and group %lu "
- "for subdaemon %s",
- (long long) child->pid, (unsigned long) uid,
- pcmk__s(child->uid, "root"), (unsigned long) gid, name);
- }
+ if (use_valgrind) {
+ crm_info("Forked process %lld using user %lu (%s) and group %lu "
+ "for subdaemon %s (valgrind enabled: %s)",
+ (long long) child->pid, (unsigned long) uid,
+ pcmk__s(child->uid, "root"), (unsigned long) gid, name,
+ PCMK__VALGRIND_EXEC);
+ } else {
+ crm_info("Forked process %lld using user %lu (%s) and group %lu "
+ "for subdaemon %s",
+ (long long) child->pid, (unsigned long) uid,
+ pcmk__s(child->uid, "root"), (unsigned long) gid, name);
+ }
return pcmk_rc_ok;
} else {
- /* Start a new session */
- (void)setsid();
+ char *path = subdaemon_path(child);
- /* Setup the two alternate arg arrays */
- opts_vgrind[0] = pcmk__str_copy(PCMK__VALGRIND_EXEC);
- if (use_callgrind) {
- opts_vgrind[1] = pcmk__str_copy("--tool=callgrind");
- opts_vgrind[2] = pcmk__str_copy("--callgrind-out-file="
- CRM_STATE_DIR "/callgrind.out.%p");
- opts_vgrind[3] = subdaemon_path(child);
- opts_vgrind[4] = NULL;
- } else {
- opts_vgrind[1] = subdaemon_path(child);
- opts_vgrind[2] = NULL;
- opts_vgrind[3] = NULL;
- opts_vgrind[4] = NULL;
- }
- opts_default[0] = subdaemon_path(child);
+ /* Start a new session */
+ setsid();
if(gid) {
// Drop root group access if not needed
if (!need_root_group && (setgid(gid) < 0)) {
crm_warn("Could not set subdaemon %s group to %lu: %s",
name, (unsigned long) gid, strerror(errno));
}
/* Initialize supplementary groups to only those always granted to
* the user, plus haclient (so we can access IPC).
*/
if (initgroups(child->uid, gid) < 0) {
crm_err("Cannot initialize system groups for subdaemon %s: %s "
QB_XS " errno=%d",
name, pcmk_rc_str(errno), errno);
}
}
if (uid && setuid(uid) < 0) {
crm_warn("Could not set subdaemon %s user to %s: %s "
QB_XS " uid=%lu errno=%d",
name, strerror(errno), child->uid, (unsigned long) uid,
errno);
}
pcmk__close_fds_in_child(true);
pcmk__open_devnull(O_RDONLY); // stdin (fd 0)
pcmk__open_devnull(O_WRONLY); // stdout (fd 1)
pcmk__open_devnull(O_WRONLY); // stderr (fd 2)
- if (use_valgrind) {
- (void)execvp(PCMK__VALGRIND_EXEC, opts_vgrind);
+ if (use_callgrind) {
+ char *out_file = pcmk__str_copy("--callgrind-out-file="
+ CRM_STATE_DIR "/callgrind.opt.%p");
+ execlp(PCMK__VALGRIND_EXEC, PCMK__VALGRIND_EXEC, "--tool=callgrind",
+ out_file, path, (char *) NULL);
+ free(out_file);
+ } else if (use_valgrind) {
+ execlp(PCMK__VALGRIND_EXEC, PCMK__VALGRIND_EXEC, path, (char *) NULL);
} else {
- char *path = subdaemon_path(child);
-
- (void) execvp(path, opts_default);
- free(path);
+ execlp(path, path, (char *) NULL);
}
+
+ free(path);
crm_crit("Could not execute subdaemon %s: %s", name, strerror(errno));
crm_exit(CRM_EX_FATAL);
}
return pcmk_rc_ok; /* never reached */
}
/*!
* \internal
* \brief Check the liveness of the child based on IPC name and PID if tracked
*
* \param[in,out] child Child tracked data
*
* \return Standard Pacemaker return code
*
* \note Return codes of particular interest include pcmk_rc_ipc_unresponsive
* indicating that no trace of IPC liveness was detected,
* pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by
* an unauthorized process, and pcmk_rc_ipc_pid_only indicating that
* the child is up by PID but not IPC end-point (possibly starting).
* \note This function doesn't modify any of \p child members but \c pid,
* and is not actively toying with processes as such but invoking
* \c stop_child in one particular case (there's for some reason
* a different authentic holder of the IPC end-point).
*/
static int
child_liveness(pcmk_child_t *child)
{
uid_t cl_uid = 0;
gid_t cl_gid = 0;
const uid_t root_uid = 0;
const gid_t root_gid = 0;
const uid_t *ref_uid;
const gid_t *ref_gid;
const char *name = pcmk__server_name(child->server);
int rc = pcmk_rc_ipc_unresponsive;
+ int pid_active = pcmk_rc_ok;
int legacy_rc = pcmk_ok;
pid_t ipc_pid = 0;
if (child->uid == NULL) {
ref_uid = &root_uid;
ref_gid = &root_gid;
} else {
ref_uid = &cl_uid;
ref_gid = &cl_gid;
legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid);
}
if (legacy_rc < 0) {
rc = pcmk_legacy2rc(legacy_rc);
crm_err("Could not find user and group IDs for user %s: %s "
QB_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc);
} else {
const char *ipc_name = pcmk__server_ipc_name(child->server);
rc = pcmk__ipc_is_authentic_process_active(ipc_name,
*ref_uid, *ref_gid,
&ipc_pid);
if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) {
if (child->pid <= 0) {
/* If rc is pcmk_rc_ok, ipc_pid is nonzero and this
* initializes a new child. If rc is
* pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will
* investigate further.
*/
child->pid = ipc_pid;
} else if ((ipc_pid != 0) && (child->pid != ipc_pid)) {
/* An unexpected (but authorized) process is responding to
* IPC. Investigate further.
*/
rc = pcmk_rc_ipc_unresponsive;
}
}
}
- if (rc == pcmk_rc_ipc_unresponsive) {
- /* If we get here, a child without IPC is being tracked, no IPC liveness
- * has been detected, or IPC liveness has been detected with an
- * unexpected (but authorized) process. This is safe on FreeBSD since
- * the only change possible from a proper child's PID into "special" PID
- * of 1 behind more loosely related process.
+ if (rc != pcmk_rc_ipc_unresponsive) {
+ return rc;
+ }
+
+ /* If we get here, a child without IPC is being tracked, no IPC liveness
+ * has been detected, or IPC liveness has been detected with an
+ * unexpected (but authorized) process. This is safe on FreeBSD since
+ * the only change possible from a proper child's PID into "special" PID
+ * of 1 behind more loosely related process.
+ */
+ pid_active = pcmk__pid_active(child->pid, name);
+
+ if ((ipc_pid != 0)
+ && ((pid_active != pcmk_rc_ok)
+ || (ipc_pid == PCMK__SPECIAL_PID)
+ || (pcmk__pid_active(ipc_pid, name) == pcmk_rc_ok))) {
+ /* An unexpected (but authorized) process was detected at the IPC
+ * endpoint, and either it is active, or the child we're tracking is
+ * not.
*/
- int ret = pcmk__pid_active(child->pid, name);
-
- if (ipc_pid && ((ret != pcmk_rc_ok)
- || ipc_pid == PCMK__SPECIAL_PID
- || (pcmk__pid_active(ipc_pid, name) == pcmk_rc_ok))) {
- /* An unexpected (but authorized) process was detected at the IPC
- * endpoint, and either it is active, or the child we're tracking is
- * not.
+
+ if (pid_active == pcmk_rc_ok) {
+ /* The child we're tracking is active. Kill it, and adopt the
+ * detected process. This assumes that our children don't fork
+ * (thus getting a different PID owning the IPC), but rather the
+ * tracking got out of sync because of some means external to
+ * Pacemaker, and adopting the detected process is better than
+ * killing it and possibly having to spawn a new child.
*/
+ /* not possessing IPC, afterall (what about corosync CPG?) */
+ stop_child(child, SIGKILL);
+ }
+ rc = pcmk_rc_ok;
+ child->pid = ipc_pid;
+ } else if (pid_active == pcmk_rc_ok) {
+ // Our tracked child's PID was found active, but not its IPC
+ rc = pcmk_rc_ipc_pid_only;
+ } else if ((child->pid == 0) && (pid_active == EINVAL)) {
+ // FreeBSD can return EINVAL
+ rc = pcmk_rc_ipc_unresponsive;
+ } else if (pid_active == EACCES) {
+ rc = pcmk_rc_ipc_unauthorized;
+ } else if (pid_active == ESRCH) {
+ rc = pcmk_rc_ipc_unresponsive;
+ } else {
+ rc = pid_active;
+ }
+
+ return rc;
+}
+
+static void
+reset_respawn_count(pcmk_child_t *child)
+{
+ /* Restore pristine state */
+ child->respawn_count = 0;
+}
+
+#define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */
+
+static int
+child_up_but_no_ipc(pcmk_child_t *child)
+{
+ const char *ipc_name = pcmk__server_ipc_name(child->server);
+
+ if (child->respawn_count == WAIT_TRIES) {
+ crm_crit("%s IPC endpoint for existing process %lld did not (re)appear",
+ ipc_name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
+ return pcmk_rc_ipc_pid_only;
+ }
+
+ crm_warn("Cannot find %s IPC endpoint for existing process %ld, could still "
+ "reappear in %d attempts",
+ ipc_name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid),
+ WAIT_TRIES - child->respawn_count);
+ return EAGAIN;
+}
+
+static int
+child_alive(pcmk_child_t *child)
+{
+ const char *name = pcmk__server_name(child->server);
+
+ if (child->pid == PCMK__SPECIAL_PID) {
+ if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
+ crm_crit("Cannot track pre-existing process for %s IPC on this "
+ "platform and PCMK_" PCMK__ENV_FAIL_FAST " requested",
+ name);
+ return EOPNOTSUPP;
+
+ } else if (child->respawn_count == WAIT_TRIES) {
+ /* Because PCMK__ENV_FAIL_FAST wasn't requested, we can't bail
+ * out. Instead, switch to IPC liveness monitoring which is not
+ * very suitable for heavy system load.
+ */
+ crm_notice("Cannot track pre-existing process for %s IPC on this "
+ "platform but assuming it is stable and using liveness "
+ "monitoring", name);
+ crm_warn("The process for %s IPC cannot be terminated, so "
+ "shutdown will be delayed by %d s to allow time for it "
+ "to terminate on its own", name, SHUTDOWN_ESCALATION_PERIOD);
- if (ret == pcmk_rc_ok) {
- /* The child we're tracking is active. Kill it, and adopt the
- * detected process. This assumes that our children don't fork
- * (thus getting a different PID owning the IPC), but rather the
- * tracking got out of sync because of some means external to
- * Pacemaker, and adopting the detected process is better than
- * killing it and possibly having to spawn a new child.
- */
- /* not possessing IPC, afterall (what about corosync CPG?) */
- stop_child(child, SIGKILL);
- }
- rc = pcmk_rc_ok;
- child->pid = ipc_pid;
- } else if (ret == pcmk_rc_ok) {
- // Our tracked child's PID was found active, but not its IPC
- rc = pcmk_rc_ipc_pid_only;
- } else if ((child->pid == 0) && (ret == EINVAL)) {
- // FreeBSD can return EINVAL
- rc = pcmk_rc_ipc_unresponsive;
} else {
- switch (ret) {
- case EACCES:
- rc = pcmk_rc_ipc_unauthorized;
- break;
- case ESRCH:
- rc = pcmk_rc_ipc_unresponsive;
- break;
- default:
- rc = ret;
- break;
- }
+ crm_warn("Cannot track pre-existing process for %s IPC on this "
+ "platform; checking %d more times",
+ name, WAIT_TRIES - child->respawn_count);
+ return EAGAIN;
+ }
+ }
+
+ crm_notice("Tracking existing %s process (pid=%lld)",
+ name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
+ child->respawn_count = -1; /* 0~keep watching */
+ child->flags |= child_active_before_startup;
+ return pcmk_rc_ok;
+}
+
+static int
+find_and_track_child(pcmk_child_t *child, int rounds, bool *wait_in_progress)
+{
+ int rc = pcmk_rc_ok;
+ const char *name = pcmk__server_name(child->server);
+
+ if (child->respawn_count < 0) {
+ return EAGAIN;
+ }
+
+ rc = child_liveness(child);
+ if (rc == pcmk_rc_ipc_unresponsive) {
+ /* As a speculation, don't give up if there are more rounds to
+ * come for other reasons, but don't artificially wait just
+ * because of this, since we would preferably start ASAP.
+ */
+ return EAGAIN;
+ }
+
+ child->respawn_count = rounds;
+
+ if (rc == pcmk_rc_ok) {
+ rc = child_alive(child);
+
+ if (rc == EAGAIN) {
+ *wait_in_progress = true;
}
+
+ } else if (rc == pcmk_rc_ipc_pid_only) {
+ rc = child_up_but_no_ipc(child);
+
+ if (rc == EAGAIN) {
+ *wait_in_progress = true;
+ }
+
+ } else {
+ crm_crit("Checked liveness of %s: %s " QB_XS " rc=%d", name,
+ pcmk_rc_str(rc), rc);
}
+
return rc;
}
/*!
* \internal
* \brief Initial one-off check of the pre-existing "child" processes
*
* With "child" process, we mean the subdaemon that defines an API end-point
* (all of them do as of the comment) -- the possible complement is skipped
* as it is deemed it has no such shared resources to cause conflicts about,
* hence it can presumably be started anew without hesitation.
* If that won't hold true in the future, the concept of a shared resource
* will have to be generalized beyond the API end-point.
*
* For boundary cases that the "child" is still starting (IPC end-point is yet
* to be witnessed), or more rarely (practically FreeBSD only), when there's
* a pre-existing "untrackable" authentic process, we give the situation some
* time to possibly unfold in the right direction, meaning that said socket
* will appear or the unattainable process will disappear per the observable
* IPC, respectively.
*
* \return Standard Pacemaker return code
*
* \note Since this gets run at the very start, \c respawn_count fields
* for particular children get temporarily overloaded with "rounds
* of waiting" tracking, restored once we are about to finish with
* success (i.e. returning value >=0) and will remain unrestored
* otherwise. One way to suppress liveness detection logic for
* particular child is to set the said value to a negative number.
*/
-#define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */
int
find_and_track_existing_processes(void)
{
bool wait_in_progress;
- int rc;
size_t i, rounds;
for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
wait_in_progress = false;
- for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
- const char *name = pcmk__server_name(pcmk_children[i].server);
- const char *ipc_name = NULL;
- if (pcmk_children[i].respawn_count < 0) {
- continue;
- }
-
- rc = child_liveness(&pcmk_children[i]);
- if (rc == pcmk_rc_ipc_unresponsive) {
- /* As a speculation, don't give up if there are more rounds to
- * come for other reasons, but don't artificially wait just
- * because of this, since we would preferably start ASAP.
- */
- continue;
- }
+ for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
+ int rc = find_and_track_child(&pcmk_children[i], rounds,
+ &wait_in_progress);
- // @TODO Functionize more of this to reduce nesting
- ipc_name = pcmk__server_ipc_name(pcmk_children[i].server);
- pcmk_children[i].respawn_count = rounds;
- switch (rc) {
- case pcmk_rc_ok:
- if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
- if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
- crm_crit("Cannot reliably track pre-existing"
- " authentic process behind %s IPC on this"
- " platform and PCMK_" PCMK__ENV_FAIL_FAST
- " requested", ipc_name);
- return EOPNOTSUPP;
- } else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
- crm_notice("Assuming pre-existing authentic, though"
- " on this platform untrackable, process"
- " behind %s IPC is stable (was in %d"
- " previous samples) so rather than"
- " bailing out (PCMK_" PCMK__ENV_FAIL_FAST
- " not requested), we just switch to a"
- " less optimal IPC liveness monitoring"
- " (not very suitable for heavy load)",
- name, WAIT_TRIES - 1);
- crm_warn("The process behind %s IPC cannot be"
- " terminated, so the overall shutdown"
- " will get delayed implicitly (%ld s),"
- " which serves as a graceful period for"
- " its native termination if it vitally"
- " depends on some other daemons going"
- " down in a controlled way already",
- name, (long) SHUTDOWN_ESCALATION_PERIOD);
- } else {
- wait_in_progress = true;
- crm_warn("Cannot reliably track pre-existing"
- " authentic process behind %s IPC on this"
- " platform, can still disappear in %d"
- " attempt(s)", ipc_name,
- WAIT_TRIES - pcmk_children[i].respawn_count);
- continue;
- }
- }
- crm_notice("Tracking existing %s process (pid=%lld)",
- name,
- (long long) PCMK__SPECIAL_PID_AS_0(
- pcmk_children[i].pid));
- pcmk_children[i].respawn_count = -1; /* 0~keep watching */
- pcmk_children[i].flags |= child_active_before_startup;
- break;
- case pcmk_rc_ipc_pid_only:
- if (pcmk_children[i].respawn_count == WAIT_TRIES) {
- crm_crit("%s IPC endpoint for existing authentic"
- " process %lld did not (re)appear",
- ipc_name,
- (long long) PCMK__SPECIAL_PID_AS_0(
- pcmk_children[i].pid));
- return rc;
- }
- wait_in_progress = true;
- crm_warn("Cannot find %s IPC endpoint for existing"
- " authentic process %lld, can still (re)appear"
- " in %d attempts (?)",
- ipc_name,
- (long long) PCMK__SPECIAL_PID_AS_0(
- pcmk_children[i].pid),
- WAIT_TRIES - pcmk_children[i].respawn_count);
- continue;
- default:
- crm_crit("Checked liveness of %s: %s " QB_XS " rc=%d",
- name, pcmk_rc_str(rc), rc);
- return rc;
+ if (rc == pcmk_rc_ok) {
+ break;
+ } else if (rc != EAGAIN) {
+ return rc;
}
}
+
if (!wait_in_progress) {
break;
}
+
pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen
}
- for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
- pcmk_children[i].respawn_count = 0; /* restore pristine state */
- }
+ for_each_child(reset_respawn_count);
pcmk__create_timer(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon,
NULL);
return pcmk_rc_ok;
}
+static void
+start_subdaemon(pcmk_child_t *child)
+{
+ if (child->pid != 0) {
+ /* We are already tracking this process */
+ return;
+ }
+
+ start_child(child);
+}
+
gboolean
init_children_processes(void *user_data)
{
if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
/* Corosync clusters can drop root group access, because we set
* uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect
* to corosync.
*/
need_root_group = false;
}
/* start any children that have not been detected */
- for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
- if (pcmk_children[i].pid != 0) {
- /* we are already tracking it */
- continue;
- }
-
- start_child(&(pcmk_children[i]));
- }
+ for_each_child(start_subdaemon);
/* From this point on, any daemons being started will be due to
* respawning rather than node start.
*
* This may be useful for the daemons to know
*/
pcmk__set_env_option(PCMK__ENV_RESPAWNED, PCMK_VALUE_TRUE, false);
pacemakerd_state = PCMK__VALUE_RUNNING;
- return TRUE;
+ return G_SOURCE_CONTINUE;
}
void
pcmk_shutdown(int nsig)
{
if (shutdown_trigger == NULL) {
shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
}
mainloop_set_trigger(shutdown_trigger);
}
-void
-restart_cluster_subdaemons(void)
+static void
+restart_subdaemon(pcmk_child_t *child)
{
- for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
- if (!pcmk_is_set(pcmk_children[i].flags, child_needs_retry) || pcmk_children[i].pid != 0) {
- continue;
- }
+ if (!pcmk_is_set(child->flags, child_needs_retry) || child->pid != 0) {
+ return;
+ }
- crm_notice("Respawning cluster-based subdaemon %s",
- pcmk__server_name(pcmk_children[i].server));
- if (start_child(&pcmk_children[i])) {
- pcmk_children[i].flags &= ~child_needs_retry;
- }
+ crm_notice("Respawning cluster-based subdaemon %s",
+ pcmk__server_name(child->server));
+
+ if (start_child(child)) {
+ child->flags &= ~child_needs_retry;
}
}
-static gboolean
-stop_child(pcmk_child_t * child, int signal)
+void
+restart_cluster_subdaemons(void)
+{
+ for_each_child(restart_subdaemon);
+}
+
+static void
+stop_child(pcmk_child_t *child, int signal)
{
const char *name = pcmk__server_name(child->server);
if (signal == 0) {
signal = SIGTERM;
}
/* why to skip PID of 1?
- FreeBSD ~ how untrackable process behind IPC is masqueraded as
- elsewhere: how "init" task is designated; in particular, in systemd
arrangement of socket-based activation, this is pretty real */
if (child->pid == PCMK__SPECIAL_PID) {
crm_debug("Nothing to do to stop subdaemon %s[%lld]",
name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
- return TRUE;
+ return;
}
if (child->pid <= 0) {
crm_trace("Nothing to do to stop subdaemon %s: Not running", name);
- return TRUE;
+ return;
}
errno = 0;
if (kill(child->pid, signal) == 0) {
crm_notice("Stopping subdaemon %s "
QB_XS " via signal %d to process %lld",
name, signal, (long long) child->pid);
+ child->flags |= child_shutting_down;
} else {
crm_err("Could not stop subdaemon %s[%lld] with signal %d: %s",
name, (long long) child->pid, signal, strerror(errno));
}
-
- return TRUE;
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Jul 8, 4:38 PM (1 d, 1 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1988718
Default Alt Text
(61 KB)

Event Timeline