Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4623634
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
61 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c
index b85e7eaf6c..217ec35741 100644
--- a/daemons/pacemakerd/pacemakerd.c
+++ b/daemons/pacemakerd/pacemakerd.c
@@ -1,486 +1,486 @@
/*
- * Copyright 2010-2024 the Pacemaker project contributors
+ * Copyright 2010-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include "pacemakerd.h"
#if SUPPORT_COROSYNC
#include "pcmkd_corosync.h"
#endif
#include <pwd.h>
#include <errno.h>
#include <unistd.h>
#include <stdio.h>
#include <stdbool.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <crm/crm.h> /* indirectly: CRM_EX_* */
#include <crm/common/mainloop.h>
#include <crm/common/xml.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/ipc_pacemakerd.h>
#include <crm/common/output_internal.h>
#include <crm/cluster/internal.h>
#include <crm/cluster.h>
#define SUMMARY "pacemakerd - primary Pacemaker daemon that launches and monitors all subsidiary Pacemaker daemons"
struct {
gboolean features;
gboolean foreground;
gboolean shutdown;
gboolean standby;
} options;
static pcmk__output_t *out = NULL;
static pcmk__supported_format_t formats[] = {
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
PCMK__OUTPUT_ARGS("features")
static int
pacemakerd_features(pcmk__output_t *out, va_list args) {
out->info(out, "Pacemaker %s (Build: %s)\n Supporting v%s: %s", PACEMAKER_VERSION,
BUILD_VERSION, CRM_FEATURE_SET, CRM_FEATURES);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("features")
static int
pacemakerd_features_xml(pcmk__output_t *out, va_list args) {
gchar **feature_list = g_strsplit(CRM_FEATURES, " ", 0);
pcmk__output_xml_create_parent(out, PCMK_XE_PACEMAKERD,
PCMK_XA_VERSION, PACEMAKER_VERSION,
PCMK_XA_BUILD, BUILD_VERSION,
PCMK_XA_FEATURE_SET, CRM_FEATURE_SET,
NULL);
out->begin_list(out, NULL, NULL, PCMK_XE_FEATURES);
for (char **s = feature_list; *s != NULL; s++) {
pcmk__output_create_xml_text_node(out, PCMK_XE_FEATURE, *s);
}
out->end_list(out);
pcmk__output_xml_pop_parent(out);
g_strfreev(feature_list);
return pcmk_rc_ok;
}
static pcmk__message_entry_t fmt_functions[] = {
{ "features", "default", pacemakerd_features },
{ "features", "xml", pacemakerd_features_xml },
{ NULL, NULL, NULL }
};
static gboolean
pid_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return TRUE;
}
static gboolean
standby_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
options.standby = TRUE;
pcmk__set_env_option(PCMK__ENV_NODE_START_STATE, PCMK_VALUE_STANDBY, false);
return TRUE;
}
static GOptionEntry entries[] = {
{ "features", 'F', 0, G_OPTION_ARG_NONE, &options.features,
"Display full version and list of features Pacemaker was built with",
NULL },
{ "foreground", 'f', 0, G_OPTION_ARG_NONE, &options.foreground,
"(Ignored) Pacemaker always runs in the foreground",
NULL },
{ "pid-file", 'p', 0, G_OPTION_ARG_CALLBACK, pid_cb,
"(Ignored) Daemon pid file location",
"FILE" },
{ "shutdown", 'S', 0, G_OPTION_ARG_NONE, &options.shutdown,
"Instruct Pacemaker to shutdown on this machine",
NULL },
{ "standby", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, standby_cb,
"Start node in standby state",
NULL },
{ NULL }
};
static void
pcmk_ignore(int nsig)
{
crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig);
}
static void
pcmk_sigquit(int nsig)
{
pcmk__panic("Received SIGQUIT");
}
static void
pacemakerd_chown(const char *path, uid_t uid, gid_t gid)
{
int rc = chown(path, uid, gid);
if (rc < 0) {
crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s",
path, CRM_DAEMON_USER, gid, pcmk_rc_str(errno));
}
}
static void
create_pcmk_dirs(void)
{
uid_t pcmk_uid = 0;
gid_t pcmk_gid = 0;
const char *dirs[] = {
PCMK__PERSISTENT_DATA_DIR, // core/blackbox/scheduler/CIB files
CRM_CORE_DIR, // core files
CRM_BLACKBOX_DIR, // blackbox dumps
PCMK_SCHEDULER_INPUT_DIR, // scheduler inputs
CRM_CONFIG_DIR, // the Cluster Information Base (CIB)
// Don't build PCMK__OCF_TMP_DIR the executor will do it
NULL
};
if (pcmk_daemon_user(&pcmk_uid, &pcmk_gid) < 0) {
crm_err("Cluster user %s does not exist, aborting Pacemaker startup",
CRM_DAEMON_USER);
crm_exit(CRM_EX_NOUSER);
}
// Used by some resource agents
if ((mkdir(CRM_STATE_DIR, 0750) < 0) && (errno != EEXIST)) {
crm_warn("Could not create directory " CRM_STATE_DIR ": %s",
pcmk_rc_str(errno));
} else {
pacemakerd_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid);
}
for (int i = 0; dirs[i] != NULL; ++i) {
int rc = pcmk__build_path(dirs[i], 0750);
if (rc != pcmk_rc_ok) {
crm_warn("Could not create directory %s: %s",
dirs[i], pcmk_rc_str(rc));
} else {
pacemakerd_chown(dirs[i], pcmk_uid, pcmk_gid);
}
}
}
static void
remove_core_file_limit(void)
{
struct rlimit cores;
// Get current limits
if (getrlimit(RLIMIT_CORE, &cores) < 0) {
crm_notice("Unable to check system core file limits "
"(consider ensuring the size is unlimited): %s",
strerror(errno));
return;
}
// Check whether core dumps are disabled
if (cores.rlim_max == 0) {
if (geteuid() != 0) { // Yes, and there's nothing we can do about it
crm_notice("Core dumps are disabled (consider enabling them)");
return;
}
cores.rlim_max = RLIM_INFINITY; // Yes, but we're root, so enable them
}
// Raise soft limit to hard limit (if not already done)
if (cores.rlim_cur != cores.rlim_max) {
cores.rlim_cur = cores.rlim_max;
if (setrlimit(RLIMIT_CORE, &cores) < 0) {
crm_notice("Unable to raise system limit on core file size "
"(consider doing so manually): %s",
strerror(errno));
return;
}
}
if (cores.rlim_cur == RLIM_INFINITY) {
crm_trace("Core file size is unlimited");
} else {
crm_trace("Core file size is limited to %llu bytes",
(unsigned long long) cores.rlim_cur);
}
}
static void
pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api,
enum pcmk_ipc_event event_type, crm_exit_t status,
void *event_data, void *user_data)
{
pcmk_pacemakerd_api_reply_t *reply = event_data;
switch (event_type) {
case pcmk_ipc_event_reply:
break;
default:
return;
}
if (status != CRM_EX_OK) {
out->err(out, "Bad reply from pacemakerd: %s", crm_exit_str(status));
return;
}
if (reply->reply_type != pcmk_pacemakerd_reply_shutdown) {
out->err(out, "Unknown reply type %d from pacemakerd",
reply->reply_type);
}
}
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
GOptionContext *context = NULL;
context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
pcmk__add_main_args(context, entries);
return context;
}
int
main(int argc, char **argv)
{
int rc = pcmk_rc_ok;
crm_exit_t exit_code = CRM_EX_OK;
GError *error = NULL;
GOptionGroup *output_group = NULL;
pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
gchar **processed_args = pcmk__cmdline_preproc(argv, "p");
GOptionContext *context = build_arg_context(args, &output_group);
bool old_instance_connected = false;
pcmk_ipc_api_t *old_instance = NULL;
qb_ipcs_service_t *ipcs = NULL;
subdaemon_check_progress = time(NULL);
setenv("LC_ALL", "C", 1); // Ensure logs are in a common language
crm_log_preinit(NULL, argc, argv);
mainloop_add_signal(SIGHUP, pcmk_ignore);
mainloop_add_signal(SIGQUIT, pcmk_sigquit);
pcmk__register_formats(output_group, formats);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if ((rc != pcmk_rc_ok) || (out == NULL)) {
exit_code = CRM_EX_ERROR;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s",
args->output_ty, pcmk_rc_str(rc));
goto done;
}
pcmk__register_messages(out, fmt_functions);
if (options.features) {
out->message(out, "features");
exit_code = CRM_EX_OK;
goto done;
}
if (args->version) {
out->version(out, false);
goto done;
}
if (options.shutdown) {
pcmk__cli_init_logging(PCMK__SERVER_PACEMAKERD, args->verbosity);
} else {
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
}
crm_debug("Checking for existing Pacemaker instance");
rc = pcmk_new_ipc_api(&old_instance, pcmk_ipc_pacemakerd);
if (old_instance == NULL) {
out->err(out, "Could not check for existing pacemakerd: %s", pcmk_rc_str(rc));
exit_code = pcmk_rc2exitc(rc);
goto done;
}
pcmk_register_ipc_callback(old_instance, pacemakerd_event_cb, NULL);
rc = pcmk__connect_ipc(old_instance, pcmk_ipc_dispatch_sync, 2);
if (rc != pcmk_rc_ok) {
crm_debug("No existing %s instance found: %s",
pcmk_ipc_name(old_instance, true), pcmk_rc_str(rc));
}
old_instance_connected = pcmk_ipc_is_connected(old_instance);
if (options.shutdown) {
if (old_instance_connected) {
rc = pcmk_pacemakerd_api_shutdown(old_instance, crm_system_name);
pcmk_dispatch_ipc(old_instance);
exit_code = pcmk_rc2exitc(rc);
if (exit_code != CRM_EX_OK) {
pcmk_free_ipc_api(old_instance);
goto done;
}
/* We get the ACK immediately, and the response right after that,
* but it might take a while for pacemakerd to get around to
* shutting down. Wait for that to happen (with 30-minute timeout).
*/
for (int i = 0; i < 900; i++) {
if (!pcmk_ipc_is_connected(old_instance)) {
exit_code = CRM_EX_OK;
pcmk_free_ipc_api(old_instance);
goto done;
}
sleep(2);
}
exit_code = CRM_EX_TIMEOUT;
pcmk_free_ipc_api(old_instance);
goto done;
} else {
out->err(out, "Could not request shutdown "
"of existing Pacemaker instance: %s", pcmk_rc_str(rc));
pcmk_free_ipc_api(old_instance);
exit_code = CRM_EX_DISCONNECT;
goto done;
}
} else if (old_instance_connected) {
pcmk_free_ipc_api(old_instance);
crm_err("Aborting start-up because active Pacemaker instance found");
exit_code = CRM_EX_FATAL;
goto done;
}
pcmk_free_ipc_api(old_instance);
/* Don't allow any accidental output after this point. */
if (out != NULL) {
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
out = NULL;
}
#if SUPPORT_COROSYNC
if (pacemakerd_read_config() == FALSE) {
crm_exit(CRM_EX_UNAVAILABLE);
}
#endif
// OCF shell functions and cluster-glue need facility under different name
{
const char *facility = pcmk__env_option(PCMK__ENV_LOGFACILITY);
if (!pcmk__str_eq(facility, PCMK_VALUE_NONE,
pcmk__str_casei|pcmk__str_null_matches)) {
pcmk__set_env_option("LOGFACILITY", facility, true);
}
}
crm_notice("Starting Pacemaker %s " QB_XS " build=%s features:%s",
PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES);
mainloop = g_main_loop_new(NULL, FALSE);
remove_core_file_limit();
create_pcmk_dirs();
pcmk__serve_pacemakerd_ipc(&ipcs, &pacemakerd_ipc_callbacks);
#if SUPPORT_COROSYNC
/* Allows us to block shutdown */
if (!cluster_connect_cfg()) {
exit_code = CRM_EX_PROTOCOL;
goto done;
}
#endif
if (pcmk__locate_sbd() > 0) {
- running_with_sbd = TRUE;
+ running_with_sbd = true;
}
switch (find_and_track_existing_processes()) {
case pcmk_rc_ok:
break;
case pcmk_rc_ipc_unauthorized:
exit_code = CRM_EX_CANTCREAT;
goto done;
default:
exit_code = CRM_EX_FATAL;
goto done;
};
mainloop_add_signal(SIGTERM, pcmk_shutdown);
mainloop_add_signal(SIGINT, pcmk_shutdown);
if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) {
crm_notice("Waiting for startup-trigger from SBD.");
pacemakerd_state = PCMK__VALUE_WAIT_FOR_PING;
startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
} else {
if (running_with_sbd) {
crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported "
"by your SBD version) improve reliability of "
"interworking between SBD & pacemaker.");
}
pacemakerd_state = PCMK__VALUE_STARTING_DAEMONS;
init_children_processes(NULL);
}
crm_notice("Pacemaker daemon successfully started and accepting connections");
g_main_loop_run(mainloop);
if (ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
g_main_loop_unref(mainloop);
#if SUPPORT_COROSYNC
cluster_disconnect_cfg();
#endif
done:
g_strfreev(processed_args);
pcmk__free_arg_context(context);
pcmk__output_and_clear_error(&error, out);
if (out != NULL) {
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
}
pcmk__unregister_formats();
crm_exit(exit_code);
}
diff --git a/daemons/pacemakerd/pacemakerd.h b/daemons/pacemakerd/pacemakerd.h
index 51e32b19e8..4c2ea7e38c 100644
--- a/daemons/pacemakerd/pacemakerd.h
+++ b/daemons/pacemakerd/pacemakerd.h
@@ -1,27 +1,27 @@
/*
- * Copyright 2010-2023 the Pacemaker project contributors
+ * Copyright 2010-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#define MAX_RESPAWN 100
extern GMainLoop *mainloop;
extern struct qb_ipcs_service_handlers pacemakerd_ipc_callbacks;
extern const char *pacemakerd_state;
-extern gboolean running_with_sbd;
-extern gboolean shutdown_complete_state_reported_client_closed;
+extern bool running_with_sbd;
+extern bool shutdown_complete_state_reported_client_closed;
extern unsigned int shutdown_complete_state_reported_to;
extern crm_trigger_t *shutdown_trigger;
extern crm_trigger_t *startup_trigger;
extern time_t subdaemon_check_progress;
int find_and_track_existing_processes(void);
gboolean init_children_processes(void *user_data);
void pcmk_shutdown(int nsig);
void restart_cluster_subdaemons(void);
diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c
index 715cd754dc..15ef6a4977 100644
--- a/daemons/pacemakerd/pcmkd_subdaemons.c
+++ b/daemons/pacemakerd/pcmkd_subdaemons.c
@@ -1,900 +1,944 @@
/*
- * Copyright 2010-2024 the Pacemaker project contributors
+ * Copyright 2010-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include "pacemakerd.h"
#if SUPPORT_COROSYNC
#include "pcmkd_corosync.h"
#endif
#include <errno.h>
#include <grp.h>
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
#include <crm/cluster.h>
#include <crm/common/xml.h>
enum child_daemon_flags {
child_none = 0,
child_respawn = 1 << 0,
child_needs_cluster = 1 << 1,
child_needs_retry = 1 << 2,
child_active_before_startup = 1 << 3,
+ child_shutting_down = 1 << 4,
};
typedef struct pcmk_child_s {
enum pcmk_ipc_server server;
pid_t pid;
int respawn_count;
const char *uid;
int check_count;
uint32_t flags;
} pcmk_child_t;
#define PCMK_PROCESS_CHECK_INTERVAL 1000 /* 1s */
#define PCMK_PROCESS_CHECK_RETRIES 5
#define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */
/* Index into the array below */
#define PCMK_CHILD_CONTROLD 5
static pcmk_child_t pcmk_children[] = {
{
pcmk_ipc_based, 0, 0, CRM_DAEMON_USER,
0, child_respawn | child_needs_cluster
},
{
pcmk_ipc_fenced, 0, 0, NULL,
0, child_respawn | child_needs_cluster
},
{
pcmk_ipc_execd, 0, 0, NULL,
0, child_respawn
},
{
pcmk_ipc_attrd, 0, 0, CRM_DAEMON_USER,
0, child_respawn | child_needs_cluster
},
{
pcmk_ipc_schedulerd, 0, 0, CRM_DAEMON_USER,
0, child_respawn
},
{
pcmk_ipc_controld, 0, 0, CRM_DAEMON_USER,
0, child_respawn | child_needs_cluster
},
};
-static char *opts_default[] = { NULL, NULL };
-static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
-
crm_trigger_t *shutdown_trigger = NULL;
crm_trigger_t *startup_trigger = NULL;
time_t subdaemon_check_progress = 0;
// Whether we need root group access to talk to cluster layer
static bool need_root_group = true;
/* When contacted via pacemakerd-api by a client having sbd in
* the name we assume it is sbd-daemon which wants to know
* if pacemakerd shutdown gracefully.
* Thus when everything is shutdown properly pacemakerd
* waits till it has reported the graceful completion of
* shutdown to sbd and just when sbd-client closes the
* connection we can assume that the report has arrived
* properly so that pacemakerd can finally exit.
* Following two variables are used to track that handshake.
*/
unsigned int shutdown_complete_state_reported_to = 0;
-gboolean shutdown_complete_state_reported_client_closed = FALSE;
+bool shutdown_complete_state_reported_client_closed = false;
/* state we report when asked via pacemakerd-api status-ping */
const char *pacemakerd_state = PCMK__VALUE_INIT;
-gboolean running_with_sbd = FALSE; /* local copy */
+bool running_with_sbd = false;
GMainLoop *mainloop = NULL;
-static gboolean fatal_error = FALSE;
+static bool fatal_error = false;
static int child_liveness(pcmk_child_t *child);
static gboolean escalate_shutdown(gpointer data);
static int start_child(pcmk_child_t * child);
static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
static void pcmk_process_exit(pcmk_child_t * child);
static gboolean pcmk_shutdown_worker(gpointer user_data);
-static gboolean stop_child(pcmk_child_t * child, int signal);
+static void stop_child(pcmk_child_t *child, int signal);
+
+static void
+for_each_child(void (*fn)(pcmk_child_t *child))
+{
+ for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
+ fn(&pcmk_children[i]);
+ }
+}
/*!
* \internal
* \brief Get path to subdaemon executable
*
* \param[in] subdaemon Subdaemon to get path for
*
* \return Newly allocated string with path to subdaemon executable
* \note It is the caller's responsibility to free() the return value
*/
static inline char *
subdaemon_path(pcmk_child_t *subdaemon)
{
return crm_strdup_printf(CRM_DAEMON_DIR "/%s",
pcmk__server_name(subdaemon->server));
}
static bool
pcmkd_cluster_connected(void)
{
#if SUPPORT_COROSYNC
return pcmkd_corosync_connected();
#else
return true;
#endif
}
static gboolean
check_next_subdaemon(gpointer user_data)
{
static int next_child = 0;
pcmk_child_t *child = &(pcmk_children[next_child]);
const char *name = pcmk__server_name(child->server);
const long long pid = PCMK__SPECIAL_PID_AS_0(child->pid);
int rc = child_liveness(child);
crm_trace("Checked subdaemon %s[%lld]: %s (%d)",
name, pid, pcmk_rc_str(rc), rc);
switch (rc) {
case pcmk_rc_ok:
child->check_count = 0;
subdaemon_check_progress = time(NULL);
break;
case pcmk_rc_ipc_pid_only: // Child was previously OK
- if (++(child->check_count) >= PCMK_PROCESS_CHECK_RETRIES) {
+ if (pcmk_is_set(child->flags, child_shutting_down)) {
+ crm_notice("Subdaemon %s[%lld] has stopped accepting IPC "
+ "connections during shutdown", name, pid);
+
+ } else if (++(child->check_count) >= PCMK_PROCESS_CHECK_RETRIES) {
// cts-lab looks for this message
crm_crit("Subdaemon %s[%lld] is unresponsive to IPC "
"after %d attempt%s and will now be killed",
name, pid, child->check_count,
pcmk__plural_s(child->check_count));
stop_child(child, SIGKILL);
if (pcmk_is_set(child->flags, child_respawn)) {
// Respawn limit hasn't been reached, so retry another round
child->check_count = 0;
}
+
} else {
crm_notice("Subdaemon %s[%lld] is unresponsive to IPC "
"after %d attempt%s (will recheck later)",
name, pid, child->check_count,
pcmk__plural_s(child->check_count));
if (pcmk_is_set(child->flags, child_respawn)) {
/* as long as the respawn-limit isn't reached
and we haven't run out of connect retries
we account this as progress we are willing
to tell to sbd
*/
subdaemon_check_progress = time(NULL);
}
}
/* go to the next child and see if
we can make progress there
*/
break;
case pcmk_rc_ipc_unresponsive:
if (!pcmk_is_set(child->flags, child_respawn)) {
/* if a subdaemon is down and we don't want it
to be restarted this is a success during
shutdown. if it isn't restarted anymore
due to MAX_RESPAWN it is
rather no success.
*/
if (child->respawn_count <= MAX_RESPAWN) {
subdaemon_check_progress = time(NULL);
}
}
if (!pcmk_is_set(child->flags, child_active_before_startup)) {
crm_trace("Subdaemon %s[%lld] terminated", name, pid);
break;
}
if (pcmk_is_set(child->flags, child_respawn)) {
// cts-lab looks for this message
crm_err("Subdaemon %s[%lld] terminated", name, pid);
} else {
/* orderly shutdown */
crm_notice("Subdaemon %s[%lld] terminated", name, pid);
}
pcmk_process_exit(child);
break;
default:
crm_exit(CRM_EX_FATAL);
break; /* static analysis/noreturn */
}
if (++next_child >= PCMK__NELEM(pcmk_children)) {
next_child = 0;
}
return G_SOURCE_CONTINUE;
}
static gboolean
escalate_shutdown(gpointer data)
{
pcmk_child_t *child = data;
if (child->pid == PCMK__SPECIAL_PID) {
pcmk_process_exit(child);
} else if (child->pid != 0) {
/* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
crm_err("Subdaemon %s not terminating in a timely manner, forcing",
pcmk__server_name(child->server));
stop_child(child, SIGSEGV);
}
- return FALSE;
+
+ return G_SOURCE_REMOVE;
}
static void
pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
pcmk_child_t *child = mainloop_child_userdata(p);
const char *name = mainloop_child_name(p);
if (signo) {
// cts-lab looks for this message
do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
"%s[%d] terminated with signal %d (%s)%s",
name, pid, signo, strsignal(signo),
(core? " and dumped core" : ""));
+ pcmk_process_exit(child);
+ return;
+ }
- } else {
- switch(exitcode) {
- case CRM_EX_OK:
- crm_info("%s[%d] exited with status %d (%s)",
- name, pid, exitcode, crm_exit_str(exitcode));
- break;
+ switch(exitcode) {
+ case CRM_EX_OK:
+ crm_info("%s[%d] exited with status %d (%s)",
+ name, pid, exitcode, crm_exit_str(exitcode));
+ break;
- case CRM_EX_FATAL:
- crm_warn("Shutting cluster down because %s[%d] had fatal failure",
- name, pid);
- child->flags &= ~child_respawn;
- fatal_error = TRUE;
- pcmk_shutdown(SIGTERM);
- break;
+ case CRM_EX_FATAL:
+ crm_warn("Shutting cluster down because %s[%d] had fatal failure",
+ name, pid);
+ child->flags &= ~child_respawn;
+ fatal_error = true;
+ pcmk_shutdown(SIGTERM);
+ break;
- case CRM_EX_PANIC:
- {
- char *msg = NULL;
+ case CRM_EX_PANIC:
+ {
+ char *msg = NULL;
- child->flags &= ~child_respawn;
- fatal_error = TRUE;
- msg = crm_strdup_printf("Subdaemon %s[%d] requested panic",
- name, pid);
- pcmk__panic(msg);
+ child->flags &= ~child_respawn;
+ fatal_error = true;
+ msg = crm_strdup_printf("Subdaemon %s[%d] requested panic",
+ name, pid);
+ pcmk__panic(msg);
- // Should never get here
- free(msg);
- pcmk_shutdown(SIGTERM);
- }
- break;
+ // Should never get here
+ free(msg);
+ pcmk_shutdown(SIGTERM);
+ }
+ break;
- default:
- // cts-lab looks for this message
- crm_err("%s[%d] exited with status %d (%s)",
- name, pid, exitcode, crm_exit_str(exitcode));
- break;
- }
+ default:
+ // cts-lab looks for this message
+ crm_err("%s[%d] exited with status %d (%s)",
+ name, pid, exitcode, crm_exit_str(exitcode));
+ break;
}
pcmk_process_exit(child);
}
static void
pcmk_process_exit(pcmk_child_t * child)
{
const char *name = pcmk__server_name(child->server);
child->pid = 0;
child->flags &= ~child_active_before_startup;
child->check_count = 0;
child->respawn_count += 1;
if (child->respawn_count > MAX_RESPAWN) {
crm_err("Subdaemon %s exceeded maximum respawn count", name);
child->flags &= ~child_respawn;
}
if (shutdown_trigger) {
/* resume step-wise shutdown (returned TRUE yields no parallelizing) */
mainloop_set_trigger(shutdown_trigger);
} else if (!pcmk_is_set(child->flags, child_respawn)) {
/* nothing to do */
} else if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
pcmk__panic("Subdaemon failed");
} else if (child_liveness(child) == pcmk_rc_ok) {
crm_warn("Not respawning subdaemon %s because IPC endpoint %s is OK",
name, pcmk__server_ipc_name(child->server));
} else if (pcmk_is_set(child->flags, child_needs_cluster) && !pcmkd_cluster_connected()) {
crm_notice("Not respawning subdaemon %s until cluster returns", name);
child->flags |= child_needs_retry;
} else {
// cts-lab looks for this message
crm_notice("Respawning subdaemon %s after unexpected exit", name);
start_child(child);
}
}
static gboolean
pcmk_shutdown_worker(gpointer user_data)
{
static int phase = PCMK__NELEM(pcmk_children) - 1;
static time_t next_log = 0;
if (phase == PCMK__NELEM(pcmk_children) - 1) {
crm_notice("Shutting down Pacemaker");
pacemakerd_state = PCMK__VALUE_SHUTTING_DOWN;
}
for (; phase >= 0; phase--) {
pcmk_child_t *child = &(pcmk_children[phase]);
const char *name = pcmk__server_name(child->server);
+ time_t now = 0;
- if (child->pid != 0) {
- time_t now = time(NULL);
+ if (child->pid == 0) {
+ /* cleanup */
+ crm_debug("Subdaemon %s confirmed stopped", name);
+ child->pid = 0;
+ continue;
+ }
- if (pcmk_is_set(child->flags, child_respawn)) {
- if (child->pid == PCMK__SPECIAL_PID) {
- crm_warn("Subdaemon %s cannot be terminated (shutdown "
- "will be escalated after %ld seconds if it does "
- "not terminate on its own; set PCMK_"
- PCMK__ENV_FAIL_FAST "=1 to exit immediately "
- "instead)",
- name, (long) SHUTDOWN_ESCALATION_PERIOD);
- }
- next_log = now + 30;
- child->flags &= ~child_respawn;
- stop_child(child, SIGTERM);
- if (phase < PCMK_CHILD_CONTROLD) {
- pcmk__create_timer(SHUTDOWN_ESCALATION_PERIOD,
- escalate_shutdown, child);
- }
+ now = time(NULL);
- } else if (now >= next_log) {
- next_log = now + 30;
- crm_notice("Still waiting for subdaemon %s to terminate "
- QB_XS " pid=%lld", name, (long long) child->pid);
+ if (pcmk_is_set(child->flags, child_respawn)) {
+ if (child->pid == PCMK__SPECIAL_PID) {
+ crm_warn("Subdaemon %s cannot be terminated (shutdown "
+ "will be escalated after %ld seconds if it does "
+ "not terminate on its own; set PCMK_"
+ PCMK__ENV_FAIL_FAST "=1 to exit immediately "
+ "instead)",
+ name, (long) SHUTDOWN_ESCALATION_PERIOD);
}
- return TRUE;
+ next_log = now + 30;
+ child->flags &= ~child_respawn;
+ stop_child(child, SIGTERM);
+ if (phase < PCMK_CHILD_CONTROLD) {
+ pcmk__create_timer(SHUTDOWN_ESCALATION_PERIOD,
+ escalate_shutdown, child);
+ }
+
+ } else if (now >= next_log) {
+ next_log = now + 30;
+ crm_notice("Still waiting for subdaemon %s to terminate "
+ QB_XS " pid=%lld", name, (long long) child->pid);
}
- /* cleanup */
- crm_debug("Subdaemon %s confirmed stopped", name);
- child->pid = 0;
+ return G_SOURCE_CONTINUE;
}
crm_notice("Shutdown complete");
pacemakerd_state = PCMK__VALUE_SHUTDOWN_COMPLETE;
if (!fatal_error && running_with_sbd &&
pcmk__get_sbd_sync_resource_startup() &&
!shutdown_complete_state_reported_client_closed) {
crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
- return TRUE;
+ return G_SOURCE_CONTINUE;
}
g_main_loop_quit(mainloop);
if (fatal_error) {
crm_notice("Shutting down and staying down after fatal error");
#if SUPPORT_COROSYNC
pcmkd_shutdown_corosync();
#endif
crm_exit(CRM_EX_FATAL);
}
- return TRUE;
+ return G_SOURCE_CONTINUE;
}
/* TODO once libqb is taught to juggle with IPC end-points carried over as
bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325)
it shall hand over these descriptors here if/once they are successfully
pre-opened in (presumably) child_liveness(), to avoid any remaining
room for races */
// \return Standard Pacemaker return code
static int
start_child(pcmk_child_t * child)
{
uid_t uid = 0;
gid_t gid = 0;
- gboolean use_valgrind = FALSE;
- gboolean use_callgrind = FALSE;
+ bool use_valgrind = false;
+ bool use_callgrind = false;
const char *name = pcmk__server_name(child->server);
const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED);
const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED);
- child->flags &= ~child_active_before_startup;
+ child->flags &= ~(child_active_before_startup | child_shutting_down);
child->check_count = 0;
if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
- use_callgrind = TRUE;
- use_valgrind = TRUE;
+ use_callgrind = true;
+ use_valgrind = true;
} else if ((env_callgrind != NULL)
&& (strstr(env_callgrind, name) != NULL)) {
- use_callgrind = TRUE;
- use_valgrind = TRUE;
+ use_callgrind = true;
+ use_valgrind = true;
} else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
- use_valgrind = TRUE;
+ use_valgrind = true;
} else if ((env_valgrind != NULL)
&& (strstr(env_valgrind, name) != NULL)) {
- use_valgrind = TRUE;
+ use_valgrind = true;
}
if (use_valgrind && strlen(PCMK__VALGRIND_EXEC) == 0) {
crm_warn("Cannot enable valgrind for subdaemon %s: valgrind not found",
name);
- use_valgrind = FALSE;
+ use_valgrind = false;
}
if ((child->uid != NULL) && (crm_user_lookup(child->uid, &uid, &gid) < 0)) {
crm_err("Invalid user (%s) for subdaemon %s: not found",
child->uid, name);
return EACCES;
}
child->pid = fork();
pcmk__assert(child->pid != -1);
if (child->pid > 0) {
/* parent */
mainloop_child_add(child->pid, 0, name, child, pcmk_child_exit);
- if (use_valgrind) {
- crm_info("Forked process %lld using user %lu (%s) and group %lu "
- "for subdaemon %s (valgrind enabled: %s)",
- (long long) child->pid, (unsigned long) uid,
- pcmk__s(child->uid, "root"), (unsigned long) gid, name,
- PCMK__VALGRIND_EXEC);
- } else {
- crm_info("Forked process %lld using user %lu (%s) and group %lu "
- "for subdaemon %s",
- (long long) child->pid, (unsigned long) uid,
- pcmk__s(child->uid, "root"), (unsigned long) gid, name);
- }
+ if (use_valgrind) {
+ crm_info("Forked process %lld using user %lu (%s) and group %lu "
+ "for subdaemon %s (valgrind enabled: %s)",
+ (long long) child->pid, (unsigned long) uid,
+ pcmk__s(child->uid, "root"), (unsigned long) gid, name,
+ PCMK__VALGRIND_EXEC);
+ } else {
+ crm_info("Forked process %lld using user %lu (%s) and group %lu "
+ "for subdaemon %s",
+ (long long) child->pid, (unsigned long) uid,
+ pcmk__s(child->uid, "root"), (unsigned long) gid, name);
+ }
return pcmk_rc_ok;
} else {
- /* Start a new session */
- (void)setsid();
+ char *path = subdaemon_path(child);
- /* Setup the two alternate arg arrays */
- opts_vgrind[0] = pcmk__str_copy(PCMK__VALGRIND_EXEC);
- if (use_callgrind) {
- opts_vgrind[1] = pcmk__str_copy("--tool=callgrind");
- opts_vgrind[2] = pcmk__str_copy("--callgrind-out-file="
- CRM_STATE_DIR "/callgrind.out.%p");
- opts_vgrind[3] = subdaemon_path(child);
- opts_vgrind[4] = NULL;
- } else {
- opts_vgrind[1] = subdaemon_path(child);
- opts_vgrind[2] = NULL;
- opts_vgrind[3] = NULL;
- opts_vgrind[4] = NULL;
- }
- opts_default[0] = subdaemon_path(child);
+ /* Start a new session */
+ setsid();
if(gid) {
// Drop root group access if not needed
if (!need_root_group && (setgid(gid) < 0)) {
crm_warn("Could not set subdaemon %s group to %lu: %s",
name, (unsigned long) gid, strerror(errno));
}
/* Initialize supplementary groups to only those always granted to
* the user, plus haclient (so we can access IPC).
*/
if (initgroups(child->uid, gid) < 0) {
crm_err("Cannot initialize system groups for subdaemon %s: %s "
QB_XS " errno=%d",
name, pcmk_rc_str(errno), errno);
}
}
if (uid && setuid(uid) < 0) {
crm_warn("Could not set subdaemon %s user to %s: %s "
QB_XS " uid=%lu errno=%d",
name, strerror(errno), child->uid, (unsigned long) uid,
errno);
}
pcmk__close_fds_in_child(true);
pcmk__open_devnull(O_RDONLY); // stdin (fd 0)
pcmk__open_devnull(O_WRONLY); // stdout (fd 1)
pcmk__open_devnull(O_WRONLY); // stderr (fd 2)
- if (use_valgrind) {
- (void)execvp(PCMK__VALGRIND_EXEC, opts_vgrind);
+ if (use_callgrind) {
+ char *out_file = pcmk__str_copy("--callgrind-out-file="
+ CRM_STATE_DIR "/callgrind.opt.%p");
+ execlp(PCMK__VALGRIND_EXEC, PCMK__VALGRIND_EXEC, "--tool=callgrind",
+ out_file, path, (char *) NULL);
+ free(out_file);
+ } else if (use_valgrind) {
+ execlp(PCMK__VALGRIND_EXEC, PCMK__VALGRIND_EXEC, path, (char *) NULL);
} else {
- char *path = subdaemon_path(child);
-
- (void) execvp(path, opts_default);
- free(path);
+ execlp(path, path, (char *) NULL);
}
+
+ free(path);
crm_crit("Could not execute subdaemon %s: %s", name, strerror(errno));
crm_exit(CRM_EX_FATAL);
}
return pcmk_rc_ok; /* never reached */
}
/*!
* \internal
* \brief Check the liveness of the child based on IPC name and PID if tracked
*
* \param[in,out] child Child tracked data
*
* \return Standard Pacemaker return code
*
* \note Return codes of particular interest include pcmk_rc_ipc_unresponsive
* indicating that no trace of IPC liveness was detected,
* pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by
* an unauthorized process, and pcmk_rc_ipc_pid_only indicating that
* the child is up by PID but not IPC end-point (possibly starting).
* \note This function doesn't modify any of \p child members but \c pid,
* and is not actively toying with processes as such but invoking
* \c stop_child in one particular case (there's for some reason
* a different authentic holder of the IPC end-point).
*/
static int
child_liveness(pcmk_child_t *child)
{
uid_t cl_uid = 0;
gid_t cl_gid = 0;
const uid_t root_uid = 0;
const gid_t root_gid = 0;
const uid_t *ref_uid;
const gid_t *ref_gid;
const char *name = pcmk__server_name(child->server);
int rc = pcmk_rc_ipc_unresponsive;
+ int pid_active = pcmk_rc_ok;
int legacy_rc = pcmk_ok;
pid_t ipc_pid = 0;
if (child->uid == NULL) {
ref_uid = &root_uid;
ref_gid = &root_gid;
} else {
ref_uid = &cl_uid;
ref_gid = &cl_gid;
legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid);
}
if (legacy_rc < 0) {
rc = pcmk_legacy2rc(legacy_rc);
crm_err("Could not find user and group IDs for user %s: %s "
QB_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc);
} else {
const char *ipc_name = pcmk__server_ipc_name(child->server);
rc = pcmk__ipc_is_authentic_process_active(ipc_name,
*ref_uid, *ref_gid,
&ipc_pid);
if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) {
if (child->pid <= 0) {
/* If rc is pcmk_rc_ok, ipc_pid is nonzero and this
* initializes a new child. If rc is
* pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will
* investigate further.
*/
child->pid = ipc_pid;
} else if ((ipc_pid != 0) && (child->pid != ipc_pid)) {
/* An unexpected (but authorized) process is responding to
* IPC. Investigate further.
*/
rc = pcmk_rc_ipc_unresponsive;
}
}
}
- if (rc == pcmk_rc_ipc_unresponsive) {
- /* If we get here, a child without IPC is being tracked, no IPC liveness
- * has been detected, or IPC liveness has been detected with an
- * unexpected (but authorized) process. This is safe on FreeBSD since
- * the only change possible from a proper child's PID into "special" PID
- * of 1 behind more loosely related process.
+ if (rc != pcmk_rc_ipc_unresponsive) {
+ return rc;
+ }
+
+ /* If we get here, a child without IPC is being tracked, no IPC liveness
+ * has been detected, or IPC liveness has been detected with an
+ * unexpected (but authorized) process. This is safe on FreeBSD since
+ * the only change possible from a proper child's PID into "special" PID
+ * of 1 behind more loosely related process.
+ */
+ pid_active = pcmk__pid_active(child->pid, name);
+
+ if ((ipc_pid != 0)
+ && ((pid_active != pcmk_rc_ok)
+ || (ipc_pid == PCMK__SPECIAL_PID)
+ || (pcmk__pid_active(ipc_pid, name) == pcmk_rc_ok))) {
+ /* An unexpected (but authorized) process was detected at the IPC
+ * endpoint, and either it is active, or the child we're tracking is
+ * not.
*/
- int ret = pcmk__pid_active(child->pid, name);
-
- if (ipc_pid && ((ret != pcmk_rc_ok)
- || ipc_pid == PCMK__SPECIAL_PID
- || (pcmk__pid_active(ipc_pid, name) == pcmk_rc_ok))) {
- /* An unexpected (but authorized) process was detected at the IPC
- * endpoint, and either it is active, or the child we're tracking is
- * not.
+
+ if (pid_active == pcmk_rc_ok) {
+ /* The child we're tracking is active. Kill it, and adopt the
+ * detected process. This assumes that our children don't fork
+ * (thus getting a different PID owning the IPC), but rather the
+ * tracking got out of sync because of some means external to
+ * Pacemaker, and adopting the detected process is better than
+ * killing it and possibly having to spawn a new child.
*/
+ /* not possessing IPC, afterall (what about corosync CPG?) */
+ stop_child(child, SIGKILL);
+ }
+ rc = pcmk_rc_ok;
+ child->pid = ipc_pid;
+ } else if (pid_active == pcmk_rc_ok) {
+ // Our tracked child's PID was found active, but not its IPC
+ rc = pcmk_rc_ipc_pid_only;
+ } else if ((child->pid == 0) && (pid_active == EINVAL)) {
+ // FreeBSD can return EINVAL
+ rc = pcmk_rc_ipc_unresponsive;
+ } else if (pid_active == EACCES) {
+ rc = pcmk_rc_ipc_unauthorized;
+ } else if (pid_active == ESRCH) {
+ rc = pcmk_rc_ipc_unresponsive;
+ } else {
+ rc = pid_active;
+ }
+
+ return rc;
+}
+
+static void
+reset_respawn_count(pcmk_child_t *child)
+{
+ /* Restore pristine state */
+ child->respawn_count = 0;
+}
+
+#define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */
+
+static int
+child_up_but_no_ipc(pcmk_child_t *child)
+{
+ const char *ipc_name = pcmk__server_ipc_name(child->server);
+
+ if (child->respawn_count == WAIT_TRIES) {
+ crm_crit("%s IPC endpoint for existing process %lld did not (re)appear",
+ ipc_name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
+ return pcmk_rc_ipc_pid_only;
+ }
+
+ crm_warn("Cannot find %s IPC endpoint for existing process %ld, could still "
+ "reappear in %d attempts",
+ ipc_name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid),
+ WAIT_TRIES - child->respawn_count);
+ return EAGAIN;
+}
+
+static int
+child_alive(pcmk_child_t *child)
+{
+ const char *name = pcmk__server_name(child->server);
+
+ if (child->pid == PCMK__SPECIAL_PID) {
+ if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
+ crm_crit("Cannot track pre-existing process for %s IPC on this "
+ "platform and PCMK_" PCMK__ENV_FAIL_FAST " requested",
+ name);
+ return EOPNOTSUPP;
+
+ } else if (child->respawn_count == WAIT_TRIES) {
+ /* Because PCMK__ENV_FAIL_FAST wasn't requested, we can't bail
+ * out. Instead, switch to IPC liveness monitoring which is not
+ * very suitable for heavy system load.
+ */
+ crm_notice("Cannot track pre-existing process for %s IPC on this "
+ "platform but assuming it is stable and using liveness "
+ "monitoring", name);
+ crm_warn("The process for %s IPC cannot be terminated, so "
+ "shutdown will be delayed by %d s to allow time for it "
+ "to terminate on its own", name, SHUTDOWN_ESCALATION_PERIOD);
- if (ret == pcmk_rc_ok) {
- /* The child we're tracking is active. Kill it, and adopt the
- * detected process. This assumes that our children don't fork
- * (thus getting a different PID owning the IPC), but rather the
- * tracking got out of sync because of some means external to
- * Pacemaker, and adopting the detected process is better than
- * killing it and possibly having to spawn a new child.
- */
- /* not possessing IPC, afterall (what about corosync CPG?) */
- stop_child(child, SIGKILL);
- }
- rc = pcmk_rc_ok;
- child->pid = ipc_pid;
- } else if (ret == pcmk_rc_ok) {
- // Our tracked child's PID was found active, but not its IPC
- rc = pcmk_rc_ipc_pid_only;
- } else if ((child->pid == 0) && (ret == EINVAL)) {
- // FreeBSD can return EINVAL
- rc = pcmk_rc_ipc_unresponsive;
} else {
- switch (ret) {
- case EACCES:
- rc = pcmk_rc_ipc_unauthorized;
- break;
- case ESRCH:
- rc = pcmk_rc_ipc_unresponsive;
- break;
- default:
- rc = ret;
- break;
- }
+ crm_warn("Cannot track pre-existing process for %s IPC on this "
+ "platform; checking %d more times",
+ name, WAIT_TRIES - child->respawn_count);
+ return EAGAIN;
+ }
+ }
+
+ crm_notice("Tracking existing %s process (pid=%lld)",
+ name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
+ child->respawn_count = -1; /* 0~keep watching */
+ child->flags |= child_active_before_startup;
+ return pcmk_rc_ok;
+}
+
+static int
+find_and_track_child(pcmk_child_t *child, int rounds, bool *wait_in_progress)
+{
+ int rc = pcmk_rc_ok;
+ const char *name = pcmk__server_name(child->server);
+
+ if (child->respawn_count < 0) {
+ return EAGAIN;
+ }
+
+ rc = child_liveness(child);
+ if (rc == pcmk_rc_ipc_unresponsive) {
+ /* As a speculation, don't give up if there are more rounds to
+ * come for other reasons, but don't artificially wait just
+ * because of this, since we would preferably start ASAP.
+ */
+ return EAGAIN;
+ }
+
+ child->respawn_count = rounds;
+
+ if (rc == pcmk_rc_ok) {
+ rc = child_alive(child);
+
+ if (rc == EAGAIN) {
+ *wait_in_progress = true;
}
+
+ } else if (rc == pcmk_rc_ipc_pid_only) {
+ rc = child_up_but_no_ipc(child);
+
+ if (rc == EAGAIN) {
+ *wait_in_progress = true;
+ }
+
+ } else {
+ crm_crit("Checked liveness of %s: %s " QB_XS " rc=%d", name,
+ pcmk_rc_str(rc), rc);
}
+
return rc;
}
/*!
* \internal
* \brief Initial one-off check of the pre-existing "child" processes
*
* With "child" process, we mean the subdaemon that defines an API end-point
* (all of them do as of the comment) -- the possible complement is skipped
* as it is deemed it has no such shared resources to cause conflicts about,
* hence it can presumably be started anew without hesitation.
* If that won't hold true in the future, the concept of a shared resource
* will have to be generalized beyond the API end-point.
*
* For boundary cases that the "child" is still starting (IPC end-point is yet
* to be witnessed), or more rarely (practically FreeBSD only), when there's
* a pre-existing "untrackable" authentic process, we give the situation some
* time to possibly unfold in the right direction, meaning that said socket
* will appear or the unattainable process will disappear per the observable
* IPC, respectively.
*
* \return Standard Pacemaker return code
*
* \note Since this gets run at the very start, \c respawn_count fields
* for particular children get temporarily overloaded with "rounds
* of waiting" tracking, restored once we are about to finish with
* success (i.e. returning value >=0) and will remain unrestored
* otherwise. One way to suppress liveness detection logic for
* particular child is to set the said value to a negative number.
*/
-#define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */
int
find_and_track_existing_processes(void)
{
bool wait_in_progress;
- int rc;
size_t i, rounds;
for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
wait_in_progress = false;
- for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
- const char *name = pcmk__server_name(pcmk_children[i].server);
- const char *ipc_name = NULL;
- if (pcmk_children[i].respawn_count < 0) {
- continue;
- }
-
- rc = child_liveness(&pcmk_children[i]);
- if (rc == pcmk_rc_ipc_unresponsive) {
- /* As a speculation, don't give up if there are more rounds to
- * come for other reasons, but don't artificially wait just
- * because of this, since we would preferably start ASAP.
- */
- continue;
- }
+ for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
+ int rc = find_and_track_child(&pcmk_children[i], rounds,
+ &wait_in_progress);
- // @TODO Functionize more of this to reduce nesting
- ipc_name = pcmk__server_ipc_name(pcmk_children[i].server);
- pcmk_children[i].respawn_count = rounds;
- switch (rc) {
- case pcmk_rc_ok:
- if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
- if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
- crm_crit("Cannot reliably track pre-existing"
- " authentic process behind %s IPC on this"
- " platform and PCMK_" PCMK__ENV_FAIL_FAST
- " requested", ipc_name);
- return EOPNOTSUPP;
- } else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
- crm_notice("Assuming pre-existing authentic, though"
- " on this platform untrackable, process"
- " behind %s IPC is stable (was in %d"
- " previous samples) so rather than"
- " bailing out (PCMK_" PCMK__ENV_FAIL_FAST
- " not requested), we just switch to a"
- " less optimal IPC liveness monitoring"
- " (not very suitable for heavy load)",
- name, WAIT_TRIES - 1);
- crm_warn("The process behind %s IPC cannot be"
- " terminated, so the overall shutdown"
- " will get delayed implicitly (%ld s),"
- " which serves as a graceful period for"
- " its native termination if it vitally"
- " depends on some other daemons going"
- " down in a controlled way already",
- name, (long) SHUTDOWN_ESCALATION_PERIOD);
- } else {
- wait_in_progress = true;
- crm_warn("Cannot reliably track pre-existing"
- " authentic process behind %s IPC on this"
- " platform, can still disappear in %d"
- " attempt(s)", ipc_name,
- WAIT_TRIES - pcmk_children[i].respawn_count);
- continue;
- }
- }
- crm_notice("Tracking existing %s process (pid=%lld)",
- name,
- (long long) PCMK__SPECIAL_PID_AS_0(
- pcmk_children[i].pid));
- pcmk_children[i].respawn_count = -1; /* 0~keep watching */
- pcmk_children[i].flags |= child_active_before_startup;
- break;
- case pcmk_rc_ipc_pid_only:
- if (pcmk_children[i].respawn_count == WAIT_TRIES) {
- crm_crit("%s IPC endpoint for existing authentic"
- " process %lld did not (re)appear",
- ipc_name,
- (long long) PCMK__SPECIAL_PID_AS_0(
- pcmk_children[i].pid));
- return rc;
- }
- wait_in_progress = true;
- crm_warn("Cannot find %s IPC endpoint for existing"
- " authentic process %lld, can still (re)appear"
- " in %d attempts (?)",
- ipc_name,
- (long long) PCMK__SPECIAL_PID_AS_0(
- pcmk_children[i].pid),
- WAIT_TRIES - pcmk_children[i].respawn_count);
- continue;
- default:
- crm_crit("Checked liveness of %s: %s " QB_XS " rc=%d",
- name, pcmk_rc_str(rc), rc);
- return rc;
+ if (rc == pcmk_rc_ok) {
+ break;
+ } else if (rc != EAGAIN) {
+ return rc;
}
}
+
if (!wait_in_progress) {
break;
}
+
pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen
}
- for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
- pcmk_children[i].respawn_count = 0; /* restore pristine state */
- }
+ for_each_child(reset_respawn_count);
pcmk__create_timer(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon,
NULL);
return pcmk_rc_ok;
}
+static void
+start_subdaemon(pcmk_child_t *child)
+{
+ if (child->pid != 0) {
+ /* We are already tracking this process */
+ return;
+ }
+
+ start_child(child);
+}
+
gboolean
init_children_processes(void *user_data)
{
if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
/* Corosync clusters can drop root group access, because we set
* uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect
* to corosync.
*/
need_root_group = false;
}
/* start any children that have not been detected */
- for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
- if (pcmk_children[i].pid != 0) {
- /* we are already tracking it */
- continue;
- }
-
- start_child(&(pcmk_children[i]));
- }
+ for_each_child(start_subdaemon);
/* From this point on, any daemons being started will be due to
* respawning rather than node start.
*
* This may be useful for the daemons to know
*/
pcmk__set_env_option(PCMK__ENV_RESPAWNED, PCMK_VALUE_TRUE, false);
pacemakerd_state = PCMK__VALUE_RUNNING;
- return TRUE;
+ return G_SOURCE_CONTINUE;
}
void
pcmk_shutdown(int nsig)
{
if (shutdown_trigger == NULL) {
shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
}
mainloop_set_trigger(shutdown_trigger);
}
-void
-restart_cluster_subdaemons(void)
+static void
+restart_subdaemon(pcmk_child_t *child)
{
- for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
- if (!pcmk_is_set(pcmk_children[i].flags, child_needs_retry) || pcmk_children[i].pid != 0) {
- continue;
- }
+ if (!pcmk_is_set(child->flags, child_needs_retry) || child->pid != 0) {
+ return;
+ }
- crm_notice("Respawning cluster-based subdaemon %s",
- pcmk__server_name(pcmk_children[i].server));
- if (start_child(&pcmk_children[i])) {
- pcmk_children[i].flags &= ~child_needs_retry;
- }
+ crm_notice("Respawning cluster-based subdaemon %s",
+ pcmk__server_name(child->server));
+
+ if (start_child(child)) {
+ child->flags &= ~child_needs_retry;
}
}
-static gboolean
-stop_child(pcmk_child_t * child, int signal)
+void
+restart_cluster_subdaemons(void)
+{
+ for_each_child(restart_subdaemon);
+}
+
+static void
+stop_child(pcmk_child_t *child, int signal)
{
const char *name = pcmk__server_name(child->server);
if (signal == 0) {
signal = SIGTERM;
}
/* why to skip PID of 1?
- FreeBSD ~ how untrackable process behind IPC is masqueraded as
- elsewhere: how "init" task is designated; in particular, in systemd
arrangement of socket-based activation, this is pretty real */
if (child->pid == PCMK__SPECIAL_PID) {
crm_debug("Nothing to do to stop subdaemon %s[%lld]",
name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
- return TRUE;
+ return;
}
if (child->pid <= 0) {
crm_trace("Nothing to do to stop subdaemon %s: Not running", name);
- return TRUE;
+ return;
}
errno = 0;
if (kill(child->pid, signal) == 0) {
crm_notice("Stopping subdaemon %s "
QB_XS " via signal %d to process %lld",
name, signal, (long long) child->pid);
+ child->flags |= child_shutting_down;
} else {
crm_err("Could not stop subdaemon %s[%lld] with signal %d: %s",
name, (long long) child->pid, signal, strerror(errno));
}
-
- return TRUE;
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Jul 8, 4:38 PM (1 d, 1 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1988718
Default Alt Text
(61 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment