diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c index b85e7eaf6c..217ec35741 100644 --- a/daemons/pacemakerd/pacemakerd.c +++ b/daemons/pacemakerd/pacemakerd.c @@ -1,486 +1,486 @@ /* - * Copyright 2010-2024 the Pacemaker project contributors + * Copyright 2010-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include "pacemakerd.h" #if SUPPORT_COROSYNC #include "pcmkd_corosync.h" #endif #include #include #include #include #include #include #include #include #include #include /* indirectly: CRM_EX_* */ #include #include #include #include #include #include #include #define SUMMARY "pacemakerd - primary Pacemaker daemon that launches and monitors all subsidiary Pacemaker daemons" struct { gboolean features; gboolean foreground; gboolean shutdown; gboolean standby; } options; static pcmk__output_t *out = NULL; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; PCMK__OUTPUT_ARGS("features") static int pacemakerd_features(pcmk__output_t *out, va_list args) { out->info(out, "Pacemaker %s (Build: %s)\n Supporting v%s: %s", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURE_SET, CRM_FEATURES); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("features") static int pacemakerd_features_xml(pcmk__output_t *out, va_list args) { gchar **feature_list = g_strsplit(CRM_FEATURES, " ", 0); pcmk__output_xml_create_parent(out, PCMK_XE_PACEMAKERD, PCMK_XA_VERSION, PACEMAKER_VERSION, PCMK_XA_BUILD, BUILD_VERSION, PCMK_XA_FEATURE_SET, CRM_FEATURE_SET, NULL); out->begin_list(out, NULL, NULL, PCMK_XE_FEATURES); for (char **s = feature_list; *s != NULL; s++) { pcmk__output_create_xml_text_node(out, PCMK_XE_FEATURE, *s); } out->end_list(out); pcmk__output_xml_pop_parent(out); g_strfreev(feature_list); return pcmk_rc_ok; } static pcmk__message_entry_t fmt_functions[] = { { "features", "default", pacemakerd_features }, { "features", "xml", pacemakerd_features_xml }, { NULL, NULL, NULL } }; static gboolean pid_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return TRUE; } static gboolean standby_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.standby = TRUE; pcmk__set_env_option(PCMK__ENV_NODE_START_STATE, PCMK_VALUE_STANDBY, false); return TRUE; } static GOptionEntry entries[] = { { "features", 'F', 0, G_OPTION_ARG_NONE, &options.features, "Display full version and list of features Pacemaker was built with", NULL }, { "foreground", 'f', 0, G_OPTION_ARG_NONE, &options.foreground, "(Ignored) Pacemaker always runs in the foreground", NULL }, { "pid-file", 'p', 0, G_OPTION_ARG_CALLBACK, pid_cb, "(Ignored) Daemon pid file location", "FILE" }, { "shutdown", 'S', 0, G_OPTION_ARG_NONE, &options.shutdown, "Instruct Pacemaker to shutdown on this machine", NULL }, { "standby", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, standby_cb, "Start node in standby state", NULL }, { NULL } }; static void pcmk_ignore(int nsig) { crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig); } static void pcmk_sigquit(int nsig) { pcmk__panic("Received SIGQUIT"); } static void pacemakerd_chown(const char *path, uid_t uid, gid_t gid) { int rc = chown(path, uid, gid); if (rc < 0) { crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s", path, CRM_DAEMON_USER, gid, pcmk_rc_str(errno)); } } static void create_pcmk_dirs(void) { uid_t pcmk_uid = 0; gid_t pcmk_gid = 0; const char *dirs[] = { PCMK__PERSISTENT_DATA_DIR, // core/blackbox/scheduler/CIB files CRM_CORE_DIR, // core files CRM_BLACKBOX_DIR, // blackbox dumps PCMK_SCHEDULER_INPUT_DIR, // scheduler inputs CRM_CONFIG_DIR, // the Cluster Information Base (CIB) // Don't build PCMK__OCF_TMP_DIR the executor will do it NULL }; if (pcmk_daemon_user(&pcmk_uid, &pcmk_gid) < 0) { crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER); crm_exit(CRM_EX_NOUSER); } // Used by some resource agents if ((mkdir(CRM_STATE_DIR, 0750) < 0) && (errno != EEXIST)) { crm_warn("Could not create directory " CRM_STATE_DIR ": %s", pcmk_rc_str(errno)); } else { pacemakerd_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid); } for (int i = 0; dirs[i] != NULL; ++i) { int rc = pcmk__build_path(dirs[i], 0750); if (rc != pcmk_rc_ok) { crm_warn("Could not create directory %s: %s", dirs[i], pcmk_rc_str(rc)); } else { pacemakerd_chown(dirs[i], pcmk_uid, pcmk_gid); } } } static void remove_core_file_limit(void) { struct rlimit cores; // Get current limits if (getrlimit(RLIMIT_CORE, &cores) < 0) { crm_notice("Unable to check system core file limits " "(consider ensuring the size is unlimited): %s", strerror(errno)); return; } // Check whether core dumps are disabled if (cores.rlim_max == 0) { if (geteuid() != 0) { // Yes, and there's nothing we can do about it crm_notice("Core dumps are disabled (consider enabling them)"); return; } cores.rlim_max = RLIM_INFINITY; // Yes, but we're root, so enable them } // Raise soft limit to hard limit (if not already done) if (cores.rlim_cur != cores.rlim_max) { cores.rlim_cur = cores.rlim_max; if (setrlimit(RLIMIT_CORE, &cores) < 0) { crm_notice("Unable to raise system limit on core file size " "(consider doing so manually): %s", strerror(errno)); return; } } if (cores.rlim_cur == RLIM_INFINITY) { crm_trace("Core file size is unlimited"); } else { crm_trace("Core file size is limited to %llu bytes", (unsigned long long) cores.rlim_cur); } } static void pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { pcmk_pacemakerd_api_reply_t *reply = event_data; switch (event_type) { case pcmk_ipc_event_reply: break; default: return; } if (status != CRM_EX_OK) { out->err(out, "Bad reply from pacemakerd: %s", crm_exit_str(status)); return; } if (reply->reply_type != pcmk_pacemakerd_reply_shutdown) { out->err(out, "Unknown reply type %d from pacemakerd", reply->reply_type); } } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; crm_exit_t exit_code = CRM_EX_OK; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "p"); GOptionContext *context = build_arg_context(args, &output_group); bool old_instance_connected = false; pcmk_ipc_api_t *old_instance = NULL; qb_ipcs_service_t *ipcs = NULL; subdaemon_check_progress = time(NULL); setenv("LC_ALL", "C", 1); // Ensure logs are in a common language crm_log_preinit(NULL, argc, argv); mainloop_add_signal(SIGHUP, pcmk_ignore); mainloop_add_signal(SIGQUIT, pcmk_sigquit); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if ((rc != pcmk_rc_ok) || (out == NULL)) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } pcmk__register_messages(out, fmt_functions); if (options.features) { out->message(out, "features"); exit_code = CRM_EX_OK; goto done; } if (args->version) { out->version(out, false); goto done; } if (options.shutdown) { pcmk__cli_init_logging(PCMK__SERVER_PACEMAKERD, args->verbosity); } else { crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); } crm_debug("Checking for existing Pacemaker instance"); rc = pcmk_new_ipc_api(&old_instance, pcmk_ipc_pacemakerd); if (old_instance == NULL) { out->err(out, "Could not check for existing pacemakerd: %s", pcmk_rc_str(rc)); exit_code = pcmk_rc2exitc(rc); goto done; } pcmk_register_ipc_callback(old_instance, pacemakerd_event_cb, NULL); rc = pcmk__connect_ipc(old_instance, pcmk_ipc_dispatch_sync, 2); if (rc != pcmk_rc_ok) { crm_debug("No existing %s instance found: %s", pcmk_ipc_name(old_instance, true), pcmk_rc_str(rc)); } old_instance_connected = pcmk_ipc_is_connected(old_instance); if (options.shutdown) { if (old_instance_connected) { rc = pcmk_pacemakerd_api_shutdown(old_instance, crm_system_name); pcmk_dispatch_ipc(old_instance); exit_code = pcmk_rc2exitc(rc); if (exit_code != CRM_EX_OK) { pcmk_free_ipc_api(old_instance); goto done; } /* We get the ACK immediately, and the response right after that, * but it might take a while for pacemakerd to get around to * shutting down. Wait for that to happen (with 30-minute timeout). */ for (int i = 0; i < 900; i++) { if (!pcmk_ipc_is_connected(old_instance)) { exit_code = CRM_EX_OK; pcmk_free_ipc_api(old_instance); goto done; } sleep(2); } exit_code = CRM_EX_TIMEOUT; pcmk_free_ipc_api(old_instance); goto done; } else { out->err(out, "Could not request shutdown " "of existing Pacemaker instance: %s", pcmk_rc_str(rc)); pcmk_free_ipc_api(old_instance); exit_code = CRM_EX_DISCONNECT; goto done; } } else if (old_instance_connected) { pcmk_free_ipc_api(old_instance); crm_err("Aborting start-up because active Pacemaker instance found"); exit_code = CRM_EX_FATAL; goto done; } pcmk_free_ipc_api(old_instance); /* Don't allow any accidental output after this point. */ if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); out = NULL; } #if SUPPORT_COROSYNC if (pacemakerd_read_config() == FALSE) { crm_exit(CRM_EX_UNAVAILABLE); } #endif // OCF shell functions and cluster-glue need facility under different name { const char *facility = pcmk__env_option(PCMK__ENV_LOGFACILITY); if (!pcmk__str_eq(facility, PCMK_VALUE_NONE, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__set_env_option("LOGFACILITY", facility, true); } } crm_notice("Starting Pacemaker %s " QB_XS " build=%s features:%s", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); mainloop = g_main_loop_new(NULL, FALSE); remove_core_file_limit(); create_pcmk_dirs(); pcmk__serve_pacemakerd_ipc(&ipcs, &pacemakerd_ipc_callbacks); #if SUPPORT_COROSYNC /* Allows us to block shutdown */ if (!cluster_connect_cfg()) { exit_code = CRM_EX_PROTOCOL; goto done; } #endif if (pcmk__locate_sbd() > 0) { - running_with_sbd = TRUE; + running_with_sbd = true; } switch (find_and_track_existing_processes()) { case pcmk_rc_ok: break; case pcmk_rc_ipc_unauthorized: exit_code = CRM_EX_CANTCREAT; goto done; default: exit_code = CRM_EX_FATAL; goto done; }; mainloop_add_signal(SIGTERM, pcmk_shutdown); mainloop_add_signal(SIGINT, pcmk_shutdown); if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) { crm_notice("Waiting for startup-trigger from SBD."); pacemakerd_state = PCMK__VALUE_WAIT_FOR_PING; startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL); } else { if (running_with_sbd) { crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported " "by your SBD version) improve reliability of " "interworking between SBD & pacemaker."); } pacemakerd_state = PCMK__VALUE_STARTING_DAEMONS; init_children_processes(NULL); } crm_notice("Pacemaker daemon successfully started and accepting connections"); g_main_loop_run(mainloop); if (ipcs) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } g_main_loop_unref(mainloop); #if SUPPORT_COROSYNC cluster_disconnect_cfg(); #endif done: g_strfreev(processed_args); pcmk__free_arg_context(context); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } diff --git a/daemons/pacemakerd/pacemakerd.h b/daemons/pacemakerd/pacemakerd.h index 51e32b19e8..4c2ea7e38c 100644 --- a/daemons/pacemakerd/pacemakerd.h +++ b/daemons/pacemakerd/pacemakerd.h @@ -1,27 +1,27 @@ /* - * Copyright 2010-2023 the Pacemaker project contributors + * Copyright 2010-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #define MAX_RESPAWN 100 extern GMainLoop *mainloop; extern struct qb_ipcs_service_handlers pacemakerd_ipc_callbacks; extern const char *pacemakerd_state; -extern gboolean running_with_sbd; -extern gboolean shutdown_complete_state_reported_client_closed; +extern bool running_with_sbd; +extern bool shutdown_complete_state_reported_client_closed; extern unsigned int shutdown_complete_state_reported_to; extern crm_trigger_t *shutdown_trigger; extern crm_trigger_t *startup_trigger; extern time_t subdaemon_check_progress; int find_and_track_existing_processes(void); gboolean init_children_processes(void *user_data); void pcmk_shutdown(int nsig); void restart_cluster_subdaemons(void); diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c index 715cd754dc..15ef6a4977 100644 --- a/daemons/pacemakerd/pcmkd_subdaemons.c +++ b/daemons/pacemakerd/pcmkd_subdaemons.c @@ -1,900 +1,944 @@ /* - * Copyright 2010-2024 the Pacemaker project contributors + * Copyright 2010-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include "pacemakerd.h" #if SUPPORT_COROSYNC #include "pcmkd_corosync.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include enum child_daemon_flags { child_none = 0, child_respawn = 1 << 0, child_needs_cluster = 1 << 1, child_needs_retry = 1 << 2, child_active_before_startup = 1 << 3, + child_shutting_down = 1 << 4, }; typedef struct pcmk_child_s { enum pcmk_ipc_server server; pid_t pid; int respawn_count; const char *uid; int check_count; uint32_t flags; } pcmk_child_t; #define PCMK_PROCESS_CHECK_INTERVAL 1000 /* 1s */ #define PCMK_PROCESS_CHECK_RETRIES 5 #define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */ /* Index into the array below */ #define PCMK_CHILD_CONTROLD 5 static pcmk_child_t pcmk_children[] = { { pcmk_ipc_based, 0, 0, CRM_DAEMON_USER, 0, child_respawn | child_needs_cluster }, { pcmk_ipc_fenced, 0, 0, NULL, 0, child_respawn | child_needs_cluster }, { pcmk_ipc_execd, 0, 0, NULL, 0, child_respawn }, { pcmk_ipc_attrd, 0, 0, CRM_DAEMON_USER, 0, child_respawn | child_needs_cluster }, { pcmk_ipc_schedulerd, 0, 0, CRM_DAEMON_USER, 0, child_respawn }, { pcmk_ipc_controld, 0, 0, CRM_DAEMON_USER, 0, child_respawn | child_needs_cluster }, }; -static char *opts_default[] = { NULL, NULL }; -static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL }; - crm_trigger_t *shutdown_trigger = NULL; crm_trigger_t *startup_trigger = NULL; time_t subdaemon_check_progress = 0; // Whether we need root group access to talk to cluster layer static bool need_root_group = true; /* When contacted via pacemakerd-api by a client having sbd in * the name we assume it is sbd-daemon which wants to know * if pacemakerd shutdown gracefully. * Thus when everything is shutdown properly pacemakerd * waits till it has reported the graceful completion of * shutdown to sbd and just when sbd-client closes the * connection we can assume that the report has arrived * properly so that pacemakerd can finally exit. * Following two variables are used to track that handshake. */ unsigned int shutdown_complete_state_reported_to = 0; -gboolean shutdown_complete_state_reported_client_closed = FALSE; +bool shutdown_complete_state_reported_client_closed = false; /* state we report when asked via pacemakerd-api status-ping */ const char *pacemakerd_state = PCMK__VALUE_INIT; -gboolean running_with_sbd = FALSE; /* local copy */ +bool running_with_sbd = false; GMainLoop *mainloop = NULL; -static gboolean fatal_error = FALSE; +static bool fatal_error = false; static int child_liveness(pcmk_child_t *child); static gboolean escalate_shutdown(gpointer data); static int start_child(pcmk_child_t * child); static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode); static void pcmk_process_exit(pcmk_child_t * child); static gboolean pcmk_shutdown_worker(gpointer user_data); -static gboolean stop_child(pcmk_child_t * child, int signal); +static void stop_child(pcmk_child_t *child, int signal); + +static void +for_each_child(void (*fn)(pcmk_child_t *child)) +{ + for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) { + fn(&pcmk_children[i]); + } +} /*! * \internal * \brief Get path to subdaemon executable * * \param[in] subdaemon Subdaemon to get path for * * \return Newly allocated string with path to subdaemon executable * \note It is the caller's responsibility to free() the return value */ static inline char * subdaemon_path(pcmk_child_t *subdaemon) { return crm_strdup_printf(CRM_DAEMON_DIR "/%s", pcmk__server_name(subdaemon->server)); } static bool pcmkd_cluster_connected(void) { #if SUPPORT_COROSYNC return pcmkd_corosync_connected(); #else return true; #endif } static gboolean check_next_subdaemon(gpointer user_data) { static int next_child = 0; pcmk_child_t *child = &(pcmk_children[next_child]); const char *name = pcmk__server_name(child->server); const long long pid = PCMK__SPECIAL_PID_AS_0(child->pid); int rc = child_liveness(child); crm_trace("Checked subdaemon %s[%lld]: %s (%d)", name, pid, pcmk_rc_str(rc), rc); switch (rc) { case pcmk_rc_ok: child->check_count = 0; subdaemon_check_progress = time(NULL); break; case pcmk_rc_ipc_pid_only: // Child was previously OK - if (++(child->check_count) >= PCMK_PROCESS_CHECK_RETRIES) { + if (pcmk_is_set(child->flags, child_shutting_down)) { + crm_notice("Subdaemon %s[%lld] has stopped accepting IPC " + "connections during shutdown", name, pid); + + } else if (++(child->check_count) >= PCMK_PROCESS_CHECK_RETRIES) { // cts-lab looks for this message crm_crit("Subdaemon %s[%lld] is unresponsive to IPC " "after %d attempt%s and will now be killed", name, pid, child->check_count, pcmk__plural_s(child->check_count)); stop_child(child, SIGKILL); if (pcmk_is_set(child->flags, child_respawn)) { // Respawn limit hasn't been reached, so retry another round child->check_count = 0; } + } else { crm_notice("Subdaemon %s[%lld] is unresponsive to IPC " "after %d attempt%s (will recheck later)", name, pid, child->check_count, pcmk__plural_s(child->check_count)); if (pcmk_is_set(child->flags, child_respawn)) { /* as long as the respawn-limit isn't reached and we haven't run out of connect retries we account this as progress we are willing to tell to sbd */ subdaemon_check_progress = time(NULL); } } /* go to the next child and see if we can make progress there */ break; case pcmk_rc_ipc_unresponsive: if (!pcmk_is_set(child->flags, child_respawn)) { /* if a subdaemon is down and we don't want it to be restarted this is a success during shutdown. if it isn't restarted anymore due to MAX_RESPAWN it is rather no success. */ if (child->respawn_count <= MAX_RESPAWN) { subdaemon_check_progress = time(NULL); } } if (!pcmk_is_set(child->flags, child_active_before_startup)) { crm_trace("Subdaemon %s[%lld] terminated", name, pid); break; } if (pcmk_is_set(child->flags, child_respawn)) { // cts-lab looks for this message crm_err("Subdaemon %s[%lld] terminated", name, pid); } else { /* orderly shutdown */ crm_notice("Subdaemon %s[%lld] terminated", name, pid); } pcmk_process_exit(child); break; default: crm_exit(CRM_EX_FATAL); break; /* static analysis/noreturn */ } if (++next_child >= PCMK__NELEM(pcmk_children)) { next_child = 0; } return G_SOURCE_CONTINUE; } static gboolean escalate_shutdown(gpointer data) { pcmk_child_t *child = data; if (child->pid == PCMK__SPECIAL_PID) { pcmk_process_exit(child); } else if (child->pid != 0) { /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */ crm_err("Subdaemon %s not terminating in a timely manner, forcing", pcmk__server_name(child->server)); stop_child(child, SIGSEGV); } - return FALSE; + + return G_SOURCE_REMOVE; } static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { pcmk_child_t *child = mainloop_child_userdata(p); const char *name = mainloop_child_name(p); if (signo) { // cts-lab looks for this message do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR), "%s[%d] terminated with signal %d (%s)%s", name, pid, signo, strsignal(signo), (core? " and dumped core" : "")); + pcmk_process_exit(child); + return; + } - } else { - switch(exitcode) { - case CRM_EX_OK: - crm_info("%s[%d] exited with status %d (%s)", - name, pid, exitcode, crm_exit_str(exitcode)); - break; + switch(exitcode) { + case CRM_EX_OK: + crm_info("%s[%d] exited with status %d (%s)", + name, pid, exitcode, crm_exit_str(exitcode)); + break; - case CRM_EX_FATAL: - crm_warn("Shutting cluster down because %s[%d] had fatal failure", - name, pid); - child->flags &= ~child_respawn; - fatal_error = TRUE; - pcmk_shutdown(SIGTERM); - break; + case CRM_EX_FATAL: + crm_warn("Shutting cluster down because %s[%d] had fatal failure", + name, pid); + child->flags &= ~child_respawn; + fatal_error = true; + pcmk_shutdown(SIGTERM); + break; - case CRM_EX_PANIC: - { - char *msg = NULL; + case CRM_EX_PANIC: + { + char *msg = NULL; - child->flags &= ~child_respawn; - fatal_error = TRUE; - msg = crm_strdup_printf("Subdaemon %s[%d] requested panic", - name, pid); - pcmk__panic(msg); + child->flags &= ~child_respawn; + fatal_error = true; + msg = crm_strdup_printf("Subdaemon %s[%d] requested panic", + name, pid); + pcmk__panic(msg); - // Should never get here - free(msg); - pcmk_shutdown(SIGTERM); - } - break; + // Should never get here + free(msg); + pcmk_shutdown(SIGTERM); + } + break; - default: - // cts-lab looks for this message - crm_err("%s[%d] exited with status %d (%s)", - name, pid, exitcode, crm_exit_str(exitcode)); - break; - } + default: + // cts-lab looks for this message + crm_err("%s[%d] exited with status %d (%s)", + name, pid, exitcode, crm_exit_str(exitcode)); + break; } pcmk_process_exit(child); } static void pcmk_process_exit(pcmk_child_t * child) { const char *name = pcmk__server_name(child->server); child->pid = 0; child->flags &= ~child_active_before_startup; child->check_count = 0; child->respawn_count += 1; if (child->respawn_count > MAX_RESPAWN) { crm_err("Subdaemon %s exceeded maximum respawn count", name); child->flags &= ~child_respawn; } if (shutdown_trigger) { /* resume step-wise shutdown (returned TRUE yields no parallelizing) */ mainloop_set_trigger(shutdown_trigger); } else if (!pcmk_is_set(child->flags, child_respawn)) { /* nothing to do */ } else if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) { pcmk__panic("Subdaemon failed"); } else if (child_liveness(child) == pcmk_rc_ok) { crm_warn("Not respawning subdaemon %s because IPC endpoint %s is OK", name, pcmk__server_ipc_name(child->server)); } else if (pcmk_is_set(child->flags, child_needs_cluster) && !pcmkd_cluster_connected()) { crm_notice("Not respawning subdaemon %s until cluster returns", name); child->flags |= child_needs_retry; } else { // cts-lab looks for this message crm_notice("Respawning subdaemon %s after unexpected exit", name); start_child(child); } } static gboolean pcmk_shutdown_worker(gpointer user_data) { static int phase = PCMK__NELEM(pcmk_children) - 1; static time_t next_log = 0; if (phase == PCMK__NELEM(pcmk_children) - 1) { crm_notice("Shutting down Pacemaker"); pacemakerd_state = PCMK__VALUE_SHUTTING_DOWN; } for (; phase >= 0; phase--) { pcmk_child_t *child = &(pcmk_children[phase]); const char *name = pcmk__server_name(child->server); + time_t now = 0; - if (child->pid != 0) { - time_t now = time(NULL); + if (child->pid == 0) { + /* cleanup */ + crm_debug("Subdaemon %s confirmed stopped", name); + child->pid = 0; + continue; + } - if (pcmk_is_set(child->flags, child_respawn)) { - if (child->pid == PCMK__SPECIAL_PID) { - crm_warn("Subdaemon %s cannot be terminated (shutdown " - "will be escalated after %ld seconds if it does " - "not terminate on its own; set PCMK_" - PCMK__ENV_FAIL_FAST "=1 to exit immediately " - "instead)", - name, (long) SHUTDOWN_ESCALATION_PERIOD); - } - next_log = now + 30; - child->flags &= ~child_respawn; - stop_child(child, SIGTERM); - if (phase < PCMK_CHILD_CONTROLD) { - pcmk__create_timer(SHUTDOWN_ESCALATION_PERIOD, - escalate_shutdown, child); - } + now = time(NULL); - } else if (now >= next_log) { - next_log = now + 30; - crm_notice("Still waiting for subdaemon %s to terminate " - QB_XS " pid=%lld", name, (long long) child->pid); + if (pcmk_is_set(child->flags, child_respawn)) { + if (child->pid == PCMK__SPECIAL_PID) { + crm_warn("Subdaemon %s cannot be terminated (shutdown " + "will be escalated after %ld seconds if it does " + "not terminate on its own; set PCMK_" + PCMK__ENV_FAIL_FAST "=1 to exit immediately " + "instead)", + name, (long) SHUTDOWN_ESCALATION_PERIOD); } - return TRUE; + next_log = now + 30; + child->flags &= ~child_respawn; + stop_child(child, SIGTERM); + if (phase < PCMK_CHILD_CONTROLD) { + pcmk__create_timer(SHUTDOWN_ESCALATION_PERIOD, + escalate_shutdown, child); + } + + } else if (now >= next_log) { + next_log = now + 30; + crm_notice("Still waiting for subdaemon %s to terminate " + QB_XS " pid=%lld", name, (long long) child->pid); } - /* cleanup */ - crm_debug("Subdaemon %s confirmed stopped", name); - child->pid = 0; + return G_SOURCE_CONTINUE; } crm_notice("Shutdown complete"); pacemakerd_state = PCMK__VALUE_SHUTDOWN_COMPLETE; if (!fatal_error && running_with_sbd && pcmk__get_sbd_sync_resource_startup() && !shutdown_complete_state_reported_client_closed) { crm_notice("Waiting for SBD to pick up shutdown-complete-state."); - return TRUE; + return G_SOURCE_CONTINUE; } g_main_loop_quit(mainloop); if (fatal_error) { crm_notice("Shutting down and staying down after fatal error"); #if SUPPORT_COROSYNC pcmkd_shutdown_corosync(); #endif crm_exit(CRM_EX_FATAL); } - return TRUE; + return G_SOURCE_CONTINUE; } /* TODO once libqb is taught to juggle with IPC end-points carried over as bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325) it shall hand over these descriptors here if/once they are successfully pre-opened in (presumably) child_liveness(), to avoid any remaining room for races */ // \return Standard Pacemaker return code static int start_child(pcmk_child_t * child) { uid_t uid = 0; gid_t gid = 0; - gboolean use_valgrind = FALSE; - gboolean use_callgrind = FALSE; + bool use_valgrind = false; + bool use_callgrind = false; const char *name = pcmk__server_name(child->server); const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED); const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED); - child->flags &= ~child_active_before_startup; + child->flags &= ~(child_active_before_startup | child_shutting_down); child->check_count = 0; if (env_callgrind != NULL && crm_is_true(env_callgrind)) { - use_callgrind = TRUE; - use_valgrind = TRUE; + use_callgrind = true; + use_valgrind = true; } else if ((env_callgrind != NULL) && (strstr(env_callgrind, name) != NULL)) { - use_callgrind = TRUE; - use_valgrind = TRUE; + use_callgrind = true; + use_valgrind = true; } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) { - use_valgrind = TRUE; + use_valgrind = true; } else if ((env_valgrind != NULL) && (strstr(env_valgrind, name) != NULL)) { - use_valgrind = TRUE; + use_valgrind = true; } if (use_valgrind && strlen(PCMK__VALGRIND_EXEC) == 0) { crm_warn("Cannot enable valgrind for subdaemon %s: valgrind not found", name); - use_valgrind = FALSE; + use_valgrind = false; } if ((child->uid != NULL) && (crm_user_lookup(child->uid, &uid, &gid) < 0)) { crm_err("Invalid user (%s) for subdaemon %s: not found", child->uid, name); return EACCES; } child->pid = fork(); pcmk__assert(child->pid != -1); if (child->pid > 0) { /* parent */ mainloop_child_add(child->pid, 0, name, child, pcmk_child_exit); - if (use_valgrind) { - crm_info("Forked process %lld using user %lu (%s) and group %lu " - "for subdaemon %s (valgrind enabled: %s)", - (long long) child->pid, (unsigned long) uid, - pcmk__s(child->uid, "root"), (unsigned long) gid, name, - PCMK__VALGRIND_EXEC); - } else { - crm_info("Forked process %lld using user %lu (%s) and group %lu " - "for subdaemon %s", - (long long) child->pid, (unsigned long) uid, - pcmk__s(child->uid, "root"), (unsigned long) gid, name); - } + if (use_valgrind) { + crm_info("Forked process %lld using user %lu (%s) and group %lu " + "for subdaemon %s (valgrind enabled: %s)", + (long long) child->pid, (unsigned long) uid, + pcmk__s(child->uid, "root"), (unsigned long) gid, name, + PCMK__VALGRIND_EXEC); + } else { + crm_info("Forked process %lld using user %lu (%s) and group %lu " + "for subdaemon %s", + (long long) child->pid, (unsigned long) uid, + pcmk__s(child->uid, "root"), (unsigned long) gid, name); + } return pcmk_rc_ok; } else { - /* Start a new session */ - (void)setsid(); + char *path = subdaemon_path(child); - /* Setup the two alternate arg arrays */ - opts_vgrind[0] = pcmk__str_copy(PCMK__VALGRIND_EXEC); - if (use_callgrind) { - opts_vgrind[1] = pcmk__str_copy("--tool=callgrind"); - opts_vgrind[2] = pcmk__str_copy("--callgrind-out-file=" - CRM_STATE_DIR "/callgrind.out.%p"); - opts_vgrind[3] = subdaemon_path(child); - opts_vgrind[4] = NULL; - } else { - opts_vgrind[1] = subdaemon_path(child); - opts_vgrind[2] = NULL; - opts_vgrind[3] = NULL; - opts_vgrind[4] = NULL; - } - opts_default[0] = subdaemon_path(child); + /* Start a new session */ + setsid(); if(gid) { // Drop root group access if not needed if (!need_root_group && (setgid(gid) < 0)) { crm_warn("Could not set subdaemon %s group to %lu: %s", name, (unsigned long) gid, strerror(errno)); } /* Initialize supplementary groups to only those always granted to * the user, plus haclient (so we can access IPC). */ if (initgroups(child->uid, gid) < 0) { crm_err("Cannot initialize system groups for subdaemon %s: %s " QB_XS " errno=%d", name, pcmk_rc_str(errno), errno); } } if (uid && setuid(uid) < 0) { crm_warn("Could not set subdaemon %s user to %s: %s " QB_XS " uid=%lu errno=%d", name, strerror(errno), child->uid, (unsigned long) uid, errno); } pcmk__close_fds_in_child(true); pcmk__open_devnull(O_RDONLY); // stdin (fd 0) pcmk__open_devnull(O_WRONLY); // stdout (fd 1) pcmk__open_devnull(O_WRONLY); // stderr (fd 2) - if (use_valgrind) { - (void)execvp(PCMK__VALGRIND_EXEC, opts_vgrind); + if (use_callgrind) { + char *out_file = pcmk__str_copy("--callgrind-out-file=" + CRM_STATE_DIR "/callgrind.opt.%p"); + execlp(PCMK__VALGRIND_EXEC, PCMK__VALGRIND_EXEC, "--tool=callgrind", + out_file, path, (char *) NULL); + free(out_file); + } else if (use_valgrind) { + execlp(PCMK__VALGRIND_EXEC, PCMK__VALGRIND_EXEC, path, (char *) NULL); } else { - char *path = subdaemon_path(child); - - (void) execvp(path, opts_default); - free(path); + execlp(path, path, (char *) NULL); } + + free(path); crm_crit("Could not execute subdaemon %s: %s", name, strerror(errno)); crm_exit(CRM_EX_FATAL); } return pcmk_rc_ok; /* never reached */ } /*! * \internal * \brief Check the liveness of the child based on IPC name and PID if tracked * * \param[in,out] child Child tracked data * * \return Standard Pacemaker return code * * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive * indicating that no trace of IPC liveness was detected, * pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by * an unauthorized process, and pcmk_rc_ipc_pid_only indicating that * the child is up by PID but not IPC end-point (possibly starting). * \note This function doesn't modify any of \p child members but \c pid, * and is not actively toying with processes as such but invoking * \c stop_child in one particular case (there's for some reason * a different authentic holder of the IPC end-point). */ static int child_liveness(pcmk_child_t *child) { uid_t cl_uid = 0; gid_t cl_gid = 0; const uid_t root_uid = 0; const gid_t root_gid = 0; const uid_t *ref_uid; const gid_t *ref_gid; const char *name = pcmk__server_name(child->server); int rc = pcmk_rc_ipc_unresponsive; + int pid_active = pcmk_rc_ok; int legacy_rc = pcmk_ok; pid_t ipc_pid = 0; if (child->uid == NULL) { ref_uid = &root_uid; ref_gid = &root_gid; } else { ref_uid = &cl_uid; ref_gid = &cl_gid; legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid); } if (legacy_rc < 0) { rc = pcmk_legacy2rc(legacy_rc); crm_err("Could not find user and group IDs for user %s: %s " QB_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc); } else { const char *ipc_name = pcmk__server_ipc_name(child->server); rc = pcmk__ipc_is_authentic_process_active(ipc_name, *ref_uid, *ref_gid, &ipc_pid); if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) { if (child->pid <= 0) { /* If rc is pcmk_rc_ok, ipc_pid is nonzero and this * initializes a new child. If rc is * pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will * investigate further. */ child->pid = ipc_pid; } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) { /* An unexpected (but authorized) process is responding to * IPC. Investigate further. */ rc = pcmk_rc_ipc_unresponsive; } } } - if (rc == pcmk_rc_ipc_unresponsive) { - /* If we get here, a child without IPC is being tracked, no IPC liveness - * has been detected, or IPC liveness has been detected with an - * unexpected (but authorized) process. This is safe on FreeBSD since - * the only change possible from a proper child's PID into "special" PID - * of 1 behind more loosely related process. + if (rc != pcmk_rc_ipc_unresponsive) { + return rc; + } + + /* If we get here, a child without IPC is being tracked, no IPC liveness + * has been detected, or IPC liveness has been detected with an + * unexpected (but authorized) process. This is safe on FreeBSD since + * the only change possible from a proper child's PID into "special" PID + * of 1 behind more loosely related process. + */ + pid_active = pcmk__pid_active(child->pid, name); + + if ((ipc_pid != 0) + && ((pid_active != pcmk_rc_ok) + || (ipc_pid == PCMK__SPECIAL_PID) + || (pcmk__pid_active(ipc_pid, name) == pcmk_rc_ok))) { + /* An unexpected (but authorized) process was detected at the IPC + * endpoint, and either it is active, or the child we're tracking is + * not. */ - int ret = pcmk__pid_active(child->pid, name); - - if (ipc_pid && ((ret != pcmk_rc_ok) - || ipc_pid == PCMK__SPECIAL_PID - || (pcmk__pid_active(ipc_pid, name) == pcmk_rc_ok))) { - /* An unexpected (but authorized) process was detected at the IPC - * endpoint, and either it is active, or the child we're tracking is - * not. + + if (pid_active == pcmk_rc_ok) { + /* The child we're tracking is active. Kill it, and adopt the + * detected process. This assumes that our children don't fork + * (thus getting a different PID owning the IPC), but rather the + * tracking got out of sync because of some means external to + * Pacemaker, and adopting the detected process is better than + * killing it and possibly having to spawn a new child. */ + /* not possessing IPC, afterall (what about corosync CPG?) */ + stop_child(child, SIGKILL); + } + rc = pcmk_rc_ok; + child->pid = ipc_pid; + } else if (pid_active == pcmk_rc_ok) { + // Our tracked child's PID was found active, but not its IPC + rc = pcmk_rc_ipc_pid_only; + } else if ((child->pid == 0) && (pid_active == EINVAL)) { + // FreeBSD can return EINVAL + rc = pcmk_rc_ipc_unresponsive; + } else if (pid_active == EACCES) { + rc = pcmk_rc_ipc_unauthorized; + } else if (pid_active == ESRCH) { + rc = pcmk_rc_ipc_unresponsive; + } else { + rc = pid_active; + } + + return rc; +} + +static void +reset_respawn_count(pcmk_child_t *child) +{ + /* Restore pristine state */ + child->respawn_count = 0; +} + +#define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */ + +static int +child_up_but_no_ipc(pcmk_child_t *child) +{ + const char *ipc_name = pcmk__server_ipc_name(child->server); + + if (child->respawn_count == WAIT_TRIES) { + crm_crit("%s IPC endpoint for existing process %lld did not (re)appear", + ipc_name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid)); + return pcmk_rc_ipc_pid_only; + } + + crm_warn("Cannot find %s IPC endpoint for existing process %ld, could still " + "reappear in %d attempts", + ipc_name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid), + WAIT_TRIES - child->respawn_count); + return EAGAIN; +} + +static int +child_alive(pcmk_child_t *child) +{ + const char *name = pcmk__server_name(child->server); + + if (child->pid == PCMK__SPECIAL_PID) { + if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) { + crm_crit("Cannot track pre-existing process for %s IPC on this " + "platform and PCMK_" PCMK__ENV_FAIL_FAST " requested", + name); + return EOPNOTSUPP; + + } else if (child->respawn_count == WAIT_TRIES) { + /* Because PCMK__ENV_FAIL_FAST wasn't requested, we can't bail + * out. Instead, switch to IPC liveness monitoring which is not + * very suitable for heavy system load. + */ + crm_notice("Cannot track pre-existing process for %s IPC on this " + "platform but assuming it is stable and using liveness " + "monitoring", name); + crm_warn("The process for %s IPC cannot be terminated, so " + "shutdown will be delayed by %d s to allow time for it " + "to terminate on its own", name, SHUTDOWN_ESCALATION_PERIOD); - if (ret == pcmk_rc_ok) { - /* The child we're tracking is active. Kill it, and adopt the - * detected process. This assumes that our children don't fork - * (thus getting a different PID owning the IPC), but rather the - * tracking got out of sync because of some means external to - * Pacemaker, and adopting the detected process is better than - * killing it and possibly having to spawn a new child. - */ - /* not possessing IPC, afterall (what about corosync CPG?) */ - stop_child(child, SIGKILL); - } - rc = pcmk_rc_ok; - child->pid = ipc_pid; - } else if (ret == pcmk_rc_ok) { - // Our tracked child's PID was found active, but not its IPC - rc = pcmk_rc_ipc_pid_only; - } else if ((child->pid == 0) && (ret == EINVAL)) { - // FreeBSD can return EINVAL - rc = pcmk_rc_ipc_unresponsive; } else { - switch (ret) { - case EACCES: - rc = pcmk_rc_ipc_unauthorized; - break; - case ESRCH: - rc = pcmk_rc_ipc_unresponsive; - break; - default: - rc = ret; - break; - } + crm_warn("Cannot track pre-existing process for %s IPC on this " + "platform; checking %d more times", + name, WAIT_TRIES - child->respawn_count); + return EAGAIN; + } + } + + crm_notice("Tracking existing %s process (pid=%lld)", + name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid)); + child->respawn_count = -1; /* 0~keep watching */ + child->flags |= child_active_before_startup; + return pcmk_rc_ok; +} + +static int +find_and_track_child(pcmk_child_t *child, int rounds, bool *wait_in_progress) +{ + int rc = pcmk_rc_ok; + const char *name = pcmk__server_name(child->server); + + if (child->respawn_count < 0) { + return EAGAIN; + } + + rc = child_liveness(child); + if (rc == pcmk_rc_ipc_unresponsive) { + /* As a speculation, don't give up if there are more rounds to + * come for other reasons, but don't artificially wait just + * because of this, since we would preferably start ASAP. + */ + return EAGAIN; + } + + child->respawn_count = rounds; + + if (rc == pcmk_rc_ok) { + rc = child_alive(child); + + if (rc == EAGAIN) { + *wait_in_progress = true; } + + } else if (rc == pcmk_rc_ipc_pid_only) { + rc = child_up_but_no_ipc(child); + + if (rc == EAGAIN) { + *wait_in_progress = true; + } + + } else { + crm_crit("Checked liveness of %s: %s " QB_XS " rc=%d", name, + pcmk_rc_str(rc), rc); } + return rc; } /*! * \internal * \brief Initial one-off check of the pre-existing "child" processes * * With "child" process, we mean the subdaemon that defines an API end-point * (all of them do as of the comment) -- the possible complement is skipped * as it is deemed it has no such shared resources to cause conflicts about, * hence it can presumably be started anew without hesitation. * If that won't hold true in the future, the concept of a shared resource * will have to be generalized beyond the API end-point. * * For boundary cases that the "child" is still starting (IPC end-point is yet * to be witnessed), or more rarely (practically FreeBSD only), when there's * a pre-existing "untrackable" authentic process, we give the situation some * time to possibly unfold in the right direction, meaning that said socket * will appear or the unattainable process will disappear per the observable * IPC, respectively. * * \return Standard Pacemaker return code * * \note Since this gets run at the very start, \c respawn_count fields * for particular children get temporarily overloaded with "rounds * of waiting" tracking, restored once we are about to finish with * success (i.e. returning value >=0) and will remain unrestored * otherwise. One way to suppress liveness detection logic for * particular child is to set the said value to a negative number. */ -#define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */ int find_and_track_existing_processes(void) { bool wait_in_progress; - int rc; size_t i, rounds; for (rounds = 1; rounds <= WAIT_TRIES; rounds++) { wait_in_progress = false; - for (i = 0; i < PCMK__NELEM(pcmk_children); i++) { - const char *name = pcmk__server_name(pcmk_children[i].server); - const char *ipc_name = NULL; - if (pcmk_children[i].respawn_count < 0) { - continue; - } - - rc = child_liveness(&pcmk_children[i]); - if (rc == pcmk_rc_ipc_unresponsive) { - /* As a speculation, don't give up if there are more rounds to - * come for other reasons, but don't artificially wait just - * because of this, since we would preferably start ASAP. - */ - continue; - } + for (i = 0; i < PCMK__NELEM(pcmk_children); i++) { + int rc = find_and_track_child(&pcmk_children[i], rounds, + &wait_in_progress); - // @TODO Functionize more of this to reduce nesting - ipc_name = pcmk__server_ipc_name(pcmk_children[i].server); - pcmk_children[i].respawn_count = rounds; - switch (rc) { - case pcmk_rc_ok: - if (pcmk_children[i].pid == PCMK__SPECIAL_PID) { - if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) { - crm_crit("Cannot reliably track pre-existing" - " authentic process behind %s IPC on this" - " platform and PCMK_" PCMK__ENV_FAIL_FAST - " requested", ipc_name); - return EOPNOTSUPP; - } else if (pcmk_children[i].respawn_count == WAIT_TRIES) { - crm_notice("Assuming pre-existing authentic, though" - " on this platform untrackable, process" - " behind %s IPC is stable (was in %d" - " previous samples) so rather than" - " bailing out (PCMK_" PCMK__ENV_FAIL_FAST - " not requested), we just switch to a" - " less optimal IPC liveness monitoring" - " (not very suitable for heavy load)", - name, WAIT_TRIES - 1); - crm_warn("The process behind %s IPC cannot be" - " terminated, so the overall shutdown" - " will get delayed implicitly (%ld s)," - " which serves as a graceful period for" - " its native termination if it vitally" - " depends on some other daemons going" - " down in a controlled way already", - name, (long) SHUTDOWN_ESCALATION_PERIOD); - } else { - wait_in_progress = true; - crm_warn("Cannot reliably track pre-existing" - " authentic process behind %s IPC on this" - " platform, can still disappear in %d" - " attempt(s)", ipc_name, - WAIT_TRIES - pcmk_children[i].respawn_count); - continue; - } - } - crm_notice("Tracking existing %s process (pid=%lld)", - name, - (long long) PCMK__SPECIAL_PID_AS_0( - pcmk_children[i].pid)); - pcmk_children[i].respawn_count = -1; /* 0~keep watching */ - pcmk_children[i].flags |= child_active_before_startup; - break; - case pcmk_rc_ipc_pid_only: - if (pcmk_children[i].respawn_count == WAIT_TRIES) { - crm_crit("%s IPC endpoint for existing authentic" - " process %lld did not (re)appear", - ipc_name, - (long long) PCMK__SPECIAL_PID_AS_0( - pcmk_children[i].pid)); - return rc; - } - wait_in_progress = true; - crm_warn("Cannot find %s IPC endpoint for existing" - " authentic process %lld, can still (re)appear" - " in %d attempts (?)", - ipc_name, - (long long) PCMK__SPECIAL_PID_AS_0( - pcmk_children[i].pid), - WAIT_TRIES - pcmk_children[i].respawn_count); - continue; - default: - crm_crit("Checked liveness of %s: %s " QB_XS " rc=%d", - name, pcmk_rc_str(rc), rc); - return rc; + if (rc == pcmk_rc_ok) { + break; + } else if (rc != EAGAIN) { + return rc; } } + if (!wait_in_progress) { break; } + pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen } - for (i = 0; i < PCMK__NELEM(pcmk_children); i++) { - pcmk_children[i].respawn_count = 0; /* restore pristine state */ - } + for_each_child(reset_respawn_count); pcmk__create_timer(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon, NULL); return pcmk_rc_ok; } +static void +start_subdaemon(pcmk_child_t *child) +{ + if (child->pid != 0) { + /* We are already tracking this process */ + return; + } + + start_child(child); +} + gboolean init_children_processes(void *user_data) { if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) { /* Corosync clusters can drop root group access, because we set * uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect * to corosync. */ need_root_group = false; } /* start any children that have not been detected */ - for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) { - if (pcmk_children[i].pid != 0) { - /* we are already tracking it */ - continue; - } - - start_child(&(pcmk_children[i])); - } + for_each_child(start_subdaemon); /* From this point on, any daemons being started will be due to * respawning rather than node start. * * This may be useful for the daemons to know */ pcmk__set_env_option(PCMK__ENV_RESPAWNED, PCMK_VALUE_TRUE, false); pacemakerd_state = PCMK__VALUE_RUNNING; - return TRUE; + return G_SOURCE_CONTINUE; } void pcmk_shutdown(int nsig) { if (shutdown_trigger == NULL) { shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL); } mainloop_set_trigger(shutdown_trigger); } -void -restart_cluster_subdaemons(void) +static void +restart_subdaemon(pcmk_child_t *child) { - for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) { - if (!pcmk_is_set(pcmk_children[i].flags, child_needs_retry) || pcmk_children[i].pid != 0) { - continue; - } + if (!pcmk_is_set(child->flags, child_needs_retry) || child->pid != 0) { + return; + } - crm_notice("Respawning cluster-based subdaemon %s", - pcmk__server_name(pcmk_children[i].server)); - if (start_child(&pcmk_children[i])) { - pcmk_children[i].flags &= ~child_needs_retry; - } + crm_notice("Respawning cluster-based subdaemon %s", + pcmk__server_name(child->server)); + + if (start_child(child)) { + child->flags &= ~child_needs_retry; } } -static gboolean -stop_child(pcmk_child_t * child, int signal) +void +restart_cluster_subdaemons(void) +{ + for_each_child(restart_subdaemon); +} + +static void +stop_child(pcmk_child_t *child, int signal) { const char *name = pcmk__server_name(child->server); if (signal == 0) { signal = SIGTERM; } /* why to skip PID of 1? - FreeBSD ~ how untrackable process behind IPC is masqueraded as - elsewhere: how "init" task is designated; in particular, in systemd arrangement of socket-based activation, this is pretty real */ if (child->pid == PCMK__SPECIAL_PID) { crm_debug("Nothing to do to stop subdaemon %s[%lld]", name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid)); - return TRUE; + return; } if (child->pid <= 0) { crm_trace("Nothing to do to stop subdaemon %s: Not running", name); - return TRUE; + return; } errno = 0; if (kill(child->pid, signal) == 0) { crm_notice("Stopping subdaemon %s " QB_XS " via signal %d to process %lld", name, signal, (long long) child->pid); + child->flags |= child_shutting_down; } else { crm_err("Could not stop subdaemon %s[%lld] with signal %d: %s", name, (long long) child->pid, signal, strerror(errno)); } - - return TRUE; }