diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
index d63d86f583..fbb5d8f2f4 100644
--- a/daemons/controld/controld_control.c
+++ b/daemons/controld/controld_control.c
@@ -1,853 +1,854 @@
 /*
  * Copyright 2004-2023 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
  * This source code is licensed under the GNU General Public License version 2
  * or later (GPLv2+) WITHOUT ANY WARRANTY.
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 #include <crm/pengine/rules.h>
 #include <crm/cluster/internal.h>
 #include <crm/cluster/election_internal.h>
 #include <crm/common/ipc_internal.h>
 
 #include <pacemaker-controld.h>
 
 static qb_ipcs_service_t *ipcs = NULL;
 
 static crm_trigger_t *config_read_trigger = NULL;
 
 #if SUPPORT_COROSYNC
 extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
 #endif
 
 void crm_shutdown(int nsig);
 static gboolean crm_read_options(gpointer user_data);
 
 /*	 A_HA_CONNECT	*/
 void
 do_ha_control(long long action,
               enum crmd_fsa_cause cause,
               enum crmd_fsa_state cur_state,
               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     gboolean registered = FALSE;
     static crm_cluster_t *cluster = NULL;
 
     if (cluster == NULL) {
         cluster = pcmk_cluster_new();
     }
 
     if (action & A_HA_DISCONNECT) {
         crm_cluster_disconnect(cluster);
         crm_info("Disconnected from the cluster");
 
         controld_set_fsa_input_flags(R_HA_DISCONNECTED);
     }
 
     if (action & A_HA_CONNECT) {
         crm_set_status_callback(&peer_update_callback);
         crm_set_autoreap(FALSE);
 
 #if SUPPORT_COROSYNC
         if (is_corosync_cluster()) {
             registered = crm_connect_corosync(cluster);
         }
 #endif // SUPPORT_COROSYNC
 
         if (registered) {
             controld_election_init(cluster->uname);
             controld_globals.our_nodename = cluster->uname;
             controld_globals.our_uuid = cluster->uuid;
             if(cluster->uuid == NULL) {
                 crm_err("Could not obtain local uuid");
                 registered = FALSE;
             }
         }
 
         if (!registered) {
             controld_set_fsa_input_flags(R_HA_DISCONNECTED);
             register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
             return;
         }
 
         populate_cib_nodes(node_update_none, __func__);
         controld_clear_fsa_input_flags(R_HA_DISCONNECTED);
         crm_info("Connected to the cluster");
     }
 
     if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
         crm_err("Unexpected action %s in %s", fsa_action2string(action),
                 __func__);
     }
 }
 
 /*	 A_SHUTDOWN	*/
 void
 do_shutdown(long long action,
             enum crmd_fsa_cause cause,
             enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     /* just in case */
     controld_set_fsa_input_flags(R_SHUTDOWN);
     controld_disconnect_fencer(FALSE);
 }
 
 /*	 A_SHUTDOWN_REQ	*/
 void
 do_shutdown_req(long long action,
                 enum crmd_fsa_cause cause,
                 enum crmd_fsa_state cur_state,
                 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     xmlNode *msg = NULL;
 
     controld_set_fsa_input_flags(R_SHUTDOWN);
     //controld_set_fsa_input_flags(R_STAYDOWN);
     crm_info("Sending shutdown request to all peers (DC is %s)",
              pcmk__s(controld_globals.dc_name, "not set"));
     msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
 
     if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
     }
     free_xml(msg);
 }
 
 void
 crmd_fast_exit(crm_exit_t exit_code)
 {
     if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) {
         crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
                  exit_code, CRM_EX_FATAL);
         exit_code = CRM_EX_FATAL;
 
     } else if ((exit_code == CRM_EX_OK)
                && pcmk_is_set(controld_globals.fsa_input_register,
                               R_IN_RECOVERY)) {
         crm_err("Could not recover from internal error");
         exit_code = CRM_EX_ERROR;
     }
 
     if (controld_globals.logger_out != NULL) {
         controld_globals.logger_out->finish(controld_globals.logger_out,
                                             exit_code, true, NULL);
         pcmk__output_free(controld_globals.logger_out);
         controld_globals.logger_out = NULL;
     }
 
     crm_exit(exit_code);
 }
 
 crm_exit_t
 crmd_exit(crm_exit_t exit_code)
 {
     GMainLoop *mloop = controld_globals.mainloop;
 
     static bool in_progress = FALSE;
 
     if (in_progress && (exit_code == CRM_EX_OK)) {
         crm_debug("Exit is already in progress");
         return exit_code;
 
     } else if(in_progress) {
         crm_notice("Error during shutdown process, exiting now with status %d (%s)",
                    exit_code, crm_exit_str(exit_code));
         crm_write_blackbox(SIGTRAP, NULL);
         crmd_fast_exit(exit_code);
     }
 
     in_progress = TRUE;
     crm_trace("Preparing to exit with status %d (%s)",
               exit_code, crm_exit_str(exit_code));
 
     /* Suppress secondary errors resulting from us disconnecting everything */
     controld_set_fsa_input_flags(R_HA_DISCONNECTED);
 
 /* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
 
     if(ipcs) {
         crm_trace("Closing IPC server");
         mainloop_del_ipc_server(ipcs);
         ipcs = NULL;
     }
 
     controld_close_attrd_ipc();
     controld_shutdown_schedulerd_ipc();
     controld_disconnect_fencer(TRUE);
 
     if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) {
         crm_debug("No mainloop detected");
         exit_code = CRM_EX_ERROR;
     }
 
     /* On an error, just get out.
      *
      * Otherwise, make the effort to have mainloop exit gracefully so
      * that it (mostly) cleans up after itself and valgrind has less
      * to report on - allowing real errors stand out
      */
     if (exit_code != CRM_EX_OK) {
         crm_notice("Forcing immediate exit with status %d (%s)",
                    exit_code, crm_exit_str(exit_code));
         crm_write_blackbox(SIGTRAP, NULL);
         crmd_fast_exit(exit_code);
     }
 
 /* Clean up as much memory as possible for valgrind */
 
     for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
          iter = iter->next) {
         fsa_data_t *fsa_data = (fsa_data_t *) iter->data;
 
         crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
                  fsa_input2string(fsa_data->fsa_input),
                  fsa_state2string(controld_globals.fsa_state),
                  fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
         delete_fsa_input(fsa_data);
     }
 
     controld_clear_fsa_input_flags(R_MEMBERSHIP);
 
     g_list_free(controld_globals.fsa_message_queue);
     controld_globals.fsa_message_queue = NULL;
 
     controld_free_node_pending_timers();
     controld_election_fini();
 
     /* Tear down the CIB manager connection, but don't free it yet -- it could
      * be used when we drain the mainloop later.
      */
 
     controld_disconnect_cib_manager();
 
     verify_stopped(controld_globals.fsa_state, LOG_WARNING);
     controld_clear_fsa_input_flags(R_LRM_CONNECTED);
     lrm_state_destroy_all();
 
     mainloop_destroy_trigger(config_read_trigger);
     config_read_trigger = NULL;
 
     controld_destroy_fsa_trigger();
     controld_destroy_transition_trigger();
 
     pcmk__client_cleanup();
     crm_peer_destroy();
 
     controld_free_fsa_timers();
     te_cleanup_stonith_history_sync(NULL, TRUE);
     controld_free_sched_timer();
 
     free(controld_globals.our_nodename);
     controld_globals.our_nodename = NULL;
 
     free(controld_globals.our_uuid);
     controld_globals.our_uuid = NULL;
 
     free(controld_globals.dc_name);
     controld_globals.dc_name = NULL;
 
     free(controld_globals.dc_version);
     controld_globals.dc_version = NULL;
 
     free(controld_globals.cluster_name);
     controld_globals.cluster_name = NULL;
 
     free(controld_globals.te_uuid);
     controld_globals.te_uuid = NULL;
 
     free_max_generation();
     controld_destroy_cib_replacements_table();
     controld_destroy_failed_sync_table();
     controld_destroy_outside_events_table();
 
     mainloop_destroy_signal(SIGPIPE);
     mainloop_destroy_signal(SIGUSR1);
     mainloop_destroy_signal(SIGTERM);
     mainloop_destroy_signal(SIGTRAP);
     /* leave SIGCHLD engaged as we might still want to drain some service-actions */
 
     if (mloop) {
         GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop);
 
         /* Don't re-enter this block */
         controld_globals.mainloop = NULL;
 
         /* no signals on final draining anymore */
         mainloop_destroy_signal(SIGCHLD);
 
         crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
 
         {
             int lpc = 0;
 
             while((g_main_context_pending(ctx) && lpc < 10)) {
                 lpc++;
                 crm_trace("Iteration %d", lpc);
                 g_main_context_dispatch(ctx);
             }
         }
 
         crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
         g_main_loop_quit(mloop);
 
         /* Won't do anything yet, since we're inside it now */
         g_main_loop_unref(mloop);
     } else {
         mainloop_destroy_signal(SIGCHLD);
     }
 
     cib_delete(controld_globals.cib_conn);
     controld_globals.cib_conn = NULL;
 
     throttle_fini();
 
     /* Graceful */
     crm_trace("Done preparing for exit with status %d (%s)",
               exit_code, crm_exit_str(exit_code));
     return exit_code;
 }
 
 /*	 A_EXIT_0, A_EXIT_1	*/
 void
 do_exit(long long action,
         enum crmd_fsa_cause cause,
         enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     crm_exit_t exit_code = CRM_EX_OK;
 
     if (pcmk_is_set(action, A_EXIT_1)) {
         exit_code = CRM_EX_ERROR;
         crm_err("Exiting now due to errors");
     }
     verify_stopped(cur_state, LOG_ERR);
     crmd_exit(exit_code);
 }
 
 static void sigpipe_ignore(int nsig) { return; }
 
 /*	 A_STARTUP	*/
 void
 do_startup(long long action,
            enum crmd_fsa_cause cause,
            enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     crm_debug("Registering Signal Handlers");
     mainloop_add_signal(SIGTERM, crm_shutdown);
     mainloop_add_signal(SIGPIPE, sigpipe_ignore);
 
     config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH,
                                                crm_read_options, NULL);
 
     controld_init_fsa_trigger();
     controld_init_transition_trigger();
 
     crm_debug("Creating CIB manager and executor objects");
     controld_globals.cib_conn = cib_new();
 
     lrm_state_init_local();
     if (controld_init_fsa_timers() == FALSE) {
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
     }
 }
 
 // \return libqb error code (0 on success, -errno on error)
 static int32_t
 accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
 {
     crm_trace("Accepting new IPC client connection");
     if (pcmk__new_client(c, uid, gid) == NULL) {
         return -EIO;
     }
     return 0;
 }
 
 // \return libqb error code (0 on success, -errno on error)
 static int32_t
 dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size)
 {
     uint32_t id = 0;
     uint32_t flags = 0;
     pcmk__client_t *client = pcmk__find_client(c);
 
     xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags);
 
     if (msg == NULL) {
         pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL);
         return 0;
     }
     pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_INDETERMINATE);
 
     CRM_ASSERT(client->user != NULL);
     pcmk__update_acl_user(msg, F_CRM_USER, client->user);
 
     crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
     if (controld_authorize_ipc_message(msg, client, NULL)) {
         crm_trace("Processing IPC message from client %s",
                   pcmk__client_name(client));
         route_message(C_IPC_MESSAGE, msg);
     }
 
     controld_trigger_fsa();
     free_xml(msg);
     return 0;
 }
 
 static int32_t
 ipc_client_disconnected(qb_ipcs_connection_t *c)
 {
     pcmk__client_t *client = pcmk__find_client(c);
 
     if (client) {
         crm_trace("Disconnecting %sregistered client %s (%p/%p)",
                   (client->userdata? "" : "un"), pcmk__client_name(client),
                   c, client);
         free(client->userdata);
         pcmk__free_client(client);
         controld_trigger_fsa();
     }
     return 0;
 }
 
 static void
 ipc_connection_destroyed(qb_ipcs_connection_t *c)
 {
     crm_trace("Connection %p", c);
     ipc_client_disconnected(c);
 }
 
 /*	 A_STOP	*/
 void
 do_stop(long long action,
         enum crmd_fsa_cause cause,
         enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     crm_trace("Closing IPC server");
     mainloop_del_ipc_server(ipcs); ipcs = NULL;
     register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
 }
 
 /*	 A_STARTED	*/
 void
 do_started(long long action,
            enum crmd_fsa_cause cause,
            enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     static struct qb_ipcs_service_handlers crmd_callbacks = {
         .connection_accept = accept_controller_client,
         .connection_created = NULL,
         .msg_process = dispatch_controller_ipc,
         .connection_closed = ipc_client_disconnected,
         .connection_destroyed = ipc_connection_destroyed
     };
 
     if (cur_state != S_STARTING) {
         crm_err("Start cancelled... %s", fsa_state2string(cur_state));
         return;
 
     } else if (!pcmk_is_set(controld_globals.fsa_input_register,
                             R_MEMBERSHIP)) {
         crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
 
         crmd_fsa_stall(TRUE);
         return;
 
     } else if (!pcmk_is_set(controld_globals.fsa_input_register,
                             R_LRM_CONNECTED)) {
         crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
 
         crmd_fsa_stall(TRUE);
         return;
 
     } else if (!pcmk_is_set(controld_globals.fsa_input_register,
                             R_CIB_CONNECTED)) {
         crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
 
         crmd_fsa_stall(TRUE);
         return;
 
     } else if (!pcmk_is_set(controld_globals.fsa_input_register,
                             R_READ_CONFIG)) {
         crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
 
         crmd_fsa_stall(TRUE);
         return;
 
     } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) {
 
         crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
         crmd_fsa_stall(TRUE);
         return;
     }
 
     crm_debug("Init server comms");
     ipcs = pcmk__serve_controld_ipc(&crmd_callbacks);
     if (ipcs == NULL) {
         crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
     } else {
         crm_notice("Pacemaker controller successfully started and accepting connections");
     }
-    controld_trigger_fencer_connect();
+    controld_set_fsa_input_flags(R_ST_REQUIRED);
+    controld_timer_fencer_connect(GINT_TO_POINTER(TRUE));
 
     controld_clear_fsa_input_flags(R_STARTING);
     register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
 }
 
 /*	 A_RECOVER	*/
 void
 do_recover(long long action,
            enum crmd_fsa_cause cause,
            enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     controld_set_fsa_input_flags(R_IN_RECOVERY);
     crm_warn("Fast-tracking shutdown in response to errors");
 
     register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
 }
 
 static pcmk__cluster_option_t controller_options[] = {
     /* name, old name, type, allowed values,
      * default value, validator,
      * short description,
      * long description
      */
     {
         "dc-version", NULL, "string", NULL, PCMK__VALUE_NONE, NULL,
         N_("Pacemaker version on cluster node elected Designated Controller (DC)"),
         N_("Includes a hash which identifies the exact changeset the code was "
             "built from. Used for diagnostic purposes.")
     },
     {
         "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL,
         N_("The messaging stack on which Pacemaker is currently running"),
         N_("Used for informational and diagnostic purposes.")
     },
     {
         "cluster-name", NULL, "string", NULL, NULL, NULL,
         N_("An arbitrary name for the cluster"),
         N_("This optional value is mostly for users' convenience as desired "
             "in administration, but may also be used in Pacemaker "
             "configuration rules via the #cluster-name node attribute, and "
             "by higher-level tools and resource agents.")
     },
     {
         XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time",
         NULL, "20s", pcmk__valid_interval_spec,
         N_("How long to wait for a response from other nodes during start-up"),
         N_("The optimal value will depend on the speed and load of your network "
             "and the type of switches used.")
     },
     {
         XML_CONFIG_ATTR_RECHECK, NULL, "time",
         N_("Zero disables polling, while positive values are an interval in seconds"
             "(unless other units are specified, for example \"5min\")"),
         "15min", pcmk__valid_interval_spec,
         N_("Polling interval to recheck cluster state and evaluate rules "
             "with date specifications"),
         N_("Pacemaker is primarily event-driven, and looks ahead to know when to "
             "recheck cluster state for failure timeouts and most time-based "
             "rules. However, it will also recheck the cluster after this "
             "amount of inactivity, to evaluate rules with date specifications "
             "and serve as a fail-safe for certain types of scheduler bugs.")
     },
     {
         "load-threshold", NULL, "percentage", NULL,
         "80%", pcmk__valid_percentage,
         N_("Maximum amount of system load that should be used by cluster nodes"),
         N_("The cluster will slow down its recovery process when the amount of "
             "system resources used (currently CPU) approaches this limit"),
     },
     {
         "node-action-limit", NULL, "integer", NULL,
         "0", pcmk__valid_number,
         N_("Maximum number of jobs that can be scheduled per node "
             "(defaults to 2x cores)")
     },
     { XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL,
         N_("How a cluster node should react if notified of its own fencing"),
         N_("A cluster node may receive notification of its own fencing if fencing "
         "is misconfigured, or if fabric fencing is in use that doesn't cut "
         "cluster communication. Allowed values are \"stop\" to attempt to "
         "immediately stop Pacemaker and stay stopped, or \"panic\" to attempt "
         "to immediately reboot the local node, falling back to stop on failure.")
     },
     {
         XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL,
         "2min", pcmk__valid_interval_spec,
         "*** Advanced Use Only ***",
         N_("Declare an election failed if it is not decided within this much "
             "time. If you need to adjust this value, it probably indicates "
             "the presence of a bug.")
     },
     {
         XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL,
         "20min", pcmk__valid_interval_spec,
         "*** Advanced Use Only ***",
         N_("Exit immediately if shutdown does not complete within this much "
             "time. If you need to adjust this value, it probably indicates "
             "the presence of a bug.")
     },
     {
         "join-integration-timeout", "crmd-integration-timeout", "time", NULL,
         "3min", pcmk__valid_interval_spec,
         "*** Advanced Use Only ***",
         N_("If you need to adjust this value, it probably indicates "
             "the presence of a bug.")
     },
     {
         "join-finalization-timeout", "crmd-finalization-timeout", "time", NULL,
         "30min", pcmk__valid_interval_spec,
         "*** Advanced Use Only ***",
         N_("If you need to adjust this value, it probably indicates "
             "the presence of a bug.")
     },
     {
         "transition-delay", "crmd-transition-delay", "time", NULL,
         "0s", pcmk__valid_interval_spec,
         N_("*** Advanced Use Only *** Enabling this option will slow down "
             "cluster recovery under all conditions"),
         N_("Delay cluster recovery for this much time to allow for additional "
             "events to occur. Useful if your configuration is sensitive to "
             "the order in which ping updates arrive.")
     },
     {
         "stonith-watchdog-timeout", NULL, "time", NULL,
         "0", controld_verify_stonith_watchdog_timeout,
         N_("How long before nodes can be assumed to be safely down when "
            "watchdog-based self-fencing via SBD is in use"),
         N_("If this is set to a positive value, lost nodes are assumed to "
            "self-fence using watchdog-based SBD within this much time. This "
            "does not require a fencing resource to be explicitly configured, "
            "though a fence_watchdog resource can be configured, to limit use "
            "to specific nodes. If this is set to 0 (the default), the cluster "
            "will never assume watchdog-based self-fencing. If this is set to a "
            "negative value, the cluster will use twice the local value of the "
            "`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, "
            "or otherwise treat this as 0. WARNING: When used, this timeout "
            "must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use "
            "watchdog-based SBD, and Pacemaker will refuse to start on any of "
            "those nodes where this is not true for the local value or SBD is "
            "not active. When this is set to a negative value, "
            "`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes "
            "that use SBD, otherwise data corruption or loss could occur.")
     },
     {
         "stonith-max-attempts", NULL, "integer", NULL,
         "10", pcmk__valid_positive_number,
         N_("How many times fencing can fail before it will no longer be "
             "immediately re-attempted on a target")
     },
 
     // Already documented in libpe_status (other values must be kept identical)
     {
         "no-quorum-policy", NULL, "select",
         "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum,
         N_("What to do when the cluster does not have quorum"), NULL
     },
     {
         XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
         "false", pcmk__valid_boolean,
         N_("Whether to lock resources to a cleanly shut down node"),
         N_("When true, resources active on a node when it is cleanly shut down "
             "are kept \"locked\" to that node (not allowed to run elsewhere) "
             "until they start again on that node after it rejoins (or for at "
             "most shutdown-lock-limit, if set). Stonith resources and "
             "Pacemaker Remote connections are never locked. Clone and bundle "
             "instances and the promoted role of promotable clones are "
             "currently never locked, though support could be added in a future "
             "release.")
     },
     {
         XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL,
         "0", pcmk__valid_interval_spec,
         N_("Do not lock resources to a cleanly shut down node longer than "
            "this"),
         N_("If shutdown-lock is true and this is set to a nonzero time "
             "duration, shutdown locks will expire after this much time has "
             "passed since the shutdown was initiated, even if the node has not "
             "rejoined.")
     },
 };
 
 void
 crmd_metadata(void)
 {
     const char *desc_short = "Pacemaker controller options";
     const char *desc_long = "Cluster options used by Pacemaker's controller";
 
     gchar *s = pcmk__format_option_metadata("pacemaker-controld", desc_short,
                                             desc_long, controller_options,
                                             PCMK__NELEM(controller_options));
     printf("%s", s);
     g_free(s);
 }
 
 static void
 config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
 {
     const char *value = NULL;
     GHashTable *config_hash = NULL;
     crm_time_t *now = crm_time_new(NULL);
     xmlNode *crmconfig = NULL;
     xmlNode *alerts = NULL;
 
     if (rc != pcmk_ok) {
         fsa_data_t *msg_data = NULL;
 
         crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 
         if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
             crm_err("The cluster is mis-configured - shutting down and staying down");
             controld_set_fsa_input_flags(R_STAYDOWN);
         }
         goto bail;
     }
 
     crmconfig = output;
     if ((crmconfig != NULL)
         && !pcmk__xe_is(crmconfig, XML_CIB_TAG_CRMCONFIG)) {
         crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
     }
     if (!crmconfig) {
         fsa_data_t *msg_data = NULL;
 
         crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
         goto bail;
     }
 
     crm_debug("Call %d : Parsing CIB options", call_id);
     config_hash = pcmk__strkey_table(free, free);
     pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL,
                       config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL);
 
     // Validate all options, and use defaults if not already present in hash
     pcmk__validate_cluster_options(config_hash, controller_options,
                                    PCMK__NELEM(controller_options));
 
     value = g_hash_table_lookup(config_hash, "no-quorum-policy");
     if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) {
         controld_set_global_flags(controld_no_quorum_suicide);
     }
 
     value = g_hash_table_lookup(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK);
     if (crm_is_true(value)) {
         controld_set_global_flags(controld_shutdown_lock_enabled);
     } else {
         controld_clear_global_flags(controld_shutdown_lock_enabled);
     }
 
     value = g_hash_table_lookup(config_hash,
                                 XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT);
     controld_globals.shutdown_lock_limit = crm_parse_interval_spec(value)
                                            / 1000;
 
     value = g_hash_table_lookup(config_hash,
                                 XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT);
     controld_globals.node_pending_timeout = crm_parse_interval_spec(value) / 1000;
 
     value = g_hash_table_lookup(config_hash, "cluster-name");
     pcmk__str_update(&(controld_globals.cluster_name), value);
 
     // Let subcomponents initialize their own static variables
     controld_configure_election(config_hash);
     controld_configure_fencing(config_hash);
     controld_configure_fsa_timers(config_hash);
     controld_configure_throttle(config_hash);
 
     alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
     crmd_unpack_alerts(alerts);
 
     controld_set_fsa_input_flags(R_READ_CONFIG);
     controld_trigger_fsa();
 
     g_hash_table_destroy(config_hash);
   bail:
     crm_time_free(now);
 }
 
 /*!
  * \internal
  * \brief Trigger read and processing of the configuration
  *
  * \param[in] fn    Calling function name
  * \param[in] line  Line number where call occurred
  */
 void
 controld_trigger_config_as(const char *fn, int line)
 {
     if (config_read_trigger != NULL) {
         crm_trace("%s:%d - Triggered config processing", fn, line);
         mainloop_set_trigger(config_read_trigger);
     }
 }
 
 gboolean
 crm_read_options(gpointer user_data)
 {
     cib_t *cib_conn = controld_globals.cib_conn;
     int call_id = cib_conn->cmds->query(cib_conn,
                                         "//" XML_CIB_TAG_CRMCONFIG
                                         " | //" XML_CIB_TAG_ALERTS,
                                         NULL, cib_xpath|cib_scope_local);
 
     fsa_register_cib_callback(call_id, NULL, config_query_callback);
     crm_trace("Querying the CIB... call %d", call_id);
     return TRUE;
 }
 
 /*	 A_READCONFIG	*/
 void
 do_read_config(long long action,
                enum crmd_fsa_cause cause,
                enum crmd_fsa_state cur_state,
                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     throttle_init();
     controld_trigger_config();
 }
 
 void
 crm_shutdown(int nsig)
 {
     const char *value = NULL;
     guint default_period_ms = 0;
 
     if ((controld_globals.mainloop == NULL)
         || !g_main_loop_is_running(controld_globals.mainloop)) {
         crmd_exit(CRM_EX_OK);
         return;
     }
 
     if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
         crm_err("Escalating shutdown");
         register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
         return;
     }
 
     controld_set_fsa_input_flags(R_SHUTDOWN);
     register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
 
     /* If shutdown timer doesn't have a period set, use the default
      *
      * @TODO: Evaluate whether this is still necessary. As long as
      * config_query_callback() has been run at least once, it doesn't look like
      * anything could have changed the timer period since then.
      */
     value = pcmk__cluster_option(NULL, controller_options,
                                  PCMK__NELEM(controller_options),
                                  XML_CONFIG_ATTR_FORCE_QUIT);
     default_period_ms = crm_parse_interval_spec(value);
     controld_shutdown_start_countdown(default_period_ms);
 }
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index a235e34e7b..9557d9e32c 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -1,1113 +1,1111 @@
 /*
  * Copyright 2004-2023 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
  * This source code is licensed under the GNU General Public License version 2
  * or later (GPLv2+) WITHOUT ANY WARRANTY.
  */
 
 #include <crm_internal.h>
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 #include <crm/stonith-ng.h>
 #include <crm/fencing/internal.h>
 
 #include <pacemaker-controld.h>
 
 static void
 tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
 
 /*
  * stonith failure counting
  *
  * We don't want to get stuck in a permanent fencing loop. Keep track of the
  * number of fencing failures for each target node, and the most we'll restart a
  * transition for.
  */
 
 struct st_fail_rec {
     int count;
 };
 
 static bool fence_reaction_panic = false;
 static unsigned long int stonith_max_attempts = 10;
 static GHashTable *stonith_failures = NULL;
 
 /*!
  * \internal
  * \brief Update max fencing attempts before giving up
  *
  * \param[in] value  New max fencing attempts
  */
 static void
 update_stonith_max_attempts(const char *value)
 {
     stonith_max_attempts = char2score(value);
     if (stonith_max_attempts < 1UL) {
         stonith_max_attempts = 10UL;
     }
 }
 
 /*!
  * \internal
  * \brief Configure reaction to notification of local node being fenced
  *
  * \param[in] reaction_s  Reaction type
  */
 static void
 set_fence_reaction(const char *reaction_s)
 {
     if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
         fence_reaction_panic = true;
 
     } else {
         if (!pcmk__str_eq(reaction_s, "stop", pcmk__str_casei)) {
             crm_warn("Invalid value '%s' for %s, using 'stop'",
                      reaction_s, XML_CONFIG_ATTR_FENCE_REACTION);
         }
         fence_reaction_panic = false;
     }
 }
 
 /*!
  * \internal
  * \brief Configure fencing options based on the CIB
  *
  * \param[in,out] options  Name/value pairs for configured options
  */
 void
 controld_configure_fencing(GHashTable *options)
 {
     const char *value = NULL;
 
     value = g_hash_table_lookup(options, XML_CONFIG_ATTR_FENCE_REACTION);
     set_fence_reaction(value);
 
     value = g_hash_table_lookup(options, "stonith-max-attempts");
     update_stonith_max_attempts(value);
 }
 
 static gboolean
 too_many_st_failures(const char *target)
 {
     GHashTableIter iter;
     const char *key = NULL;
     struct st_fail_rec *value = NULL;
 
     if (stonith_failures == NULL) {
         return FALSE;
     }
 
     if (target == NULL) {
         g_hash_table_iter_init(&iter, stonith_failures);
         while (g_hash_table_iter_next(&iter, (gpointer *) &key,
                (gpointer *) &value)) {
 
             if (value->count >= stonith_max_attempts) {
                 target = (const char*)key;
                 goto too_many;
             }
         }
     } else {
         value = g_hash_table_lookup(stonith_failures, target);
         if ((value != NULL) && (value->count >= stonith_max_attempts)) {
             goto too_many;
         }
     }
     return FALSE;
 
 too_many:
     crm_warn("Too many failures (%d) to fence %s, giving up",
              value->count, target);
     return TRUE;
 }
 
 /*!
  * \internal
  * \brief Reset a stonith fail count
  *
  * \param[in] target  Name of node to reset, or NULL for all
  */
 void
 st_fail_count_reset(const char *target)
 {
     if (stonith_failures == NULL) {
         return;
     }
 
     if (target) {
         struct st_fail_rec *rec = NULL;
 
         rec = g_hash_table_lookup(stonith_failures, target);
         if (rec) {
             rec->count = 0;
         }
     } else {
         GHashTableIter iter;
         const char *key = NULL;
         struct st_fail_rec *rec = NULL;
 
         g_hash_table_iter_init(&iter, stonith_failures);
         while (g_hash_table_iter_next(&iter, (gpointer *) &key,
                                       (gpointer *) &rec)) {
             rec->count = 0;
         }
     }
 }
 
 static void
 st_fail_count_increment(const char *target)
 {
     struct st_fail_rec *rec = NULL;
 
     if (stonith_failures == NULL) {
         stonith_failures = pcmk__strkey_table(free, free);
     }
 
     rec = g_hash_table_lookup(stonith_failures, target);
     if (rec) {
         rec->count++;
     } else {
         rec = malloc(sizeof(struct st_fail_rec));
         if(rec == NULL) {
             return;
         }
 
         rec->count = 1;
         g_hash_table_insert(stonith_failures, strdup(target), rec);
     }
 }
 
 /* end stonith fail count functions */
 
 
 static void
 cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
                     void *user_data)
 {
     if (rc < pcmk_ok) {
         crm_err("Fencing update %d for %s: failed - %s (%d)",
                 call_id, (char *)user_data, pcmk_strerror(rc), rc);
         crm_log_xml_warn(msg, "Failed update");
         abort_transition(INFINITY, pcmk__graph_shutdown, "CIB update failed",
                          NULL);
 
     } else {
         crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
     }
 }
 
 static void
 send_stonith_update(pcmk__graph_action_t *action, const char *target,
                     const char *uuid)
 {
     int rc = pcmk_ok;
     crm_node_t *peer = NULL;
 
     /* We (usually) rely on the membership layer to do node_update_cluster,
      * and the peer status callback to do node_update_peer, because the node
      * might have already rejoined before we get the stonith result here.
      */
     int flags = node_update_join | node_update_expected;
 
     /* zero out the node-status & remove all LRM status info */
     xmlNode *node_state = NULL;
 
     CRM_CHECK(target != NULL, return);
     CRM_CHECK(uuid != NULL, return);
 
     /* Make sure the membership and join caches are accurate.
      * Try getting any existing node cache entry also by node uuid in case it
      * doesn't have an uname yet.
      */
     peer = pcmk__get_peer_full(0, target, uuid, CRM_GET_PEER_ANY);
 
     CRM_CHECK(peer != NULL, return);
 
     if (peer->state == NULL) {
         /* Usually, we rely on the membership layer to update the cluster state
          * in the CIB. However, if the node has never been seen, do it here, so
          * the node is not considered unclean.
          */
         flags |= node_update_cluster;
     }
 
     if (peer->uuid == NULL) {
         crm_info("Recording uuid '%s' for node '%s'", uuid, target);
         peer->uuid = strdup(uuid);
     }
 
     crmd_peer_down(peer, TRUE);
 
     /* Generate a node state update for the CIB */
     node_state = create_node_state_update(peer, flags, NULL, __func__);
 
     /* we have to mark whether or not remote nodes have already been fenced */
     if (peer->flags & crm_remote_node) {
         char *now_s = pcmk__ttoa(time(NULL));
 
         crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
         free(now_s);
     }
 
     /* Force our known ID */
     crm_xml_add(node_state, XML_ATTR_ID, uuid);
 
     rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
                                                  XML_CIB_TAG_STATUS, node_state,
                                                  cib_scope_local
                                                  |cib_can_create);
 
     /* Delay processing the trigger until the update completes */
     crm_debug("Sending fencing update %d for %s", rc, target);
     fsa_register_cib_callback(rc, strdup(target), cib_fencing_updated);
 
     // Make sure it sticks
     /* controld_globals.cib_conn->cmds->bump_epoch(controld_globals.cib_conn,
      *                                             cib_scope_local);
      */
 
     controld_delete_node_state(peer->uname, controld_section_all,
                                cib_scope_local);
     free_xml(node_state);
     return;
 }
 
 /*!
  * \internal
  * \brief Abort transition due to stonith failure
  *
  * \param[in] abort_action  Whether to restart or stop transition
  * \param[in] target  Don't restart if this (NULL for any) has too many failures
  * \param[in] reason  Log this stonith action XML as abort reason (or NULL)
  */
 static void
 abort_for_stonith_failure(enum pcmk__graph_next abort_action,
                           const char *target, const xmlNode *reason)
 {
     /* If stonith repeatedly fails, we eventually give up on starting a new
      * transition for that reason.
      */
     if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) {
         abort_action = pcmk__graph_wait;
     }
     abort_transition(INFINITY, abort_action, "Stonith failed", reason);
 }
 
 
 /*
  * stonith cleanup list
  *
  * If the DC is shot, proper notifications might not go out.
  * The stonith cleanup list allows the cluster to (re-)send
  * notifications once a new DC is elected.
  */
 
 static GList *stonith_cleanup_list = NULL;
 
 /*!
  * \internal
  * \brief Add a node to the stonith cleanup list
  *
  * \param[in] target  Name of node to add
  */
 void
 add_stonith_cleanup(const char *target) {
     stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
 }
 
 /*!
  * \internal
  * \brief Remove a node from the stonith cleanup list
  *
  * \param[in] Name of node to remove
  */
 void
 remove_stonith_cleanup(const char *target)
 {
     GList *iter = stonith_cleanup_list;
 
     while (iter != NULL) {
         GList *tmp = iter;
         char *iter_name = tmp->data;
 
         iter = iter->next;
         if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
             crm_trace("Removing %s from the cleanup list", iter_name);
             stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
             free(iter_name);
         }
     }
 }
 
 /*!
  * \internal
  * \brief Purge all entries from the stonith cleanup list
  */
 void
 purge_stonith_cleanup(void)
 {
     if (stonith_cleanup_list) {
         GList *iter = NULL;
 
         for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
             char *target = iter->data;
 
             crm_info("Purging %s from stonith cleanup list", target);
             free(target);
         }
         g_list_free(stonith_cleanup_list);
         stonith_cleanup_list = NULL;
     }
 }
 
 /*!
  * \internal
  * \brief Send stonith updates for all entries in cleanup list, then purge it
  */
 void
 execute_stonith_cleanup(void)
 {
     GList *iter;
 
     for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
         char *target = iter->data;
         crm_node_t *target_node = crm_get_peer(0, target);
         const char *uuid = crm_peer_uuid(target_node);
 
         crm_notice("Marking %s, target of a previous stonith action, as clean", target);
         send_stonith_update(NULL, target, uuid);
         free(target);
     }
     g_list_free(stonith_cleanup_list);
     stonith_cleanup_list = NULL;
 }
 
 /* end stonith cleanup list functions */
 
 
 /* stonith API client
  *
  * Functions that need to interact directly with the fencer via its API
  */
 
 static stonith_t *stonith_api = NULL;
-static crm_trigger_t *stonith_reconnect = NULL;
+static mainloop_timer_t *controld_fencer_connect_timer = NULL;
 static char *te_client_id = NULL;
 
 static gboolean
 fail_incompletable_stonith(pcmk__graph_t *graph)
 {
     GList *lpc = NULL;
     const char *task = NULL;
     xmlNode *last_action = NULL;
 
     if (graph == NULL) {
         return FALSE;
     }
 
     for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
         GList *lpc2 = NULL;
         pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
 
         if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
             continue;
         }
 
         for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
             pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data;
 
             if ((action->type != pcmk__cluster_graph_action)
                 || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
                 continue;
             }
 
             task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
             if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
                 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
                 last_action = action->xml;
                 pcmk__update_graph(graph, action);
                 crm_notice("Failing action %d (%s): fencer terminated",
                            action->id, ID(action->xml));
             }
         }
     }
 
     if (last_action != NULL) {
         crm_warn("Fencer failure resulted in unrunnable actions");
         abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action);
         return TRUE;
     }
 
     return FALSE;
 }
 
 static void
 tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
 {
     te_cleanup_stonith_history_sync(st, FALSE);
 
     if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
         crm_err("Lost fencer connection (will attempt to reconnect)");
-        mainloop_set_trigger(stonith_reconnect);
-
+        if (!mainloop_timer_running(controld_fencer_connect_timer)) {
+            mainloop_timer_start(controld_fencer_connect_timer);
+        }
     } else {
         crm_info("Disconnected from fencer");
     }
 
     if (stonith_api) {
         /* the client API won't properly reconnect notifications
          * if they are still in the table - so remove them
          */
         if (stonith_api->state != stonith_disconnected) {
             stonith_api->cmds->disconnect(st);
         }
         stonith_api->cmds->remove_notification(stonith_api, NULL);
     }
 
     if (AM_I_DC) {
         fail_incompletable_stonith(controld_globals.transition_graph);
         trigger_graph();
     }
 }
 
 /*!
  * \internal
  * \brief Handle an event notification from the fencing API
  *
  * \param[in] st     Fencing API connection (ignored)
  * \param[in] event  Fencing API event notification
  */
 static void
 handle_fence_notification(stonith_t *st, stonith_event_t *event)
 {
     bool succeeded = true;
     const char *executioner = "the cluster";
     const char *client = "a client";
     const char *reason = NULL;
     int exec_status;
 
     if (te_client_id == NULL) {
         te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
                                          (unsigned long) getpid());
     }
 
     if (event == NULL) {
         crm_err("Notify data not found");
         return;
     }
 
     if (event->executioner != NULL) {
         executioner = event->executioner;
     }
     if (event->client_origin != NULL) {
         client = event->client_origin;
     }
 
     exec_status = stonith__event_execution_status(event);
     if ((stonith__event_exit_status(event) != CRM_EX_OK)
         || (exec_status != PCMK_EXEC_DONE)) {
         succeeded = false;
         if (exec_status == PCMK_EXEC_DONE) {
             exec_status = PCMK_EXEC_ERROR;
         }
     }
     reason = stonith__event_exit_reason(event);
 
     crmd_alert_fencing_op(event);
 
     if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) {
         // Unfencing doesn't need special handling, just a log message
         if (succeeded) {
             crm_notice("%s was unfenced by %s at the request of %s@%s",
                        event->target, executioner, client, event->origin);
         } else {
             crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
                     event->target, executioner,
                     pcmk_exec_status_str(exec_status),
                     ((reason == NULL)? "" : ": "),
                     ((reason == NULL)? "" : reason),
                     stonith__event_exit_status(event));
         }
         return;
     }
 
     if (succeeded
         && pcmk__str_eq(event->target, controld_globals.our_nodename,
                         pcmk__str_casei)) {
         /* We were notified of our own fencing. Most likely, either fencing was
          * misconfigured, or fabric fencing that doesn't cut cluster
          * communication is in use.
          *
          * Either way, shutting down the local host is a good idea, to require
          * administrator intervention. Also, other nodes would otherwise likely
          * set our status to lost because of the fencing callback and discard
          * our subsequent election votes as "not part of our cluster".
          */
         crm_crit("We were allegedly just fenced by %s for %s!",
                  executioner, event->origin); // Dumps blackbox if enabled
         if (fence_reaction_panic) {
             pcmk__panic(__func__);
         } else {
             crm_exit(CRM_EX_FATAL);
         }
         return; // Should never get here
     }
 
     /* Update the count of fencing failures for this target, in case we become
      * DC later. The current DC has already updated its fail count in
      * tengine_stonith_callback().
      */
     if (!AM_I_DC) {
         if (succeeded) {
             st_fail_count_reset(event->target);
         } else {
             st_fail_count_increment(event->target);
         }
     }
 
     crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
                "%s%s%s%s " CRM_XS " event=%s",
                event->target, (succeeded? "" : " not"),
                event->action, executioner, client, event->origin,
                (succeeded? "OK" : pcmk_exec_status_str(exec_status)),
                ((reason == NULL)? "" : " ("),
                ((reason == NULL)? "" : reason),
                ((reason == NULL)? "" : ")"),
                event->id);
 
     if (succeeded) {
         crm_node_t *peer = pcmk__search_known_node_cache(0, event->target,
                                                          CRM_GET_PEER_ANY);
         const char *uuid = NULL;
 
         if (peer == NULL) {
             return;
         }
 
         uuid = crm_peer_uuid(peer);
 
         if (AM_I_DC) {
             /* The DC always sends updates */
             send_stonith_update(NULL, event->target, uuid);
 
             /* @TODO Ideally, at this point, we'd check whether the fenced node
              * hosted any guest nodes, and call remote_node_down() for them.
              * Unfortunately, the controller doesn't have a simple, reliable way
              * to map hosts to guests. It might be possible to track this in the
              * peer cache via crm_remote_peer_cache_refresh(). For now, we rely
              * on the scheduler creating fence pseudo-events for the guests.
              */
 
             if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
                 /* Abort the current transition if it wasn't the cluster that
                  * initiated fencing.
                  */
                 crm_info("External fencing operation from %s fenced %s",
                          client, event->target);
                 abort_transition(INFINITY, pcmk__graph_restart,
                                  "External Fencing Operation", NULL);
             }
 
         } else if (pcmk__str_eq(controld_globals.dc_name, event->target,
                                 pcmk__str_null_matches|pcmk__str_casei)
                    && !pcmk_is_set(peer->flags, crm_remote_node)) {
             // Assume the target was our DC if we don't currently have one
 
             if (controld_globals.dc_name != NULL) {
                 crm_notice("Fencing target %s was our DC", event->target);
             } else {
                 crm_notice("Fencing target %s may have been our DC",
                            event->target);
             }
 
             /* Given the CIB resyncing that occurs around elections,
              * have one node update the CIB now and, if the new DC is different,
              * have them do so too after the election
              */
             if (pcmk__str_eq(event->executioner, controld_globals.our_nodename,
                              pcmk__str_casei)) {
                 send_stonith_update(NULL, event->target, uuid);
             }
             add_stonith_cleanup(event->target);
         }
 
         /* If the target is a remote node, and we host its connection,
          * immediately fail all monitors so it can be recovered quickly.
          * The connection won't necessarily drop when a remote node is fenced,
          * so the failure might not otherwise be detected until the next poke.
          */
         if (pcmk_is_set(peer->flags, crm_remote_node)) {
             remote_ra_fail(event->target);
         }
 
         crmd_peer_down(peer, TRUE);
      }
 }
 
 /*!
  * \brief Connect to fencer
  *
- * \param[in] user_data  If NULL, retry failures now, otherwise retry in main loop
+ * \param[in] user_data  If NULL, retry failures now, otherwise retry in mainloop timer
  *
- * \return TRUE
+ * \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry
  * \note If user_data is NULL, this will wait 2s between attempts, for up to
  *       30 attempts, meaning the controller could be blocked as long as 58s.
  */
-static gboolean
-te_connect_stonith(gpointer user_data)
+gboolean
+controld_timer_fencer_connect(gpointer user_data)
 {
     int rc = pcmk_ok;
 
     if (stonith_api == NULL) {
         stonith_api = stonith_api_new();
         if (stonith_api == NULL) {
             crm_err("Could not connect to fencer: API memory allocation failed");
-            return TRUE;
+            return G_SOURCE_REMOVE;
         }
     }
 
     if (stonith_api->state != stonith_disconnected) {
         crm_trace("Already connected to fencer, no need to retry");
-        return TRUE;
+        return G_SOURCE_REMOVE;
     }
 
     if (user_data == NULL) {
         // Blocking (retry failures now until successful)
         rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
         if (rc != pcmk_ok) {
             crm_err("Could not connect to fencer in 30 attempts: %s "
                     CRM_XS " rc=%d", pcmk_strerror(rc), rc);
         }
     } else {
         // Non-blocking (retry failures later in main loop)
         rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
+
+        if (controld_fencer_connect_timer == NULL) {
+            controld_fencer_connect_timer =
+                mainloop_timer_add("controld_fencer_connect", 1000,
+                                   TRUE, controld_timer_fencer_connect,
+                                   GINT_TO_POINTER(TRUE));
+        }
+
         if (rc != pcmk_ok) {
             if (pcmk_is_set(controld_globals.fsa_input_register,
                             R_ST_REQUIRED)) {
                 crm_notice("Fencer connection failed (will retry): %s "
                            CRM_XS " rc=%d", pcmk_strerror(rc), rc);
-                mainloop_set_trigger(stonith_reconnect);
+
+                if (!mainloop_timer_running(controld_fencer_connect_timer)) {
+                    mainloop_timer_start(controld_fencer_connect_timer);
+                }
+
+                return G_SOURCE_CONTINUE;
             } else {
                 crm_info("Fencer connection failed (ignoring because no longer required): %s "
                          CRM_XS " rc=%d", pcmk_strerror(rc), rc);
             }
-            return TRUE;
+            return G_SOURCE_REMOVE;
         }
     }
 
     if (rc == pcmk_ok) {
         stonith_api->cmds->register_notification(stonith_api,
                                                  T_STONITH_NOTIFY_DISCONNECT,
                                                  tengine_stonith_connection_destroy);
         stonith_api->cmds->register_notification(stonith_api,
                                                  T_STONITH_NOTIFY_FENCE,
                                                  handle_fence_notification);
         stonith_api->cmds->register_notification(stonith_api,
                                                  T_STONITH_NOTIFY_HISTORY_SYNCED,
                                                  tengine_stonith_history_synced);
         te_trigger_stonith_history_sync(TRUE);
         crm_notice("Fencer successfully connected");
     }
 
-    return TRUE;
-}
-
-/*!
-    \internal
-    \brief Schedule fencer connection attempt in main loop
-*/
-void
-controld_trigger_fencer_connect(void)
-{
-    if (stonith_reconnect == NULL) {
-        stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
-                                                 te_connect_stonith,
-                                                 GINT_TO_POINTER(TRUE));
-    }
-    controld_set_fsa_input_flags(R_ST_REQUIRED);
-    mainloop_set_trigger(stonith_reconnect);
+    return G_SOURCE_REMOVE;
 }
 
 void
 controld_disconnect_fencer(bool destroy)
 {
     if (stonith_api) {
         // Prevent fencer connection from coming up again
         controld_clear_fsa_input_flags(R_ST_REQUIRED);
 
         if (stonith_api->state != stonith_disconnected) {
             stonith_api->cmds->disconnect(stonith_api);
         }
         stonith_api->cmds->remove_notification(stonith_api, NULL);
     }
     if (destroy) {
         if (stonith_api) {
             stonith_api->cmds->free(stonith_api);
             stonith_api = NULL;
         }
-        if (stonith_reconnect) {
-            mainloop_destroy_trigger(stonith_reconnect);
-            stonith_reconnect = NULL;
+        if (controld_fencer_connect_timer) {
+            mainloop_timer_del(controld_fencer_connect_timer);
+            controld_fencer_connect_timer = NULL;
         }
         if (te_client_id) {
             free(te_client_id);
             te_client_id = NULL;
         }
     }
 }
 
 static gboolean
 do_stonith_history_sync(gpointer user_data)
 {
     if (stonith_api && (stonith_api->state != stonith_disconnected)) {
         stonith_history_t *history = NULL;
 
         te_cleanup_stonith_history_sync(stonith_api, FALSE);
         stonith_api->cmds->history(stonith_api,
                                    st_opt_sync_call | st_opt_broadcast,
                                    NULL, &history, 5);
         stonith_history_free(history);
         return TRUE;
     } else {
         crm_info("Skip triggering stonith history-sync as stonith is disconnected");
         return FALSE;
     }
 }
 
 static void
 tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
 {
     char *uuid = NULL;
     int stonith_id = -1;
     int transition_id = -1;
     pcmk__graph_action_t *action = NULL;
     const char *target = NULL;
 
     if ((data == NULL) || (data->userdata == NULL)) {
         crm_err("Ignoring fence operation %d result: "
                 "No transition key given (bug?)",
                 ((data == NULL)? -1 : data->call_id));
         return;
     }
 
     if (!AM_I_DC) {
         const char *reason = stonith__exit_reason(data);
 
         if (reason == NULL) {
            reason = pcmk_exec_status_str(stonith__execution_status(data));
         }
         crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s",
                    data->call_id, stonith__exit_status(data), reason,
                    (const char *) data->userdata);
         return;
     }
 
     CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
                                     &stonith_id, NULL),
               goto bail);
 
     if (controld_globals.transition_graph->complete || (stonith_id < 0)
         || !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none)
         || (controld_globals.transition_graph->id != transition_id)) {
         crm_info("Ignoring fence operation %d result: "
                  "Not from current transition " CRM_XS
                  " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
                  data->call_id,
                  pcmk__btoa(controld_globals.transition_graph->complete),
                  stonith_id, uuid, controld_globals.te_uuid, transition_id,
                  controld_globals.transition_graph->id);
         goto bail;
     }
 
     action = controld_get_action(stonith_id);
     if (action == NULL) {
         crm_err("Ignoring fence operation %d result: "
                 "Action %d not found in transition graph (bug?) "
                 CRM_XS " uuid=%s transition=%d",
                 data->call_id, stonith_id, uuid, transition_id);
         goto bail;
     }
 
     target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
     if (target == NULL) {
         crm_err("Ignoring fence operation %d result: No target given (bug?)",
                 data->call_id);
         goto bail;
     }
 
     stop_te_timer(action);
     if (stonith__exit_status(data) == CRM_EX_OK) {
         const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
         const char *op = crm_meta_value(action->params, "stonith_action");
 
         crm_info("Fence operation %d for %s succeeded", data->call_id, target);
         if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
             te_action_confirmed(action, NULL);
             if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) {
                 const char *value = NULL;
                 char *now = pcmk__ttoa(time(NULL));
                 gboolean is_remote_node = FALSE;
 
                 /* This check is not 100% reliable, since this node is not
                  * guaranteed to have the remote node cached. However, it
                  * doesn't have to be reliable, since the attribute manager can
                  * learn a node's "remoteness" by other means sooner or later.
                  * This allows it to learn more quickly if this node does have
                  * the information.
                  */
                 if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
                     is_remote_node = TRUE;
                 }
 
                 update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
                              is_remote_node);
                 free(now);
 
                 value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
                 update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
                              is_remote_node);
 
                 value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
                 update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
                              is_remote_node);
 
             } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
                 send_stonith_update(action, target, uuid);
                 pcmk__set_graph_action_flags(action,
                                              pcmk__graph_action_sent_update);
             }
         }
         st_fail_count_reset(target);
 
     } else {
         enum pcmk__graph_next abort_action = pcmk__graph_restart;
         int status = stonith__execution_status(data);
         const char *reason = stonith__exit_reason(data);
 
         if (reason == NULL) {
             if (status == PCMK_EXEC_DONE) {
                 reason = "Agent returned error";
             } else {
                 reason = pcmk_exec_status_str(status);
             }
         }
         pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
 
         /* If no fence devices were available, there's no use in immediately
          * checking again, so don't start a new transition in that case.
          */
         if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
             crm_warn("Fence operation %d for %s failed: %s "
                      "(aborting transition and giving up for now)",
                      data->call_id, target, reason);
             abort_action = pcmk__graph_wait;
         } else {
             crm_notice("Fence operation %d for %s failed: %s "
                        "(aborting transition)", data->call_id, target, reason);
         }
 
         /* Increment the fail count now, so abort_for_stonith_failure() can
          * check it. Non-DC nodes will increment it in
          * handle_fence_notification().
          */
         st_fail_count_increment(target);
         abort_for_stonith_failure(abort_action, target, NULL);
     }
 
     pcmk__update_graph(controld_globals.transition_graph, action);
     trigger_graph();
 
   bail:
     free(data->userdata);
     free(uuid);
     return;
 }
 
 static int
 fence_with_delay(const char *target, const char *type, int delay)
 {
     uint32_t options = st_opt_none; // Group of enum stonith_call_options
     int timeout_sec = (int) (controld_globals.transition_graph->stonith_timeout
                              / 1000);
 
     if (crmd_join_phase_count(crm_join_confirmed) == 1) {
         stonith__set_call_options(options, target, st_opt_allow_suicide);
     }
     return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
                                                type, timeout_sec, 0, delay);
 }
 
 /*!
  * \internal
  * \brief Execute a fencing action from a transition graph
  *
  * \param[in] graph   Transition graph being executed (ignored)
  * \param[in] action  Fencing action to execute
  *
  * \return Standard Pacemaker return code
  */
 int
 controld_execute_fence_action(pcmk__graph_t *graph,
                               pcmk__graph_action_t *action)
 {
     int rc = 0;
     const char *id = ID(action->xml);
     const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
     const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
     const char *type = crm_meta_value(action->params, "stonith_action");
     char *transition_key = NULL;
     const char *priority_delay = NULL;
     int delay_i = 0;
     gboolean invalid_action = FALSE;
     int stonith_timeout = (int) (controld_globals.transition_graph->stonith_timeout
                                  / 1000);
 
     CRM_CHECK(id != NULL, invalid_action = TRUE);
     CRM_CHECK(uuid != NULL, invalid_action = TRUE);
     CRM_CHECK(type != NULL, invalid_action = TRUE);
     CRM_CHECK(target != NULL, invalid_action = TRUE);
 
     if (invalid_action) {
         crm_log_xml_warn(action->xml, "BadAction");
         return EPROTO;
     }
 
     priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY);
 
     crm_notice("Requesting fencing (%s) targeting node %s "
                CRM_XS " action=%s timeout=%i%s%s",
                type, target, id, stonith_timeout,
                priority_delay ? " priority_delay=" : "",
                priority_delay ? priority_delay : "");
 
     /* Passing NULL means block until we can connect... */
-    te_connect_stonith(NULL);
+    controld_timer_fencer_connect(NULL);
 
     pcmk__scan_min_int(priority_delay, &delay_i, 0);
     rc = fence_with_delay(target, type, delay_i);
     transition_key = pcmk__transition_key(controld_globals.transition_graph->id,
                                           action->id, 0,
                                           controld_globals.te_uuid),
     stonith_api->cmds->register_callback(stonith_api, rc,
                                          (stonith_timeout
                                           + (delay_i > 0 ? delay_i : 0)),
                                          st_opt_timeout_updates, transition_key,
                                          "tengine_stonith_callback",
                                          tengine_stonith_callback);
     return pcmk_rc_ok;
 }
 
 bool
 controld_verify_stonith_watchdog_timeout(const char *value)
 {
     long st_timeout = value? crm_get_msec(value) : 0;
     const char *our_nodename = controld_globals.our_nodename;
     gboolean rv = TRUE;
 
     if (st_timeout == 0
         || (stonith_api && (stonith_api->state != stonith_disconnected) &&
             stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
                                                            our_nodename))) {
         rv = pcmk__valid_sbd_timeout(value);
     }
     return rv;
 }
 
 /* end stonith API client functions */
 
 
 /*
  * stonith history synchronization
  *
  * Each node's fencer keeps track of a cluster-wide fencing history. When a node
  * joins or leaves, we need to synchronize the history across all nodes.
  */
 
 static crm_trigger_t *stonith_history_sync_trigger = NULL;
 static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
 static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
 
 void
 te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
 {
     if (free_timers) {
         mainloop_timer_del(stonith_history_sync_timer_short);
         stonith_history_sync_timer_short = NULL;
         mainloop_timer_del(stonith_history_sync_timer_long);
         stonith_history_sync_timer_long = NULL;
     } else {
         mainloop_timer_stop(stonith_history_sync_timer_short);
         mainloop_timer_stop(stonith_history_sync_timer_long);
     }
 
     if (st) {
         st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED);
     }
 }
 
 static void
 tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
 {
     te_cleanup_stonith_history_sync(st, FALSE);
     crm_debug("Fence-history synced - cancel all timers");
 }
 
 static gboolean
 stonith_history_sync_set_trigger(gpointer user_data)
 {
     mainloop_set_trigger(stonith_history_sync_trigger);
     return FALSE;
 }
 
 void
 te_trigger_stonith_history_sync(bool long_timeout)
 {
     /* trigger a sync in 5s to give more nodes the
      * chance to show up so that we don't create
      * unnecessary stonith-history-sync traffic
      *
      * the long timeout of 30s is there as a fallback
      * so that after a successful connection to fenced
      * we will wait for 30s for the DC to trigger a
      * history-sync
      * if this doesn't happen we trigger a sync locally
      * (e.g. fenced segfaults and is restarted by pacemakerd)
      */
 
     /* as we are finally checking the stonith-connection
      * in do_stonith_history_sync we should be fine
      * leaving stonith_history_sync_time & stonith_history_sync_trigger
      * around
      */
     if (stonith_history_sync_trigger == NULL) {
         stonith_history_sync_trigger =
             mainloop_add_trigger(G_PRIORITY_LOW,
                                  do_stonith_history_sync, NULL);
     }
 
     if (long_timeout) {
         if(stonith_history_sync_timer_long == NULL) {
             stonith_history_sync_timer_long =
                 mainloop_timer_add("history_sync_long", 30000,
                                    FALSE, stonith_history_sync_set_trigger,
                                    NULL);
         }
         crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
         mainloop_timer_start(stonith_history_sync_timer_long);
     } else {
         if(stonith_history_sync_timer_short == NULL) {
             stonith_history_sync_timer_short =
                 mainloop_timer_add("history_sync_short", 5000,
                                    FALSE, stonith_history_sync_set_trigger,
                                    NULL);
         }
         crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
         mainloop_timer_start(stonith_history_sync_timer_short);
     }
 
 }
 
 /* end stonith history synchronization functions */
diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h
index 86a5050b87..76779c6530 100644
--- a/daemons/controld/controld_fencing.h
+++ b/daemons/controld/controld_fencing.h
@@ -1,38 +1,38 @@
 /*
  * Copyright 2004-2022 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
  * This source code is licensed under the GNU Lesser General Public License
  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
  */
 
 #ifndef CONTROLD_FENCING__H
 #  define CONTROLD_FENCING__H
 
 #include <stdbool.h>                // bool
 #include <pacemaker-internal.h>     // pcmk__graph_t, pcmk__graph_action_t
 
 void controld_configure_fencing(GHashTable *options);
 
 // stonith fail counts
 void st_fail_count_reset(const char * target);
 
 // stonith API client
-void controld_trigger_fencer_connect(void);
+gboolean controld_timer_fencer_connect(gpointer user_data);
 void controld_disconnect_fencer(bool destroy);
 int controld_execute_fence_action(pcmk__graph_t *graph,
                                   pcmk__graph_action_t *action);
 bool controld_verify_stonith_watchdog_timeout(const char *value);
 
 // stonith cleanup list
 void add_stonith_cleanup(const char *target);
 void remove_stonith_cleanup(const char *target);
 void purge_stonith_cleanup(void);
 void execute_stonith_cleanup(void);
 
 // stonith history synchronization
 void te_trigger_stonith_history_sync(bool long_timeout);
 void te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers);
 
 #endif