No OneTemporary
Actions

Size

45 KB

Referenced Files

None

Subscribers

None

View Options

	diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
	index b91f4afead..dbb383bf2a 100644
	--- a/daemons/controld/controld_control.c
	+++ b/daemons/controld/controld_control.c
	@@ -1,846 +1,846 @@
	/*
	* Copyright 2004-2022 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <sys/param.h>
	#include <sys/types.h>
	#include <sys/stat.h>

	#include <crm/crm.h>
	#include <crm/msg_xml.h>
	#include <crm/pengine/rules.h>
	#include <crm/cluster/internal.h>
	#include <crm/cluster/election_internal.h>
	#include <crm/common/ipc_internal.h>

	#include <pacemaker-controld.h>

	qb_ipcs_service_t *ipcs = NULL;

	#if SUPPORT_COROSYNC
	extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
	#endif

	void crm_shutdown(int nsig);
	gboolean crm_read_options(gpointer user_data);

	gboolean fsa_has_quorum = FALSE;
	crm_trigger_t *fsa_source = NULL;
	crm_trigger_t *config_read = NULL;
	bool no_quorum_suicide_escalation = FALSE;
	bool controld_shutdown_lock_enabled = false;

	/* A_HA_CONNECT */
	void
	do_ha_control(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	gboolean registered = FALSE;
	static crm_cluster_t *cluster = NULL;

	if (cluster == NULL) {
	cluster = calloc(1, sizeof(crm_cluster_t));
	}

	if (action & A_HA_DISCONNECT) {
	crm_cluster_disconnect(cluster);
	crm_info("Disconnected from the cluster");

	controld_set_fsa_input_flags(R_HA_DISCONNECTED);
	}

	if (action & A_HA_CONNECT) {
	crm_set_status_callback(&peer_update_callback);
	crm_set_autoreap(FALSE);

	if (is_corosync_cluster()) {
	#if SUPPORT_COROSYNC
	registered = crm_connect_corosync(cluster);
	#endif
	}

	if (registered == TRUE) {
	controld_election_init(cluster->uname);
	fsa_our_uname = cluster->uname;
	fsa_our_uuid = cluster->uuid;
	if(cluster->uuid == NULL) {
	crm_err("Could not obtain local uuid");
	registered = FALSE;
	}
	}

	if (registered == FALSE) {
	controld_set_fsa_input_flags(R_HA_DISCONNECTED);
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	return;
	}

	populate_cib_nodes(node_update_none, __func__);
	controld_clear_fsa_input_flags(R_HA_DISCONNECTED);
	crm_info("Connected to the cluster");
	}

	if (action & ~(A_HA_CONNECT \| A_HA_DISCONNECT)) {
	crm_err("Unexpected action %s in %s", fsa_action2string(action),
	__func__);
	}
	}

	/* A_SHUTDOWN */
	void
	do_shutdown(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	/* just in case */
	controld_set_fsa_input_flags(R_SHUTDOWN);
	controld_disconnect_fencer(FALSE);
	}

	/* A_SHUTDOWN_REQ */
	void
	do_shutdown_req(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	xmlNode *msg = NULL;

	controld_set_fsa_input_flags(R_SHUTDOWN);
	//controld_set_fsa_input_flags(R_STAYDOWN);
	crm_info("Sending shutdown request to all peers (DC is %s)",
	(fsa_our_dc? fsa_our_dc : "not set"));
	msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

	if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}
	free_xml(msg);
	}

	extern char *max_generation_from;
	extern xmlNode *max_generation_xml;
	extern GHashTable *resource_history;
	extern GHashTable *voted;
	extern pcmk__output_t *logger_out;

	void
	crmd_fast_exit(crm_exit_t exit_code)
	{
	if (pcmk_is_set(fsa_input_register, R_STAYDOWN)) {
	crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
	exit_code, CRM_EX_FATAL);
	exit_code = CRM_EX_FATAL;

	} else if ((exit_code == CRM_EX_OK)
	&& pcmk_is_set(fsa_input_register, R_IN_RECOVERY)) {
	crm_err("Could not recover from internal error");
	exit_code = CRM_EX_ERROR;
	}

	if (logger_out != NULL) {
	logger_out->finish(logger_out, exit_code, true, NULL);
	pcmk__output_free(logger_out);
	logger_out = NULL;
	}

	crm_exit(exit_code);
	}

	crm_exit_t
	crmd_exit(crm_exit_t exit_code)
	{
	GList *gIter = NULL;
	GMainLoop *mloop = crmd_mainloop;

	static bool in_progress = FALSE;

	if (in_progress && (exit_code == CRM_EX_OK)) {
	crm_debug("Exit is already in progress");
	return exit_code;

	} else if(in_progress) {
	crm_notice("Error during shutdown process, exiting now with status %d (%s)",
	exit_code, crm_exit_str(exit_code));
	crm_write_blackbox(SIGTRAP, NULL);
	crmd_fast_exit(exit_code);
	}

	in_progress = TRUE;
	crm_trace("Preparing to exit with status %d (%s)",
	exit_code, crm_exit_str(exit_code));

	/* Suppress secondary errors resulting from us disconnecting everything */
	controld_set_fsa_input_flags(R_HA_DISCONNECTED);

	/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */

	if(ipcs) {
	crm_trace("Closing IPC server");
	mainloop_del_ipc_server(ipcs);
	ipcs = NULL;
	}

	controld_close_attrd_ipc();
	controld_shutdown_schedulerd_ipc();
	controld_disconnect_fencer(TRUE);

	if ((exit_code == CRM_EX_OK) && (crmd_mainloop == NULL)) {
	crm_debug("No mainloop detected");
	exit_code = CRM_EX_ERROR;
	}

	/* On an error, just get out.
	*
	* Otherwise, make the effort to have mainloop exit gracefully so
	* that it (mostly) cleans up after itself and valgrind has less
	* to report on - allowing real errors stand out
	*/
	if (exit_code != CRM_EX_OK) {
	crm_notice("Forcing immediate exit with status %d (%s)",
	exit_code, crm_exit_str(exit_code));
	crm_write_blackbox(SIGTRAP, NULL);
	crmd_fast_exit(exit_code);
	}

	/* Clean up as much memory as possible for valgrind */

	for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) {
	fsa_data_t *fsa_data = gIter->data;

	crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
	fsa_input2string(fsa_data->fsa_input),
	fsa_state2string(fsa_state),
	fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
	delete_fsa_input(fsa_data);
	}

	controld_clear_fsa_input_flags(R_MEMBERSHIP);
	g_list_free(fsa_message_queue); fsa_message_queue = NULL;

	metadata_cache_fini();
	controld_election_fini();

	/* Tear down the CIB manager connection, but don't free it yet -- it could
	* be used when we drain the mainloop later.
	*/

	controld_disconnect_cib_manager();

	verify_stopped(fsa_state, LOG_WARNING);
	controld_clear_fsa_input_flags(R_LRM_CONNECTED);
	lrm_state_destroy_all();

	/* This basically will not work, since mainloop has a reference to it */
	mainloop_destroy_trigger(fsa_source); fsa_source = NULL;

	mainloop_destroy_trigger(config_read); config_read = NULL;
	mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL;

	pcmk__client_cleanup();
	crm_peer_destroy();

	controld_free_fsa_timers();
	te_cleanup_stonith_history_sync(NULL, TRUE);
	controld_free_sched_timer();

	free(fsa_our_dc_version); fsa_our_dc_version = NULL;
	free(fsa_our_uname); fsa_our_uname = NULL;
	free(fsa_our_uuid); fsa_our_uuid = NULL;
	free(fsa_our_dc); fsa_our_dc = NULL;

	free(fsa_cluster_name); fsa_cluster_name = NULL;

	free(te_uuid); te_uuid = NULL;
	free(failed_stop_offset); failed_stop_offset = NULL;
	free(failed_start_offset); failed_start_offset = NULL;

	free(max_generation_from); max_generation_from = NULL;
	free_xml(max_generation_xml); max_generation_xml = NULL;

	mainloop_destroy_signal(SIGPIPE);
	mainloop_destroy_signal(SIGUSR1);
	mainloop_destroy_signal(SIGTERM);
	mainloop_destroy_signal(SIGTRAP);
	/* leave SIGCHLD engaged as we might still want to drain some service-actions */

	if (mloop) {
	GMainContext *ctx = g_main_loop_get_context(crmd_mainloop);

	/* Don't re-enter this block */
	crmd_mainloop = NULL;

	/* no signals on final draining anymore */
	mainloop_destroy_signal(SIGCHLD);

	crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));

	{
	int lpc = 0;

	while((g_main_context_pending(ctx) && lpc < 10)) {
	lpc++;
	crm_trace("Iteration %d", lpc);
	g_main_context_dispatch(ctx);
	}
	}

	crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
	g_main_loop_quit(mloop);

	/* Won't do anything yet, since we're inside it now */
	g_main_loop_unref(mloop);
	} else {
	mainloop_destroy_signal(SIGCHLD);
	}

	cib_delete(fsa_cib_conn);
	fsa_cib_conn = NULL;

	throttle_fini();

	/* Graceful */
	crm_trace("Done preparing for exit with status %d (%s)",
	exit_code, crm_exit_str(exit_code));
	return exit_code;
	}

	/* A_EXIT_0, A_EXIT_1 */
	void
	do_exit(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	crm_exit_t exit_code = CRM_EX_OK;
	int log_level = LOG_INFO;
	const char *exit_type = "gracefully";

	if (action & A_EXIT_1) {
	log_level = LOG_ERR;
	exit_type = "forcefully";
	exit_code = CRM_EX_ERROR;
	}

	verify_stopped(cur_state, LOG_ERR);
	do_crm_log(log_level, "Performing %s - %s exiting the controller",
	fsa_action2string(action), exit_type);

	crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
	crmd_exit(exit_code);
	}

	static void sigpipe_ignore(int nsig) { return; }

	/* A_STARTUP */
	void
	do_startup(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	crm_debug("Registering Signal Handlers");
	mainloop_add_signal(SIGTERM, crm_shutdown);
	mainloop_add_signal(SIGPIPE, sigpipe_ignore);

	fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL);
	config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL);
	transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL);

	crm_debug("Creating CIB manager and executor objects");
	fsa_cib_conn = cib_new();

	lrm_state_init_local();
	if (controld_init_fsa_timers() == FALSE) {
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}
	}

	// \return libqb error code (0 on success, -errno on error)
	static int32_t
	accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
	{
	crm_trace("Accepting new IPC client connection");
	if (pcmk__new_client(c, uid, gid) == NULL) {
	return -EIO;
	}
	return 0;
	}

	// \return libqb error code (0 on success, -errno on error)
	static int32_t
	dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size)
	{
	uint32_t id = 0;
	uint32_t flags = 0;
	pcmk__client_t *client = pcmk__find_client(c);

	xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags);

	if (msg == NULL) {
	pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_PROTOCOL);
	return 0;
	}
	pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_INDETERMINATE);

	CRM_ASSERT(client->user != NULL);
	pcmk__update_acl_user(msg, F_CRM_USER, client->user);

	crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
	if (controld_authorize_ipc_message(msg, client, NULL)) {
	crm_trace("Processing IPC message from client %s",
	pcmk__client_name(client));
	route_message(C_IPC_MESSAGE, msg);
	}

	trigger_fsa();
	free_xml(msg);
	return 0;
	}

	static int32_t
	crmd_ipc_closed(qb_ipcs_connection_t * c)
	{
	pcmk__client_t *client = pcmk__find_client(c);

	if (client) {
	crm_trace("Disconnecting %sregistered client %s (%p/%p)",
	(client->userdata? "" : "un"), pcmk__client_name(client),
	c, client);
	free(client->userdata);
	pcmk__free_client(client);
	trigger_fsa();
	}
	return 0;
	}

	static void
	crmd_ipc_destroy(qb_ipcs_connection_t * c)
	{
	crm_trace("Connection %p", c);
	crmd_ipc_closed(c);
	}

	/* A_STOP */
	void
	do_stop(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	crm_trace("Closing IPC server");
	mainloop_del_ipc_server(ipcs); ipcs = NULL;
	register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
	}

	/* A_STARTED */
	void
	do_started(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	static struct qb_ipcs_service_handlers crmd_callbacks = {
	.connection_accept = accept_controller_client,
	.connection_created = NULL,
	.msg_process = dispatch_controller_ipc,
	.connection_closed = crmd_ipc_closed,
	.connection_destroyed = crmd_ipc_destroy
	};

	if (cur_state != S_STARTING) {
	crm_err("Start cancelled... %s", fsa_state2string(cur_state));
	return;

	} else if (!pcmk_is_set(fsa_input_register, R_MEMBERSHIP)) {
	crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);

	crmd_fsa_stall(TRUE);
	return;

	} else if (!pcmk_is_set(fsa_input_register, R_LRM_CONNECTED)) {
	crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);

	crmd_fsa_stall(TRUE);
	return;

	} else if (!pcmk_is_set(fsa_input_register, R_CIB_CONNECTED)) {
	crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);

	crmd_fsa_stall(TRUE);
	return;

	} else if (!pcmk_is_set(fsa_input_register, R_READ_CONFIG)) {
	crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);

	crmd_fsa_stall(TRUE);
	return;

	} else if (!pcmk_is_set(fsa_input_register, R_PEER_DATA)) {

	crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
	crmd_fsa_stall(TRUE);
	return;
	}

	crm_debug("Init server comms");
	ipcs = pcmk__serve_controld_ipc(&crmd_callbacks);
	if (ipcs == NULL) {
	crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	} else {
	crm_notice("Pacemaker controller successfully started and accepting connections");
	}
	controld_trigger_fencer_connect();

	controld_clear_fsa_input_flags(R_STARTING);
	register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
	}

	/* A_RECOVER */
	void
	do_recover(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	controld_set_fsa_input_flags(R_IN_RECOVERY);
	crm_warn("Fast-tracking shutdown in response to errors");

	register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
	}

	static pcmk__cluster_option_t crmd_opts[] = {
	/* name, old name, type, allowed values,
	* default value, validator,
	* short description,
	* long description
	*/
	{
	"dc-version", NULL, "string", NULL, PCMK__VALUE_NONE, NULL,
	- "Pacemaker version on cluster node elected Designated Controller (DC)",
	- "Includes a hash which identifies the exact changeset the code was "
	- "built from. Used for diagnostic purposes."
	+ N_("Pacemaker version on cluster node elected Designated Controller (DC)"),
	+ N_("Includes a hash which identifies the exact changeset the code was "
	+ "built from. Used for diagnostic purposes.")
	},
	{
	"cluster-infrastructure", NULL, "string", NULL, "corosync", NULL,
	- "The messaging stack on which Pacemaker is currently running",
	- "Used for informational and diagnostic purposes."
	+ N_("The messaging stack on which Pacemaker is currently running"),
	+ N_("Used for informational and diagnostic purposes.")
	},
	{
	"cluster-name", NULL, "string", NULL, NULL, NULL,
	- "An arbitrary name for the cluster",
	- "This optional value is mostly for users' convenience as desired "
	+ N_("An arbitrary name for the cluster"),
	+ N_("This optional value is mostly for users' convenience as desired "
	"in administration, but may also be used in Pacemaker "
	"configuration rules via the #cluster-name node attribute, and "
	- "by higher-level tools and resource agents."
	+ "by higher-level tools and resource agents.")
	},
	{
	XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time",
	NULL, "20s", pcmk__valid_interval_spec,
	- "How long to wait for a response from other nodes during start-up",
	- "The optimal value will depend on the speed and load of your network "
	- "and the type of switches used."
	+ N_("How long to wait for a response from other nodes during start-up"),
	+ N_("The optimal value will depend on the speed and load of your network "
	+ "and the type of switches used.")
	},
	{
	XML_CONFIG_ATTR_RECHECK, NULL, "time",
	N_("Zero disables polling, while positive values are an interval in seconds"
	"(unless other units are specified, for example \"5min\")"),
	"15min", pcmk__valid_interval_spec,
	- "Polling interval to recheck cluster state and evaluate rules "
	- "with date specifications",
	- "Pacemaker is primarily event-driven, and looks ahead to know when to "
	+ N_("Polling interval to recheck cluster state and evaluate rules "
	+ "with date specifications"),
	+ N_("Pacemaker is primarily event-driven, and looks ahead to know when to "
	"recheck cluster state for failure timeouts and most time-based "
	"rules. However, it will also recheck the cluster after this "
	"amount of inactivity, to evaluate rules with date specifications "
	- "and serve as a fail-safe for certain types of scheduler bugs."
	+ "and serve as a fail-safe for certain types of scheduler bugs.")
	},
	{
	"load-threshold", NULL, "percentage", NULL,
	"80%", pcmk__valid_percentage,
	- "Maximum amount of system load that should be used by cluster nodes",
	- "The cluster will slow down its recovery process when the amount of "
	- "system resources used (currently CPU) approaches this limit",
	+ N_("Maximum amount of system load that should be used by cluster nodes"),
	+ N_("The cluster will slow down its recovery process when the amount of "
	+ "system resources used (currently CPU) approaches this limit"),
	},
	{
	"node-action-limit", NULL, "integer", NULL,
	"0", pcmk__valid_number,
	- "Maximum number of jobs that can be scheduled per node "
	- "(defaults to 2x cores)"
	+ N_("Maximum number of jobs that can be scheduled per node "
	+ "(defaults to 2x cores)")
	},
	{ XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL,
	- "How a cluster node should react if notified of its own fencing",
	- "A cluster node may receive notification of its own fencing if fencing "
	+ N_("How a cluster node should react if notified of its own fencing"),
	+ N_("A cluster node may receive notification of its own fencing if fencing "
	"is misconfigured, or if fabric fencing is in use that doesn't cut "
	"cluster communication. Allowed values are \"stop\" to attempt to "
	"immediately stop Pacemaker and stay stopped, or \"panic\" to attempt "
	- "to immediately reboot the local node, falling back to stop on failure."
	+ "to immediately reboot the local node, falling back to stop on failure.")
	},
	{
	XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL,
	"2min", pcmk__valid_interval_spec,
	"* Advanced Use Only *",
	- "Declare an election failed if it is not decided within this much "
	+ N_("Declare an election failed if it is not decided within this much "
	"time. If you need to adjust this value, it probably indicates "
	- "the presence of a bug."
	+ "the presence of a bug.")
	},
	{
	XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL,
	"20min", pcmk__valid_interval_spec,
	"* Advanced Use Only *",
	- "Exit immediately if shutdown does not complete within this much "
	+ N_("Exit immediately if shutdown does not complete within this much "
	"time. If you need to adjust this value, it probably indicates "
	- "the presence of a bug."
	+ "the presence of a bug.")
	},
	{
	"join-integration-timeout", "crmd-integration-timeout", "time", NULL,
	"3min", pcmk__valid_interval_spec,
	"* Advanced Use Only *",
	- "If you need to adjust this value, it probably indicates "
	- "the presence of a bug."
	+ N_("If you need to adjust this value, it probably indicates "
	+ "the presence of a bug.")
	},
	{
	"join-finalization-timeout", "crmd-finalization-timeout", "time", NULL,
	"30min", pcmk__valid_interval_spec,
	"* Advanced Use Only *",
	- "If you need to adjust this value, it probably indicates "
	- "the presence of a bug."
	+ N_("If you need to adjust this value, it probably indicates "
	+ "the presence of a bug.")
	},
	{
	"transition-delay", "crmd-transition-delay", "time", NULL,
	"0s", pcmk__valid_interval_spec,
	- "* Advanced Use Only * Enabling this option will slow down "
	- "cluster recovery under all conditions",
	- "Delay cluster recovery for this much time to allow for additional "
	+ N_("* Advanced Use Only * Enabling this option will slow down "
	+ "cluster recovery under all conditions"),
	+ N_("Delay cluster recovery for this much time to allow for additional "
	"events to occur. Useful if your configuration is sensitive to "
	- "the order in which ping updates arrive."
	+ "the order in which ping updates arrive.")
	},
	{
	"stonith-watchdog-timeout", NULL, "time", NULL,
	"0", controld_verify_stonith_watchdog_timeout,
	- "How long to wait before we can assume nodes are safely down "
	- "when watchdog-based self-fencing via SBD is in use",
	- "If nonzero, along with `have-watchdog=true` automatically set by the "
	+ N_("How long to wait before we can assume nodes are safely down "
	+ "when watchdog-based self-fencing via SBD is in use"),
	+ N_("If nonzero, along with `have-watchdog=true` automatically set by the "
	"cluster, when fencing is required, watchdog-based self-fencing "
	"will be performed via SBD without requiring a fencing resource "
	"explicitly configured. "
	"If `stonith-watchdog-timeout` is set to a positive value, unseen "
	"nodes are assumed to self-fence within this much time. +WARNING:+ "
	"It must be ensured that this value is larger than the "
	"`SBD_WATCHDOG_TIMEOUT` environment variable on all nodes. "
	"Pacemaker verifies the settings individually on all nodes and "
	"prevents startup or shuts down if configured wrongly on the fly. "
	"It's strongly recommended that `SBD_WATCHDOG_TIMEOUT` is set to "
	"the same value on all nodes. "
	"If `stonith-watchdog-timeout` is set to a negative value, and "
	"`SBD_WATCHDOG_TIMEOUT` is set, twice that value will be used. "
	"+WARNING:+ In this case, it's essential (currently not verified by "
	"Pacemaker) that `SBD_WATCHDOG_TIMEOUT` is set to the same value on "
	- "all nodes."
	+ "all nodes.")
	},
	{
	"stonith-max-attempts", NULL, "integer", NULL,
	"10", pcmk__valid_positive_number,
	- "How many times fencing can fail before it will no longer be "
	- "immediately re-attempted on a target"
	+ N_("How many times fencing can fail before it will no longer be "
	+ "immediately re-attempted on a target")
	},

	// Already documented in libpe_status (other values must be kept identical)
	{
	"no-quorum-policy", NULL, "select", "stop, freeze, ignore, demote, suicide",
	"stop", pcmk__valid_quorum, NULL, NULL
	},
	{
	XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
	"false", pcmk__valid_boolean, NULL, NULL
	},
	};

	void
	crmd_metadata(void)
	{
	char *s = pcmk__format_option_metadata("pacemaker-controld",
	"Pacemaker controller options",
	"Cluster options used by Pacemaker's "
	"controller",
	crmd_opts, PCMK__NELEM(crmd_opts));
	printf("%s", s);
	free(s);
	}

	static void
	verify_crmd_options(GHashTable * options)
	{
	pcmk__validate_cluster_options(options, crmd_opts, PCMK__NELEM(crmd_opts));
	}

	static const char *
	crmd_pref(GHashTable * options, const char *name)
	{
	return pcmk__cluster_option(options, crmd_opts, PCMK__NELEM(crmd_opts),
	name);
	}

	static void
	config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
	{
	const char *value = NULL;
	GHashTable *config_hash = NULL;
	crm_time_t *now = crm_time_new(NULL);
	xmlNode *crmconfig = NULL;
	xmlNode *alerts = NULL;

	if (rc != pcmk_ok) {
	fsa_data_t *msg_data = NULL;

	crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);

	if (rc == -EACCES \|\| rc == -pcmk_err_schema_validation) {
	crm_err("The cluster is mis-configured - shutting down and staying down");
	controld_set_fsa_input_flags(R_STAYDOWN);
	}
	goto bail;
	}

	crmconfig = output;
	if ((crmconfig) &&
	(crm_element_name(crmconfig)) &&
	(strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) {
	crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
	}
	if (!crmconfig) {
	fsa_data_t *msg_data = NULL;

	crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	goto bail;
	}

	crm_debug("Call %d : Parsing CIB options", call_id);
	config_hash = pcmk__strkey_table(free, free);
	pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL,
	config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL);

	verify_crmd_options(config_hash);

	value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
	election_trigger->period_ms = crm_parse_interval_spec(value);

	value = crmd_pref(config_hash, "node-action-limit"); /* Also checks migration-limit */
	throttle_update_job_max(value);

	value = crmd_pref(config_hash, "load-threshold");
	if(value) {
	throttle_set_load_target(strtof(value, NULL) / 100.0);
	}

	value = crmd_pref(config_hash, "no-quorum-policy");
	if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) {
	no_quorum_suicide_escalation = TRUE;
	}

	set_fence_reaction(crmd_pref(config_hash, XML_CONFIG_ATTR_FENCE_REACTION));

	value = crmd_pref(config_hash,"stonith-max-attempts");
	update_stonith_max_attempts(value);

	value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT);
	shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value);
	crm_debug("Shutdown escalation occurs if DC has not responded to request in %ums",
	shutdown_escalation_timer->period_ms);

	value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL);
	controld_set_election_period(value);

	value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK);
	recheck_interval_ms = crm_parse_interval_spec(value);
	crm_debug("Re-run scheduler after %dms of inactivity", recheck_interval_ms);

	value = crmd_pref(config_hash, "transition-delay");
	transition_timer->period_ms = crm_parse_interval_spec(value);

	value = crmd_pref(config_hash, "join-integration-timeout");
	integration_timer->period_ms = crm_parse_interval_spec(value);

	value = crmd_pref(config_hash, "join-finalization-timeout");
	finalization_timer->period_ms = crm_parse_interval_spec(value);

	value = crmd_pref(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK);
	controld_shutdown_lock_enabled = crm_is_true(value);

	free(fsa_cluster_name);
	fsa_cluster_name = NULL;

	value = g_hash_table_lookup(config_hash, "cluster-name");
	if (value) {
	fsa_cluster_name = strdup(value);
	}

	alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
	crmd_unpack_alerts(alerts);

	controld_set_fsa_input_flags(R_READ_CONFIG);
	crm_trace("Triggering FSA: %s", __func__);
	mainloop_set_trigger(fsa_source);

	g_hash_table_destroy(config_hash);
	bail:
	crm_time_free(now);
	}

	gboolean
	crm_read_options(gpointer user_data)
	{
	int call_id =
	fsa_cib_conn->cmds->query(fsa_cib_conn,
	"//" XML_CIB_TAG_CRMCONFIG " \| //" XML_CIB_TAG_ALERTS,
	NULL, cib_xpath \| cib_scope_local);

	fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback);
	crm_trace("Querying the CIB... call %d", call_id);
	return TRUE;
	}

	/* A_READCONFIG */
	void
	do_read_config(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	throttle_init();
	mainloop_set_trigger(config_read);
	}

	void
	crm_shutdown(int nsig)
	{
	if ((crmd_mainloop == NULL) \|\| !g_main_loop_is_running(crmd_mainloop)) {
	crmd_exit(CRM_EX_OK);
	return;
	}

	if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) {
	crm_err("Escalating shutdown");
	register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
	return;
	}

	controld_set_fsa_input_flags(R_SHUTDOWN);
	register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);

	if (shutdown_escalation_timer->period_ms == 0) {
	const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT);

	shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value);
	}

	crm_notice("Initiating controller shutdown sequence " CRM_XS
	" limit=%ums", shutdown_escalation_timer->period_ms);
	controld_start_timer(shutdown_escalation_timer);
	}
	diff --git a/po/zh_CN.po b/po/zh_CN.po
	index 65f4175912..49a4087e78 100644
	--- a/po/zh_CN.po
	+++ b/po/zh_CN.po
	@@ -1,126 +1,289 @@
	#
	# Copyright 2003-2022 the Pacemaker project contributors
	#
	# The version control history for this file may have further details.
	#
	# This source code is licensed under the GNU Lesser General Public License
	# version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	#
	#, fuzzy
	msgid ""
	msgstr ""
	"Project-Id-Version: Pacemaker 2\n"
	"Report-Msgid-Bugs-To: developers@clusterlabs.org\n"
	-"POT-Creation-Date: 2022-04-08 10:34-0500\n"
	+"POT-Creation-Date: 2022-02-11 13:46+0800\n"
	"PO-Revision-Date: 2021-11-08 11:04+0800\n"
	"Last-Translator: Vivi <developers@clusterlabs.org>\n"
	"Language-Team: CHINESE <wangluwei@uniontech.org>\n"
	"Language: zh_CN\n"
	"MIME-Version: 1.0\n"
	"Content-Type: text/plain; charset=UTF-8\n"
	"Content-Transfer-Encoding: 8bit\n"

	+#: daemons/controld/controld_control.c:525
	+msgid "Pacemaker version on cluster node elected Designated Controller (DC)"
	+msgstr "集群选定的控制器节点(DC)Pacemaker版本"
	+
	+#: daemons/controld/controld_control.c:526
	+msgid ""
	+"Includes a hash which identifies the exact changeset the code was built "
	+"from. Used for diagnostic purposes."
	+msgstr "它包含一个可识别代码构建中所有变更的哈希，其可用于诊断。"
	+
	+#: daemons/controld/controld_control.c:531
	+msgid "The messaging stack on which Pacemaker is currently running"
	+msgstr "Pacemaker当前正在运行的消息传递堆栈"
	+
	+#: daemons/controld/controld_control.c:532
	+msgid "Used for informational and diagnostic purposes."
	+msgstr "用于提供信息和诊断。"
	+
	+#: daemons/controld/controld_control.c:536
	+msgid "An arbitrary name for the cluster"
	+msgstr "任意集群的名称"
	+
	+#: daemons/controld/controld_control.c:537
	+msgid ""
	+"This optional value is mostly for users' convenience as desired in "
	+"administration, but may also be used in Pacemaker configuration rules via "
	+"the #cluster-name node attribute, and by higher-level tools and resource "
	+"agents."
	+msgstr "该可选值主要是为了方便用户管理使用，"
	+"也可以在pacemaker 配置规则中通过 #cluster-name 节点属性配置使用，"
	+"也可以通过高级工具和资源代理使用。"
	+
	+#: daemons/controld/controld_control.c:545
	+msgid "How long to wait for a response from other nodes during start-up"
	+msgstr "启动过程中等待其他节点响应的时间"
	+
	+#: daemons/controld/controld_control.c:546
	+msgid ""
	+"The optimal value will depend on the speed and load of your network and the "
	+"type of switches used."
	+msgstr "其最佳值将由你的网络的速度和负载以及所用交换机的类型决定。"
	+
	#: daemons/controld/controld_control.c:551
	msgid ""
	"Zero disables polling, while positive values are an interval in "
	"seconds(unless other units are specified, for example \"5min\")"
	msgstr ""
	"设置为0将禁用轮询，设置为正数将是以秒为单位的时间间隔（除非使用了其他单位，比"
	"如\"5min\"表示5分钟）"

	-#: daemons/fenced/pacemaker-fenced.c:1462
	+#: daemons/controld/controld_control.c:554
	+msgid ""
	+"Polling interval to recheck cluster state and evaluate rules with date "
	+"specifications"
	+msgstr "重新检查集群状态及日期使用规范的轮询间隔"
	+
	+#: daemons/controld/controld_control.c:565
	+msgid "Maximum amount of system load that should be used by cluster nodes"
	+msgstr "集群节点应该使用的最大系统负载值"
	+
	+#: daemons/controld/controld_control.c:566
	+msgid ""
	+"The cluster will slow down its recovery process when the amount of system "
	+"resources used (currently CPU) approaches this limit"
	+msgstr "当使用的系统资源量（当前为CPU）接近此限制时，集群将减慢其恢复过程"
	+
	+#: daemons/controld/controld_control.c:572
	+msgid ""
	+"Maximum number of jobs that can be scheduled per node (defaults to 2x cores)"
	+msgstr "每个节点可以调度的最大作业数（默认为2x内核数）"
	+
	+#: daemons/controld/controld_control.c:576
	+msgid "How a cluster node should react if notified of its own fencing"
	+msgstr "集群节点在收到自己的防护通知时应如何反应"
	+
	+#: daemons/controld/controld_control.c:577
	+msgid ""
	+"A cluster node may receive notification of its own fencing if fencing is "
	+"misconfigured, or if fabric fencing is in use that doesn't cut cluster "
	+"communication. Allowed values are \"stop\" to attempt to immediately stop "
	+"Pacemaker and stay stopped, or \"panic\" to attempt to immediately reboot "
	+"the local node, falling back to stop on failure."
	+msgstr "如果配置了错误的防护，或者节点正在使用的防护没有中断集群通信，则集群节点可能会收到"
	+"有关自身的防护通知。允许的值为 \"stop\" 尝试立即停止pacemaker并保持停用状态,或者 \"panic\" 尝试"
	+"立即重新启动本地节点，并在失败时返回执行stop。"
	+
	+#: daemons/controld/controld_control.c:587
	+msgid ""
	+"Declare an election failed if it is not decided within this much time. If "
	+"you need to adjust this value, it probably indicates the presence of a bug."
	+msgstr ""
	+"如果集群在本项设置时间内没有作出决定则宣布选举失败。如果您需要调整该值，这可能代表"
	+"存在某些错误。"
	+
	+#: daemons/controld/controld_control.c:595
	+msgid ""
	+"Exit immediately if shutdown does not complete within this much time. If you "
	+"need to adjust this value, it probably indicates the presence of a bug."
	+msgstr "如果在这段时间内关机仍未完成，请立即退出。如果您需要调整该值，这可能代表"
	+"存在某些错误。"
	+
	+#: daemons/controld/controld_control.c:603
	+#: daemons/controld/controld_control.c:610
	+msgid ""
	+"If you need to adjust this value, it probably indicates the presence of a "
	+"bug."
	+msgstr "如果您需要调整该值，这可能代表存在某些错误。"
	+
	+#: daemons/controld/controld_control.c:616
	+msgid ""
	+"* Advanced Use Only * Enabling this option will slow down cluster "
	+"recovery under all conditions"
	+msgstr ""
	+"* Advanced Use Only * 启用此选项将在所有情况下减慢集群恢复的速度"
	+
	+#: daemons/controld/controld_control.c:618
	+msgid ""
	+"Delay cluster recovery for this much time to allow for additional events to "
	+"occur. Useful if your configuration is sensitive to the order in which ping "
	+"updates arrive."
	+msgstr "当集群恢复被延迟了指定的时间间隔时，可能会有其它事件出现。"
	+"如果您的配置对 ping 更新到达的顺序很敏感，这就很有用"
	+
	+#: daemons/controld/controld_control.c:625
	+msgid ""
	+"How long to wait before we can assume nodes are safely down when watchdog-"
	+"based self-fencing via SBD is in use"
	+msgstr "当watchdog-based self-fencing 通过SBD 被执行时，"
	+"我们可以假设节点安全关闭之前需要等待多长时间"
	+
	+#: daemons/controld/controld_control.c:627
	+msgid ""
	+"If nonzero, along with `have-watchdog=true` automatically set by the "
	+"cluster, when fencing is required, watchdog-based self-fencing will be "
	+"performed via SBD without requiring a fencing resource explicitly "
	+"configured. If `stonith-watchdog-timeout` is set to a positive value, unseen "
	+"nodes are assumed to self-fence within this much time. +WARNING:+ It must be "
	+"ensured that this value is larger than the `SBD_WATCHDOG_TIMEOUT` "
	+"environment variable on all nodes. Pacemaker verifies the settings "
	+"individually on all nodes and prevents startup or shuts down if configured "
	+"wrongly on the fly. It's strongly recommended that `SBD_WATCHDOG_TIMEOUT` is "
	+"set to the same value on all nodes. If `stonith-watchdog-timeout` is set to "
	+"a negative value, and `SBD_WATCHDOG_TIMEOUT` is set, twice that value will "
	+"be used. +WARNING:+ In this case, it's essential (currently not verified by "
	+"Pacemaker) that `SBD_WATCHDOG_TIMEOUT` is set to the same value on all nodes."
	+msgstr ""
	+"如果值非零，且集群设置了 `have-watchdog=true` ，当需要fencing时，watchdog-based self-fence 将通过SBD执行，"
	+"而不需要显式配置fencing资源。如果 `stonith-watchdog-timeout` 被设为正值，则假定未被发现的节点在这段时间内self-fence。"
	+" +WARNING:+ 必须确保值大于所有节点上的`SBD_WATCHDOG_TIMEOUT` 环境变量。Pacemaker将在所有节点上单独验证设置，"
	+"并防止所有节点在动态配置错误时发生启动或关闭。强烈建议在所有节点上将 `SBD_WATCHDOG_TIMEOUT` 设置为相同的值。"
	+"如果 `stonith-watchdog-timeout` 设置为负值。并且设置了 `SBD_WATCHDOG_TIMEOUT` ，则将使用该值的两倍，"
	+" +WARNING:+ 在这种情况下，必须将所有节点上 `SBD_WATCHDOG_TIMEOUT` 设置为相同的值(目前没有通过pacemaker验证)。"
	+
	+#: daemons/controld/controld_control.c:648
	+msgid ""
	+"How many times fencing can fail before it will no longer be immediately re-"
	+"attempted on a target"
	+msgstr "stonith失败多少次会停止尝试"
	+
	+#: daemons/controld/controld_control.c:556
	+msgid ""
	+"Pacemaker is primarily event-driven, and looks ahead to know when to recheck "
	+"cluster state for failure timeouts and most time-based rules. However, it "
	+"will also recheck the cluster after this amount of inactivity, to evaluate "
	+"rules with date specifications and serve as a fail-safe for certain types of "
	+"scheduler bugs."
	+msgstr ""
	+"Pacemaker 主要是通过事件驱动的，并能预期重新检查集群状态及大多数基于时间的规则"
	+"以获取当前出现的失败或超时的问题。同时，在其不活动状态结束之后，它还将重新检"
	+"查集群，以评估具有日期规格的规则，并为某些类型的调度程序错误提供故障保护服务。"
	+
	+#: daemons/fenced/pacemaker-fenced.c:1464
	#, c-format
	msgid ""
	" <longdesc lang=\"en\">Instance attributes available for all \"stonith\"-"
	"class resources and used by Pacemaker's fence daemon, formerly known as "
	"stonithd</longdesc>\n"
	msgstr ""
	" <longdesc lang=\"zh\">实例属性可用于所有stonith类资源，并由Pacemaker的fence"
	"守护程序使用（以前称为stonithd）</longdesc>\n"

	-#: daemons/fenced/pacemaker-fenced.c:1467
	+#: daemons/fenced/pacemaker-fenced.c:1469
	#, c-format
	msgid ""
	" <shortdesc lang=\"en\">Instance attributes available for all \"stonith\"-"
	"class resources</shortdesc>\n"
	msgstr ""
	" <shortdesc lang=\"zh\">可用于所有stonith类资源的实例属性</shortdesc>\n"

	-#: daemons/fenced/pacemaker-fenced.c:1490
	+#: daemons/fenced/pacemaker-fenced.c:1492
	#, fuzzy, c-format
	msgid ""
	" <longdesc lang=\"en\">Some devices do not support the standard 'port' "
	"parameter or may provide additional ones. Use this to specify an alternate, "
	"device-specific, parameter that should indicate the machine to be fenced. A "
	"value of '%s' can be used to tell the cluster not to supply any additional "
	"parameters.\n"
	" </longdesc>\n"
	msgstr ""
	" <longdesc lang=\"zh\">某些设备可能不支持使用标准的'port'(端口)参数，也可"
	"能会提供额外的端口参数。\n"
	"使用此参数可以为需要fence(防护)的机器指定一个备用的，专用于该设备的参数,该参"
	"数应指出要fence的机器。\n"
	"使用值'%s'可用来告诉集群不提供任何额外的参数\n"
	" </longdesc>\n"

	-#: daemons/fenced/pacemaker-fenced.c:1502
	+#: daemons/fenced/pacemaker-fenced.c:1504
	#, c-format
	msgid ""
	" <shortdesc lang=\"en\">Advanced use only: An alternate parameter to "
	"supply instead of 'port'</shortdesc>\n"
	msgstr ""
	" <shortdesc lang=\"zh\">Advanced use only:(仅限高级使用)备用参数可替"
	"代'port'</shortdesc>\n"

	-#: daemons/fenced/pacemaker-fenced.c:1513
	+#: daemons/fenced/pacemaker-fenced.c:1515
	#, c-format
	msgid ""
	" <longdesc lang=\"en\">Eg. node1:1;node2:2,3 would tell the cluster to "
	"use port 1 for node1 and ports 2 and 3 for node2</longdesc>\n"
	msgstr ""
	" <longdesc lang=\"zh\">例如：pcmk_host_map=\"node:1;node2:2,3\"表示让集群"
	"的节点node1使用端口1，节点node2使用端口2和端口3。</longdesc>\n"

	-#: daemons/fenced/pacemaker-fenced.c:1519
	+#: daemons/fenced/pacemaker-fenced.c:1521
	#, c-format
	msgid ""
	" <shortdesc lang=\"en\">A mapping of host names to ports numbers for "
	"devices that do not support host names.</shortdesc>\n"
	msgstr ""
	" <shortdesc lang=\"zh\">为不支持主机名的设备提供主机名和端口号的映射</"
	"shortdesc>\n"

	#: lib/cib/cib_utils.c:558
	msgid "Enable Access Control Lists (ACLs) for the CIB"
	msgstr "为CIB启用访问控制列表(ACL)"

	#: lib/cib/cib_utils.c:564
	msgid "Maximum IPC message backlog before disconnecting a cluster daemon"
	msgstr "断开集群守护程序之前的最大IPC消息积压"

	#: lib/cib/cib_utils.c:565
	msgid ""
	"Raise this if log has \"Evicting client\" messages for cluster daemon PIDs "
	"(a good value is the number of resources in the cluster multiplied by the "
	"number of nodes)."
	msgstr ""
	"如果日志中有针对集群守护程序PID的消息“Evicting client”，（则建议将值设为集群"
	"中的资源数量乘以节点数量）"

	-#: lib/common/options.c:590
	+#: lib/common/options.c:591
	msgid " Allowed values: "
	msgstr ""

	-#: lib/pengine/common.c:131
	+#: lib/pengine/common.c:119
	msgid "Whether watchdog integration is enabled"
	msgstr "是否启用看门狗集成设置"

	-#: tools/crm_resource.c:1388
	+#: tools/crm_resource.c:1405
	#, fuzzy, c-format
	msgid "Metadata query for %s failed: %s"
	msgstr "，查询%s的元数据失败: %s\n"

	-#: tools/crm_resource.c:1394
	+#: tools/crm_resource.c:1411
	#, c-format
	msgid "'%s' is not a valid agent specification"
	msgstr "'%s' 是一个无效的代理"

File Metadata

Mime Type: text/x-diff
Expires: Tue, Jul 8, 5:08 PM (1 d, 10 h)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1997346
Default Alt Text: (45 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions