Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4624145
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
87 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
index b4840d22ee..98f9ff2381 100644
--- a/daemons/controld/controld_control.c
+++ b/daemons/controld/controld_control.c
@@ -1,867 +1,869 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/pengine/rules.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election_internal.h>
#include <crm/common/ipc_internal.h>
#include <pacemaker-controld.h>
static qb_ipcs_service_t *ipcs = NULL;
static crm_trigger_t *config_read_trigger = NULL;
#if SUPPORT_COROSYNC
extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
#endif
void crm_shutdown(int nsig);
static gboolean crm_read_options(gpointer user_data);
/* A_HA_CONNECT */
void
do_ha_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
gboolean registered = FALSE;
static crm_cluster_t *cluster = NULL;
if (cluster == NULL) {
cluster = pcmk_cluster_new();
}
if (action & A_HA_DISCONNECT) {
crm_cluster_disconnect(cluster);
crm_info("Disconnected from the cluster");
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
}
if (action & A_HA_CONNECT) {
crm_set_status_callback(&peer_update_callback);
crm_set_autoreap(FALSE);
#if SUPPORT_COROSYNC
if (is_corosync_cluster()) {
registered = crm_connect_corosync(cluster);
}
#endif // SUPPORT_COROSYNC
if (registered) {
controld_election_init(cluster->uname);
controld_globals.our_nodename = cluster->uname;
controld_globals.our_uuid = cluster->uuid;
if(cluster->uuid == NULL) {
crm_err("Could not obtain local uuid");
registered = FALSE;
}
}
if (!registered) {
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
populate_cib_nodes(node_update_none, __func__);
controld_clear_fsa_input_flags(R_HA_DISCONNECTED);
crm_info("Connected to the cluster");
}
if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action),
__func__);
}
}
/* A_SHUTDOWN */
void
do_shutdown(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* just in case */
controld_set_fsa_input_flags(R_SHUTDOWN);
controld_disconnect_fencer(FALSE);
}
/* A_SHUTDOWN_REQ */
void
do_shutdown_req(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *msg = NULL;
controld_set_fsa_input_flags(R_SHUTDOWN);
//controld_set_fsa_input_flags(R_STAYDOWN);
crm_info("Sending shutdown request to all peers (DC is %s)",
pcmk__s(controld_globals.dc_name, "not set"));
msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
free_xml(msg);
}
void
crmd_fast_exit(crm_exit_t exit_code)
{
if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) {
crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
exit_code, CRM_EX_FATAL);
exit_code = CRM_EX_FATAL;
} else if ((exit_code == CRM_EX_OK)
&& pcmk_is_set(controld_globals.fsa_input_register,
R_IN_RECOVERY)) {
crm_err("Could not recover from internal error");
exit_code = CRM_EX_ERROR;
}
if (controld_globals.logger_out != NULL) {
controld_globals.logger_out->finish(controld_globals.logger_out,
exit_code, true, NULL);
pcmk__output_free(controld_globals.logger_out);
controld_globals.logger_out = NULL;
}
crm_exit(exit_code);
}
crm_exit_t
crmd_exit(crm_exit_t exit_code)
{
GMainLoop *mloop = controld_globals.mainloop;
static bool in_progress = FALSE;
if (in_progress && (exit_code == CRM_EX_OK)) {
crm_debug("Exit is already in progress");
return exit_code;
} else if(in_progress) {
crm_notice("Error during shutdown process, exiting now with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
in_progress = TRUE;
crm_trace("Preparing to exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
/* Suppress secondary errors resulting from us disconnecting everything */
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
if(ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
controld_close_attrd_ipc();
controld_shutdown_schedulerd_ipc();
controld_disconnect_fencer(TRUE);
if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) {
crm_debug("No mainloop detected");
exit_code = CRM_EX_ERROR;
}
/* On an error, just get out.
*
* Otherwise, make the effort to have mainloop exit gracefully so
* that it (mostly) cleans up after itself and valgrind has less
* to report on - allowing real errors stand out
*/
if (exit_code != CRM_EX_OK) {
crm_notice("Forcing immediate exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
/* Clean up as much memory as possible for valgrind */
for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
iter = iter->next) {
fsa_data_t *fsa_data = (fsa_data_t *) iter->data;
crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
fsa_input2string(fsa_data->fsa_input),
fsa_state2string(controld_globals.fsa_state),
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
delete_fsa_input(fsa_data);
}
controld_clear_fsa_input_flags(R_MEMBERSHIP);
g_list_free(controld_globals.fsa_message_queue);
controld_globals.fsa_message_queue = NULL;
controld_free_node_pending_timers();
controld_election_fini();
/* Tear down the CIB manager connection, but don't free it yet -- it could
* be used when we drain the mainloop later.
*/
controld_disconnect_cib_manager();
verify_stopped(controld_globals.fsa_state, LOG_WARNING);
controld_clear_fsa_input_flags(R_LRM_CONNECTED);
lrm_state_destroy_all();
mainloop_destroy_trigger(config_read_trigger);
config_read_trigger = NULL;
controld_destroy_fsa_trigger();
controld_destroy_transition_trigger();
pcmk__client_cleanup();
crm_peer_destroy();
controld_free_fsa_timers();
te_cleanup_stonith_history_sync(NULL, TRUE);
controld_free_sched_timer();
free(controld_globals.our_nodename);
controld_globals.our_nodename = NULL;
free(controld_globals.our_uuid);
controld_globals.our_uuid = NULL;
free(controld_globals.dc_name);
controld_globals.dc_name = NULL;
free(controld_globals.dc_version);
controld_globals.dc_version = NULL;
free(controld_globals.cluster_name);
controld_globals.cluster_name = NULL;
free(controld_globals.te_uuid);
controld_globals.te_uuid = NULL;
free_max_generation();
controld_destroy_failed_sync_table();
controld_destroy_outside_events_table();
mainloop_destroy_signal(SIGPIPE);
mainloop_destroy_signal(SIGUSR1);
mainloop_destroy_signal(SIGTERM);
mainloop_destroy_signal(SIGTRAP);
/* leave SIGCHLD engaged as we might still want to drain some service-actions */
if (mloop) {
GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop);
/* Don't re-enter this block */
controld_globals.mainloop = NULL;
/* no signals on final draining anymore */
mainloop_destroy_signal(SIGCHLD);
crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
{
int lpc = 0;
while((g_main_context_pending(ctx) && lpc < 10)) {
lpc++;
crm_trace("Iteration %d", lpc);
g_main_context_dispatch(ctx);
}
}
crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
g_main_loop_quit(mloop);
/* Won't do anything yet, since we're inside it now */
g_main_loop_unref(mloop);
} else {
mainloop_destroy_signal(SIGCHLD);
}
cib_delete(controld_globals.cib_conn);
controld_globals.cib_conn = NULL;
throttle_fini();
/* Graceful */
crm_trace("Done preparing for exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
return exit_code;
}
/* A_EXIT_0, A_EXIT_1 */
void
do_exit(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_exit_t exit_code = CRM_EX_OK;
if (pcmk_is_set(action, A_EXIT_1)) {
exit_code = CRM_EX_ERROR;
crm_err("Exiting now due to errors");
}
verify_stopped(cur_state, LOG_ERR);
crmd_exit(exit_code);
}
static void sigpipe_ignore(int nsig) { return; }
/* A_STARTUP */
void
do_startup(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_debug("Registering Signal Handlers");
mainloop_add_signal(SIGTERM, crm_shutdown);
mainloop_add_signal(SIGPIPE, sigpipe_ignore);
config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH,
crm_read_options, NULL);
controld_init_fsa_trigger();
controld_init_transition_trigger();
crm_debug("Creating CIB manager and executor objects");
controld_globals.cib_conn = cib_new();
lrm_state_init_local();
if (controld_init_fsa_timers() == FALSE) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
// \return libqb error code (0 on success, -errno on error)
static int32_t
accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
{
crm_trace("Accepting new IPC client connection");
if (pcmk__new_client(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
// \return libqb error code (0 on success, -errno on error)
static int32_t
dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
pcmk__client_t *client = pcmk__find_client(c);
xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags);
if (msg == NULL) {
pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL);
return 0;
}
pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_INDETERMINATE);
CRM_ASSERT(client->user != NULL);
pcmk__update_acl_user(msg, F_CRM_USER, client->user);
crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
if (controld_authorize_ipc_message(msg, client, NULL)) {
crm_trace("Processing IPC message from client %s",
pcmk__client_name(client));
route_message(C_IPC_MESSAGE, msg);
}
controld_trigger_fsa();
free_xml(msg);
return 0;
}
static int32_t
ipc_client_disconnected(qb_ipcs_connection_t *c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client) {
crm_trace("Disconnecting %sregistered client %s (%p/%p)",
(client->userdata? "" : "un"), pcmk__client_name(client),
c, client);
free(client->userdata);
pcmk__free_client(client);
controld_trigger_fsa();
}
return 0;
}
static void
ipc_connection_destroyed(qb_ipcs_connection_t *c)
{
crm_trace("Connection %p", c);
ipc_client_disconnected(c);
}
/* A_STOP */
void
do_stop(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs); ipcs = NULL;
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* A_STARTED */
void
do_started(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
static struct qb_ipcs_service_handlers crmd_callbacks = {
.connection_accept = accept_controller_client,
.connection_created = NULL,
.msg_process = dispatch_controller_ipc,
.connection_closed = ipc_client_disconnected,
.connection_destroyed = ipc_connection_destroyed
};
if (cur_state != S_STARTING) {
crm_err("Start cancelled... %s", fsa_state2string(cur_state));
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_MEMBERSHIP)) {
crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_LRM_CONNECTED)) {
crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_CIB_CONNECTED)) {
crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_READ_CONFIG)) {
crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) {
crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
crmd_fsa_stall(TRUE);
return;
}
crm_debug("Init server comms");
ipcs = pcmk__serve_controld_ipc(&crmd_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
} else {
crm_notice("Pacemaker controller successfully started and accepting connections");
}
controld_set_fsa_input_flags(R_ST_REQUIRED);
controld_timer_fencer_connect(GINT_TO_POINTER(TRUE));
controld_clear_fsa_input_flags(R_STARTING);
register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
}
/* A_RECOVER */
void
do_recover(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
controld_set_fsa_input_flags(R_IN_RECOVERY);
crm_warn("Fast-tracking shutdown in response to errors");
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
static pcmk__cluster_option_t controller_options[] = {
/* name, old name, type, allowed values,
* default value, validator,
* short description,
* long description
*/
{
PCMK_OPT_DC_VERSION, NULL, "string", NULL,
PCMK__VALUE_NONE, NULL,
N_("Pacemaker version on cluster node elected Designated Controller (DC)"),
N_("Includes a hash which identifies the exact changeset the code was "
"built from. Used for diagnostic purposes.")
},
{
PCMK_OPT_CLUSTER_INFRASTRUCTURE, NULL, "string", NULL,
"corosync", NULL,
N_("The messaging stack on which Pacemaker is currently running"),
N_("Used for informational and diagnostic purposes.")
},
{
PCMK_OPT_CLUSTER_NAME, NULL, "string", NULL,
NULL, NULL,
N_("An arbitrary name for the cluster"),
N_("This optional value is mostly for users' convenience as desired "
"in administration, but may also be used in Pacemaker "
"configuration rules via the #cluster-name node attribute, and "
"by higher-level tools and resource agents.")
},
{
PCMK_OPT_DC_DEADTIME, NULL, "time", NULL,
"20s", pcmk__valid_interval_spec,
N_("How long to wait for a response from other nodes during start-up"),
N_("The optimal value will depend on the speed and load of your network "
"and the type of switches used.")
},
{
PCMK_OPT_CLUSTER_RECHECK_INTERVAL, NULL, "time",
N_("Zero disables polling, while positive values are an interval in seconds"
"(unless other units are specified, for example \"5min\")"),
"15min", pcmk__valid_interval_spec,
N_("Polling interval to recheck cluster state and evaluate rules "
"with date specifications"),
N_("Pacemaker is primarily event-driven, and looks ahead to know when to "
"recheck cluster state for failure timeouts and most time-based "
"rules. However, it will also recheck the cluster after this "
"amount of inactivity, to evaluate rules with date specifications "
"and serve as a fail-safe for certain types of scheduler bugs.")
},
{
"load-threshold", NULL, "percentage", NULL,
"80%", pcmk__valid_percentage,
N_("Maximum amount of system load that should be used by cluster nodes"),
N_("The cluster will slow down its recovery process when the amount of "
"system resources used (currently CPU) approaches this limit"),
},
{
"node-action-limit", NULL, "integer", NULL,
"0", pcmk__valid_number,
N_("Maximum number of jobs that can be scheduled per node "
"(defaults to 2x cores)")
},
- { XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL,
+ {
+ PCMK_OPT_FENCE_REACTION, NULL, "string", NULL,
+ "stop", NULL,
N_("How a cluster node should react if notified of its own fencing"),
N_("A cluster node may receive notification of its own fencing if fencing "
"is misconfigured, or if fabric fencing is in use that doesn't cut "
"cluster communication. Allowed values are \"stop\" to attempt to "
"immediately stop Pacemaker and stay stopped, or \"panic\" to attempt "
"to immediately reboot the local node, falling back to stop on failure.")
},
{
PCMK_OPT_ELECTION_TIMEOUT, NULL, "time", NULL,
"2min", pcmk__valid_interval_spec,
"*** Advanced Use Only ***",
N_("Declare an election failed if it is not decided within this much "
"time. If you need to adjust this value, it probably indicates "
"the presence of a bug.")
},
{
XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL,
"20min", pcmk__valid_interval_spec,
"*** Advanced Use Only ***",
N_("Exit immediately if shutdown does not complete within this much "
"time. If you need to adjust this value, it probably indicates "
"the presence of a bug.")
},
{
"join-integration-timeout", "crmd-integration-timeout", "time", NULL,
"3min", pcmk__valid_interval_spec,
"*** Advanced Use Only ***",
N_("If you need to adjust this value, it probably indicates "
"the presence of a bug.")
},
{
"join-finalization-timeout", "crmd-finalization-timeout", "time", NULL,
"30min", pcmk__valid_interval_spec,
"*** Advanced Use Only ***",
N_("If you need to adjust this value, it probably indicates "
"the presence of a bug.")
},
{
"transition-delay", "crmd-transition-delay", "time", NULL,
"0s", pcmk__valid_interval_spec,
N_("*** Advanced Use Only *** Enabling this option will slow down "
"cluster recovery under all conditions"),
N_("Delay cluster recovery for this much time to allow for additional "
"events to occur. Useful if your configuration is sensitive to "
"the order in which ping updates arrive.")
},
{
"stonith-watchdog-timeout", NULL, "time", NULL,
"0", controld_verify_stonith_watchdog_timeout,
N_("How long before nodes can be assumed to be safely down when "
"watchdog-based self-fencing via SBD is in use"),
N_("If this is set to a positive value, lost nodes are assumed to "
"self-fence using watchdog-based SBD within this much time. This "
"does not require a fencing resource to be explicitly configured, "
"though a fence_watchdog resource can be configured, to limit use "
"to specific nodes. If this is set to 0 (the default), the cluster "
"will never assume watchdog-based self-fencing. If this is set to a "
"negative value, the cluster will use twice the local value of the "
"`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, "
"or otherwise treat this as 0. WARNING: When used, this timeout "
"must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use "
"watchdog-based SBD, and Pacemaker will refuse to start on any of "
"those nodes where this is not true for the local value or SBD is "
"not active. When this is set to a negative value, "
"`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes "
"that use SBD, otherwise data corruption or loss could occur.")
},
{
"stonith-max-attempts", NULL, "integer", NULL,
"10", pcmk__valid_positive_number,
N_("How many times fencing can fail before it will no longer be "
"immediately re-attempted on a target")
},
// Already documented in libpe_status (other values must be kept identical)
{
"no-quorum-policy", NULL, "select",
"stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum,
N_("What to do when the cluster does not have quorum"), NULL
},
{
XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
"false", pcmk__valid_boolean,
N_("Whether to lock resources to a cleanly shut down node"),
N_("When true, resources active on a node when it is cleanly shut down "
"are kept \"locked\" to that node (not allowed to run elsewhere) "
"until they start again on that node after it rejoins (or for at "
"most shutdown-lock-limit, if set). Stonith resources and "
"Pacemaker Remote connections are never locked. Clone and bundle "
"instances and the promoted role of promotable clones are "
"currently never locked, though support could be added in a future "
"release.")
},
{
XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL,
"0", pcmk__valid_interval_spec,
N_("Do not lock resources to a cleanly shut down node longer than "
"this"),
N_("If shutdown-lock is true and this is set to a nonzero time "
"duration, shutdown locks will expire after this much time has "
"passed since the shutdown was initiated, even if the node has not "
"rejoined.")
},
{
XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT, NULL, "time", NULL,
"0", pcmk__valid_interval_spec,
N_("How long to wait for a node that has joined the cluster to join "
"the controller process group"),
N_("Fence nodes that do not join the controller process group within "
"this much time after joining the cluster, to allow the cluster "
"to continue managing resources. A value of 0 means never fence "
"pending nodes. Setting the value to 2h means fence nodes after "
"2 hours.")
},
};
void
crmd_metadata(void)
{
const char *desc_short = "Pacemaker controller options";
const char *desc_long = "Cluster options used by Pacemaker's controller";
gchar *s = pcmk__format_option_metadata("pacemaker-controld", desc_short,
desc_long, controller_options,
PCMK__NELEM(controller_options));
printf("%s", s);
g_free(s);
}
static void
config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
const char *value = NULL;
GHashTable *config_hash = NULL;
crm_time_t *now = crm_time_new(NULL);
xmlNode *crmconfig = NULL;
xmlNode *alerts = NULL;
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
crm_err("The cluster is mis-configured - shutting down and staying down");
controld_set_fsa_input_flags(R_STAYDOWN);
}
goto bail;
}
crmconfig = output;
if ((crmconfig != NULL)
&& !pcmk__xe_is(crmconfig, XML_CIB_TAG_CRMCONFIG)) {
crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
}
if (!crmconfig) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
goto bail;
}
crm_debug("Call %d : Parsing CIB options", call_id);
config_hash = pcmk__strkey_table(free, free);
pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL,
config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL);
// Validate all options, and use defaults if not already present in hash
pcmk__validate_cluster_options(config_hash, controller_options,
PCMK__NELEM(controller_options));
value = g_hash_table_lookup(config_hash, "no-quorum-policy");
if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) {
controld_set_global_flags(controld_no_quorum_suicide);
}
value = g_hash_table_lookup(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK);
if (crm_is_true(value)) {
controld_set_global_flags(controld_shutdown_lock_enabled);
} else {
controld_clear_global_flags(controld_shutdown_lock_enabled);
}
value = g_hash_table_lookup(config_hash,
XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT);
controld_globals.shutdown_lock_limit = crm_parse_interval_spec(value)
/ 1000;
value = g_hash_table_lookup(config_hash,
XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT);
controld_globals.node_pending_timeout = crm_parse_interval_spec(value) / 1000;
value = g_hash_table_lookup(config_hash, PCMK_OPT_CLUSTER_NAME);
pcmk__str_update(&(controld_globals.cluster_name), value);
// Let subcomponents initialize their own static variables
controld_configure_election(config_hash);
controld_configure_fencing(config_hash);
controld_configure_fsa_timers(config_hash);
controld_configure_throttle(config_hash);
alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
crmd_unpack_alerts(alerts);
controld_set_fsa_input_flags(R_READ_CONFIG);
controld_trigger_fsa();
g_hash_table_destroy(config_hash);
bail:
crm_time_free(now);
}
/*!
* \internal
* \brief Trigger read and processing of the configuration
*
* \param[in] fn Calling function name
* \param[in] line Line number where call occurred
*/
void
controld_trigger_config_as(const char *fn, int line)
{
if (config_read_trigger != NULL) {
crm_trace("%s:%d - Triggered config processing", fn, line);
mainloop_set_trigger(config_read_trigger);
}
}
gboolean
crm_read_options(gpointer user_data)
{
cib_t *cib_conn = controld_globals.cib_conn;
int call_id = cib_conn->cmds->query(cib_conn,
"//" XML_CIB_TAG_CRMCONFIG
" | //" XML_CIB_TAG_ALERTS,
NULL, cib_xpath|cib_scope_local);
fsa_register_cib_callback(call_id, NULL, config_query_callback);
crm_trace("Querying the CIB... call %d", call_id);
return TRUE;
}
/* A_READCONFIG */
void
do_read_config(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
throttle_init();
controld_trigger_config();
}
void
crm_shutdown(int nsig)
{
const char *value = NULL;
guint default_period_ms = 0;
if ((controld_globals.mainloop == NULL)
|| !g_main_loop_is_running(controld_globals.mainloop)) {
crmd_exit(CRM_EX_OK);
return;
}
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
crm_err("Escalating shutdown");
register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
return;
}
controld_set_fsa_input_flags(R_SHUTDOWN);
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
/* If shutdown timer doesn't have a period set, use the default
*
* @TODO: Evaluate whether this is still necessary. As long as
* config_query_callback() has been run at least once, it doesn't look like
* anything could have changed the timer period since then.
*/
value = pcmk__cluster_option(NULL, controller_options,
PCMK__NELEM(controller_options),
XML_CONFIG_ATTR_FORCE_QUIT);
default_period_ms = crm_parse_interval_spec(value);
controld_shutdown_start_countdown(default_period_ms);
}
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index 9557d9e32c..c39fa9801b 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -1,1111 +1,1111 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <pacemaker-controld.h>
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
/*
* stonith failure counting
*
* We don't want to get stuck in a permanent fencing loop. Keep track of the
* number of fencing failures for each target node, and the most we'll restart a
* transition for.
*/
struct st_fail_rec {
int count;
};
static bool fence_reaction_panic = false;
static unsigned long int stonith_max_attempts = 10;
static GHashTable *stonith_failures = NULL;
/*!
* \internal
* \brief Update max fencing attempts before giving up
*
* \param[in] value New max fencing attempts
*/
static void
update_stonith_max_attempts(const char *value)
{
stonith_max_attempts = char2score(value);
if (stonith_max_attempts < 1UL) {
stonith_max_attempts = 10UL;
}
}
/*!
* \internal
* \brief Configure reaction to notification of local node being fenced
*
* \param[in] reaction_s Reaction type
*/
static void
set_fence_reaction(const char *reaction_s)
{
if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
fence_reaction_panic = true;
} else {
if (!pcmk__str_eq(reaction_s, "stop", pcmk__str_casei)) {
crm_warn("Invalid value '%s' for %s, using 'stop'",
- reaction_s, XML_CONFIG_ATTR_FENCE_REACTION);
+ reaction_s, PCMK_OPT_FENCE_REACTION);
}
fence_reaction_panic = false;
}
}
/*!
* \internal
* \brief Configure fencing options based on the CIB
*
* \param[in,out] options Name/value pairs for configured options
*/
void
controld_configure_fencing(GHashTable *options)
{
const char *value = NULL;
- value = g_hash_table_lookup(options, XML_CONFIG_ATTR_FENCE_REACTION);
+ value = g_hash_table_lookup(options, PCMK_OPT_FENCE_REACTION);
set_fence_reaction(value);
value = g_hash_table_lookup(options, "stonith-max-attempts");
update_stonith_max_attempts(value);
}
static gboolean
too_many_st_failures(const char *target)
{
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *value = NULL;
if (stonith_failures == NULL) {
return FALSE;
}
if (target == NULL) {
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &value)) {
if (value->count >= stonith_max_attempts) {
target = (const char*)key;
goto too_many;
}
}
} else {
value = g_hash_table_lookup(stonith_failures, target);
if ((value != NULL) && (value->count >= stonith_max_attempts)) {
goto too_many;
}
}
return FALSE;
too_many:
crm_warn("Too many failures (%d) to fence %s, giving up",
value->count, target);
return TRUE;
}
/*!
* \internal
* \brief Reset a stonith fail count
*
* \param[in] target Name of node to reset, or NULL for all
*/
void
st_fail_count_reset(const char *target)
{
if (stonith_failures == NULL) {
return;
}
if (target) {
struct st_fail_rec *rec = NULL;
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count = 0;
}
} else {
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *rec = NULL;
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &rec)) {
rec->count = 0;
}
}
}
static void
st_fail_count_increment(const char *target)
{
struct st_fail_rec *rec = NULL;
if (stonith_failures == NULL) {
stonith_failures = pcmk__strkey_table(free, free);
}
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count++;
} else {
rec = malloc(sizeof(struct st_fail_rec));
if(rec == NULL) {
return;
}
rec->count = 1;
g_hash_table_insert(stonith_failures, strdup(target), rec);
}
}
/* end stonith fail count functions */
static void
cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data)
{
if (rc < pcmk_ok) {
crm_err("Fencing update %d for %s: failed - %s (%d)",
call_id, (char *)user_data, pcmk_strerror(rc), rc);
crm_log_xml_warn(msg, "Failed update");
abort_transition(INFINITY, pcmk__graph_shutdown, "CIB update failed",
NULL);
} else {
crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
}
}
static void
send_stonith_update(pcmk__graph_action_t *action, const char *target,
const char *uuid)
{
int rc = pcmk_ok;
crm_node_t *peer = NULL;
/* We (usually) rely on the membership layer to do node_update_cluster,
* and the peer status callback to do node_update_peer, because the node
* might have already rejoined before we get the stonith result here.
*/
int flags = node_update_join | node_update_expected;
/* zero out the node-status & remove all LRM status info */
xmlNode *node_state = NULL;
CRM_CHECK(target != NULL, return);
CRM_CHECK(uuid != NULL, return);
/* Make sure the membership and join caches are accurate.
* Try getting any existing node cache entry also by node uuid in case it
* doesn't have an uname yet.
*/
peer = pcmk__get_peer_full(0, target, uuid, CRM_GET_PEER_ANY);
CRM_CHECK(peer != NULL, return);
if (peer->state == NULL) {
/* Usually, we rely on the membership layer to update the cluster state
* in the CIB. However, if the node has never been seen, do it here, so
* the node is not considered unclean.
*/
flags |= node_update_cluster;
}
if (peer->uuid == NULL) {
crm_info("Recording uuid '%s' for node '%s'", uuid, target);
peer->uuid = strdup(uuid);
}
crmd_peer_down(peer, TRUE);
/* Generate a node state update for the CIB */
node_state = create_node_state_update(peer, flags, NULL, __func__);
/* we have to mark whether or not remote nodes have already been fenced */
if (peer->flags & crm_remote_node) {
char *now_s = pcmk__ttoa(time(NULL));
crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
free(now_s);
}
/* Force our known ID */
crm_xml_add(node_state, XML_ATTR_ID, uuid);
rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
XML_CIB_TAG_STATUS, node_state,
cib_scope_local
|cib_can_create);
/* Delay processing the trigger until the update completes */
crm_debug("Sending fencing update %d for %s", rc, target);
fsa_register_cib_callback(rc, strdup(target), cib_fencing_updated);
// Make sure it sticks
/* controld_globals.cib_conn->cmds->bump_epoch(controld_globals.cib_conn,
* cib_scope_local);
*/
controld_delete_node_state(peer->uname, controld_section_all,
cib_scope_local);
free_xml(node_state);
return;
}
/*!
* \internal
* \brief Abort transition due to stonith failure
*
* \param[in] abort_action Whether to restart or stop transition
* \param[in] target Don't restart if this (NULL for any) has too many failures
* \param[in] reason Log this stonith action XML as abort reason (or NULL)
*/
static void
abort_for_stonith_failure(enum pcmk__graph_next abort_action,
const char *target, const xmlNode *reason)
{
/* If stonith repeatedly fails, we eventually give up on starting a new
* transition for that reason.
*/
if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) {
abort_action = pcmk__graph_wait;
}
abort_transition(INFINITY, abort_action, "Stonith failed", reason);
}
/*
* stonith cleanup list
*
* If the DC is shot, proper notifications might not go out.
* The stonith cleanup list allows the cluster to (re-)send
* notifications once a new DC is elected.
*/
static GList *stonith_cleanup_list = NULL;
/*!
* \internal
* \brief Add a node to the stonith cleanup list
*
* \param[in] target Name of node to add
*/
void
add_stonith_cleanup(const char *target) {
stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
}
/*!
* \internal
* \brief Remove a node from the stonith cleanup list
*
* \param[in] Name of node to remove
*/
void
remove_stonith_cleanup(const char *target)
{
GList *iter = stonith_cleanup_list;
while (iter != NULL) {
GList *tmp = iter;
char *iter_name = tmp->data;
iter = iter->next;
if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
crm_trace("Removing %s from the cleanup list", iter_name);
stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
free(iter_name);
}
}
}
/*!
* \internal
* \brief Purge all entries from the stonith cleanup list
*/
void
purge_stonith_cleanup(void)
{
if (stonith_cleanup_list) {
GList *iter = NULL;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_info("Purging %s from stonith cleanup list", target);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
}
/*!
* \internal
* \brief Send stonith updates for all entries in cleanup list, then purge it
*/
void
execute_stonith_cleanup(void)
{
GList *iter;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_node_t *target_node = crm_get_peer(0, target);
const char *uuid = crm_peer_uuid(target_node);
crm_notice("Marking %s, target of a previous stonith action, as clean", target);
send_stonith_update(NULL, target, uuid);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
/* end stonith cleanup list functions */
/* stonith API client
*
* Functions that need to interact directly with the fencer via its API
*/
static stonith_t *stonith_api = NULL;
static mainloop_timer_t *controld_fencer_connect_timer = NULL;
static char *te_client_id = NULL;
static gboolean
fail_incompletable_stonith(pcmk__graph_t *graph)
{
GList *lpc = NULL;
const char *task = NULL;
xmlNode *last_action = NULL;
if (graph == NULL) {
return FALSE;
}
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
GList *lpc2 = NULL;
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
continue;
}
for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data;
if ((action->type != pcmk__cluster_graph_action)
|| pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
continue;
}
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
last_action = action->xml;
pcmk__update_graph(graph, action);
crm_notice("Failing action %d (%s): fencer terminated",
action->id, ID(action->xml));
}
}
}
if (last_action != NULL) {
crm_warn("Fencer failure resulted in unrunnable actions");
abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action);
return TRUE;
}
return FALSE;
}
static void
tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
{
te_cleanup_stonith_history_sync(st, FALSE);
if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
crm_err("Lost fencer connection (will attempt to reconnect)");
if (!mainloop_timer_running(controld_fencer_connect_timer)) {
mainloop_timer_start(controld_fencer_connect_timer);
}
} else {
crm_info("Disconnected from fencer");
}
if (stonith_api) {
/* the client API won't properly reconnect notifications
* if they are still in the table - so remove them
*/
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(st);
}
stonith_api->cmds->remove_notification(stonith_api, NULL);
}
if (AM_I_DC) {
fail_incompletable_stonith(controld_globals.transition_graph);
trigger_graph();
}
}
/*!
* \internal
* \brief Handle an event notification from the fencing API
*
* \param[in] st Fencing API connection (ignored)
* \param[in] event Fencing API event notification
*/
static void
handle_fence_notification(stonith_t *st, stonith_event_t *event)
{
bool succeeded = true;
const char *executioner = "the cluster";
const char *client = "a client";
const char *reason = NULL;
int exec_status;
if (te_client_id == NULL) {
te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
(unsigned long) getpid());
}
if (event == NULL) {
crm_err("Notify data not found");
return;
}
if (event->executioner != NULL) {
executioner = event->executioner;
}
if (event->client_origin != NULL) {
client = event->client_origin;
}
exec_status = stonith__event_execution_status(event);
if ((stonith__event_exit_status(event) != CRM_EX_OK)
|| (exec_status != PCMK_EXEC_DONE)) {
succeeded = false;
if (exec_status == PCMK_EXEC_DONE) {
exec_status = PCMK_EXEC_ERROR;
}
}
reason = stonith__event_exit_reason(event);
crmd_alert_fencing_op(event);
if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) {
// Unfencing doesn't need special handling, just a log message
if (succeeded) {
crm_notice("%s was unfenced by %s at the request of %s@%s",
event->target, executioner, client, event->origin);
} else {
crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
event->target, executioner,
pcmk_exec_status_str(exec_status),
((reason == NULL)? "" : ": "),
((reason == NULL)? "" : reason),
stonith__event_exit_status(event));
}
return;
}
if (succeeded
&& pcmk__str_eq(event->target, controld_globals.our_nodename,
pcmk__str_casei)) {
/* We were notified of our own fencing. Most likely, either fencing was
* misconfigured, or fabric fencing that doesn't cut cluster
* communication is in use.
*
* Either way, shutting down the local host is a good idea, to require
* administrator intervention. Also, other nodes would otherwise likely
* set our status to lost because of the fencing callback and discard
* our subsequent election votes as "not part of our cluster".
*/
crm_crit("We were allegedly just fenced by %s for %s!",
executioner, event->origin); // Dumps blackbox if enabled
if (fence_reaction_panic) {
pcmk__panic(__func__);
} else {
crm_exit(CRM_EX_FATAL);
}
return; // Should never get here
}
/* Update the count of fencing failures for this target, in case we become
* DC later. The current DC has already updated its fail count in
* tengine_stonith_callback().
*/
if (!AM_I_DC) {
if (succeeded) {
st_fail_count_reset(event->target);
} else {
st_fail_count_increment(event->target);
}
}
crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
"%s%s%s%s " CRM_XS " event=%s",
event->target, (succeeded? "" : " not"),
event->action, executioner, client, event->origin,
(succeeded? "OK" : pcmk_exec_status_str(exec_status)),
((reason == NULL)? "" : " ("),
((reason == NULL)? "" : reason),
((reason == NULL)? "" : ")"),
event->id);
if (succeeded) {
crm_node_t *peer = pcmk__search_known_node_cache(0, event->target,
CRM_GET_PEER_ANY);
const char *uuid = NULL;
if (peer == NULL) {
return;
}
uuid = crm_peer_uuid(peer);
if (AM_I_DC) {
/* The DC always sends updates */
send_stonith_update(NULL, event->target, uuid);
/* @TODO Ideally, at this point, we'd check whether the fenced node
* hosted any guest nodes, and call remote_node_down() for them.
* Unfortunately, the controller doesn't have a simple, reliable way
* to map hosts to guests. It might be possible to track this in the
* peer cache via crm_remote_peer_cache_refresh(). For now, we rely
* on the scheduler creating fence pseudo-events for the guests.
*/
if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
/* Abort the current transition if it wasn't the cluster that
* initiated fencing.
*/
crm_info("External fencing operation from %s fenced %s",
client, event->target);
abort_transition(INFINITY, pcmk__graph_restart,
"External Fencing Operation", NULL);
}
} else if (pcmk__str_eq(controld_globals.dc_name, event->target,
pcmk__str_null_matches|pcmk__str_casei)
&& !pcmk_is_set(peer->flags, crm_remote_node)) {
// Assume the target was our DC if we don't currently have one
if (controld_globals.dc_name != NULL) {
crm_notice("Fencing target %s was our DC", event->target);
} else {
crm_notice("Fencing target %s may have been our DC",
event->target);
}
/* Given the CIB resyncing that occurs around elections,
* have one node update the CIB now and, if the new DC is different,
* have them do so too after the election
*/
if (pcmk__str_eq(event->executioner, controld_globals.our_nodename,
pcmk__str_casei)) {
send_stonith_update(NULL, event->target, uuid);
}
add_stonith_cleanup(event->target);
}
/* If the target is a remote node, and we host its connection,
* immediately fail all monitors so it can be recovered quickly.
* The connection won't necessarily drop when a remote node is fenced,
* so the failure might not otherwise be detected until the next poke.
*/
if (pcmk_is_set(peer->flags, crm_remote_node)) {
remote_ra_fail(event->target);
}
crmd_peer_down(peer, TRUE);
}
}
/*!
* \brief Connect to fencer
*
* \param[in] user_data If NULL, retry failures now, otherwise retry in mainloop timer
*
* \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry
* \note If user_data is NULL, this will wait 2s between attempts, for up to
* 30 attempts, meaning the controller could be blocked as long as 58s.
*/
gboolean
controld_timer_fencer_connect(gpointer user_data)
{
int rc = pcmk_ok;
if (stonith_api == NULL) {
stonith_api = stonith_api_new();
if (stonith_api == NULL) {
crm_err("Could not connect to fencer: API memory allocation failed");
return G_SOURCE_REMOVE;
}
}
if (stonith_api->state != stonith_disconnected) {
crm_trace("Already connected to fencer, no need to retry");
return G_SOURCE_REMOVE;
}
if (user_data == NULL) {
// Blocking (retry failures now until successful)
rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
if (rc != pcmk_ok) {
crm_err("Could not connect to fencer in 30 attempts: %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
} else {
// Non-blocking (retry failures later in main loop)
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
if (controld_fencer_connect_timer == NULL) {
controld_fencer_connect_timer =
mainloop_timer_add("controld_fencer_connect", 1000,
TRUE, controld_timer_fencer_connect,
GINT_TO_POINTER(TRUE));
}
if (rc != pcmk_ok) {
if (pcmk_is_set(controld_globals.fsa_input_register,
R_ST_REQUIRED)) {
crm_notice("Fencer connection failed (will retry): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
if (!mainloop_timer_running(controld_fencer_connect_timer)) {
mainloop_timer_start(controld_fencer_connect_timer);
}
return G_SOURCE_CONTINUE;
} else {
crm_info("Fencer connection failed (ignoring because no longer required): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
return G_SOURCE_REMOVE;
}
}
if (rc == pcmk_ok) {
stonith_api->cmds->register_notification(stonith_api,
T_STONITH_NOTIFY_DISCONNECT,
tengine_stonith_connection_destroy);
stonith_api->cmds->register_notification(stonith_api,
T_STONITH_NOTIFY_FENCE,
handle_fence_notification);
stonith_api->cmds->register_notification(stonith_api,
T_STONITH_NOTIFY_HISTORY_SYNCED,
tengine_stonith_history_synced);
te_trigger_stonith_history_sync(TRUE);
crm_notice("Fencer successfully connected");
}
return G_SOURCE_REMOVE;
}
void
controld_disconnect_fencer(bool destroy)
{
if (stonith_api) {
// Prevent fencer connection from coming up again
controld_clear_fsa_input_flags(R_ST_REQUIRED);
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(stonith_api);
}
stonith_api->cmds->remove_notification(stonith_api, NULL);
}
if (destroy) {
if (stonith_api) {
stonith_api->cmds->free(stonith_api);
stonith_api = NULL;
}
if (controld_fencer_connect_timer) {
mainloop_timer_del(controld_fencer_connect_timer);
controld_fencer_connect_timer = NULL;
}
if (te_client_id) {
free(te_client_id);
te_client_id = NULL;
}
}
}
static gboolean
do_stonith_history_sync(gpointer user_data)
{
if (stonith_api && (stonith_api->state != stonith_disconnected)) {
stonith_history_t *history = NULL;
te_cleanup_stonith_history_sync(stonith_api, FALSE);
stonith_api->cmds->history(stonith_api,
st_opt_sync_call | st_opt_broadcast,
NULL, &history, 5);
stonith_history_free(history);
return TRUE;
} else {
crm_info("Skip triggering stonith history-sync as stonith is disconnected");
return FALSE;
}
}
static void
tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
{
char *uuid = NULL;
int stonith_id = -1;
int transition_id = -1;
pcmk__graph_action_t *action = NULL;
const char *target = NULL;
if ((data == NULL) || (data->userdata == NULL)) {
crm_err("Ignoring fence operation %d result: "
"No transition key given (bug?)",
((data == NULL)? -1 : data->call_id));
return;
}
if (!AM_I_DC) {
const char *reason = stonith__exit_reason(data);
if (reason == NULL) {
reason = pcmk_exec_status_str(stonith__execution_status(data));
}
crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s",
data->call_id, stonith__exit_status(data), reason,
(const char *) data->userdata);
return;
}
CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
&stonith_id, NULL),
goto bail);
if (controld_globals.transition_graph->complete || (stonith_id < 0)
|| !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none)
|| (controld_globals.transition_graph->id != transition_id)) {
crm_info("Ignoring fence operation %d result: "
"Not from current transition " CRM_XS
" complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
data->call_id,
pcmk__btoa(controld_globals.transition_graph->complete),
stonith_id, uuid, controld_globals.te_uuid, transition_id,
controld_globals.transition_graph->id);
goto bail;
}
action = controld_get_action(stonith_id);
if (action == NULL) {
crm_err("Ignoring fence operation %d result: "
"Action %d not found in transition graph (bug?) "
CRM_XS " uuid=%s transition=%d",
data->call_id, stonith_id, uuid, transition_id);
goto bail;
}
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
if (target == NULL) {
crm_err("Ignoring fence operation %d result: No target given (bug?)",
data->call_id);
goto bail;
}
stop_te_timer(action);
if (stonith__exit_status(data) == CRM_EX_OK) {
const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
const char *op = crm_meta_value(action->params, "stonith_action");
crm_info("Fence operation %d for %s succeeded", data->call_id, target);
if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
te_action_confirmed(action, NULL);
if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) {
const char *value = NULL;
char *now = pcmk__ttoa(time(NULL));
gboolean is_remote_node = FALSE;
/* This check is not 100% reliable, since this node is not
* guaranteed to have the remote node cached. However, it
* doesn't have to be reliable, since the attribute manager can
* learn a node's "remoteness" by other means sooner or later.
* This allows it to learn more quickly if this node does have
* the information.
*/
if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
is_remote_node = TRUE;
}
update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
is_remote_node);
free(now);
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
is_remote_node);
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
is_remote_node);
} else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
send_stonith_update(action, target, uuid);
pcmk__set_graph_action_flags(action,
pcmk__graph_action_sent_update);
}
}
st_fail_count_reset(target);
} else {
enum pcmk__graph_next abort_action = pcmk__graph_restart;
int status = stonith__execution_status(data);
const char *reason = stonith__exit_reason(data);
if (reason == NULL) {
if (status == PCMK_EXEC_DONE) {
reason = "Agent returned error";
} else {
reason = pcmk_exec_status_str(status);
}
}
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
/* If no fence devices were available, there's no use in immediately
* checking again, so don't start a new transition in that case.
*/
if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
crm_warn("Fence operation %d for %s failed: %s "
"(aborting transition and giving up for now)",
data->call_id, target, reason);
abort_action = pcmk__graph_wait;
} else {
crm_notice("Fence operation %d for %s failed: %s "
"(aborting transition)", data->call_id, target, reason);
}
/* Increment the fail count now, so abort_for_stonith_failure() can
* check it. Non-DC nodes will increment it in
* handle_fence_notification().
*/
st_fail_count_increment(target);
abort_for_stonith_failure(abort_action, target, NULL);
}
pcmk__update_graph(controld_globals.transition_graph, action);
trigger_graph();
bail:
free(data->userdata);
free(uuid);
return;
}
static int
fence_with_delay(const char *target, const char *type, int delay)
{
uint32_t options = st_opt_none; // Group of enum stonith_call_options
int timeout_sec = (int) (controld_globals.transition_graph->stonith_timeout
/ 1000);
if (crmd_join_phase_count(crm_join_confirmed) == 1) {
stonith__set_call_options(options, target, st_opt_allow_suicide);
}
return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
type, timeout_sec, 0, delay);
}
/*!
* \internal
* \brief Execute a fencing action from a transition graph
*
* \param[in] graph Transition graph being executed (ignored)
* \param[in] action Fencing action to execute
*
* \return Standard Pacemaker return code
*/
int
controld_execute_fence_action(pcmk__graph_t *graph,
pcmk__graph_action_t *action)
{
int rc = 0;
const char *id = ID(action->xml);
const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *type = crm_meta_value(action->params, "stonith_action");
char *transition_key = NULL;
const char *priority_delay = NULL;
int delay_i = 0;
gboolean invalid_action = FALSE;
int stonith_timeout = (int) (controld_globals.transition_graph->stonith_timeout
/ 1000);
CRM_CHECK(id != NULL, invalid_action = TRUE);
CRM_CHECK(uuid != NULL, invalid_action = TRUE);
CRM_CHECK(type != NULL, invalid_action = TRUE);
CRM_CHECK(target != NULL, invalid_action = TRUE);
if (invalid_action) {
crm_log_xml_warn(action->xml, "BadAction");
return EPROTO;
}
priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY);
crm_notice("Requesting fencing (%s) targeting node %s "
CRM_XS " action=%s timeout=%i%s%s",
type, target, id, stonith_timeout,
priority_delay ? " priority_delay=" : "",
priority_delay ? priority_delay : "");
/* Passing NULL means block until we can connect... */
controld_timer_fencer_connect(NULL);
pcmk__scan_min_int(priority_delay, &delay_i, 0);
rc = fence_with_delay(target, type, delay_i);
transition_key = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, 0,
controld_globals.te_uuid),
stonith_api->cmds->register_callback(stonith_api, rc,
(stonith_timeout
+ (delay_i > 0 ? delay_i : 0)),
st_opt_timeout_updates, transition_key,
"tengine_stonith_callback",
tengine_stonith_callback);
return pcmk_rc_ok;
}
bool
controld_verify_stonith_watchdog_timeout(const char *value)
{
long st_timeout = value? crm_get_msec(value) : 0;
const char *our_nodename = controld_globals.our_nodename;
gboolean rv = TRUE;
if (st_timeout == 0
|| (stonith_api && (stonith_api->state != stonith_disconnected) &&
stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
our_nodename))) {
rv = pcmk__valid_sbd_timeout(value);
}
return rv;
}
/* end stonith API client functions */
/*
* stonith history synchronization
*
* Each node's fencer keeps track of a cluster-wide fencing history. When a node
* joins or leaves, we need to synchronize the history across all nodes.
*/
static crm_trigger_t *stonith_history_sync_trigger = NULL;
static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
void
te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
{
if (free_timers) {
mainloop_timer_del(stonith_history_sync_timer_short);
stonith_history_sync_timer_short = NULL;
mainloop_timer_del(stonith_history_sync_timer_long);
stonith_history_sync_timer_long = NULL;
} else {
mainloop_timer_stop(stonith_history_sync_timer_short);
mainloop_timer_stop(stonith_history_sync_timer_long);
}
if (st) {
st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED);
}
}
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
{
te_cleanup_stonith_history_sync(st, FALSE);
crm_debug("Fence-history synced - cancel all timers");
}
static gboolean
stonith_history_sync_set_trigger(gpointer user_data)
{
mainloop_set_trigger(stonith_history_sync_trigger);
return FALSE;
}
void
te_trigger_stonith_history_sync(bool long_timeout)
{
/* trigger a sync in 5s to give more nodes the
* chance to show up so that we don't create
* unnecessary stonith-history-sync traffic
*
* the long timeout of 30s is there as a fallback
* so that after a successful connection to fenced
* we will wait for 30s for the DC to trigger a
* history-sync
* if this doesn't happen we trigger a sync locally
* (e.g. fenced segfaults and is restarted by pacemakerd)
*/
/* as we are finally checking the stonith-connection
* in do_stonith_history_sync we should be fine
* leaving stonith_history_sync_time & stonith_history_sync_trigger
* around
*/
if (stonith_history_sync_trigger == NULL) {
stonith_history_sync_trigger =
mainloop_add_trigger(G_PRIORITY_LOW,
do_stonith_history_sync, NULL);
}
if (long_timeout) {
if(stonith_history_sync_timer_long == NULL) {
stonith_history_sync_timer_long =
mainloop_timer_add("history_sync_long", 30000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
mainloop_timer_start(stonith_history_sync_timer_long);
} else {
if(stonith_history_sync_timer_short == NULL) {
stonith_history_sync_timer_short =
mainloop_timer_add("history_sync_short", 5000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
mainloop_timer_start(stonith_history_sync_timer_short);
}
}
/* end stonith history synchronization functions */
diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h
index 441adc47b8..44a2799566 100644
--- a/include/crm/msg_xml.h
+++ b/include/crm/msg_xml.h
@@ -1,495 +1,496 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_MSG_XML__H
# define PCMK__CRM_MSG_XML__H
# include <crm/common/xml.h>
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
#include <crm/msg_xml_compat.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* This file defines constants for various XML syntax (mainly element and
* attribute names).
*
* For consistency, new constants should start with "PCMK_", followed by "XE"
* for XML element names, "XA" for XML attribute names, and "META" for meta
* attribute names. Old names that don't follow this policy should eventually be
* deprecated and replaced with names that do.
*/
/*
* XML elements
*/
#define PCMK_XE_DATE_EXPRESSION "date_expression"
#define PCMK_XE_OP_EXPRESSION "op_expression"
/* This has been deprecated as a CIB element (an alias for <clone> with
* "promotable" set to "true") since 2.0.0.
*/
#define PCMK_XE_PROMOTABLE_LEGACY "master"
#define PCMK_XE_RSC_EXPRESSION "rsc_expression"
/*
* XML attributes
*/
#define PCMK_XA_FORMAT "format"
/* These have been deprecated as CIB <clone> element attributes (aliases for
* "promoted-max" and "promoted-node-max") since 2.0.0.
*/
#define PCMK_XA_PROMOTED_MAX_LEGACY "master-max"
#define PCMK_XA_PROMOTED_NODE_MAX_LEGACY "master-node-max"
/*
* Meta attributes
*/
#define PCMK_META_CLONE_MAX "clone-max"
#define PCMK_META_CLONE_MIN "clone-min"
#define PCMK_META_CLONE_NODE_MAX "clone-node-max"
#define PCMK_META_ENABLED "enabled"
#define PCMK_META_FAILURE_TIMEOUT "failure-timeout"
#define PCMK_META_MIGRATION_THRESHOLD "migration-threshold"
#define PCMK_META_PROMOTED_MAX "promoted-max"
#define PCMK_META_PROMOTED_NODE_MAX "promoted-node-max"
/*
* Cluster options
*/
#define PCMK_OPT_BATCH_LIMIT "batch-limit"
#define PCMK_OPT_CLUSTER_DELAY "cluster-delay"
#define PCMK_OPT_CLUSTER_INFRASTRUCTURE "cluster-infrastructure"
#define PCMK_OPT_CLUSTER_IPC_LIMIT "cluster-ipc-limit"
#define PCMK_OPT_CLUSTER_NAME "cluster-name"
#define PCMK_OPT_CLUSTER_RECHECK_INTERVAL "cluster-recheck-interval"
#define PCMK_OPT_CONCURRENT_FENCING "concurrent-fencing"
#define PCMK_OPT_DC_DEADTIME "dc-deadtime"
#define PCMK_OPT_DC_VERSION "dc-version"
#define PCMK_OPT_ELECTION_TIMEOUT "election-timeout"
#define PCMK_OPT_ENABLE_ACL "enable-acl"
#define PCMK_OPT_ENABLE_STARTUP_PROBES "enable-startup-probes"
+#define PCMK_OPT_FENCE_REACTION "fence-reaction"
/*
* Older constants that don't follow current naming
*/
# ifndef F_ORIG
# define F_ORIG "src"
# endif
# ifndef F_SEQ
# define F_SEQ "seq"
# endif
# ifndef F_SUBTYPE
# define F_SUBTYPE "subt"
# endif
# ifndef F_TYPE
# define F_TYPE "t"
# endif
# ifndef F_CLIENTNAME
# define F_CLIENTNAME "cn"
# endif
# ifndef F_XML_TAGNAME
# define F_XML_TAGNAME "__name__"
# endif
# ifndef T_CRM
# define T_CRM "crmd"
# endif
# ifndef T_ATTRD
# define T_ATTRD "attrd"
# endif
# define CIB_OPTIONS_FIRST "cib-bootstrap-options"
# define F_CRM_DATA "crm_xml"
# define F_CRM_TASK "crm_task"
# define F_CRM_HOST_TO "crm_host_to"
# define F_CRM_MSG_TYPE F_SUBTYPE
# define F_CRM_SYS_TO "crm_sys_to"
# define F_CRM_SYS_FROM "crm_sys_from"
# define F_CRM_HOST_FROM F_ORIG
# define F_CRM_REFERENCE XML_ATTR_REFERENCE
# define F_CRM_VERSION XML_ATTR_VERSION
# define F_CRM_ORIGIN "origin"
# define F_CRM_USER "crm_user"
# define F_CRM_JOIN_ID "join_id"
# define F_CRM_DC_LEAVING "dc-leaving"
# define F_CRM_ELECTION_ID "election-id"
# define F_CRM_ELECTION_AGE_S "election-age-sec"
# define F_CRM_ELECTION_AGE_US "election-age-nano-sec"
# define F_CRM_ELECTION_OWNER "election-owner"
# define F_CRM_TGRAPH "crm-tgraph-file"
# define F_CRM_TGRAPH_INPUT "crm-tgraph-in"
# define F_CRM_THROTTLE_MODE "crm-limit-mode"
# define F_CRM_THROTTLE_MAX "crm-limit-max"
/*---- Common tags/attrs */
# define XML_DIFF_MARKER "__crm_diff_marker__"
# define XML_TAG_CIB "cib"
# define XML_TAG_FAILED "failed"
# define XML_ATTR_CRM_VERSION "crm_feature_set"
# define XML_ATTR_DIGEST "digest"
# define XML_ATTR_VALIDATION "validate-with"
# define XML_ATTR_QUORUM_PANIC "no-quorum-panic"
# define XML_ATTR_HAVE_QUORUM "have-quorum"
# define XML_ATTR_HAVE_WATCHDOG "have-watchdog"
# define XML_ATTR_GENERATION "epoch"
# define XML_ATTR_GENERATION_ADMIN "admin_epoch"
# define XML_ATTR_NUMUPDATES "num_updates"
# define XML_ATTR_TIMEOUT "timeout"
# define XML_ATTR_ORIGIN "crm-debug-origin"
# define XML_ATTR_TSTAMP "crm-timestamp"
# define XML_CIB_ATTR_WRITTEN "cib-last-written"
# define XML_ATTR_VERSION "version"
# define XML_ATTR_DESC "description"
# define XML_ATTR_ID "id"
# define XML_ATTR_NAME "name"
# define XML_ATTR_IDREF "id-ref"
# define XML_ATTR_ID_LONG "long-id"
# define XML_ATTR_TYPE "type"
# define XML_ATTR_OP "op"
# define XML_ATTR_DC_UUID "dc-uuid"
# define XML_ATTR_UPDATE_ORIG "update-origin"
# define XML_ATTR_UPDATE_CLIENT "update-client"
# define XML_ATTR_UPDATE_USER "update-user"
# define XML_BOOLEAN_TRUE "true"
# define XML_BOOLEAN_FALSE "false"
# define XML_BOOLEAN_YES XML_BOOLEAN_TRUE
# define XML_BOOLEAN_NO XML_BOOLEAN_FALSE
# define XML_TAG_OPTIONS "options"
/*---- top level tags/attrs */
# define XML_ATTR_REQUEST "request"
# define XML_ATTR_RESPONSE "response"
# define XML_ATTR_UNAME "uname"
# define XML_ATTR_REFERENCE "reference"
# define XML_CRM_TAG_PING "ping_response"
# define XML_PING_ATTR_STATUS "result"
# define XML_PING_ATTR_SYSFROM "crm_subsystem"
# define XML_PING_ATTR_CRMDSTATE "crmd_state"
# define XML_PING_ATTR_PACEMAKERDSTATE "pacemakerd_state"
# define XML_PING_ATTR_PACEMAKERDSTATE_INIT "init"
# define XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS "starting_daemons"
# define XML_PING_ATTR_PACEMAKERDSTATE_WAITPING "wait_for_ping"
# define XML_PING_ATTR_PACEMAKERDSTATE_RUNNING "running"
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN "shutting_down"
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE "shutdown_complete"
# define XML_PING_ATTR_PACEMAKERDSTATE_REMOTE "remote"
# define XML_FAIL_TAG_CIB "failed_update"
# define XML_FAILCIB_ATTR_ID "id"
# define XML_FAILCIB_ATTR_OBJTYPE "object_type"
# define XML_FAILCIB_ATTR_OP "operation"
# define XML_FAILCIB_ATTR_REASON "reason"
/*---- CIB specific tags/attrs */
# define XML_CIB_TAG_SECTION_ALL "all"
# define XML_CIB_TAG_CONFIGURATION "configuration"
# define XML_CIB_TAG_STATUS "status"
# define XML_CIB_TAG_RESOURCES "resources"
# define XML_CIB_TAG_NODES "nodes"
# define XML_CIB_TAG_CONSTRAINTS "constraints"
# define XML_CIB_TAG_CRMCONFIG "crm_config"
# define XML_CIB_TAG_OPCONFIG "op_defaults"
# define XML_CIB_TAG_RSCCONFIG "rsc_defaults"
# define XML_CIB_TAG_ACLS "acls"
# define XML_CIB_TAG_ALERTS "alerts"
# define XML_CIB_TAG_ALERT "alert"
# define XML_CIB_TAG_ALERT_RECIPIENT "recipient"
# define XML_CIB_TAG_ALERT_SELECT "select"
# define XML_CIB_TAG_ALERT_ATTRIBUTES "select_attributes"
# define XML_CIB_TAG_ALERT_FENCING "select_fencing"
# define XML_CIB_TAG_ALERT_NODES "select_nodes"
# define XML_CIB_TAG_ALERT_RESOURCES "select_resources"
# define XML_CIB_TAG_ALERT_ATTR "attribute"
# define XML_CIB_TAG_STATE "node_state"
# define XML_CIB_TAG_NODE "node"
# define XML_CIB_TAG_NVPAIR "nvpair"
# define XML_CIB_TAG_PROPSET "cluster_property_set"
# define XML_TAG_ATTR_SETS "instance_attributes"
# define XML_TAG_META_SETS "meta_attributes"
# define XML_TAG_ATTRS "attributes"
# define XML_TAG_PARAMS "parameters"
# define XML_TAG_PARAM "param"
# define XML_TAG_UTILIZATION "utilization"
# define XML_TAG_RESOURCE_REF "resource_ref"
# define XML_CIB_TAG_RESOURCE "primitive"
# define XML_CIB_TAG_GROUP "group"
# define XML_CIB_TAG_INCARNATION "clone"
# define XML_CIB_TAG_CONTAINER "bundle"
# define XML_CIB_TAG_RSC_TEMPLATE "template"
# define XML_RSC_ATTR_TARGET "container-attribute-target"
# define XML_RSC_ATTR_RESTART "restart-type"
# define XML_RSC_ATTR_ORDERED "ordered"
# define XML_RSC_ATTR_INTERLEAVE "interleave"
# define XML_RSC_ATTR_INCARNATION "clone"
# define XML_RSC_ATTR_PROMOTABLE "promotable"
# define XML_RSC_ATTR_MANAGED "is-managed"
# define XML_RSC_ATTR_TARGET_ROLE "target-role"
# define XML_RSC_ATTR_UNIQUE "globally-unique"
# define XML_RSC_ATTR_NOTIFY "notify"
# define XML_RSC_ATTR_STICKINESS "resource-stickiness"
# define XML_RSC_ATTR_MULTIPLE "multiple-active"
# define XML_RSC_ATTR_REQUIRES "requires"
# define XML_RSC_ATTR_CONTAINER "container"
# define XML_RSC_ATTR_INTERNAL_RSC "internal_rsc"
# define XML_RSC_ATTR_MAINTENANCE "maintenance"
# define XML_RSC_ATTR_REMOTE_NODE "remote-node"
# define XML_RSC_ATTR_CLEAR_OP "clear_failure_op"
# define XML_RSC_ATTR_CLEAR_INTERVAL "clear_failure_interval"
# define XML_RSC_ATTR_REMOTE_RA_ADDR "addr"
# define XML_RSC_ATTR_REMOTE_RA_SERVER "server"
# define XML_RSC_ATTR_REMOTE_RA_PORT "port"
# define XML_RSC_ATTR_CRITICAL "critical"
# define XML_REMOTE_ATTR_RECONNECT_INTERVAL "reconnect_interval"
# define XML_OP_ATTR_ON_FAIL "on-fail"
# define XML_OP_ATTR_START_DELAY "start-delay"
# define XML_OP_ATTR_ALLOW_MIGRATE "allow-migrate"
# define XML_OP_ATTR_ORIGIN "interval-origin"
# define XML_OP_ATTR_PENDING "record-pending"
# define XML_OP_ATTR_DIGESTS_ALL "digests-all"
# define XML_OP_ATTR_DIGESTS_SECURE "digests-secure"
# define XML_CIB_TAG_LRM "lrm"
# define XML_LRM_TAG_RESOURCES "lrm_resources"
# define XML_LRM_TAG_RESOURCE "lrm_resource"
# define XML_LRM_TAG_RSC_OP "lrm_rsc_op"
# define XML_AGENT_ATTR_CLASS "class"
# define XML_AGENT_ATTR_PROVIDER "provider"
//! \deprecated Do not use (will be removed in a future release)
# define XML_CIB_ATTR_REPLACE "replace"
# define XML_CIB_ATTR_PRIORITY "priority"
# define XML_NODE_IS_REMOTE "remote_node"
# define XML_NODE_IS_FENCED "node_fenced"
# define XML_NODE_IS_MAINTENANCE "node_in_maintenance"
# define XML_CIB_ATTR_SHUTDOWN "shutdown"
/* Aside from being an old name for the executor, LRM is a misnomer here because
* the controller and scheduler use these to track actions, which are not always
* executor operations.
*/
// XML attribute that takes interval specification (user-facing configuration)
# define XML_LRM_ATTR_INTERVAL "interval"
// XML attribute that takes interval in milliseconds (daemon APIs)
// (identical value as above, but different constant allows clearer code intent)
# define XML_LRM_ATTR_INTERVAL_MS XML_LRM_ATTR_INTERVAL
# define XML_LRM_ATTR_TASK "operation"
# define XML_LRM_ATTR_TASK_KEY "operation_key"
# define XML_LRM_ATTR_TARGET "on_node"
# define XML_LRM_ATTR_TARGET_UUID "on_node_uuid"
/*! Actions to be executed on Pacemaker Remote nodes are routed through the
* controller on the cluster node hosting the remote connection. That cluster
* node is considered the router node for the action.
*/
# define XML_LRM_ATTR_ROUTER_NODE "router_node"
# define XML_LRM_ATTR_RSCID "rsc-id"
# define XML_LRM_ATTR_OPSTATUS "op-status"
# define XML_LRM_ATTR_RC "rc-code"
# define XML_LRM_ATTR_CALLID "call-id"
# define XML_LRM_ATTR_OP_DIGEST "op-digest"
# define XML_LRM_ATTR_OP_RESTART "op-force-restart"
# define XML_LRM_ATTR_OP_SECURE "op-secure-params"
# define XML_LRM_ATTR_RESTART_DIGEST "op-restart-digest"
# define XML_LRM_ATTR_SECURE_DIGEST "op-secure-digest"
# define XML_LRM_ATTR_EXIT_REASON "exit-reason"
# define XML_RSC_OP_LAST_CHANGE "last-rc-change"
# define XML_RSC_OP_T_EXEC "exec-time"
# define XML_RSC_OP_T_QUEUE "queue-time"
# define XML_LRM_ATTR_MIGRATE_SOURCE "migrate_source"
# define XML_LRM_ATTR_MIGRATE_TARGET "migrate_target"
# define XML_TAG_GRAPH "transition_graph"
# define XML_GRAPH_TAG_RSC_OP "rsc_op"
# define XML_GRAPH_TAG_PSEUDO_EVENT "pseudo_event"
# define XML_GRAPH_TAG_CRM_EVENT "crm_event"
# define XML_GRAPH_TAG_DOWNED "downed"
# define XML_GRAPH_TAG_MAINTENANCE "maintenance"
# define XML_TAG_RULE "rule"
# define XML_RULE_ATTR_SCORE "score"
# define XML_RULE_ATTR_SCORE_ATTRIBUTE "score-attribute"
# define XML_RULE_ATTR_ROLE "role"
# define XML_RULE_ATTR_BOOLEAN_OP "boolean-op"
# define XML_TAG_EXPRESSION "expression"
# define XML_EXPR_ATTR_ATTRIBUTE "attribute"
# define XML_EXPR_ATTR_OPERATION "operation"
# define XML_EXPR_ATTR_VALUE "value"
# define XML_EXPR_ATTR_TYPE "type"
# define XML_EXPR_ATTR_VALUE_SOURCE "value-source"
# define XML_CONS_TAG_RSC_DEPEND "rsc_colocation"
# define XML_CONS_TAG_RSC_ORDER "rsc_order"
# define XML_CONS_TAG_RSC_LOCATION "rsc_location"
# define XML_CONS_TAG_RSC_TICKET "rsc_ticket"
# define XML_CONS_TAG_RSC_SET "resource_set"
# define XML_CONS_ATTR_SYMMETRICAL "symmetrical"
# define XML_LOCATION_ATTR_DISCOVERY "resource-discovery"
# define XML_COLOC_ATTR_SOURCE "rsc"
# define XML_COLOC_ATTR_SOURCE_ROLE "rsc-role"
# define XML_COLOC_ATTR_TARGET "with-rsc"
# define XML_COLOC_ATTR_TARGET_ROLE "with-rsc-role"
# define XML_COLOC_ATTR_NODE_ATTR "node-attribute"
# define XML_COLOC_ATTR_INFLUENCE "influence"
//! \deprecated Deprecated since 2.1.5
# define XML_COLOC_ATTR_SOURCE_INSTANCE "rsc-instance"
//! \deprecated Deprecated since 2.1.5
# define XML_COLOC_ATTR_TARGET_INSTANCE "with-rsc-instance"
# define XML_LOC_ATTR_SOURCE "rsc"
# define XML_LOC_ATTR_SOURCE_PATTERN "rsc-pattern"
# define XML_ORDER_ATTR_FIRST "first"
# define XML_ORDER_ATTR_THEN "then"
# define XML_ORDER_ATTR_FIRST_ACTION "first-action"
# define XML_ORDER_ATTR_THEN_ACTION "then-action"
# define XML_ORDER_ATTR_KIND "kind"
//! \deprecated Deprecated since 2.1.5
# define XML_ORDER_ATTR_FIRST_INSTANCE "first-instance"
//! \deprecated Deprecated since 2.1.5
# define XML_ORDER_ATTR_THEN_INSTANCE "then-instance"
# define XML_TICKET_ATTR_TICKET "ticket"
# define XML_TICKET_ATTR_LOSS_POLICY "loss-policy"
# define XML_NVPAIR_ATTR_NAME "name"
# define XML_NVPAIR_ATTR_VALUE "value"
# define XML_NODE_ATTR_RSC_DISCOVERY "resource-discovery-enabled"
# define XML_CONFIG_ATTR_FORCE_QUIT "shutdown-escalation"
# define XML_CONFIG_ATTR_FENCE_REACTION "fence-reaction"
# define XML_CONFIG_ATTR_SHUTDOWN_LOCK "shutdown-lock"
# define XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT "shutdown-lock-limit"
# define XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY "priority-fencing-delay"
# define XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT "node-pending-timeout"
# define XML_ALERT_ATTR_PATH "path"
# define XML_ALERT_ATTR_TIMEOUT "timeout"
# define XML_ALERT_ATTR_TSTAMP_FORMAT "timestamp-format"
# define XML_ALERT_ATTR_REC_VALUE "value"
# define XML_CIB_TAG_GENERATION_TUPPLE "generation_tuple"
# define XML_ATTR_TRANSITION_MAGIC "transition-magic"
# define XML_ATTR_TRANSITION_KEY "transition-key"
# define XML_ATTR_TE_NOWAIT "op_no_wait"
# define XML_ATTR_TE_TARGET_RC "op_target_rc"
# define XML_TAG_TRANSIENT_NODEATTRS "transient_attributes"
//! \deprecated Do not use (will be removed in a future release)
# define XML_TAG_DIFF_ADDED "diff-added"
//! \deprecated Do not use (will be removed in a future release)
# define XML_TAG_DIFF_REMOVED "diff-removed"
# define XML_ACL_TAG_USER "acl_target"
# define XML_ACL_TAG_USERv1 "acl_user"
# define XML_ACL_TAG_GROUP "acl_group"
# define XML_ACL_TAG_ROLE "acl_role"
# define XML_ACL_TAG_PERMISSION "acl_permission"
# define XML_ACL_TAG_ROLE_REF "role"
# define XML_ACL_TAG_ROLE_REFv1 "role_ref"
# define XML_ACL_ATTR_KIND "kind"
# define XML_ACL_TAG_READ "read"
# define XML_ACL_TAG_WRITE "write"
# define XML_ACL_TAG_DENY "deny"
# define XML_ACL_ATTR_REF "reference"
# define XML_ACL_ATTR_REFv1 "ref"
# define XML_ACL_ATTR_TAG "object-type"
# define XML_ACL_ATTR_TAGv1 "tag"
# define XML_ACL_ATTR_XPATH "xpath"
# define XML_ACL_ATTR_ATTRIBUTE "attribute"
# define XML_CIB_TAG_TICKETS "tickets"
# define XML_CIB_TAG_TICKET_STATE "ticket_state"
# define XML_CIB_TAG_TAGS "tags"
# define XML_CIB_TAG_TAG "tag"
# define XML_CIB_TAG_OBJ_REF "obj_ref"
# define XML_TAG_FENCING_TOPOLOGY "fencing-topology"
# define XML_TAG_FENCING_LEVEL "fencing-level"
# define XML_ATTR_STONITH_INDEX "index"
# define XML_ATTR_STONITH_TARGET "target"
# define XML_ATTR_STONITH_TARGET_VALUE "target-value"
# define XML_ATTR_STONITH_TARGET_PATTERN "target-pattern"
# define XML_ATTR_STONITH_TARGET_ATTRIBUTE "target-attribute"
# define XML_ATTR_STONITH_DEVICES "devices"
# define XML_TAG_DIFF "diff"
# define XML_DIFF_VERSION "version"
# define XML_DIFF_VSOURCE "source"
# define XML_DIFF_VTARGET "target"
# define XML_DIFF_CHANGE "change"
# define XML_DIFF_LIST "change-list"
# define XML_DIFF_ATTR "change-attr"
# define XML_DIFF_RESULT "change-result"
# define XML_DIFF_OP "operation"
# define XML_DIFF_PATH "path"
# define XML_DIFF_POSITION "position"
# define ID(x) crm_element_value(x, XML_ATTR_ID)
#ifdef __cplusplus
}
#endif
#endif
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:00 PM (1 d, 6 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2002428
Default Alt Text
(87 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment