Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F2022253
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
46 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/fenced/fenced_scheduler.c b/daemons/fenced/fenced_scheduler.c
index 609a506a8e..3e9aa32e13 100644
--- a/daemons/fenced/fenced_scheduler.c
+++ b/daemons/fenced/fenced_scheduler.c
@@ -1,14 +1,71 @@
/*
* Copyright 2009-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
+#include <stdio.h>
+#include <errno.h>
+
#include <crm/pengine/status.h>
+#include <crm/pengine/internal.h>
+
+#include <pacemaker-internal.h>
+#include <pacemaker-fenced.h>
pcmk_scheduler_t *fenced_data_set = NULL;
+
+/*!
+ * \internal
+ * \brief Initialize scheduler data for fencer purposes
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+fenced_scheduler_init(void)
+{
+ pcmk__output_t *logger = NULL;
+ int rc = pcmk__log_output_new(&logger);
+
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ fenced_data_set = pe_new_working_set();
+ if (fenced_data_set == NULL) {
+ pcmk__output_free(logger);
+ return ENOMEM;
+ }
+
+ pe__register_messages(logger);
+ pcmk__register_lib_messages(logger);
+ pcmk__output_set_log_level(logger, LOG_TRACE);
+ fenced_data_set->priv = logger;
+
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Free all scheduler-related resources
+ */
+void
+fenced_scheduler_cleanup(void)
+{
+ if (fenced_data_set != NULL) {
+ pcmk__output_t *logger = fenced_data_set->priv;
+
+ if (logger != NULL) {
+ logger->finish(logger, CRM_EX_OK, true, NULL);
+ pcmk__output_free(logger);
+ fenced_data_set->priv = NULL;
+ }
+ pe_free_working_set(fenced_data_set);
+ fenced_data_set = NULL;
+ }
+}
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
index d352555162..7c69fb8a10 100644
--- a/daemons/fenced/pacemaker-fenced.c
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -1,915 +1,898 @@
/*
* Copyright 2009-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h> // PRIu32, PRIx32
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/output_internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/common/mainloop.h>
#include <crm/cib/internal.h>
-#include <crm/pengine/internal.h>
-#include <pacemaker-internal.h>
#include <pacemaker-fenced.h>
#define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster"
-extern pcmk_scheduler_t *fenced_data_set;
-
char *stonith_our_uname = NULL;
long stonith_watchdog_timeout_ms = 0;
GList *stonith_watchdog_targets = NULL;
static GMainLoop *mainloop = NULL;
gboolean stand_alone = FALSE;
gboolean stonith_shutdown_flag = FALSE;
static qb_ipcs_service_t *ipcs = NULL;
-static pcmk__output_t *logger_out = NULL;
static pcmk__output_t *out = NULL;
pcmk__supported_format_t formats[] = {
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
static struct {
bool no_cib_connect;
gchar **log_files;
} options;
crm_exit_t exit_code = CRM_EX_OK;
static void stonith_cleanup(void);
static int32_t
st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
if (stonith_shutdown_flag) {
crm_info("Ignoring new client [%d] during shutdown",
pcmk__client_pid(c));
return -EPERM;
}
if (pcmk__new_client(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
/* Exit code means? */
static int32_t
st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
int call_options = 0;
xmlNode *request = NULL;
pcmk__client_t *c = pcmk__find_client(qbc);
const char *op = NULL;
if (c == NULL) {
crm_info("Invalid client: %p", qbc);
return 0;
}
request = pcmk__client_data2xml(c, data, &id, &flags);
if (request == NULL) {
pcmk__ipc_send_ack(c, id, flags, "nack", NULL, CRM_EX_PROTOCOL);
return 0;
}
op = crm_element_value(request, F_CRM_TASK);
if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
crm_xml_add(request, F_TYPE, T_STONITH_NG);
crm_xml_add(request, F_STONITH_OPERATION, op);
crm_xml_add(request, F_STONITH_CLIENTID, c->id);
crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c));
crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE);
free_xml(request);
return 0;
}
if (c->name == NULL) {
const char *value = crm_element_value(request, F_STONITH_CLIENTNAME);
if (value == NULL) {
value = "unknown";
}
c->name = crm_strdup_printf("%s.%u", value, c->pid);
}
crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32
" from client %s", flags, call_options, id, pcmk__client_name(c));
if (pcmk_is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(flags & crm_ipc_client_response);
CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */
c->request_id = id; /* Reply only to the last one */
}
crm_xml_add(request, F_STONITH_CLIENTID, c->id);
crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c));
crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
crm_log_xml_trace(request, "ipc-received");
stonith_command(c, id, flags, request, NULL);
free_xml(request);
return 0;
}
/* Error code means? */
static int32_t
st_ipc_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p closed", c);
pcmk__free_client(client);
/* 0 means: yes, go ahead and destroy the connection */
return 0;
}
static void
st_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p destroyed", c);
st_ipc_closed(c);
}
static void
stonith_peer_callback(xmlNode * msg, void *private_data)
{
const char *remote_peer = crm_element_value(msg, F_ORIG);
const char *op = crm_element_value(msg, F_STONITH_OPERATION);
if (pcmk__str_eq(op, "poke", pcmk__str_none)) {
return;
}
crm_log_xml_trace(msg, "Peer[inbound]");
stonith_command(NULL, 0, 0, msg, remote_peer);
}
#if SUPPORT_COROSYNC
static void
stonith_peer_ais_callback(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
uint32_t kind = 0;
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
if(data == NULL) {
return;
}
if (kind == crm_class_cluster) {
xml = string2xml(data);
if (xml == NULL) {
crm_err("Invalid XML: '%.120s'", data);
free(data);
return;
}
crm_xml_add(xml, F_ORIG, from);
/* crm_xml_add_int(xml, F_SEQ, wrapper->id); */
stonith_peer_callback(xml, NULL);
}
free_xml(xml);
free(data);
return;
}
static void
stonith_peer_cs_destroy(gpointer user_data)
{
crm_crit("Lost connection to cluster layer, shutting down");
stonith_shutdown(0);
}
#endif
void
do_local_reply(const xmlNode *notify_src, pcmk__client_t *client,
int call_options)
{
/* send callback to originating child */
int local_rc = pcmk_rc_ok;
int rid = 0;
uint32_t ipc_flags = crm_ipc_server_event;
if (pcmk_is_set(call_options, st_opt_sync_call)) {
CRM_LOG_ASSERT(client->request_id);
rid = client->request_id;
client->request_id = 0;
ipc_flags = crm_ipc_flags_none;
}
local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags);
if (local_rc == pcmk_rc_ok) {
crm_trace("Sent response %d to client %s",
rid, pcmk__client_name(client));
} else {
crm_warn("%synchronous reply to client %s failed: %s",
(pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"),
pcmk__client_name(client), pcmk_rc_str(local_rc));
}
}
uint64_t
get_stonith_flag(const char *name)
{
if (pcmk__str_eq(name, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) {
return st_callback_notify_fence;
} else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) {
return st_callback_device_add;
} else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) {
return st_callback_device_del;
} else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY, pcmk__str_casei)) {
return st_callback_notify_history;
} else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk__str_casei)) {
return st_callback_notify_history_synced;
}
return st_callback_unknown;
}
static void
stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
{
const xmlNode *update_msg = user_data;
pcmk__client_t *client = value;
const char *type = NULL;
CRM_CHECK(client != NULL, return);
CRM_CHECK(update_msg != NULL, return);
type = crm_element_value(update_msg, F_SUBTYPE);
CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);
if (client->ipcs == NULL) {
crm_trace("Skipping client with NULL channel");
return;
}
if (pcmk_is_set(client->flags, get_stonith_flag(type))) {
int rc = pcmk__ipc_send_xml(client, 0, update_msg,
crm_ipc_server_event);
if (rc != pcmk_rc_ok) {
crm_warn("%s notification of client %s failed: %s "
CRM_XS " id=%.8s rc=%d", type, pcmk__client_name(client),
pcmk_rc_str(rc), client->id, rc);
} else {
crm_trace("Sent %s notification to client %s",
type, pcmk__client_name(client));
}
}
}
void
do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout)
{
pcmk__client_t *client = NULL;
xmlNode *notify_data = NULL;
if (!timeout || !call_id || !client_id) {
return;
}
client = pcmk__find_client_by_id(client_id);
if (!client) {
return;
}
notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE);
crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE);
crm_xml_add(notify_data, F_STONITH_CALLID, call_id);
crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout);
crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);
if (client) {
pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event);
}
free_xml(notify_data);
}
/*!
* \internal
* \brief Notify relevant IPC clients of a fencing operation result
*
* \param[in] type Notification type
* \param[in] result Result of fencing operation (assume success if NULL)
* \param[in] data If not NULL, add to notification as call data
*/
void
fenced_send_notification(const char *type, const pcmk__action_result_t *result,
xmlNode *data)
{
/* TODO: Standardize the contents of data */
xmlNode *update_msg = create_xml_node(NULL, "notify");
CRM_LOG_ASSERT(type != NULL);
crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY);
crm_xml_add(update_msg, F_SUBTYPE, type);
crm_xml_add(update_msg, F_STONITH_OPERATION, type);
stonith__xe_set_result(update_msg, result);
if (data != NULL) {
add_message_xml(update_msg, F_STONITH_CALLDATA, data);
}
crm_trace("Notifying clients");
pcmk__foreach_ipc_client(stonith_notify_client, update_msg);
free_xml(update_msg);
crm_trace("Notify complete");
}
/*!
* \internal
* \brief Send notifications for a configuration change to subscribed clients
*
* \param[in] op Notification type (STONITH_OP_DEVICE_ADD,
* STONITH_OP_DEVICE_DEL, STONITH_OP_LEVEL_ADD, or
* STONITH_OP_LEVEL_DEL)
* \param[in] result Operation result
* \param[in] desc Description of what changed
* \param[in] active Current number of devices or topologies in use
*/
static void
send_config_notification(const char *op, const pcmk__action_result_t *result,
const char *desc, int active)
{
xmlNode *notify_data = create_xml_node(NULL, op);
CRM_CHECK(notify_data != NULL, return);
crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
fenced_send_notification(op, result, notify_data);
free_xml(notify_data);
}
/*!
* \internal
* \brief Send notifications for a device change to subscribed clients
*
* \param[in] op Notification type (STONITH_OP_DEVICE_ADD or
* STONITH_OP_DEVICE_DEL)
* \param[in] result Operation result
* \param[in] desc ID of device that changed
*/
void
fenced_send_device_notification(const char *op,
const pcmk__action_result_t *result,
const char *desc)
{
send_config_notification(op, result, desc, g_hash_table_size(device_list));
}
/*!
* \internal
* \brief Send notifications for a topology level change to subscribed clients
*
* \param[in] op Notification type (STONITH_OP_LEVEL_ADD or
* STONITH_OP_LEVEL_DEL)
* \param[in] result Operation result
* \param[in] desc String representation of level (<target>[<level_index>])
*/
void
fenced_send_level_notification(const char *op,
const pcmk__action_result_t *result,
const char *desc)
{
send_config_notification(op, result, desc, g_hash_table_size(topology));
}
/*!
* \internal
* \brief Check whether a node does watchdog-fencing
*
* \param[in] node Name of node to check
*
* \return TRUE if node found in stonith_watchdog_targets
* or stonith_watchdog_targets is empty indicating
* all nodes are doing watchdog-fencing
*/
gboolean
node_does_watchdog_fencing(const char *node)
{
return ((stonith_watchdog_targets == NULL) ||
pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei));
}
void
stonith_shutdown(int nsig)
{
crm_info("Terminating with %d clients", pcmk__ipc_client_count());
stonith_shutdown_flag = TRUE;
if (mainloop != NULL && g_main_loop_is_running(mainloop)) {
g_main_loop_quit(mainloop);
}
}
static void
stonith_cleanup(void)
{
fenced_cib_cleanup();
if (ipcs) {
qb_ipcs_destroy(ipcs);
}
crm_peer_destroy();
pcmk__client_cleanup();
free_stonith_remote_op_list();
free_topology_list();
free_device_list();
free_metadata_cache();
fenced_unregister_handlers();
free(stonith_our_uname);
stonith_our_uname = NULL;
}
static gboolean
stand_alone_cpg_cb(const gchar *option_name, const gchar *optarg, gpointer data,
GError **error)
{
stand_alone = FALSE;
options.no_cib_connect = true;
return TRUE;
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = st_ipc_accept,
.connection_created = NULL,
.msg_process = st_ipc_dispatch,
.connection_closed = st_ipc_closed,
.connection_destroyed = st_ipc_destroy
};
/*!
* \internal
* \brief Callback for peer status changes
*
* \param[in] type What changed
* \param[in] node What peer had the change
* \param[in] data Previous value of what changed
*/
static void
st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
if ((type != crm_status_processes)
&& !pcmk_is_set(node->flags, crm_remote_node)) {
/*
* This is a hack until we can send to a nodeid and/or we fix node name lookups
* These messages are ignored in stonith_peer_callback()
*/
xmlNode *query = create_xml_node(NULL, "stonith_command");
crm_xml_add(query, F_XML_TAGNAME, "stonith_command");
crm_xml_add(query, F_TYPE, T_STONITH_NG);
crm_xml_add(query, F_STONITH_OPERATION, "poke");
crm_debug("Broadcasting our uname because of node %u", node->id);
send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
free_xml(query);
}
}
static pcmk__cluster_option_t fencer_options[] = {
/* name, old name, type, allowed values,
* default value, validator,
* short description,
* long description
*/
{
PCMK_STONITH_HOST_ARGUMENT, NULL, "string", NULL, "port", NULL,
N_("Advanced use only: An alternate parameter to supply instead of 'port'"),
N_("some devices do not support the "
"standard 'port' parameter or may provide additional ones. Use "
"this to specify an alternate, device-specific, parameter "
"that should indicate the machine to be fenced. A value of "
"none can be used to tell the cluster not to supply any "
"additional parameters.")
},
{
PCMK_STONITH_HOST_MAP,NULL, "string", NULL, "", NULL,
N_("A mapping of host names to ports numbers for devices that do not support host names."),
N_("Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2")
},
{
PCMK_STONITH_HOST_LIST,NULL, "string", NULL, "", NULL,
N_("Eg. node1,node2,node3"),
N_("A list of machines controlled by "
"this device (Optional unless pcmk_host_list=static-list)")
},
{
PCMK_STONITH_HOST_CHECK,NULL, "string", NULL, "dynamic-list", NULL,
N_("How to determine which machines are controlled by the device."),
N_("Allowed values: dynamic-list "
"(query the device via the 'list' command), static-list "
"(check the pcmk_host_list attribute), status "
"(query the device via the 'status' command), "
"none (assume every device can fence every "
"machine)")
},
{
PCMK_STONITH_DELAY_MAX,NULL, "time", NULL, "0s", NULL,
N_("Enable a base delay for fencing actions and specify base delay value."),
N_("Enable a delay of no more than the "
"time specified before executing fencing actions. Pacemaker "
"derives the overall delay by taking the value of "
"pcmk_delay_base and adding a random delay value such "
"that the sum is kept below this maximum.")
},
{
PCMK_STONITH_DELAY_BASE,NULL, "string", NULL, "0s", NULL,
N_("Enable a base delay for "
"fencing actions and specify base delay value."),
N_("This enables a static delay for "
"fencing actions, which can help avoid \"death matches\" where "
"two nodes try to fence each other at the same time. If "
"pcmk_delay_max is also used, a random delay will be "
"added such that the total delay is kept below that value."
"This can be set to a single time value to apply to any node "
"targeted by this device (useful if a separate device is "
"configured for each target), or to a node map (for example, "
"\"node1:1s;node2:5\") to set a different value per target.")
},
{
PCMK_STONITH_ACTION_LIMIT,NULL, "integer", NULL, "1", NULL,
N_("The maximum number of actions can be performed in parallel on this device"),
N_("Cluster property concurrent-fencing=true needs to be configured first."
"Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.")
},
{
"pcmk_reboot_action", NULL, "string", NULL,
PCMK_ACTION_REBOOT, NULL,
N_("Advanced use only: An alternate command to run instead of 'reboot'"),
N_("Some devices do not support the standard commands or may provide additional ones.\n"
"Use this to specify an alternate, device-specific, command that implements the \'reboot\' action.")
},
{
"pcmk_reboot_timeout",NULL, "time", NULL, "60s", NULL,
N_("Advanced use only: Specify an alternate timeout to use for reboot actions instead of stonith-timeout"),
N_("Some devices need much more/less time to complete than normal."
"Use this to specify an alternate, device-specific, timeout for \'reboot\' actions.")
},
{
"pcmk_reboot_retries",NULL, "integer", NULL, "2", NULL,
N_("Advanced use only: The maximum number of times to retry the 'reboot' command within the timeout period"),
N_("Some devices do not support multiple connections."
" Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
" Use this option to alter the number of times Pacemaker retries \'reboot\' actions before giving up.")
},
{
"pcmk_off_action", NULL, "string", NULL,
PCMK_ACTION_OFF, NULL,
N_("Advanced use only: An alternate command to run instead of \'off\'"),
N_("Some devices do not support the standard commands or may provide additional ones."
"Use this to specify an alternate, device-specific, command that implements the \'off\' action.")
},
{
"pcmk_off_timeout",NULL, "time", NULL, "60s", NULL,
N_("Advanced use only: Specify an alternate timeout to use for off actions instead of stonith-timeout"),
N_("Some devices need much more/less time to complete than normal."
"Use this to specify an alternate, device-specific, timeout for \'off\' actions.")
},
{
"pcmk_off_retries",NULL, "integer", NULL, "2", NULL,
N_("Advanced use only: The maximum number of times to retry the 'off' command within the timeout period"),
N_("Some devices do not support multiple connections."
" Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
" Use this option to alter the number of times Pacemaker retries \'off\' actions before giving up.")
},
{
"pcmk_on_action", NULL, "string", NULL,
PCMK_ACTION_ON, NULL,
N_("Advanced use only: An alternate command to run instead of 'on'"),
N_("Some devices do not support the standard commands or may provide additional ones."
"Use this to specify an alternate, device-specific, command that implements the \'on\' action.")
},
{
"pcmk_on_timeout",NULL, "time", NULL, "60s", NULL,
N_("Advanced use only: Specify an alternate timeout to use for on actions instead of stonith-timeout"),
N_("Some devices need much more/less time to complete than normal."
"Use this to specify an alternate, device-specific, timeout for \'on\' actions.")
},
{
"pcmk_on_retries",NULL, "integer", NULL, "2", NULL,
N_("Advanced use only: The maximum number of times to retry the 'on' command within the timeout period"),
N_("Some devices do not support multiple connections."
" Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
" Use this option to alter the number of times Pacemaker retries \'on\' actions before giving up.")
},
{
"pcmk_list_action",NULL, "string", NULL,
PCMK_ACTION_LIST, NULL,
N_("Advanced use only: An alternate command to run instead of \'list\'"),
N_("Some devices do not support the standard commands or may provide additional ones."
"Use this to specify an alternate, device-specific, command that implements the \'list\' action.")
},
{
"pcmk_list_timeout",NULL, "time", NULL, "60s", NULL,
N_("Advanced use only: Specify an alternate timeout to use for list actions instead of stonith-timeout"),
N_("Some devices need much more/less time to complete than normal."
"Use this to specify an alternate, device-specific, timeout for \'list\' actions.")
},
{
"pcmk_list_retries",NULL, "integer", NULL, "2", NULL,
N_("Advanced use only: The maximum number of times to retry the \'list\' command within the timeout period"),
N_("Some devices do not support multiple connections."
" Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
" Use this option to alter the number of times Pacemaker retries \'list\' actions before giving up.")
},
{
"pcmk_monitor_action", NULL, "string", NULL,
PCMK_ACTION_MONITOR, NULL,
N_("Advanced use only: An alternate command to run instead of \'monitor\'"),
N_("Some devices do not support the standard commands or may provide additional ones."
"Use this to specify an alternate, device-specific, command that implements the \'monitor\' action.")
},
{
"pcmk_monitor_timeout",NULL, "time", NULL, "60s", NULL,
N_("Advanced use only: Specify an alternate timeout to use for monitor actions instead of stonith-timeout"),
N_("Some devices need much more/less time to complete than normal.\n"
"Use this to specify an alternate, device-specific, timeout for \'monitor\' actions.")
},
{
"pcmk_monitor_retries",NULL, "integer", NULL, "2", NULL,
N_("Advanced use only: The maximum number of times to retry the \'monitor\' command within the timeout period"),
N_("Some devices do not support multiple connections."
" Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
" Use this option to alter the number of times Pacemaker retries \'monitor\' actions before giving up.")
},
{
"pcmk_status_action", NULL, "string", NULL,
PCMK_ACTION_STATUS, NULL,
N_("Advanced use only: An alternate command to run instead of \'status\'"),
N_("Some devices do not support the standard commands or may provide additional ones."
"Use this to specify an alternate, device-specific, command that implements the \'status\' action.")
},
{
"pcmk_status_timeout",NULL, "time", NULL, "60s", NULL,
N_("Advanced use only: Specify an alternate timeout to use for status actions instead of stonith-timeout"),
N_("Some devices need much more/less time to complete than normal."
"Use this to specify an alternate, device-specific, timeout for \'status\' actions.")
},
{
"pcmk_status_retries",NULL, "integer", NULL, "2", NULL,
N_("Advanced use only: The maximum number of times to retry the \'status\' command within the timeout period"),
N_("Some devices do not support multiple connections."
" Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
" Use this option to alter the number of times Pacemaker retries \'status\' actions before giving up.")
},
};
void
fencer_metadata(void)
{
const char *desc_short = N_("Instance attributes available for all "
"\"stonith\"-class resources");
const char *desc_long = N_("Instance attributes available for all \"stonith\"-"
"class resources and used by Pacemaker's fence "
"daemon, formerly known as stonithd");
gchar *s = pcmk__format_option_metadata("pacemaker-fenced", desc_short,
desc_long, fencer_options,
PCMK__NELEM(fencer_options));
printf("%s", s);
g_free(s);
}
static GOptionEntry entries[] = {
{ "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone,
N_("Deprecated (will be removed in a future release)"), NULL },
{ "stand-alone-w-cpg", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
stand_alone_cpg_cb, N_("Intended for use in regression testing only"), NULL },
{ "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY,
&options.log_files, N_("Send logs to the additional named logfile"), NULL },
{ NULL }
};
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group)
{
GOptionContext *context = NULL;
context = pcmk__build_arg_context(args, "text (default), xml", group,
"[metadata]");
pcmk__add_main_args(context, entries);
return context;
}
int
main(int argc, char **argv)
{
int rc = pcmk_rc_ok;
crm_cluster_t *cluster = NULL;
crm_ipc_t *old_instance = NULL;
GError *error = NULL;
GOptionGroup *output_group = NULL;
pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
gchar **processed_args = pcmk__cmdline_preproc(argv, "l");
GOptionContext *context = build_arg_context(args, &output_group);
crm_log_preinit(NULL, argc, argv);
pcmk__register_formats(output_group, formats);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_ERROR;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Error creating output format %s: %s",
args->output_ty, pcmk_rc_str(rc));
goto done;
}
if (args->version) {
out->version(out, false);
goto done;
}
if ((g_strv_length(processed_args) >= 2)
&& pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) {
fencer_metadata();
goto done;
}
// Open additional log files
pcmk__add_logfiles(options.log_files, out);
crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE,
(args->verbosity > 0), argc, argv, FALSE);
crm_notice("Starting Pacemaker fencer");
old_instance = crm_ipc_new("stonith-ng", 0);
if (old_instance == NULL) {
/* crm_ipc_new() will have already logged an error message with
* crm_err()
*/
exit_code = CRM_EX_FATAL;
goto done;
}
if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) {
// IPC endpoint already up
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_err("pacemaker-fenced is already active, aborting startup");
goto done;
} else {
// Not up or not authentic, we'll proceed either way
crm_ipc_destroy(old_instance);
old_instance = NULL;
}
mainloop_add_signal(SIGTERM, stonith_shutdown);
crm_peer_init();
- fenced_data_set = pe_new_working_set();
- CRM_ASSERT(fenced_data_set != NULL);
+ rc = fenced_scheduler_init();
+ if (rc != pcmk_rc_ok) {
+ exit_code = CRM_EX_FATAL;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Error initializing scheduler data: %s", pcmk_rc_str(rc));
+ goto done;
+ }
cluster = pcmk_cluster_new();
if (!stand_alone) {
#if SUPPORT_COROSYNC
if (is_corosync_cluster()) {
cluster->destroy = stonith_peer_cs_destroy;
cluster->cpg.cpg_deliver_fn = stonith_peer_ais_callback;
cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership;
}
#endif // SUPPORT_COROSYNC
crm_set_status_callback(&st_peer_update_callback);
if (crm_cluster_connect(cluster) == FALSE) {
exit_code = CRM_EX_FATAL;
crm_crit("Cannot sign in to the cluster... terminating");
goto done;
}
pcmk__str_update(&stonith_our_uname, cluster->uname);
if (!options.no_cib_connect) {
setup_cib();
}
} else {
pcmk__str_update(&stonith_our_uname, "localhost");
crm_warn("Stand-alone mode is deprecated and will be removed "
"in a future release");
}
init_device_list();
init_topology_list();
pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks);
- rc = pcmk__log_output_new(&logger_out);
- if (rc != pcmk_rc_ok) {
- exit_code = CRM_EX_FATAL;
- g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
- "Error creating output format log: %s", pcmk_rc_str(rc));
- goto done;
- }
- pe__register_messages(logger_out);
- pcmk__register_lib_messages(logger_out);
- pcmk__output_set_log_level(logger_out, LOG_TRACE);
- fenced_data_set->priv = logger_out;
-
// Create the mainloop and run it...
mainloop = g_main_loop_new(NULL, FALSE);
crm_notice("Pacemaker fencer successfully started and accepting connections");
g_main_loop_run(mainloop);
done:
g_strfreev(processed_args);
pcmk__free_arg_context(context);
g_strfreev(options.log_files);
stonith_cleanup();
pcmk_cluster_free(cluster);
- pe_free_working_set(fenced_data_set);
+ fenced_scheduler_cleanup();
pcmk__output_and_clear_error(&error, out);
- if (logger_out != NULL) {
- logger_out->finish(logger_out, exit_code, true, NULL);
- pcmk__output_free(logger_out);
- }
-
if (out != NULL) {
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
}
pcmk__unregister_formats();
crm_exit(exit_code);
}
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index bf0d5d37dc..c2acf184bd 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -1,328 +1,331 @@
/*
* Copyright 2009-2023 the Pacemaker project contributors
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <stdint.h> // uint32_t, uint64_t
#include <crm/common/mainloop.h>
#include <crm/cluster.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
/*!
* \internal
* \brief Check whether target has already been fenced recently
*
* \param[in] tolerance Number of seconds to look back in time
* \param[in] target Name of node to search for
* \param[in] action Action we want to match
*
* \return TRUE if an equivalent fencing operation took place in the last
* \p tolerance seconds, FALSE otherwise
*/
gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action);
typedef struct stonith_device_s {
char *id;
char *agent;
char *namespace;
/*! list of actions that must execute on the target node. Used for unfencing */
GString *on_target_actions;
GList *targets;
time_t targets_age;
gboolean has_attr_map;
// Whether target's nodeid should be passed as a parameter to the agent
gboolean include_nodeid;
/* whether the cluster should automatically unfence nodes with the device */
gboolean automatic_unfencing;
guint priority;
uint32_t flags; // Group of enum st_device_flags
GHashTable *params;
GHashTable *aliases;
GList *pending_ops;
mainloop_timer_t *timer;
crm_trigger_t *work;
xmlNode *agent_metadata;
/*! A verified device is one that has contacted the
* agent successfully to perform a monitor operation */
gboolean verified;
gboolean cib_registered;
gboolean api_registered;
gboolean dirty;
} stonith_device_t;
/* These values are used to index certain arrays by "phase". Usually an
* operation has only one "phase", so phase is always zero. However, some
* reboots are remapped to "off" then "on", in which case "reboot" will be
* phase 0, "off" will be phase 1 and "on" will be phase 2.
*/
enum st_remap_phase {
st_phase_requested = 0,
st_phase_off = 1,
st_phase_on = 2,
st_phase_max = 3
};
typedef struct remote_fencing_op_s {
/* The unique id associated with this operation */
char *id;
/*! The node this operation will fence */
char *target;
/*! The fencing action to perform on the target. (reboot, on, off) */
char *action;
/*! When was the fencing action recorded (seconds since epoch) */
time_t created;
/*! Marks if the final notifications have been sent to local stonith clients. */
gboolean notify_sent;
/*! The number of query replies received */
guint replies;
/*! The number of query replies expected */
guint replies_expected;
/*! Does this node own control of this operation */
gboolean owner;
/*! After query is complete, This the high level timer that expires the entire operation */
guint op_timer_total;
/*! This timer expires the current fencing request. Many fencing
* requests may exist in a single operation */
guint op_timer_one;
/*! This timer expires the query request sent out to determine
* what nodes are contain what devices, and who those devices can fence */
guint query_timer;
/*! This is the default timeout to use for each fencing device if no
* custom timeout is received in the query. */
gint base_timeout;
/*! This is the calculated total timeout an operation can take before
* expiring. This is calculated by adding together all the timeout
* values associated with the devices this fencing operation may call */
gint total_timeout;
/*!
* Fencing delay (in seconds) requested by API client (used by controller to
* implement priority-fencing-delay). A value of -1 means disable all
* configured delays.
*/
int client_delay;
/*! Delegate is the node being asked to perform a fencing action
* on behalf of the node that owns the remote operation. Some operations
* will involve multiple delegates. This value represents the final delegate
* that is used. */
char *delegate;
/*! The point at which the remote operation completed */
time_t completed;
//! Group of enum stonith_call_options associated with this operation
uint32_t call_options;
/*! The current state of the remote operation. This indicates
* what stage the op is in, query, exec, done, duplicate, failed. */
enum op_state state;
/*! The node that owns the remote operation */
char *originator;
/*! The local client id that initiated the fencing request */
char *client_id;
/*! The client's call_id that initiated the fencing request */
int client_callid;
/*! The name of client that initiated the fencing request */
char *client_name;
/*! List of the received query results for all the nodes in the cpg group */
GList *query_results;
/*! The original request that initiated the remote stonith operation */
xmlNode *request;
/*! The current topology level being executed */
guint level;
/*! The current operation phase being executed */
enum st_remap_phase phase;
/*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */
GList *automatic_list;
/*! List of all devices at the currently executing topology level */
GList *devices_list;
/*! Current entry in the topology device list */
GList *devices;
/*! List of duplicate operations attached to this operation. Once this operation
* completes, the duplicate operations will be closed out as well. */
GList *duplicates;
/*! The point at which the remote operation completed(nsec) */
long long completed_nsec;
/*! The (potentially intermediate) result of the operation */
pcmk__action_result_t result;
} remote_fencing_op_t;
void fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged);
// Fencer-specific client flags
enum st_client_flags {
st_callback_unknown = UINT64_C(0),
st_callback_notify_fence = (UINT64_C(1) << 0),
st_callback_device_add = (UINT64_C(1) << 2),
st_callback_device_del = (UINT64_C(1) << 4),
st_callback_notify_history = (UINT64_C(1) << 5),
st_callback_notify_history_synced = (UINT64_C(1) << 6)
};
// How the user specified the target of a topology level
enum fenced_target_by {
fenced_target_by_unknown = -1, // Invalid or not yet parsed
fenced_target_by_name, // By target name
fenced_target_by_pattern, // By a pattern matching target names
fenced_target_by_attribute, // By a node attribute/value on target
};
/*
* Complex fencing requirements are specified via fencing topologies.
* A topology consists of levels; each level is a list of fencing devices.
* Topologies are stored in a hash table by node name. When a node needs to be
* fenced, if it has an entry in the topology table, the levels are tried
* sequentially, and the devices in each level are tried sequentially.
* Fencing is considered successful as soon as any level succeeds;
* a level is considered successful if all its devices succeed.
* Essentially, all devices at a given level are "and-ed" and the
* levels are "or-ed".
*
* This structure is used for the topology table entries.
* Topology levels start from 1, so levels[0] is unused and always NULL.
*/
typedef struct stonith_topology_s {
enum fenced_target_by kind; // How target was specified
/*! Node name regex or attribute name=value for which topology applies */
char *target;
char *target_value;
char *target_pattern;
char *target_attribute;
/*! Names of fencing devices at each topology level */
GList *levels[ST_LEVEL_MAX];
} stonith_topology_t;
void stonith_shutdown(int nsig);
void init_device_list(void);
void free_device_list(void);
void init_topology_list(void);
void free_topology_list(void);
void free_stonith_remote_op_list(void);
void init_stonith_remote_op_hash_table(GHashTable **table);
void free_metadata_cache(void);
void fenced_unregister_handlers(void);
uint64_t get_stonith_flag(const char *name);
void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
xmlNode *op_request, const char *remote_peer);
int stonith_device_register(xmlNode *msg, gboolean from_cib);
void stonith_device_remove(const char *id, bool from_cib);
char *stonith_level_key(const xmlNode *msg, enum fenced_target_by);
void fenced_register_level(xmlNode *msg, char **desc,
pcmk__action_result_t *result);
void fenced_unregister_level(xmlNode *msg, char **desc,
pcmk__action_result_t *result);
stonith_topology_t *find_topology_for_host(const char *host);
void do_local_reply(const xmlNode *notify_src, pcmk__client_t *client,
int call_options);
xmlNode *fenced_construct_reply(const xmlNode *request, xmlNode *data,
const pcmk__action_result_t *result);
void
do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
void fenced_send_notification(const char *type,
const pcmk__action_result_t *result,
xmlNode *data);
void fenced_send_device_notification(const char *op,
const pcmk__action_result_t *result,
const char *desc);
void fenced_send_level_notification(const char *op,
const pcmk__action_result_t *result,
const char *desc);
remote_fencing_op_t *initiate_remote_stonith_op(const pcmk__client_t *client,
xmlNode *request,
gboolean manual_ack);
void fenced_process_fencing_reply(xmlNode *msg);
int process_remote_stonith_query(xmlNode * msg);
void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer);
void stonith_fence_history(xmlNode *msg, xmlNode **output,
const char *remote_peer, int options);
void stonith_fence_history_trim(void);
bool fencing_peer_active(crm_node_t *peer);
void set_fencing_completed(remote_fencing_op_t * op);
int fenced_handle_manual_confirmation(const pcmk__client_t *client,
xmlNode *msg);
void fencer_metadata(void);
const char *fenced_device_reboot_action(const char *device_id);
bool fenced_device_supports_on(const char *device_id);
gboolean node_has_attr(const char *node, const char *name, const char *value);
gboolean node_does_watchdog_fencing(const char *node);
void fencing_topology_init(void);
void setup_cib(void);
void fenced_cib_cleanup(void);
+int fenced_scheduler_init(void);
+void fenced_scheduler_cleanup(void);
+
static inline void
fenced_set_protocol_error(pcmk__action_result_t *result)
{
pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
"Fencer API request missing required information (bug?)");
}
/*!
* \internal
* \brief Get the device flag to use with a given action when searching devices
*
* \param[in] action Action to check
*
* \return st_device_supports_on if \p action is "on", otherwise
* st_device_supports_none
*/
static inline uint32_t
fenced_support_flag(const char *action)
{
if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) {
return st_device_supports_on;
}
return st_device_supports_none;
}
extern char *stonith_our_uname;
extern gboolean stand_alone;
extern GHashTable *device_list;
extern GHashTable *topology;
extern long stonith_watchdog_timeout_ms;
extern GList *stonith_watchdog_targets;
extern GHashTable *stonith_remote_op_list;
extern crm_exit_t exit_code;
extern gboolean stonith_shutdown_flag;
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Dec 23, 10:39 PM (1 d, 18 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1129259
Default Alt Text
(46 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment