Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
index 932925b97c..96ad5a33ec 100644
--- a/daemons/fenced/pacemaker-fenced.c
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -1,1547 +1,1549 @@
/*
* Copyright 2009-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h> // PRIu32, PRIx32
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/cluster/internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/common/mainloop.h>
#include <crm/cib/internal.h>
#include <crm/pengine/status.h>
#include <pacemaker-internal.h>
#include <pacemaker-fenced.h>
char *stonith_our_uname = NULL;
char *stonith_our_uuid = NULL;
long stonith_watchdog_timeout_ms = 0;
static GMainLoop *mainloop = NULL;
gboolean stand_alone = FALSE;
static gboolean no_cib_connect = FALSE;
static gboolean stonith_shutdown_flag = FALSE;
static qb_ipcs_service_t *ipcs = NULL;
static xmlNode *local_cib = NULL;
static pe_working_set_t *fenced_data_set = NULL;
static cib_t *cib_api = NULL;
static void stonith_shutdown(int nsig);
static void stonith_cleanup(void);
static int32_t
st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
if (stonith_shutdown_flag) {
crm_info("Ignoring new client [%d] during shutdown",
pcmk__client_pid(c));
return -EPERM;
}
if (pcmk__new_client(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
/* Exit code means? */
static int32_t
st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
int call_options = 0;
xmlNode *request = NULL;
pcmk__client_t *c = pcmk__find_client(qbc);
const char *op = NULL;
if (c == NULL) {
crm_info("Invalid client: %p", qbc);
return 0;
}
request = pcmk__client_data2xml(c, data, &id, &flags);
if (request == NULL) {
pcmk__ipc_send_ack(c, id, flags, "nack", CRM_EX_PROTOCOL);
return 0;
}
op = crm_element_value(request, F_CRM_TASK);
if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
crm_xml_add(request, F_TYPE, T_STONITH_NG);
crm_xml_add(request, F_STONITH_OPERATION, op);
crm_xml_add(request, F_STONITH_CLIENTID, c->id);
crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c));
crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE);
free_xml(request);
return 0;
}
if (c->name == NULL) {
const char *value = crm_element_value(request, F_STONITH_CLIENTNAME);
if (value == NULL) {
value = "unknown";
}
c->name = crm_strdup_printf("%s.%u", value, c->pid);
}
crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
crm_trace("Flags 0x%08" PRIx32 "/0x%08x for command %" PRIu32
" from client %s", flags, call_options, id, pcmk__client_name(c));
if (pcmk_is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(flags & crm_ipc_client_response);
CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */
c->request_id = id; /* Reply only to the last one */
}
crm_xml_add(request, F_STONITH_CLIENTID, c->id);
crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c));
crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
stonith_command(c, id, flags, request, NULL);
free_xml(request);
return 0;
}
/* Error code means? */
static int32_t
st_ipc_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p closed", c);
pcmk__free_client(client);
/* 0 means: yes, go ahead and destroy the connection */
return 0;
}
static void
st_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p destroyed", c);
st_ipc_closed(c);
}
static void
stonith_peer_callback(xmlNode * msg, void *private_data)
{
const char *remote_peer = crm_element_value(msg, F_ORIG);
const char *op = crm_element_value(msg, F_STONITH_OPERATION);
if (pcmk__str_eq(op, "poke", pcmk__str_none)) {
return;
}
crm_log_xml_trace(msg, "Peer[inbound]");
stonith_command(NULL, 0, 0, msg, remote_peer);
}
#if SUPPORT_COROSYNC
static void
stonith_peer_ais_callback(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
uint32_t kind = 0;
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
if(data == NULL) {
return;
}
if (kind == crm_class_cluster) {
xml = string2xml(data);
if (xml == NULL) {
crm_err("Invalid XML: '%.120s'", data);
free(data);
return;
}
crm_xml_add(xml, F_ORIG, from);
/* crm_xml_add_int(xml, F_SEQ, wrapper->id); */
stonith_peer_callback(xml, NULL);
}
free_xml(xml);
free(data);
return;
}
static void
stonith_peer_cs_destroy(gpointer user_data)
{
crm_crit("Lost connection to cluster layer, shutting down");
stonith_shutdown(0);
}
#endif
void
do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer)
{
/* send callback to originating child */
pcmk__client_t *client_obj = NULL;
int local_rc = pcmk_rc_ok;
crm_trace("Sending response");
client_obj = pcmk__find_client_by_id(client_id);
crm_trace("Sending callback to request originator");
if (client_obj == NULL) {
local_rc = EPROTO;
crm_trace("No client to sent the response to. F_STONITH_CLIENTID not set.");
} else {
int rid = 0;
if (sync_reply) {
CRM_LOG_ASSERT(client_obj->request_id);
rid = client_obj->request_id;
client_obj->request_id = 0;
crm_trace("Sending response %d to client %s%s",
rid, pcmk__client_name(client_obj),
(from_peer? " (originator of delegated request)" : ""));
} else {
crm_trace("Sending an event to client %s%s",
pcmk__client_name(client_obj),
(from_peer? " (originator of delegated request)" : ""));
}
local_rc = pcmk__ipc_send_xml(client_obj, rid, notify_src,
(sync_reply? crm_ipc_flags_none
: crm_ipc_server_event));
}
if ((local_rc != pcmk_rc_ok) && (client_obj != NULL)) {
crm_warn("%s reply to client %s failed: %s",
(sync_reply? "Synchronous" : "Asynchronous"),
pcmk__client_name(client_obj), pcmk_rc_str(local_rc));
}
}
uint64_t
get_stonith_flag(const char *name)
{
if (pcmk__str_eq(name, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) {
return st_callback_notify_fence;
} else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) {
return st_callback_device_add;
} else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) {
return st_callback_device_del;
} else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY, pcmk__str_casei)) {
return st_callback_notify_history;
} else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk__str_casei)) {
return st_callback_notify_history_synced;
}
return st_callback_unknown;
}
static void
stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
pcmk__client_t *client = value;
const char *type = NULL;
CRM_CHECK(client != NULL, return);
CRM_CHECK(update_msg != NULL, return);
type = crm_element_value(update_msg, F_SUBTYPE);
CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);
if (client->ipcs == NULL) {
crm_trace("Skipping client with NULL channel");
return;
}
if (pcmk_is_set(client->flags, get_stonith_flag(type))) {
int rc = pcmk__ipc_send_xml(client, 0, update_msg,
crm_ipc_server_event|crm_ipc_server_error);
if (rc != pcmk_rc_ok) {
crm_warn("%s notification of client %s failed: %s "
CRM_XS " id=%.8s rc=%d", type, pcmk__client_name(client),
pcmk_rc_str(rc), client->id, rc);
} else {
crm_trace("Sent %s notification to client %s",
type, pcmk__client_name(client));
}
}
}
void
do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout)
{
pcmk__client_t *client = NULL;
xmlNode *notify_data = NULL;
if (!timeout || !call_id || !client_id) {
return;
}
client = pcmk__find_client_by_id(client_id);
if (!client) {
return;
}
notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE);
crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE);
crm_xml_add(notify_data, F_STONITH_CALLID, call_id);
crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout);
crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);
if (client) {
pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event);
}
free_xml(notify_data);
}
void
do_stonith_notify(int options, const char *type, int result, xmlNode * data)
{
/* TODO: Standardize the contents of data */
xmlNode *update_msg = create_xml_node(NULL, "notify");
CRM_CHECK(type != NULL,;);
crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY);
crm_xml_add(update_msg, F_SUBTYPE, type);
crm_xml_add(update_msg, F_STONITH_OPERATION, type);
crm_xml_add_int(update_msg, F_STONITH_RC, result);
if (data != NULL) {
add_message_xml(update_msg, F_STONITH_CALLDATA, data);
}
crm_trace("Notifying clients");
pcmk__foreach_ipc_client(stonith_notify_client, update_msg);
free_xml(update_msg);
crm_trace("Notify complete");
}
static void
do_stonith_notify_config(int options, const char *op, int rc,
const char *desc, int active)
{
xmlNode *notify_data = create_xml_node(NULL, op);
CRM_CHECK(notify_data != NULL, return);
crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
do_stonith_notify(options, op, rc, notify_data);
free_xml(notify_data);
}
void
do_stonith_notify_device(int options, const char *op, int rc, const char *desc)
{
do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list));
}
void
do_stonith_notify_level(int options, const char *op, int rc, const char *desc)
{
do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology));
}
static void
topology_remove_helper(const char *node, int level)
{
int rc;
char *desc = NULL;
xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL);
crm_xml_add(data, F_STONITH_ORIGIN, __func__);
crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level);
crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
rc = stonith_level_remove(data, &desc);
do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc);
free_xml(data);
free(desc);
}
static void
remove_cib_device(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
const char *rsc_id = NULL;
const char *standard = NULL;
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match != NULL) {
standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
}
if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
continue;
}
rsc_id = crm_element_value(match, XML_ATTR_ID);
stonith_device_remove(rsc_id, TRUE);
}
}
static void
handle_topology_change(xmlNode *match, bool remove)
{
int rc;
char *desc = NULL;
CRM_CHECK(match != NULL, return);
crm_trace("Updating %s", ID(match));
if(remove) {
int index = 0;
char *key = stonith_level_key(match, -1);
crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
topology_remove_helper(key, index);
free(key);
}
rc = stonith_level_register(match, &desc);
do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc);
free(desc);
}
static void
remove_fencing_topology(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if (match && crm_element_value(match, XML_DIFF_MARKER)) {
/* Deletion */
int index = 0;
char *target = stonith_level_key(match, -1);
crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
if (target == NULL) {
crm_err("Invalid fencing target in element %s", ID(match));
} else if (index <= 0) {
crm_err("Invalid level for %s in element %s", target, ID(match));
} else {
topology_remove_helper(target, index);
}
/* } else { Deal with modifications during the 'addition' stage */
}
}
}
static void
register_fencing_topology(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
handle_topology_change(match, TRUE);
}
}
/* Fencing
<diff crm_feature_set="3.0.6">
<diff-removed>
<fencing-topology>
<fencing-level id="f-p1.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="removed:top"/>
<fencing-level id="f-p1.2" target="pcmk-1" index="2" devices="power" __crm_diff_marker__="removed:top"/>
<fencing-level devices="disk,network" id="f-p2.1"/>
</fencing-topology>
</diff-removed>
<diff-added>
<fencing-topology>
<fencing-level id="f-p.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="added:top"/>
<fencing-level id="f-p2.1" target="pcmk-2" index="1" devices="disk,something"/>
<fencing-level id="f-p3.1" target="pcmk-2" index="2" devices="power" __crm_diff_marker__="added:top"/>
</fencing-topology>
</diff-added>
</diff>
*/
static void
fencing_topology_init(void)
{
xmlXPathObjectPtr xpathObj = NULL;
const char *xpath = "//" XML_TAG_FENCING_LEVEL;
crm_trace("Full topology refresh");
free_topology_list();
init_topology_list();
/* Grab everything */
xpathObj = xpath_search(local_cib, xpath);
register_fencing_topology(xpathObj);
freeXpathObject(xpathObj);
}
#define rsc_name(x) x->clone_name?x->clone_name:x->id
/*!
* \internal
* \brief Check whether our uname is in a resource's allowed node list
*
* \param[in] rsc Resource to check
*
* \return Pointer to node object if found, NULL otherwise
*/
static pe_node_t *
our_node_allowed_for(pe_resource_t *rsc)
{
GHashTableIter iter;
pe_node_t *node = NULL;
if (rsc && stonith_our_uname) {
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (node && strcmp(node->details->uname, stonith_our_uname) == 0) {
break;
}
node = NULL;
}
}
return node;
}
/*!
* \internal
* \brief If a resource or any of its children are STONITH devices, update their
* definitions given a cluster working set.
*
* \param[in] rsc Resource to check
* \param[in] data_set Cluster working set with device information
*/
static void cib_device_update(pe_resource_t *rsc, pe_working_set_t *data_set)
{
pe_node_t *node = NULL;
const char *value = NULL;
const char *rclass = NULL;
pe_node_t *parent = NULL;
gboolean remove = TRUE;
/* If this is a complex resource, check children rather than this resource itself.
* TODO: Mark each installed device and remove if untouched when this process finishes.
*/
if(rsc->children) {
GListPtr gIter = NULL;
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
cib_device_update(gIter->data, data_set);
if(pe_rsc_is_clone(rsc)) {
crm_trace("Only processing one copy of the clone %s", rsc->id);
break;
}
}
return;
}
/* We only care about STONITH resources. */
rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
return;
}
/* If this STONITH resource is disabled, just remove it. */
if (pe__resource_is_disabled(rsc)) {
crm_info("Device %s has been disabled", rsc->id);
goto update_done;
}
/* Check whether our node is allowed for this resource (and its parent if in a group) */
node = our_node_allowed_for(rsc);
if (rsc->parent && (rsc->parent->variant == pe_group)) {
parent = our_node_allowed_for(rsc->parent);
}
if(node == NULL) {
/* Our node is disallowed, so remove the device */
GHashTableIter iter;
crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname);
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
crm_trace("Available: %s = %d", node->details->uname, node->weight);
}
goto update_done;
} else if(node->weight < 0 || (parent && parent->weight < 0)) {
/* Our node (or its group) is disallowed by score, so remove the device */
char *score = score2char((node->weight < 0) ? node->weight : parent->weight);
crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score);
free(score);
goto update_done;
} else {
/* Our node is allowed, so update the device information */
int rc;
xmlNode *data;
GHashTableIter gIter;
stonith_key_value_t *params = NULL;
const char *name = NULL;
const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE);
const char *rsc_provides = NULL;
crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight);
get_rsc_attributes(rsc->parameters, rsc, node, data_set);
get_meta_attributes(rsc->meta, rsc, node, data_set);
rsc_provides = g_hash_table_lookup(rsc->meta, PCMK_STONITH_PROVIDES);
g_hash_table_iter_init(&gIter, rsc->parameters);
while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) {
if (!name || !value) {
continue;
}
params = stonith_key_value_add(params, name, value);
crm_trace(" %s=%s", name, value);
}
remove = FALSE;
data = create_device_registration_xml(rsc_name(rsc), st_namespace_any,
agent, params, rsc_provides);
stonith_key_value_freeall(params, 1, 1);
rc = stonith_device_register(data, NULL, TRUE);
CRM_ASSERT(rc == pcmk_ok);
free_xml(data);
}
update_done:
if(remove && g_hash_table_lookup(device_list, rsc_name(rsc))) {
stonith_device_remove(rsc_name(rsc), TRUE);
}
}
/*!
* \internal
* \brief Update all STONITH device definitions based on current CIB
*/
static void
cib_devices_update(void)
{
GListPtr gIter = NULL;
crm_info("Updating devices to version %s.%s.%s",
crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN),
crm_element_value(local_cib, XML_ATTR_GENERATION),
crm_element_value(local_cib, XML_ATTR_NUMUPDATES));
CRM_ASSERT(fenced_data_set != NULL);
fenced_data_set->input = local_cib;
fenced_data_set->now = crm_time_new(NULL);
fenced_data_set->localhost = stonith_our_uname;
pe__set_working_set_flags(fenced_data_set, pe_flag_quick_location);
cluster_status(fenced_data_set);
pcmk__schedule_actions(fenced_data_set, NULL, NULL);
for (gIter = fenced_data_set->resources; gIter != NULL; gIter = gIter->next) {
cib_device_update(gIter->data, fenced_data_set);
}
fenced_data_set->input = NULL; // Wasn't a copy, so don't let API free it
pe_reset_working_set(fenced_data_set);
}
static void
update_cib_stonith_devices_v2(const char *event, xmlNode * msg)
{
xmlNode *change = NULL;
char *reason = NULL;
bool needs_update = FALSE;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
for (change = pcmk__xml_first_child(patchset); change != NULL;
change = pcmk__xml_next(change)) {
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
const char *shortpath = NULL;
if ((op == NULL) ||
(strcmp(op, "move") == 0) ||
strstr(xpath, "/"XML_CIB_TAG_STATUS)) {
continue;
} else if (pcmk__str_eq(op, "delete", pcmk__str_casei) && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) {
const char *rsc_id = NULL;
char *search = NULL;
char *mutable = NULL;
if (strstr(xpath, XML_TAG_ATTR_SETS) ||
strstr(xpath, XML_TAG_META_SETS)) {
needs_update = TRUE;
reason = strdup("(meta) attribute deleted from resource");
break;
}
mutable = strdup(xpath);
rsc_id = strstr(mutable, "primitive[@id=\'");
if (rsc_id != NULL) {
rsc_id += strlen("primitive[@id=\'");
search = strchr(rsc_id, '\'');
}
if (search != NULL) {
*search = 0;
stonith_device_remove(rsc_id, TRUE);
} else {
crm_warn("Ignoring malformed CIB update (resource deletion)");
}
free(mutable);
} else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) ||
strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) ||
strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) {
shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath);
reason = crm_strdup_printf("%s %s", op, shortpath+1);
needs_update = TRUE;
break;
}
}
if(needs_update) {
crm_info("Updating device list from CIB: %s", reason);
cib_devices_update();
} else {
crm_trace("No updates for device list found in CIB");
}
free(reason);
}
static void
update_cib_stonith_devices_v1(const char *event, xmlNode * msg)
{
const char *reason = "none";
gboolean needs_update = FALSE;
xmlXPathObjectPtr xpath_obj = NULL;
/* process new constraints */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION);
if (numXpathResults(xpath_obj) > 0) {
int max = numXpathResults(xpath_obj), lpc = 0;
/* Safest and simplest to always recompute */
needs_update = TRUE;
reason = "new location constraint";
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpath_obj, lpc);
crm_log_xml_trace(match, "new constraint");
}
}
freeXpathObject(xpath_obj);
/* process deletions */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE);
if (numXpathResults(xpath_obj) > 0) {
remove_cib_device(xpath_obj);
}
freeXpathObject(xpath_obj);
/* process additions */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE);
if (numXpathResults(xpath_obj) > 0) {
int max = numXpathResults(xpath_obj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
const char *rsc_id = NULL;
const char *standard = NULL;
xmlNode *match = getXpathResult(xpath_obj, lpc);
rsc_id = crm_element_value(match, XML_ATTR_ID);
standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
continue;
}
crm_trace("Fencing resource %s was added or modified", rsc_id);
reason = "new resource";
needs_update = TRUE;
}
}
freeXpathObject(xpath_obj);
if(needs_update) {
crm_info("Updating device list from CIB: %s", reason);
cib_devices_update();
}
}
static void
update_cib_stonith_devices(const char *event, xmlNode * msg)
{
int format = 1;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
CRM_ASSERT(patchset);
crm_element_value_int(patchset, "format", &format);
switch(format) {
case 1:
update_cib_stonith_devices_v1(event, msg);
break;
case 2:
update_cib_stonith_devices_v2(event, msg);
break;
default:
crm_warn("Unknown patch format: %d", format);
}
}
/* Needs to hold node name + attribute name + attribute value + 75 */
#define XPATH_MAX 512
/*!
* \internal
* \brief Check whether a node has a specific attribute name/value
*
* \param[in] node Name of node to check
* \param[in] name Name of an attribute to look for
* \param[in] value The value the named attribute needs to be set to in order to be considered a match
*
* \return TRUE if the locally cached CIB has the specified node attribute
*/
gboolean
node_has_attr(const char *node, const char *name, const char *value)
{
char xpath[XPATH_MAX];
xmlNode *match;
int n;
CRM_CHECK(local_cib != NULL, return FALSE);
/* Search for the node's attributes in the CIB. While the schema allows
* multiple sets of instance attributes, and allows instance attributes to
* use id-ref to reference values elsewhere, that is intended for resources,
* so we ignore that here.
*/
n = snprintf(xpath, XPATH_MAX, "//" XML_CIB_TAG_NODES
"/" XML_CIB_TAG_NODE "[@uname='%s']/" XML_TAG_ATTR_SETS
"/" XML_CIB_TAG_NVPAIR "[@name='%s' and @value='%s']",
node, name, value);
match = get_xpath_object(xpath, local_cib, LOG_NEVER);
CRM_CHECK(n < XPATH_MAX, return FALSE);
return (match != NULL);
}
static void
update_fencing_topology(const char *event, xmlNode * msg)
{
int format = 1;
const char *xpath;
xmlXPathObjectPtr xpathObj = NULL;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
CRM_ASSERT(patchset);
crm_element_value_int(patchset, "format", &format);
if(format == 1) {
/* Process deletions (only) */
xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL;
xpathObj = xpath_search(msg, xpath);
remove_fencing_topology(xpathObj);
freeXpathObject(xpathObj);
/* Process additions and changes */
xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL;
xpathObj = xpath_search(msg, xpath);
register_fencing_topology(xpathObj);
freeXpathObject(xpathObj);
} else if(format == 2) {
xmlNode *change = NULL;
int add[] = { 0, 0, 0 };
int del[] = { 0, 0, 0 };
xml_patch_versions(patchset, add, del);
for (change = pcmk__xml_first_child(patchset); change != NULL;
change = pcmk__xml_next(change)) {
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
if(op == NULL) {
continue;
} else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) {
/* Change to a specific entry */
crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath);
if(strcmp(op, "move") == 0) {
continue;
} else if(strcmp(op, "create") == 0) {
handle_topology_change(change->children, FALSE);
} else if(strcmp(op, "modify") == 0) {
xmlNode *match = first_named_child(change, XML_DIFF_RESULT);
if(match) {
handle_topology_change(match->children, TRUE);
}
} else if(strcmp(op, "delete") == 0) {
/* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */
crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
}
} else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) {
/* Change to the topology in general */
crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
} else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) {
/* Changes to the whole config section, possibly including the topology as a whild */
if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) {
crm_trace("Nothing for us in %s operation %d.%d.%d for %s.",
op, add[0], add[1], add[2], xpath);
} else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) {
crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
}
} else {
crm_trace("Nothing for us in %s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
}
}
} else {
crm_warn("Unknown patch format: %d", format);
}
}
static bool have_cib_devices = FALSE;
static void
update_cib_cache_cb(const char *event, xmlNode * msg)
{
int rc = pcmk_ok;
xmlNode *stonith_enabled_xml = NULL;
xmlNode *stonith_watchdog_xml = NULL;
const char *stonith_enabled_s = NULL;
static gboolean stonith_enabled_saved = TRUE;
if(!have_cib_devices) {
crm_trace("Skipping updates until we get a full dump");
return;
} else if(msg == NULL) {
crm_trace("Missing %s update", event);
return;
}
/* Maintain a local copy of the CIB so that we have full access
* to device definitions, location constraints, and node attributes
*/
if (local_cib != NULL) {
int rc = pcmk_ok;
xmlNode *patchset = NULL;
crm_element_value_int(msg, F_CIB_RC, &rc);
if (rc != pcmk_ok) {
return;
}
patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
xml_log_patchset(LOG_TRACE, "Config update", patchset);
rc = xml_apply_patchset(local_cib, patchset, TRUE);
switch (rc) {
case pcmk_ok:
case -pcmk_err_old_data:
break;
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(local_cib);
local_cib = NULL;
break;
default:
crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(local_cib);
local_cib = NULL;
}
}
if (local_cib == NULL) {
crm_trace("Re-requesting full CIB");
rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call);
if(rc != pcmk_ok) {
crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc);
return;
}
CRM_ASSERT(local_cib != NULL);
stonith_enabled_saved = FALSE; /* Trigger a full refresh below */
}
crm_peer_caches_refresh(local_cib);
stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']",
local_cib, LOG_NEVER);
if (stonith_enabled_xml) {
stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE);
}
if (stonith_enabled_s == NULL || crm_is_true(stonith_enabled_s)) {
long timeout_ms = 0;
const char *value = NULL;
stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']",
local_cib, LOG_NEVER);
if (stonith_watchdog_xml) {
value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE);
}
if(value) {
timeout_ms = crm_get_msec(value);
}
if (timeout_ms < 0) {
timeout_ms = pcmk__auto_watchdog_timeout();
}
if(timeout_ms != stonith_watchdog_timeout_ms) {
crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000);
stonith_watchdog_timeout_ms = timeout_ms;
}
} else {
stonith_watchdog_timeout_ms = 0;
}
if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) {
crm_trace("Ignoring CIB updates while fencing is disabled");
stonith_enabled_saved = FALSE;
return;
} else if (stonith_enabled_saved == FALSE) {
crm_info("Updating fencing device and topology lists "
"now that fencing is enabled");
stonith_enabled_saved = TRUE;
fencing_topology_init();
cib_devices_update();
} else {
update_fencing_topology(event, msg);
update_cib_stonith_devices(event, msg);
}
}
static void
init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
crm_info("Updating device list from CIB");
have_cib_devices = TRUE;
local_cib = copy_xml(output);
crm_peer_caches_refresh(local_cib);
fencing_topology_init();
cib_devices_update();
}
static void
stonith_shutdown(int nsig)
{
crm_info("Terminating with %d clients", pcmk__ipc_client_count());
stonith_shutdown_flag = TRUE;
if (mainloop != NULL && g_main_loop_is_running(mainloop)) {
g_main_loop_quit(mainloop);
} else {
stonith_cleanup();
crm_exit(CRM_EX_OK);
}
}
static void
cib_connection_destroy(gpointer user_data)
{
if (stonith_shutdown_flag) {
crm_info("Connection to the CIB manager closed");
return;
} else {
crm_crit("Lost connection to the CIB manager, shutting down");
}
if (cib_api) {
cib_api->cmds->signoff(cib_api);
}
stonith_shutdown(0);
}
static void
stonith_cleanup(void)
{
if (cib_api) {
cib_api->cmds->del_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb);
cib_api->cmds->signoff(cib_api);
}
if (ipcs) {
qb_ipcs_destroy(ipcs);
}
crm_peer_destroy();
pcmk__client_cleanup();
free_stonith_remote_op_list();
free_topology_list();
free_device_list();
free_metadata_cache();
free(stonith_our_uname);
stonith_our_uname = NULL;
free_xml(local_cib);
local_cib = NULL;
}
static pcmk__cli_option_t long_options[] = {
// long option, argument type, storage, short option, description, flags
{
"stand-alone", no_argument, 0, 's',
NULL, pcmk__option_default
},
{
"stand-alone-w-cpg", no_argument, 0, 'c',
NULL, pcmk__option_default
},
{
"logfile", required_argument, 0, 'l',
NULL, pcmk__option_default
},
{
"verbose", no_argument, 0, 'V',
NULL, pcmk__option_default
},
{
"version", no_argument, 0, '$',
NULL, pcmk__option_default
},
{
"help", no_argument, 0, '?',
NULL, pcmk__option_default
},
{ 0, 0, 0, 0 }
};
static void
setup_cib(void)
{
int rc, retries = 0;
cib_api = cib_new();
if (cib_api == NULL) {
crm_err("No connection to the CIB manager");
return;
}
do {
sleep(retries);
rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command);
} while (rc == -ENOTCONN && ++retries < 5);
if (rc != pcmk_ok) {
crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc);
} else if (pcmk_ok !=
cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) {
crm_err("Could not set CIB notification callback");
} else {
rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local);
cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb",
init_cib_cache_cb);
cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy);
crm_info("Watching for fencing topology changes");
}
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = st_ipc_accept,
.connection_created = NULL,
.msg_process = st_ipc_dispatch,
.connection_closed = st_ipc_closed,
.connection_destroyed = st_ipc_destroy
};
/*!
* \internal
* \brief Callback for peer status changes
*
* \param[in] type What changed
* \param[in] node What peer had the change
* \param[in] data Previous value of what changed
*/
static void
st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
if ((type != crm_status_processes)
&& !pcmk_is_set(node->flags, crm_remote_node)) {
/*
* This is a hack until we can send to a nodeid and/or we fix node name lookups
* These messages are ignored in stonith_peer_callback()
*/
xmlNode *query = create_xml_node(NULL, "stonith_command");
crm_xml_add(query, F_XML_TAGNAME, "stonith_command");
crm_xml_add(query, F_TYPE, T_STONITH_NG);
crm_xml_add(query, F_STONITH_OPERATION, "poke");
crm_debug("Broadcasting our uname because of node %u", node->id);
send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
free_xml(query);
}
}
int
main(int argc, char **argv)
{
int flag;
int lpc = 0;
int argerr = 0;
int option_index = 0;
crm_cluster_t cluster;
const char *actions[] = { "reboot", "off", "on", "list", "monitor", "status" };
crm_ipc_t *old_instance = NULL;
crm_log_preinit(NULL, argc, argv);
pcmk__set_cli_options(NULL, "[options]", long_options,
"daemon for executing fencing devices in a "
"Pacemaker cluster");
while (1) {
flag = pcmk__next_cli_option(argc, argv, &option_index, NULL);
if (flag == -1) {
break;
}
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'l':
crm_add_logfile(optarg);
break;
case 's':
stand_alone = TRUE;
break;
case 'c':
stand_alone = FALSE;
no_cib_connect = TRUE;
break;
case '$':
case '?':
pcmk__cli_help(flag, CRM_EX_OK);
break;
default:
++argerr;
break;
}
}
if (argc - optind == 1 && pcmk__str_eq("metadata", argv[optind], pcmk__str_casei)) {
printf("<?xml version=\"1.0\"?><!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n");
printf("<resource-agent name=\"pacemaker-fenced\">\n");
printf(" <version>1.0</version>\n");
printf(" <longdesc lang=\"en\">Instance attributes available for all \"stonith\"-class resources"
" and used by Pacemaker's fence daemon, formerly known as stonithd</longdesc>\n");
printf(" <shortdesc lang=\"en\">Instance attributes available for all \"stonith\"-class resources</shortdesc>\n");
printf(" <parameters>\n");
#if 0
// priority is not implemented yet
printf(" <parameter name=\"priority\" unique=\"0\">\n");
printf(" <shortdesc lang=\"en\">Devices that are not in a topology "
"are tried in order of highest to lowest integer priority</shortdesc>\n");
printf(" <content type=\"integer\" default=\"0\"/>\n");
printf(" </parameter>\n");
#endif
printf(" <parameter name=\"%s\" unique=\"0\">\n",
PCMK_STONITH_HOST_ARGUMENT);
printf
(" <shortdesc lang=\"en\">Advanced use only: An alternate parameter to supply instead of 'port'</shortdesc>\n");
printf
(" <longdesc lang=\"en\">Some devices do not support the standard 'port' parameter or may provide additional ones.\n"
"Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n"
"A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n"
" </longdesc>\n");
printf(" <content type=\"string\" default=\"port\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n",
PCMK_STONITH_HOST_MAP);
printf
(" <shortdesc lang=\"en\">A mapping of host names to ports numbers for devices that do not support host names.</shortdesc>\n");
printf
(" <longdesc lang=\"en\">Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2</longdesc>\n");
printf(" <content type=\"string\" default=\"\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n",
PCMK_STONITH_HOST_LIST);
printf(" <shortdesc lang=\"en\">A list of machines controlled by "
"this device (Optional unless %s=static-list).</shortdesc>\n",
PCMK_STONITH_HOST_CHECK);
printf(" <content type=\"string\" default=\"\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n",
PCMK_STONITH_HOST_CHECK);
printf
(" <shortdesc lang=\"en\">How to determine which machines are controlled by the device.</shortdesc>\n");
printf(" <longdesc lang=\"en\">Allowed values: dynamic-list "
"(query the device via the 'list' command), static-list "
"(check the " PCMK_STONITH_HOST_LIST " attribute), status "
"(query the device via the 'status' command), none (assume "
"every device can fence every machine)</longdesc>\n");
printf(" <content type=\"string\" default=\"dynamic-list\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n",
PCMK_STONITH_DELAY_MAX);
- printf(" <shortdesc lang=\"en\">Enable a random delay for "
- "fencing actions and specify the maximum of random "
- "delay.</shortdesc>\n");
+ printf(" <shortdesc lang=\"en\">Enable a delay of no more than the "
+ "time specified before executing fencing actions. Pacemaker "
+ "derives the overall delay by taking the value of "
+ PCMK_STONITH_DELAY_BASE " and adding a random delay value such "
+ "that the sum is kept below this maximum.</shortdesc>\n");
printf(" <longdesc lang=\"en\">This prevents double fencing when "
"using slow devices such as sbd.\nUse this to enable a random "
"delay for fencing actions.\nThe overall delay is derived from "
"this random delay value adding a static delay so that the sum "
"is kept below the maximum delay.</longdesc>\n");
printf(" <content type=\"time\" default=\"0s\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n",
PCMK_STONITH_DELAY_BASE);
printf(" <shortdesc lang=\"en\">Enable a base delay for "
"fencing actions and specify base delay value.</shortdesc>\n");
printf(" <longdesc lang=\"en\">This prevents double fencing when "
"different delays are configured on the nodes.\nUse this to "
"enable a static delay for fencing actions.\nThe overall delay "
"is derived from a random delay value adding this static delay "
"so that the sum is kept below the maximum delay.</longdesc>\n");
printf(" <content type=\"time\" default=\"0s\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n",
PCMK_STONITH_ACTION_LIMIT);
printf
(" <shortdesc lang=\"en\">The maximum number of actions can be performed in parallel on this device</shortdesc>\n");
printf
(" <longdesc lang=\"en\">Cluster property concurrent-fencing=true needs to be configured first.\n"
"Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.</longdesc>\n");
printf(" <content type=\"integer\" default=\"1\"/>\n");
printf(" </parameter>\n");
for (lpc = 0; lpc < DIMOF(actions); lpc++) {
printf(" <parameter name=\"pcmk_%s_action\" unique=\"0\">\n", actions[lpc]);
printf
(" <shortdesc lang=\"en\">Advanced use only: An alternate command to run instead of '%s'</shortdesc>\n",
actions[lpc]);
printf
(" <longdesc lang=\"en\">Some devices do not support the standard commands or may provide additional ones.\n"
"Use this to specify an alternate, device-specific, command that implements the '%s' action.</longdesc>\n",
actions[lpc]);
printf(" <content type=\"string\" default=\"%s\"/>\n", actions[lpc]);
printf(" </parameter>\n");
printf(" <parameter name=\"pcmk_%s_timeout\" unique=\"0\">\n", actions[lpc]);
printf
(" <shortdesc lang=\"en\">Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout</shortdesc>\n",
actions[lpc]);
printf
(" <longdesc lang=\"en\">Some devices need much more/less time to complete than normal.\n"
"Use this to specify an alternate, device-specific, timeout for '%s' actions.</longdesc>\n",
actions[lpc]);
printf(" <content type=\"time\" default=\"60s\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"pcmk_%s_retries\" unique=\"0\">\n", actions[lpc]);
printf
(" <shortdesc lang=\"en\">Advanced use only: The maximum number of times to retry the '%s' command within the timeout period</shortdesc>\n",
actions[lpc]);
printf(" <longdesc lang=\"en\">Some devices do not support multiple connections."
" Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
" Use this option to alter the number of times Pacemaker retries '%s' actions before giving up."
"</longdesc>\n", actions[lpc]);
printf(" <content type=\"integer\" default=\"2\"/>\n");
printf(" </parameter>\n");
}
printf(" </parameters>\n");
printf("</resource-agent>\n");
return CRM_EX_OK;
}
if (optind != argc) {
++argerr;
}
if (argerr) {
pcmk__cli_help('?', CRM_EX_USAGE);
}
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
crm_notice("Starting Pacemaker fencer");
old_instance = crm_ipc_new("stonith-ng", 0);
if (crm_ipc_connect(old_instance)) {
/* IPC end-point already up */
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_err("pacemaker-fenced is already active, aborting startup");
crm_exit(CRM_EX_OK);
} else {
/* not up or not authentic, we'll proceed either way */
crm_ipc_destroy(old_instance);
old_instance = NULL;
}
mainloop_add_signal(SIGTERM, stonith_shutdown);
crm_peer_init();
fenced_data_set = pe_new_working_set();
CRM_ASSERT(fenced_data_set != NULL);
pe__set_working_set_flags(fenced_data_set,
pe_flag_no_counts|pe_flag_no_compat);
if (stand_alone == FALSE) {
if (is_corosync_cluster()) {
#if SUPPORT_COROSYNC
cluster.destroy = stonith_peer_cs_destroy;
cluster.cpg.cpg_deliver_fn = stonith_peer_ais_callback;
cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership;
#endif
}
crm_set_status_callback(&st_peer_update_callback);
if (crm_cluster_connect(&cluster) == FALSE) {
crm_crit("Cannot sign in to the cluster... terminating");
crm_exit(CRM_EX_FATAL);
}
stonith_our_uname = cluster.uname;
stonith_our_uuid = cluster.uuid;
if (no_cib_connect == FALSE) {
setup_cib();
}
} else {
stonith_our_uname = strdup("localhost");
}
init_device_list();
init_topology_list();
if(stonith_watchdog_timeout_ms > 0) {
int rc;
xmlNode *xml;
stonith_key_value_t *params = NULL;
params = stonith_key_value_add(params, PCMK_STONITH_HOST_LIST,
stonith_our_uname);
xml = create_device_registration_xml("watchdog", st_namespace_internal,
STONITH_WATCHDOG_AGENT, params,
NULL);
stonith_key_value_freeall(params, 1, 1);
rc = stonith_device_register(xml, NULL, FALSE);
free_xml(xml);
if (rc != pcmk_ok) {
crm_crit("Cannot register watchdog pseudo fence agent");
crm_exit(CRM_EX_FATAL);
}
}
pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks);
/* Create the mainloop and run it... */
mainloop = g_main_loop_new(NULL, FALSE);
crm_notice("Pacemaker fencer successfully started and accepting connections");
g_main_loop_run(mainloop);
stonith_cleanup();
pe_free_working_set(fenced_data_set);
crm_exit(CRM_EX_OK);
}
diff --git a/doc/sphinx/Pacemaker_Explained/fencing.rst b/doc/sphinx/Pacemaker_Explained/fencing.rst
index df928b5dbc..9ed12b39a4 100644
--- a/doc/sphinx/Pacemaker_Explained/fencing.rst
+++ b/doc/sphinx/Pacemaker_Explained/fencing.rst
@@ -1,1170 +1,1170 @@
.. index::
single: fencing
single: STONITH
.. _fencing:
Fencing
-------
What Is Fencing?
################
*Fencing* is the ability to make a node unable to run resources, even when that
node is unresponsive to cluster commands.
Fencing is also known as *STONITH*, an acronym for "Shoot The Other Node In The
Head", since the most common fencing method is cutting power to the node.
Another method is "fabric fencing", cutting the node's access to some
capability required to run resources (such as network access or a shared disk).
.. index::
single: fencing; why necessary
Why Is Fencing Necessary?
#########################
Fencing protects your data from being corrupted by malfunctioning nodes or
unintentional concurrent access to shared resources.
Fencing protects against the "split brain" failure scenario, where cluster
nodes have lost the ability to reliably communicate with each other but are
still able to run resources. If the cluster just assumed that uncommunicative
nodes were down, then multiple instances of a resource could be started on
different nodes.
The effect of split brain depends on the resource type. For example, an IP
address brought up on two hosts on a network will cause packets to randomly be
sent to one or the other host, rendering the IP useless. For a database or
clustered file system, the effect could be much more severe, causing data
corruption or divergence.
Fencing is also used when a resource cannot otherwise be stopped. If a
resource fails to stop on a node, it cannot be started on a different node
without risking the same type of conflict as split-brain. Fencing the
original node ensures the resource can be safely started elsewhere.
Users may also configure the ``on-fail`` property of :ref:`operation` or the
``loss-policy`` property of
:ref:`ticket constraints <ticket-constraints>` to ``fence``, in which
case the cluster will fence the resource's node if the operation fails or the
ticket is lost.
.. index::
single: fencing; device
Fence Devices
#############
A *fence device* or *fencing device* is a special type of resource that
provides the means to fence a node.
Examples of fencing devices include intelligent power switches and IPMI devices
that accept SNMP commands to cut power to a node, and iSCSI controllers that
allow SCSI reservations to be used to cut a node's access to a shared disk.
Since fencing devices will be used to recover from loss of networking
connectivity to other nodes, it is essential that they do not rely on the same
network as the cluster itself, otherwise that network becomes a single point of
failure.
Since loss of a node due to power outage is indistinguishable from loss of
network connectivity to that node, it is also essential that at least one fence
device for a node does not share power with that node. For example, an on-board
IPMI controller that shares power with its host should not be used as the sole
fencing device for that host.
Since fencing is used to isolate malfunctioning nodes, no fence device should
rely on its target functioning properly. This includes, for example, devices
that ssh into a node and issue a shutdown command (such devices might be
suitable for testing, but never for production).
.. index::
single: fencing; agent
Fence Agents
############
A *fence agent* or *fencing agent* is a ``stonith``-class resource agent.
The fence agent standard provides commands (such as ``off`` and ``reboot``)
that the cluster can use to fence nodes. As with other resource agent classes,
this allows a layer of abstraction so that Pacemaker doesn't need any knowledge
about specific fencing technologies -- that knowledge is isolated in the agent.
When a Fence Device Can Be Used
###############################
Fencing devices do not actually "run" like most services. Typically, they just
provide an interface for sending commands to an external device.
Additionally, fencing may be initiated by Pacemaker, by other cluster-aware
software such as DRBD or DLM, or manually by an administrator, at any point in
the cluster life cycle, including before any resources have been started.
To accommodate this, Pacemaker does not require the fence device resource to be
"started" in order to be used. Whether a fence device is started or not
determines whether a node runs any recurring monitor for the device, and gives
the node a slight preference for being chosen to execute fencing using that
device.
By default, any node can execute any fencing device. If a fence device is
disabled by setting its ``target-role`` to ``Stopped``, then no node can use
that device. If a location constraint with a negative score prevents a specific
node from "running" a fence device, then that node will never be chosen to
execute fencing using the device. A node may fence itself, but the cluster will
choose that only if no other nodes can do the fencing.
A common configuration scenario is to have one fence device per target node.
In such a case, users often configure anti-location constraints so that
the target node does not monitor its own device.
Limitations of Fencing Resources
################################
Fencing resources have certain limitations that other resource classes don't:
* They may have only one set of meta-attributes and one set of instance
attributes.
* If :ref:`rules` are used to determine fencing resource options, these
might be evaluated only when first read, meaning that later changes to the
rules will have no effect. Therefore, it is better to avoid confusion and not
use rules at all with fencing resources.
These limitations could be revisited if there is sufficient user demand.
.. index::
single: fencing; special instance attributes
.. _fencing-attributes:
Special Options for Fencing Resources
#####################################
The table below lists special instance attributes that may be set for any
fencing resource (*not* meta-attributes, even though they are interpreted by
Pacemaker rather than the fence agent). These are also listed in the man page
for ``pacemaker-fenced``.
.. Not_Yet_Implemented:
+----------------------+---------+--------------------+----------------------------------------+
| priority | integer | 0 | .. index:: |
| | | | single: priority |
| | | | |
| | | | The priority of the fence device. |
| | | | Devices are tried in order of highest |
| | | | priority to lowest. |
+----------------------+---------+--------------------+----------------------------------------+
.. table:: **Additional Properties of Fencing Resources**
+----------------------+---------+--------------------+----------------------------------------+
| Field | Type | Default | Description |
+======================+=========+====================+========================================+
| stonith-timeout | time | | .. index:: |
| | | | single: stonith-timeout |
| | | | |
| | | | Older versions used this to override |
| | | | the default period to wait for a fence |
| | | | action (reboot, on, or off) to |
| | | | complete for this device. It has been |
| | | | replaced by the |
| | | | ``pcmk_reboot_timeout`` and |
| | | | ``pcmk_off_timeout`` properties. |
+----------------------+---------+--------------------+----------------------------------------+
| provides | string | | .. index:: |
| | | | single: provides |
| | | | |
| | | | Any special capability provided by the |
| | | | fence device. Currently, only one such |
| | | | capability is meaningful: |
| | | | :ref:`unfencing <unfencing>`. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_host_map | string | | .. index:: |
| | | | single: pcmk_host_map |
| | | | |
| | | | A mapping of host names to ports |
| | | | numbers for devices that do not |
| | | | support host names. |
| | | | |
| | | | Example: ``node1:1;node2:2,3`` tells |
| | | | the cluster to use port 1 for |
| | | | ``node1`` and ports 2 and 3 for |
| | | | ``node2``. If ``pcmk_host_check`` is |
| | | | explicitly set to ``static-list``, |
| | | | either this or ``pcmk_host_list`` must |
| | | | be set. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_host_list | string | | .. index:: |
| | | | single: pcmk_host_list |
| | | | |
| | | | A list of machines controlled by this |
| | | | device. If ``pcmk_host_check`` is |
| | | | explicitly set to ``static-list``, |
| | | | either this or ``pcmk_host_map`` must |
| | | | be set. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_host_check | string | The default is | .. index:: |
| | | ``static-list`` if | single: pcmk_host_check |
| | | either | |
| | | ``pcmk_host_list`` | How to determine which machines are |
| | | or | controlled by the device. Allowed |
| | | ``pcmk_host_map`` | values: |
| | | is configured. If | |
| | | neither of those | * ``dynamic-list:`` query the device |
| | | are configured, | via the agent's ``list`` action |
| | | the default is | * ``static-list:`` check the |
| | | ``dynamic-list`` | ``pcmk_host_list`` or |
| | | if the fence | ``pcmk_host_map`` attribute |
| | | device supports | * ``status:`` query the device via the |
| | | the list action, | "status" command |
| | | or ``status`` if | * ``none:`` assume the device can |
| | | the fence device | fence any node |
| | | supports the | |
| | | status action but | |
| | | not the list | |
| | | action. If none of | |
| | | those conditions | |
| | | apply, the default | |
| | | is ``none``. | |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_delay_max | time | 0s | .. index:: |
| | | | single: pcmk_delay_max |
| | | | |
- | | | | Enable a random delay of up to the |
+ | | | | Enable a delay of no more than the |
| | | | time specified before executing |
- | | | | fencing actions. This is sometimes |
+ | | | | fencing actions. Pacemaker derives the |
+ | | | | overall delay by taking the value of |
+ | | | | pcmk_delay_base and adding a random |
+ | | | | delay value such that the sum is kept |
+ | | | | below this maximum. This is sometimes |
| | | | used in two-node clusters to ensure |
| | | | that the nodes don't fence each other |
- | | | | at the same time. The overall delay |
- | | | | introduced by pacemaker is derived |
- | | | | from this random delay value adding a |
- | | | | static delay so that the sum is kept |
- | | | | below the maximum delay. |
+ | | | | at the same time. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_delay_base | time | 0s | .. index:: |
| | | | single: pcmk_delay_base |
| | | | |
| | | | Enable a static delay before executing |
| | | | fencing actions. This can be used, for |
| | | | example, in two-node clusters to |
| | | | ensure that the nodes don't fence each |
| | | | other, by having separate fencing |
| | | | resources with different values. The |
| | | | node that is fenced with the shorter |
| | | | delay will lose a fencing race. The |
| | | | overall delay introduced by pacemaker |
| | | | is derived from this value plus a |
| | | | random delay such that the sum is kept |
| | | | below the maximum delay. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_action_limit | integer | 1 | .. index:: |
| | | | single: pcmk_action_limit |
| | | | |
| | | | The maximum number of actions that can |
| | | | be performed in parallel on this |
| | | | device, if the cluster option |
| | | | ``concurrent-fencing`` is ``true``. A |
| | | | value of -1 means unlimited. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_host_argument | string | ``port`` otherwise | .. index:: |
| | | ``plug`` if | single: pcmk_host_argument |
| | | supported | |
| | | according to the | *Advanced use only.* Which parameter |
| | | metadata of the | should be supplied to the fence agent |
| | | fence agent | to identify the node to be fenced. |
| | | | Some devices support neither the |
| | | | standard ``plug`` nor the deprecated |
| | | | ``port`` parameter, or may provide |
| | | | additional ones. Use this to specify |
| | | | an alternate, device-specific |
| | | | parameter. A value of ``none`` tells |
| | | | the cluster not to supply any |
| | | | additional parameters. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_reboot_action | string | reboot | .. index:: |
| | | | single: pcmk_reboot_action |
| | | | |
| | | | *Advanced use only.* The command to |
| | | | send to the resource agent in order to |
| | | | reboot a node. Some devices do not |
| | | | support the standard commands or may |
| | | | provide additional ones. Use this to |
| | | | specify an alternate, device-specific |
| | | | command. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_reboot_timeout | time | 60s | .. index:: |
| | | | single: pcmk_reboot_timeout |
| | | | |
| | | | *Advanced use only.* Specify an |
| | | | alternate timeout to use for |
| | | | ``reboot`` actions instead of the |
| | | | value of ``stonith-timeout``. Some |
| | | | devices need much more or less time to |
| | | | complete than normal. Use this to |
| | | | specify an alternate, device-specific |
| | | | timeout. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_reboot_retries | integer | 2 | .. index:: |
| | | | single: pcmk_reboot_retries |
| | | | |
| | | | *Advanced use only.* The maximum |
| | | | number of times to retry the |
| | | | ``reboot`` command within the timeout |
| | | | period. Some devices do not support |
| | | | multiple connections, and operations |
| | | | may fail if the device is busy with |
| | | | another task, so Pacemaker will |
| | | | automatically retry the operation, if |
| | | | there is time remaining. Use this |
| | | | option to alter the number of times |
| | | | Pacemaker retries before giving up. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_off_action | string | off | .. index:: |
| | | | single: pcmk_off_action |
| | | | |
| | | | *Advanced use only.* The command to |
| | | | send to the resource agent in order to |
| | | | shut down a node. Some devices do not |
| | | | support the standard commands or may |
| | | | provide additional ones. Use this to |
| | | | specify an alternate, device-specific |
| | | | command. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_off_timeout | time | 60s | .. index:: |
| | | | single: pcmk_off_timeout |
| | | | |
| | | | *Advanced use only.* Specify an |
| | | | alternate timeout to use for |
| | | | ``off`` actions instead of the |
| | | | value of ``stonith-timeout``. Some |
| | | | devices need much more or less time to |
| | | | complete than normal. Use this to |
| | | | specify an alternate, device-specific |
| | | | timeout. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_off_retries | integer | 2 | .. index:: |
| | | | single: pcmk_off_retries |
| | | | |
| | | | *Advanced use only.* The maximum |
| | | | number of times to retry the |
| | | | ``off`` command within the timeout |
| | | | period. Some devices do not support |
| | | | multiple connections, and operations |
| | | | may fail if the device is busy with |
| | | | another task, so Pacemaker will |
| | | | automatically retry the operation, if |
| | | | there is time remaining. Use this |
| | | | option to alter the number of times |
| | | | Pacemaker retries before giving up. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_list_action | string | list | .. index:: |
| | | | single: pcmk_list_action |
| | | | |
| | | | *Advanced use only.* The command to |
| | | | send to the resource agent in order to |
| | | | list nodes. Some devices do not |
| | | | support the standard commands or may |
| | | | provide additional ones. Use this to |
| | | | specify an alternate, device-specific |
| | | | command. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_list_timeout | time | 60s | .. index:: |
| | | | single: pcmk_list_timeout |
| | | | |
| | | | *Advanced use only.* Specify an |
| | | | alternate timeout to use for |
| | | | ``list`` actions instead of the |
| | | | value of ``stonith-timeout``. Some |
| | | | devices need much more or less time to |
| | | | complete than normal. Use this to |
| | | | specify an alternate, device-specific |
| | | | timeout. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_list_retries | integer | 2 | .. index:: |
| | | | single: pcmk_list_retries |
| | | | |
| | | | *Advanced use only.* The maximum |
| | | | number of times to retry the |
| | | | ``list`` command within the timeout |
| | | | period. Some devices do not support |
| | | | multiple connections, and operations |
| | | | may fail if the device is busy with |
| | | | another task, so Pacemaker will |
| | | | automatically retry the operation, if |
| | | | there is time remaining. Use this |
| | | | option to alter the number of times |
| | | | Pacemaker retries before giving up. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_monitor_action | string | monitor | .. index:: |
| | | | single: pcmk_monitor_action |
| | | | |
| | | | *Advanced use only.* The command to |
| | | | send to the resource agent in order to |
| | | | report extended status. Some devices do|
| | | | not support the standard commands or |
| | | | may provide additional ones. Use this |
| | | | to specify an alternate, |
| | | | device-specific command. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_monitor_timeout | time | 60s | .. index:: |
| | | | single: pcmk_monitor_timeout |
| | | | |
| | | | *Advanced use only.* Specify an |
| | | | alternate timeout to use for |
| | | | ``monitor`` actions instead of the |
| | | | value of ``stonith-timeout``. Some |
| | | | devices need much more or less time to |
| | | | complete than normal. Use this to |
| | | | specify an alternate, device-specific |
| | | | timeout. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_monitor_retries | integer | 2 | .. index:: |
| | | | single: pcmk_monitor_retries |
| | | | |
| | | | *Advanced use only.* The maximum |
| | | | number of times to retry the |
| | | | ``monitor`` command within the timeout |
| | | | period. Some devices do not support |
| | | | multiple connections, and operations |
| | | | may fail if the device is busy with |
| | | | another task, so Pacemaker will |
| | | | automatically retry the operation, if |
| | | | there is time remaining. Use this |
| | | | option to alter the number of times |
| | | | Pacemaker retries before giving up. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_status_action | string | status | .. index:: |
| | | | single: pcmk_status_action |
| | | | |
| | | | *Advanced use only.* The command to |
| | | | send to the resource agent in order to |
| | | | report status. Some devices do |
| | | | not support the standard commands or |
| | | | may provide additional ones. Use this |
| | | | to specify an alternate, |
| | | | device-specific command. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_status_timeout | time | 60s | .. index:: |
| | | | single: pcmk_status_timeout |
| | | | |
| | | | *Advanced use only.* Specify an |
| | | | alternate timeout to use for |
| | | | ``status`` actions instead of the |
| | | | value of ``stonith-timeout``. Some |
| | | | devices need much more or less time to |
| | | | complete than normal. Use this to |
| | | | specify an alternate, device-specific |
| | | | timeout. |
+----------------------+---------+--------------------+----------------------------------------+
| pcmk_status_retries | integer | 2 | .. index:: |
| | | | single: pcmk_status_retries |
| | | | |
| | | | *Advanced use only.* The maximum |
| | | | number of times to retry the |
| | | | ``status`` command within the timeout |
| | | | period. Some devices do not support |
| | | | multiple connections, and operations |
| | | | may fail if the device is busy with |
| | | | another task, so Pacemaker will |
| | | | automatically retry the operation, if |
| | | | there is time remaining. Use this |
| | | | option to alter the number of times |
| | | | Pacemaker retries before giving up. |
+----------------------+---------+--------------------+----------------------------------------+
.. index::
single: unfencing
single: fencing; unfencing
.. _unfencing:
Unfencing
#########
With fabric fencing (such as cutting network or shared disk access rather than
power), it is expected that the cluster will fence the node, and then a system
administrator must manually investigate what went wrong, correct any issues
found, then reboot (or restart the cluster services on) the node.
Once the node reboots and rejoins the cluster, some fabric fencing devices
require an explicit command to restore the node's access. This capability is
called *unfencing* and is typically implemented as the fence agent's ``on``
command.
If any cluster resource has ``requires`` set to ``unfencing``, then that
resource will not be probed or started on a node until that node has been
unfenced.
Fence Devices Dependent on Other Resources
##########################################
In some cases, a fence device may require some other cluster resource (such as
an IP address) to be active in order to function properly.
This is obviously undesirable in general: fencing may be required when the
depended-on resource is not active, or fencing may be required because the node
running the depended-on resource is no longer responding.
However, this may be acceptable under certain conditions:
* The dependent fence device should not be able to target any node that is
allowed to run the depended-on resource.
* The depended-on resource should not be disabled during production operation.
* The ``concurrent-fencing`` cluster property should be set to ``true``.
Otherwise, if both the node running the depended-on resource and some node
targeted by the dependent fence device need to be fenced, the fencing of the
node running the depended-on resource might be ordered first, making the
second fencing impossible and blocking further recovery. With concurrent
fencing, the dependent fence device might fail at first due to the
depended-on resource being unavailable, but it will be retried and eventually
succeed once the resource is brought back up.
Even under those conditions, there is one unlikely problem scenario. The DC
always schedules fencing of itself after any other fencing needed, to avoid
unnecessary repeated DC elections. If the dependent fence device targets the
DC, and both the DC and a different node running the depended-on resource need
to be fenced, the DC fencing will always fail and block further recovery. Note,
however, that losing a DC node entirely causes some other node to become DC and
schedule the fencing, so this is only a risk when a stop or other operation
with ``on-fail`` set to ``fencing`` fails on the DC.
.. index::
single: fencing; configuration
Configuring Fencing
###################
Higher-level tools can provide simpler interfaces to this process, but using
Pacemaker command-line tools, this is how you could configure a fence device.
#. Find the correct driver:
.. code-block:: none
# stonith_admin --list-installed
.. note::
You may have to install packages to make fence agents available on your
host. Searching your available packages for ``fence-`` is usually
helpful. Ensure the packages providing the fence agents you require are
installed on every cluster node.
#. Find the required parameters associated with the device
(replacing ``$AGENT_NAME`` with the name obtained from the previous step):
.. code-block:: none
# stonith_admin --metadata --agent $AGENT_NAME
#. Create a file called ``stonith.xml`` containing a primitive resource
with a class of ``stonith``, a type equal to the agent name obtained earlier,
and a parameter for each of the values returned in the previous step.
#. If the device does not know how to fence nodes based on their uname,
you may also need to set the special ``pcmk_host_map`` parameter. See
:ref:`fencing-attributes` for details.
#. If the device does not support the ``list`` command, you may also need
to set the special ``pcmk_host_list`` and/or ``pcmk_host_check``
parameters. See :ref:`fencing-attributes` for details.
#. If the device does not expect the victim to be specified with the
``port`` parameter, you may also need to set the special
``pcmk_host_argument`` parameter. See :ref:`fencing-attributes` for details.
#. Upload it into the CIB using cibadmin:
.. code-block:: none
# cibadmin --create --scope resources --xml-file stonith.xml
#. Set ``stonith-enabled`` to true:
.. code-block:: none
# crm_attribute --type crm_config --name stonith-enabled --update true
#. Once the stonith resource is running, you can test it by executing the
following, replacing ``$NODE_NAME`` with the name of the node to fence
(although you might want to stop the cluster on that machine first):
.. code-block:: none
# stonith_admin --reboot $NODE_NAME
Example Fencing Configuration
_____________________________
For this example, we assume we have a cluster node, ``pcmk-1``, whose IPMI
controller is reachable at the IP address 192.0.2.1. The IPMI controller uses
the username ``testuser`` and the password ``abc123``.
#. Looking at what's installed, we may see a variety of available agents:
.. code-block:: none
# stonith_admin --list-installed
.. code-block:: none
(... some output omitted ...)
fence_idrac
fence_ilo3
fence_ilo4
fence_ilo5
fence_imm
fence_ipmilan
(... some output omitted ...)
Perhaps after some reading some man pages and doing some Internet searches,
we might decide ``fence_ipmilan`` is our best choice.
#. Next, we would check what parameters ``fence_ipmilan`` provides:
.. code-block:: none
# stonith_admin --metadata -a fence_ipmilan
.. code-block:: xml
<resource-agent name="fence_ipmilan" shortdesc="Fence agent for IPMI">
<symlink name="fence_ilo3" shortdesc="Fence agent for HP iLO3"/>
<symlink name="fence_ilo4" shortdesc="Fence agent for HP iLO4"/>
<symlink name="fence_ilo5" shortdesc="Fence agent for HP iLO5"/>
<symlink name="fence_imm" shortdesc="Fence agent for IBM Integrated Management Module"/>
<symlink name="fence_idrac" shortdesc="Fence agent for Dell iDRAC"/>
<longdesc>fence_ipmilan is an I/O Fencing agentwhich can be used with machines controlled by IPMI.This agent calls support software ipmitool (http://ipmitool.sf.net/). WARNING! This fence agent might report success before the node is powered off. You should use -m/method onoff if your fence device works correctly with that option.</longdesc>
<vendor-url/>
<parameters>
<parameter name="action" unique="0" required="0">
<getopt mixed="-o, --action=[action]"/>
<content type="string" default="reboot"/>
<shortdesc lang="en">Fencing action</shortdesc>
</parameter>
<parameter name="auth" unique="0" required="0">
<getopt mixed="-A, --auth=[auth]"/>
<content type="select">
<option value="md5"/>
<option value="password"/>
<option value="none"/>
</content>
<shortdesc lang="en">IPMI Lan Auth type.</shortdesc>
</parameter>
<parameter name="cipher" unique="0" required="0">
<getopt mixed="-C, --cipher=[cipher]"/>
<content type="string"/>
<shortdesc lang="en">Ciphersuite to use (same as ipmitool -C parameter)</shortdesc>
</parameter>
<parameter name="hexadecimal_kg" unique="0" required="0">
<getopt mixed="--hexadecimal-kg=[key]"/>
<content type="string"/>
<shortdesc lang="en">Hexadecimal-encoded Kg key for IPMIv2 authentication</shortdesc>
</parameter>
<parameter name="ip" unique="0" required="0" obsoletes="ipaddr">
<getopt mixed="-a, --ip=[ip]"/>
<content type="string"/>
<shortdesc lang="en">IP address or hostname of fencing device</shortdesc>
</parameter>
<parameter name="ipaddr" unique="0" required="0" deprecated="1">
<getopt mixed="-a, --ip=[ip]"/>
<content type="string"/>
<shortdesc lang="en">IP address or hostname of fencing device</shortdesc>
</parameter>
<parameter name="ipport" unique="0" required="0">
<getopt mixed="-u, --ipport=[port]"/>
<content type="integer" default="623"/>
<shortdesc lang="en">TCP/UDP port to use for connection with device</shortdesc>
</parameter>
<parameter name="lanplus" unique="0" required="0">
<getopt mixed="-P, --lanplus"/>
<content type="boolean" default="0"/>
<shortdesc lang="en">Use Lanplus to improve security of connection</shortdesc>
</parameter>
<parameter name="login" unique="0" required="0" deprecated="1">
<getopt mixed="-l, --username=[name]"/>
<content type="string"/>
<shortdesc lang="en">Login name</shortdesc>
</parameter>
<parameter name="method" unique="0" required="0">
<getopt mixed="-m, --method=[method]"/>
<content type="select" default="onoff">
<option value="onoff"/>
<option value="cycle"/>
</content>
<shortdesc lang="en">Method to fence</shortdesc>
</parameter>
<parameter name="passwd" unique="0" required="0" deprecated="1">
<getopt mixed="-p, --password=[password]"/>
<content type="string"/>
<shortdesc lang="en">Login password or passphrase</shortdesc>
</parameter>
<parameter name="passwd_script" unique="0" required="0" deprecated="1">
<getopt mixed="-S, --password-script=[script]"/>
<content type="string"/>
<shortdesc lang="en">Script to run to retrieve password</shortdesc>
</parameter>
<parameter name="password" unique="0" required="0" obsoletes="passwd">
<getopt mixed="-p, --password=[password]"/>
<content type="string"/>
<shortdesc lang="en">Login password or passphrase</shortdesc>
</parameter>
<parameter name="password_script" unique="0" required="0" obsoletes="passwd_script">
<getopt mixed="-S, --password-script=[script]"/>
<content type="string"/>
<shortdesc lang="en">Script to run to retrieve password</shortdesc>
</parameter>
<parameter name="plug" unique="0" required="0" obsoletes="port">
<getopt mixed="-n, --plug=[ip]"/>
<content type="string"/>
<shortdesc lang="en">IP address or hostname of fencing device (together with --port-as-ip)</shortdesc>
</parameter>
<parameter name="port" unique="0" required="0" deprecated="1">
<getopt mixed="-n, --plug=[ip]"/>
<content type="string"/>
<shortdesc lang="en">IP address or hostname of fencing device (together with --port-as-ip)</shortdesc>
</parameter>
<parameter name="privlvl" unique="0" required="0">
<getopt mixed="-L, --privlvl=[level]"/>
<content type="select" default="administrator">
<option value="callback"/>
<option value="user"/>
<option value="operator"/>
<option value="administrator"/>
</content>
<shortdesc lang="en">Privilege level on IPMI device</shortdesc>
</parameter>
<parameter name="target" unique="0" required="0">
<getopt mixed="--target=[targetaddress]"/>
<content type="string"/>
<shortdesc lang="en">Bridge IPMI requests to the remote target address</shortdesc>
</parameter>
<parameter name="username" unique="0" required="0" obsoletes="login">
<getopt mixed="-l, --username=[name]"/>
<content type="string"/>
<shortdesc lang="en">Login name</shortdesc>
</parameter>
<parameter name="quiet" unique="0" required="0">
<getopt mixed="-q, --quiet"/>
<content type="boolean"/>
<shortdesc lang="en">Disable logging to stderr. Does not affect --verbose or --debug-file or logging to syslog.</shortdesc>
</parameter>
<parameter name="verbose" unique="0" required="0">
<getopt mixed="-v, --verbose"/>
<content type="boolean"/>
<shortdesc lang="en">Verbose mode</shortdesc>
</parameter>
<parameter name="debug" unique="0" required="0" deprecated="1">
<getopt mixed="-D, --debug-file=[debugfile]"/>
<content type="string"/>
<shortdesc lang="en">Write debug information to given file</shortdesc>
</parameter>
<parameter name="debug_file" unique="0" required="0" obsoletes="debug">
<getopt mixed="-D, --debug-file=[debugfile]"/>
<content type="string"/>
<shortdesc lang="en">Write debug information to given file</shortdesc>
</parameter>
<parameter name="version" unique="0" required="0">
<getopt mixed="-V, --version"/>
<content type="boolean"/>
<shortdesc lang="en">Display version information and exit</shortdesc>
</parameter>
<parameter name="help" unique="0" required="0">
<getopt mixed="-h, --help"/>
<content type="boolean"/>
<shortdesc lang="en">Display help and exit</shortdesc>
</parameter>
<parameter name="delay" unique="0" required="0">
<getopt mixed="--delay=[seconds]"/>
<content type="second" default="0"/>
<shortdesc lang="en">Wait X seconds before fencing is started</shortdesc>
</parameter>
<parameter name="ipmitool_path" unique="0" required="0">
<getopt mixed="--ipmitool-path=[path]"/>
<content type="string" default="/usr/bin/ipmitool"/>
<shortdesc lang="en">Path to ipmitool binary</shortdesc>
</parameter>
<parameter name="login_timeout" unique="0" required="0">
<getopt mixed="--login-timeout=[seconds]"/>
<content type="second" default="5"/>
<shortdesc lang="en">Wait X seconds for cmd prompt after login</shortdesc>
</parameter>
<parameter name="port_as_ip" unique="0" required="0">
<getopt mixed="--port-as-ip"/>
<content type="boolean"/>
<shortdesc lang="en">Make "port/plug" to be an alias to IP address</shortdesc>
</parameter>
<parameter name="power_timeout" unique="0" required="0">
<getopt mixed="--power-timeout=[seconds]"/>
<content type="second" default="20"/>
<shortdesc lang="en">Test X seconds for status change after ON/OFF</shortdesc>
</parameter>
<parameter name="power_wait" unique="0" required="0">
<getopt mixed="--power-wait=[seconds]"/>
<content type="second" default="2"/>
<shortdesc lang="en">Wait X seconds after issuing ON/OFF</shortdesc>
</parameter>
<parameter name="shell_timeout" unique="0" required="0">
<getopt mixed="--shell-timeout=[seconds]"/>
<content type="second" default="3"/>
<shortdesc lang="en">Wait X seconds for cmd prompt after issuing command</shortdesc>
</parameter>
<parameter name="retry_on" unique="0" required="0">
<getopt mixed="--retry-on=[attempts]"/>
<content type="integer" default="1"/>
<shortdesc lang="en">Count of attempts to retry power on</shortdesc>
</parameter>
<parameter name="sudo" unique="0" required="0" deprecated="1">
<getopt mixed="--use-sudo"/>
<content type="boolean"/>
<shortdesc lang="en">Use sudo (without password) when calling 3rd party software</shortdesc>
</parameter>
<parameter name="use_sudo" unique="0" required="0" obsoletes="sudo">
<getopt mixed="--use-sudo"/>
<content type="boolean"/>
<shortdesc lang="en">Use sudo (without password) when calling 3rd party software</shortdesc>
</parameter>
<parameter name="sudo_path" unique="0" required="0">
<getopt mixed="--sudo-path=[path]"/>
<content type="string" default="/usr/bin/sudo"/>
<shortdesc lang="en">Path to sudo binary</shortdesc>
</parameter>
</parameters>
<actions>
<action name="on" automatic="0"/>
<action name="off"/>
<action name="reboot"/>
<action name="status"/>
<action name="monitor"/>
<action name="metadata"/>
<action name="manpage"/>
<action name="validate-all"/>
<action name="diag"/>
<action name="stop" timeout="20s"/>
<action name="start" timeout="20s"/>
</actions>
</resource-agent>
Once we've decided what parameter values we think we need, it is a good idea
to run the fence agent's status action manually, to verify that our values
work correctly:
.. code-block:: none
# fence_ipmilan --lanplus -a 192.0.2.1 -l testuser -p abc123 -o status
Chassis Power is on
#. Based on that, we might create a fencing resource configuration like this in
``stonith.xml`` (or any file name, just use the same name with ``cibadmin``
later):
.. code-block:: xml
<primitive id="Fencing-pcmk-1" class="stonith" type="fence_ipmilan" >
<instance_attributes id="Fencing-params" >
<nvpair id="Fencing-lanplus" name="lanplus" value="1" />
<nvpair id="Fencing-ip" name="ip" value="192.0.2.1" />
<nvpair id="Fencing-password" name="password" value="testuser" />
<nvpair id="Fencing-username" name="username" value="abc123" />
</instance_attributes>
<operations >
<op id="Fencing-monitor-10m" interval="10m" name="monitor" timeout="300s" />
</operations>
</primitive>
.. note::
Even though the man page shows that the ``action`` parameter is
supported, we do not provide that in the resource configuration.
Pacemaker will supply an appropriate action whenever the fence device
must be used.
#. In this case, we don't need to configure ``pcmk_host_map`` because
``fence_ipmilan`` ignores the target node name and instead uses its
``ip`` parameter to know how to contact the IPMI controller.
#. We do need to let Pacemaker know which cluster node can be fenced by this
device, since ``fence_ipmilan`` doesn't support the ``list`` action. Add
a line like this to the agent's instance attributes:
.. code-block:: xml
<nvpair id="Fencing-pcmk_host_list" name="pcmk_host_list" value="pcmk-1" />
#. We don't need to configure ``pcmk_host_argument`` since ``ip`` is all the
fence agent needs (it ignores the target name).
#. Make the configuration active:
.. code-block:: none
# cibadmin --create --scope resources --xml-file stonith.xml
#. Set ``stonith-enabled`` to true (this only has to be done once):
.. code-block:: none
# crm_attribute --type crm_config --name stonith-enabled --update true
#. Since our cluster is still in testing, we can reboot ``pcmk-1`` without
bothering anyone, so we'll test our fencing configuration by running this
from one of the other cluster nodes:
.. code-block:: none
# stonith_admin --reboot pcmk-1
Then we will verify that the node did, in fact, reboot.
We can repeat that process to create a separate fencing resource for each node.
With some other fence device types, a single fencing resource is able to be
used for all nodes. In fact, we could do that with ``fence_ipmilan``, using the
``port-as-ip`` parameter along with ``pcmk_host_map``. Either approach is
fine.
.. index::
single: fencing; topology
single: fencing-topology
single: fencing-level
Fencing Topologies
##################
Pacemaker supports fencing nodes with multiple devices through a feature called
*fencing topologies*. Fencing topologies may be used to provide alternative
devices in case one fails, or to require multiple devices to all be executed
successfully in order to consider the node successfully fenced, or even a
combination of the two.
Create the individual devices as you normally would, then define one or more
``fencing-level`` entries in the ``fencing-topology`` section of the
configuration.
* Each fencing level is attempted in order of ascending ``index``. Allowed
values are 1 through 9.
* If a device fails, processing terminates for the current level. No further
devices in that level are exercised, and the next level is attempted instead.
* If the operation succeeds for all the listed devices in a level, the level is
deemed to have passed.
* The operation is finished when a level has passed (success), or all levels
have been attempted (failed).
* If the operation failed, the next step is determined by the scheduler and/or
the controller.
Some possible uses of topologies include:
* Try on-board IPMI, then an intelligent power switch if that fails
* Try fabric fencing of both disk and network, then fall back to power fencing
if either fails
* Wait up to a certain time for a kernel dump to complete, then cut power to
the node
.. table:: **Attributes of a fencing-level Element**
+------------------+-----------------------------------------------------------------------------------------+
| Attribute | Description |
+==================+=========================================================================================+
| id | .. index:: |
| | pair: fencing-level; id |
| | |
| | A unique name for this element (required) |
+------------------+-----------------------------------------------------------------------------------------+
| target | .. index:: |
| | pair: fencing-level; target |
| | |
| | The name of a single node to which this level applies |
+------------------+-----------------------------------------------------------------------------------------+
| target-pattern | .. index:: |
| | pair: fencing-level; target-pattern |
| | |
| | An extended regular expression (as defined in `POSIX |
| | <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04>`_) |
| | matching the names of nodes to which this level applies |
+------------------+-----------------------------------------------------------------------------------------+
| target-attribute | .. index:: |
| | pair: fencing-level; target-attribute |
| | |
| | The name of a node attribute that is set (to ``target-value``) for nodes to which this |
| | level applies |
+------------------+-----------------------------------------------------------------------------------------+
| target-value | .. index:: |
| | pair: fencing-level; target-value |
| | |
| | The node attribute value (of ``target-attribute``) that is set for nodes to which this |
| | level applies |
+------------------+-----------------------------------------------------------------------------------------+
| index | .. index:: |
| | pair: fencing-level; index |
| | |
| | The order in which to attempt the levels. Levels are attempted in ascending order |
| | *until one succeeds*. Valid values are 1 through 9. |
+------------------+-----------------------------------------------------------------------------------------+
| devices | .. index:: |
| | pair: fencing-level; devices |
| | |
| | A comma-separated list of devices that must all be tried for this level |
+------------------+-----------------------------------------------------------------------------------------+
.. note:: **Fencing topology with different devices for different nodes**
.. code-block:: xml
<cib crm_feature_set="3.6.0" validate-with="pacemaker-3.5" admin_epoch="1" epoch="0" num_updates="0">
<configuration>
...
<fencing-topology>
<!-- For pcmk-1, try poison-pill and fail back to power -->
<fencing-level id="f-p1.1" target="pcmk-1" index="1" devices="poison-pill"/>
<fencing-level id="f-p1.2" target="pcmk-1" index="2" devices="power"/>
<!-- For pcmk-2, try disk and network, and fail back to power -->
<fencing-level id="f-p2.1" target="pcmk-2" index="1" devices="disk,network"/>
<fencing-level id="f-p2.2" target="pcmk-2" index="2" devices="power"/>
</fencing-topology>
...
<configuration>
<status/>
</cib>
Example Dual-Layer, Dual-Device Fencing Topologies
__________________________________________________
The following example illustrates an advanced use of ``fencing-topology`` in a
cluster with the following properties:
* 2 nodes (prod-mysql1 and prod-mysql2)
* the nodes have IPMI controllers reachable at 192.0.2.1 and 192.0.2.2
* the nodes each have two independent Power Supply Units (PSUs) connected to
two independent Power Distribution Units (PDUs) reachable at 198.51.100.1
(port 10 and port 11) and 203.0.113.1 (port 10 and port 11)
* fencing via the IPMI controller uses the ``fence_ipmilan`` agent (1 fence device
per controller, with each device targeting a separate node)
* fencing via the PDUs uses the ``fence_apc_snmp`` agent (1 fence device per
PDU, with both devices targeting both nodes)
* a random delay is used to lessen the chance of a "death match"
* fencing topology is set to try IPMI fencing first then dual PDU fencing if
that fails
In a node failure scenario, Pacemaker will first select ``fence_ipmilan`` to
try to kill the faulty node. Using the fencing topology, if that method fails,
it will then move on to selecting ``fence_apc_snmp`` twice (once for the first
PDU, then again for the second PDU).
The fence action is considered successful only if both PDUs report the required
status. If any of them fails, fencing loops back to the first fencing method,
``fence_ipmilan``, and so on, until the node is fenced or the fencing action is
cancelled.
.. note:: **First fencing method: single IPMI device per target**
Each cluster node has it own dedicated IPMI controller that can be contacted
for fencing using the following primitives:
.. code-block:: xml
<primitive class="stonith" id="fence_prod-mysql1_ipmi" type="fence_ipmilan">
<instance_attributes id="fence_prod-mysql1_ipmi-instance_attributes">
<nvpair id="fence_prod-mysql1_ipmi-instance_attributes-ipaddr" name="ipaddr" value="192.0.2.1"/>
<nvpair id="fence_prod-mysql1_ipmi-instance_attributes-login" name="login" value="fencing"/>
<nvpair id="fence_prod-mysql1_ipmi-instance_attributes-passwd" name="passwd" value="finishme"/>
<nvpair id="fence_prod-mysql1_ipmi-instance_attributes-lanplus" name="lanplus" value="true"/>
<nvpair id="fence_prod-mysql1_ipmi-instance_attributes-pcmk_host_list" name="pcmk_host_list" value="prod-mysql1"/>
<nvpair id="fence_prod-mysql1_ipmi-instance_attributes-pcmk_delay_max" name="pcmk_delay_max" value="8s"/>
</instance_attributes>
</primitive>
<primitive class="stonith" id="fence_prod-mysql2_ipmi" type="fence_ipmilan">
<instance_attributes id="fence_prod-mysql2_ipmi-instance_attributes">
<nvpair id="fence_prod-mysql2_ipmi-instance_attributes-ipaddr" name="ipaddr" value="192.0.2.2"/>
<nvpair id="fence_prod-mysql2_ipmi-instance_attributes-login" name="login" value="fencing"/>
<nvpair id="fence_prod-mysql2_ipmi-instance_attributes-passwd" name="passwd" value="finishme"/>
<nvpair id="fence_prod-mysql2_ipmi-instance_attributes-lanplus" name="lanplus" value="true"/>
<nvpair id="fence_prod-mysql2_ipmi-instance_attributes-pcmk_host_list" name="pcmk_host_list" value="prod-mysql2"/>
<nvpair id="fence_prod-mysql2_ipmi-instance_attributes-pcmk_delay_max" name="pcmk_delay_max" value="8s"/>
</instance_attributes>
</primitive>
.. note:: **Second fencing method: dual PDU devices**
Each cluster node also has 2 distinct power supplies controlled by 2
distinct PDUs:
* Node 1: PDU 1 port 10 and PDU 2 port 10
* Node 2: PDU 1 port 11 and PDU 2 port 11
The matching fencing agents are configured as follows:
.. code-block:: xml
<primitive class="stonith" id="fence_apc1" type="fence_apc_snmp">
<instance_attributes id="fence_apc1-instance_attributes">
<nvpair id="fence_apc1-instance_attributes-ipaddr" name="ipaddr" value="198.51.100.1"/>
<nvpair id="fence_apc1-instance_attributes-login" name="login" value="fencing"/>
<nvpair id="fence_apc1-instance_attributes-passwd" name="passwd" value="fencing"/>
<nvpair id="fence_apc1-instance_attributes-pcmk_host_list"
name="pcmk_host_map" value="prod-mysql1:10;prod-mysql2:11"/>
<nvpair id="fence_apc1-instance_attributes-pcmk_delay_max" name="pcmk_delay_max" value="8s"/>
</instance_attributes>
</primitive>
<primitive class="stonith" id="fence_apc2" type="fence_apc_snmp">
<instance_attributes id="fence_apc2-instance_attributes">
<nvpair id="fence_apc2-instance_attributes-ipaddr" name="ipaddr" value="203.0.113.1"/>
<nvpair id="fence_apc2-instance_attributes-login" name="login" value="fencing"/>
<nvpair id="fence_apc2-instance_attributes-passwd" name="passwd" value="fencing"/>
<nvpair id="fence_apc2-instance_attributes-pcmk_host_list"
name="pcmk_host_map" value="prod-mysql1:10;prod-mysql2:11"/>
<nvpair id="fence_apc2-instance_attributes-pcmk_delay_max" name="pcmk_delay_max" value="8s"/>
</instance_attributes>
</primitive>
.. note:: **Fencing topology**
Now that all the fencing resources are defined, it's time to create the
right topology. We want to first fence using IPMI and if that does not work,
fence both PDUs to effectively and surely kill the node.
.. code-block:: xml
<fencing-topology>
<fencing-level id="level-1-1" target="prod-mysql1" index="1" devices="fence_prod-mysql1_ipmi" />
<fencing-level id="level-1-2" target="prod-mysql1" index="2" devices="fence_apc1,fence_apc2" />
<fencing-level id="level-2-1" target="prod-mysql2" index="1" devices="fence_prod-mysql2_ipmi" />
<fencing-level id="level-2-2" target="prod-mysql2" index="2" devices="fence_apc1,fence_apc2" />
</fencing-topology>
In ``fencing-topology``, the lowest ``index`` value for a target determines
its first fencing method.
Remapping Reboots
#################
When the cluster needs to reboot a node, whether because ``stonith-action`` is
``reboot`` or because a reboot was requested externally (such as by
``stonith_admin --reboot``), it will remap that to other commands in two cases:
* If the chosen fencing device does not support the ``reboot`` command, the
cluster will ask it to perform ``off`` instead.
* If a fencing topology level with multiple devices must be executed, the
cluster will ask all the devices to perform ``off``, then ask the devices to
perform ``on``.
To understand the second case, consider the example of a node with redundant
power supplies connected to intelligent power switches. Rebooting one switch
and then the other would have no effect on the node. Turning both switches off,
and then on, actually reboots the node.
In such a case, the fencing operation will be treated as successful as long as
the ``off`` commands succeed, because then it is safe for the cluster to
recover any resources that were on the node. Timeouts and errors in the ``on``
phase will be logged but ignored.
When a reboot operation is remapped, any action-specific timeout for the
remapped action will be used (for example, ``pcmk_off_timeout`` will be used
when executing the ``off`` command, not ``pcmk_reboot_timeout``).

File Metadata

Mime Type
text/x-diff
Expires
Sat, Nov 23, 4:34 PM (17 h, 23 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1018913
Default Alt Text
(126 KB)

Event Timeline