Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/fencing/main.c b/fencing/main.c
index 2694452917..9773085f99 100644
--- a/fencing/main.c
+++ b/fencing/main.c
@@ -1,1404 +1,1408 @@
/*
* Copyright (C) 2009 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipcs.h>
#include <crm/cluster/internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/mainloop.h>
#include <crm/cib/internal.h>
#include <crm/pengine/status.h>
#include <allocate.h>
#include <internal.h>
#include <standalone_config.h>
char *stonith_our_uname = NULL;
char *stonith_our_uuid = NULL;
long stonith_watchdog_timeout_ms = 0;
GMainLoop *mainloop = NULL;
gboolean stand_alone = FALSE;
gboolean no_cib_connect = FALSE;
gboolean stonith_shutdown_flag = FALSE;
qb_ipcs_service_t *ipcs = NULL;
xmlNode *local_cib = NULL;
static cib_t *cib_api = NULL;
static void *cib_library = NULL;
static void stonith_shutdown(int nsig);
static void stonith_cleanup(void);
static int32_t
st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
if (stonith_shutdown_flag) {
crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c));
return -EPERM;
}
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
st_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection created for %p", c);
}
/* Exit code means? */
static int32_t
st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
int call_options = 0;
xmlNode *request = NULL;
crm_client_t *c = crm_client_get(qbc);
if (c == NULL) {
crm_info("Invalid client: %p", qbc);
return 0;
}
request = crm_ipcs_recv(c, data, size, &id, &flags);
if (request == NULL) {
crm_ipcs_send_ack(c, id, flags, "nack", __FUNCTION__, __LINE__);
return 0;
}
if (c->name == NULL) {
const char *value = crm_element_value(request, F_STONITH_CLIENTNAME);
if (value == NULL) {
value = "unknown";
}
c->name = g_strdup_printf("%s.%u", value, c->pid);
}
crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
crm_trace("Flags %u/%u for command %u from %s", flags, call_options, id, crm_client_name(c));
if (is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(flags & crm_ipc_client_response);
CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */
c->request_id = id; /* Reply only to the last one */
}
crm_xml_add(request, F_STONITH_CLIENTID, c->id);
crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c));
crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
crm_log_xml_trace(request, "Client[inbound]");
stonith_command(c, id, flags, request, NULL);
free_xml(request);
return 0;
}
/* Error code means? */
static int32_t
st_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p closed", c);
crm_client_destroy(client);
/* 0 means: yes, go ahead and destroy the connection */
return 0;
}
static void
st_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p destroyed", c);
st_ipc_closed(c);
}
static void
stonith_peer_callback(xmlNode * msg, void *private_data)
{
const char *remote_peer = crm_element_value(msg, F_ORIG);
const char *op = crm_element_value(msg, F_STONITH_OPERATION);
if (crm_str_eq(op, "poke", TRUE)) {
return;
}
crm_log_xml_trace(msg, "Peer[inbound]");
stonith_command(NULL, 0, 0, msg, remote_peer);
}
#if SUPPORT_HEARTBEAT
static void
stonith_peer_hb_callback(HA_Message * msg, void *private_data)
{
xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__);
stonith_peer_callback(xml, private_data);
free_xml(xml);
}
static void
stonith_peer_hb_destroy(gpointer user_data)
{
if (stonith_shutdown_flag) {
crm_info("Heartbeat disconnection complete... exiting");
} else {
crm_err("Heartbeat connection lost! Exiting.");
}
stonith_shutdown(0);
}
#endif
#if SUPPORT_COROSYNC
static void
stonith_peer_ais_callback(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
uint32_t kind = 0;
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
if(data == NULL) {
return;
}
if (kind == crm_class_cluster) {
xml = string2xml(data);
if (xml == NULL) {
crm_err("Invalid XML: '%.120s'", data);
free(data);
return;
}
crm_xml_add(xml, F_ORIG, from);
/* crm_xml_add_int(xml, F_SEQ, wrapper->id); */
stonith_peer_callback(xml, NULL);
}
free_xml(xml);
free(data);
return;
}
static void
stonith_peer_cs_destroy(gpointer user_data)
{
crm_err("Corosync connection terminated");
stonith_shutdown(0);
}
#endif
void
do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer)
{
/* send callback to originating child */
crm_client_t *client_obj = NULL;
int local_rc = pcmk_ok;
crm_trace("Sending response");
client_obj = crm_client_get_by_id(client_id);
crm_trace("Sending callback to request originator");
if (client_obj == NULL) {
local_rc = -1;
crm_trace("No client to sent the response to. F_STONITH_CLIENTID not set.");
} else {
int rid = 0;
if (sync_reply) {
CRM_LOG_ASSERT(client_obj->request_id);
rid = client_obj->request_id;
client_obj->request_id = 0;
crm_trace("Sending response %d to %s %s",
rid, client_obj->name, from_peer ? "(originator of delegated request)" : "");
} else {
crm_trace("Sending an event to %s %s",
client_obj->name, from_peer ? "(originator of delegated request)" : "");
}
local_rc = crm_ipcs_send(client_obj, rid, notify_src, sync_reply?crm_ipc_flags_none:crm_ipc_server_event);
}
if (local_rc < pcmk_ok && client_obj != NULL) {
crm_warn("%sSync reply to %s failed: %s",
sync_reply ? "" : "A-",
client_obj ? client_obj->name : "<unknown>", pcmk_strerror(local_rc));
}
}
long long
get_stonith_flag(const char *name)
{
if (safe_str_eq(name, T_STONITH_NOTIFY_FENCE)) {
return 0x01;
} else if (safe_str_eq(name, STONITH_OP_DEVICE_ADD)) {
return 0x04;
} else if (safe_str_eq(name, STONITH_OP_DEVICE_DEL)) {
return 0x10;
}
return 0;
}
static void
stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
crm_client_t *client = value;
const char *type = NULL;
CRM_CHECK(client != NULL, return);
CRM_CHECK(update_msg != NULL, return);
type = crm_element_value(update_msg, F_SUBTYPE);
CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);
if (client->ipcs == NULL) {
crm_trace("Skipping client with NULL channel");
return;
}
if (client->options & get_stonith_flag(type)) {
int rc = crm_ipcs_send(client, 0, update_msg, crm_ipc_server_event | crm_ipc_server_error);
if (rc <= 0) {
crm_warn("%s notification of client %s.%.6s failed: %s (%d)",
type, crm_client_name(client), client->id, pcmk_strerror(rc), rc);
} else {
crm_trace("Sent %s notification to client %s.%.6s", type, crm_client_name(client),
client->id);
}
}
}
void
do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout)
{
crm_client_t *client = NULL;
xmlNode *notify_data = NULL;
if (!timeout || !call_id || !client_id) {
return;
}
client = crm_client_get_by_id(client_id);
if (!client) {
return;
}
notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE);
crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE);
crm_xml_add(notify_data, F_STONITH_CALLID, call_id);
crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout);
crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);
if (client) {
crm_ipcs_send(client, 0, notify_data, crm_ipc_server_event);
}
free_xml(notify_data);
}
void
do_stonith_notify(int options, const char *type, int result, xmlNode * data)
{
/* TODO: Standardize the contents of data */
xmlNode *update_msg = create_xml_node(NULL, "notify");
CRM_CHECK(type != NULL,;);
crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY);
crm_xml_add(update_msg, F_SUBTYPE, type);
crm_xml_add(update_msg, F_STONITH_OPERATION, type);
crm_xml_add_int(update_msg, F_STONITH_RC, result);
if (data != NULL) {
add_message_xml(update_msg, F_STONITH_CALLDATA, data);
}
crm_trace("Notifying clients");
g_hash_table_foreach(client_connections, stonith_notify_client, update_msg);
free_xml(update_msg);
crm_trace("Notify complete");
}
static stonith_key_value_t *
parse_device_list(const char *devices)
{
int lpc = 0;
int max = 0;
int last = 0;
stonith_key_value_t *output = NULL;
if (devices == NULL) {
return output;
}
max = strlen(devices);
for (lpc = 0; lpc <= max; lpc++) {
if (devices[lpc] == ',' || devices[lpc] == 0) {
char *line = NULL;
line = calloc(1, 2 + lpc - last);
snprintf(line, 1 + lpc - last, "%s", devices + last);
output = stonith_key_value_add(output, NULL, line);
free(line);
last = lpc + 1;
}
}
return output;
}
static void
topology_remove_helper(const char *node, int level)
{
int rc;
char *desc = NULL;
xmlNode *data = create_xml_node(NULL, F_STONITH_LEVEL);
xmlNode *notify_data = create_xml_node(NULL, STONITH_OP_LEVEL_DEL);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
crm_xml_add_int(data, XML_ATTR_ID, level);
crm_xml_add(data, F_STONITH_TARGET, node);
rc = stonith_level_remove(data, &desc);
crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology));
do_stonith_notify(0, STONITH_OP_LEVEL_DEL, rc, notify_data);
free_xml(notify_data);
free_xml(data);
free(desc);
}
static void
topology_register_helper(const char *node, int level, stonith_key_value_t * device_list)
{
int rc;
char *desc = NULL;
xmlNode *notify_data = create_xml_node(NULL, STONITH_OP_LEVEL_ADD);
xmlNode *data = create_level_registration_xml(node, level, device_list);
rc = stonith_level_register(data, &desc);
crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology));
do_stonith_notify(0, STONITH_OP_LEVEL_ADD, rc, notify_data);
free_xml(notify_data);
free_xml(data);
free(desc);
}
static void
remove_cib_device(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
const char *rsc_id = NULL;
const char *standard = NULL;
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match != NULL) {
standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
}
if (safe_str_neq(standard, "stonith")) {
continue;
}
rsc_id = crm_element_value(match, XML_ATTR_ID);
stonith_device_remove(rsc_id, TRUE);
}
}
static void
handle_topology_change(xmlNode *match, bool remove)
{
CRM_LOG_ASSERT(match != NULL);
if(match) {
int index = 0;
const char *target;
const char *dev_list;
stonith_key_value_t *devices = NULL;
crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
target = crm_element_value(match, XML_ATTR_STONITH_TARGET);
dev_list = crm_element_value(match, XML_ATTR_STONITH_DEVICES);
devices = parse_device_list(dev_list);
crm_trace("Updating %s[%d] (%s) to %s", target, index, ID(match), dev_list);
if(remove) {
topology_remove_helper(target, index);
}
topology_register_helper(target, index, devices);
stonith_key_value_freeall(devices, 1, 1);
}
}
static void
remove_fencing_topology(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if (match && crm_element_value(match, XML_DIFF_MARKER)) {
/* Deletion */
int index = 0;
const char *target = crm_element_value(match, XML_ATTR_STONITH_TARGET);
crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
if (target == NULL) {
crm_err("Invalid fencing target in element %s", ID(match));
} else if (index <= 0) {
crm_err("Invalid level for %s in element %s", target, ID(match));
} else {
topology_remove_helper(target, index);
}
/* } else { Deal with modifications during the 'addition' stage */
}
}
}
static void
register_fencing_topology(xmlXPathObjectPtr xpathObj, gboolean force)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
handle_topology_change(match, TRUE);
}
}
/* Fencing
<diff crm_feature_set="3.0.6">
<diff-removed>
<fencing-topology>
<fencing-level id="f-p1.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="removed:top"/>
<fencing-level id="f-p1.2" target="pcmk-1" index="2" devices="power" __crm_diff_marker__="removed:top"/>
<fencing-level devices="disk,network" id="f-p2.1"/>
</fencing-topology>
</diff-removed>
<diff-added>
<fencing-topology>
<fencing-level id="f-p.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="added:top"/>
<fencing-level id="f-p2.1" target="pcmk-2" index="1" devices="disk,something"/>
<fencing-level id="f-p3.1" target="pcmk-2" index="2" devices="power" __crm_diff_marker__="added:top"/>
</fencing-topology>
</diff-added>
</diff>
*/
static void
fencing_topology_init(xmlNode * msg)
{
xmlXPathObjectPtr xpathObj = NULL;
const char *xpath = "//" XML_TAG_FENCING_LEVEL;
crm_trace("Full topology refresh");
if(topology) {
g_hash_table_destroy(topology);
topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry);
}
/* Grab everything */
xpathObj = xpath_search(local_cib, xpath);
register_fencing_topology(xpathObj, TRUE);
freeXpathObject(xpathObj);
}
#define rsc_name(x) x->clone_name?x->clone_name:x->id
static void cib_device_update(resource_t *rsc, pe_working_set_t *data_set)
{
node_t *node = NULL;
const char *value = NULL;
const char *rclass = NULL;
node_t *parent = NULL;
gboolean remove = TRUE;
/* TODO: Mark each installed device and remove if untouched when this process finishes */
if(rsc->children) {
GListPtr gIter = NULL;
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
cib_device_update(gIter->data, data_set);
if(rsc->variant == pe_clone || rsc->variant == pe_master) {
crm_trace("Only processing one copy of the clone %s", rsc->id);
break;
}
}
return;
}
rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
if(safe_str_neq(rclass, "stonith")) {
return;
}
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
if(value && strcmp(RSC_STOPPED, value) == 0) {
crm_info("Device %s has been disabled", rsc->id);
goto update_done;
} else if(stonith_our_uname) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if(node && strcmp(node->details->uname, stonith_our_uname) == 0) {
break;
}
node = NULL;
}
}
if (rsc->parent && rsc->parent->variant == pe_group && stonith_our_uname) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, rsc->parent->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&parent)) {
if(parent && strcmp(parent->details->uname, stonith_our_uname) == 0) {
break;
}
parent = NULL;
}
}
if(node == NULL) {
GHashTableIter iter;
crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname);
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
crm_trace("Available: %s = %d", node->details->uname, node->weight);
}
goto update_done;
} else if(node->weight < 0 || (parent && parent->weight < 0)) {
char *score = score2char((node->weight < 0) ? node->weight : parent->weight);
crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score);
free(score);
goto update_done;
} else {
xmlNode *data;
GHashTableIter gIter;
stonith_key_value_t *params = NULL;
const char *name = NULL;
const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE);
const char *provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
const char *rsc_provides = NULL;
crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight);
get_rsc_attributes(rsc->parameters, rsc, node, data_set);
get_meta_attributes(rsc->meta, rsc, node, data_set);
rsc_provides = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROVIDES);
g_hash_table_iter_init(&gIter, rsc->parameters);
while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) {
if (!name || !value) {
continue;
}
params = stonith_key_value_add(params, name, value);
crm_trace(" %s=%s", name, value);
}
remove = FALSE;
data = create_device_registration_xml(rsc_name(rsc), provider, agent, params, rsc_provides);
stonith_device_register(data, NULL, TRUE);
stonith_key_value_freeall(params, 1, 1);
free_xml(data);
}
update_done:
if(remove && g_hash_table_lookup(device_list, rsc_name(rsc))) {
stonith_device_remove(rsc_name(rsc), TRUE);
}
}
extern xmlNode *do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, crm_time_t * now);
extern node_t *create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set);
static void
cib_devices_update(void)
{
GListPtr gIter = NULL;
pe_working_set_t data_set;
crm_info("Updating devices to version %s.%s.%s",
crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN),
crm_element_value(local_cib, XML_ATTR_GENERATION),
crm_element_value(local_cib, XML_ATTR_NUMUPDATES));
set_working_set_defaults(&data_set);
data_set.input = local_cib;
data_set.now = crm_time_new(NULL);
data_set.flags |= pe_flag_quick_location;
data_set.localhost = stonith_our_uname;
cluster_status(&data_set);
do_calculations(&data_set, NULL, NULL);
for (gIter = data_set.resources; gIter != NULL; gIter = gIter->next) {
cib_device_update(gIter->data, &data_set);
}
data_set.input = NULL; /* Wasn't a copy */
cleanup_alloc_calculations(&data_set);
}
static void
update_cib_stonith_devices_v2(const char *event, xmlNode * msg)
{
xmlNode *change = NULL;
char *reason = NULL;
bool needs_update = FALSE;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) {
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
const char *shortpath = NULL;
if(op == NULL || strcmp(op, "move") == 0) {
continue;
} else if(safe_str_eq(op, "delete") && strstr(xpath, XML_CIB_TAG_RESOURCE)) {
const char *rsc_id = NULL;
char *search = NULL;
char *mutable = strdup(xpath);
rsc_id = strstr(mutable, "primitive[@id=\'") + strlen("primitive[@id=\'");
search = strchr(rsc_id, '\'');
search[0] = 0;
stonith_device_remove(rsc_id, TRUE);
free(mutable);
} else if(strstr(xpath, "/"XML_CIB_TAG_RESOURCES)) {
shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath);
reason = g_strdup_printf("%s %s", op, shortpath+1);
needs_update = TRUE;
break;
} else if(strstr(xpath, XML_CONS_TAG_RSC_LOCATION)) {
shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath);
reason = g_strdup_printf("%s %s", op, shortpath+1);
needs_update = TRUE;
break;
}
}
if(needs_update) {
crm_info("Updating device list from the cib: %s", reason);
cib_devices_update();
}
free(reason);
}
static void
update_cib_stonith_devices_v1(const char *event, xmlNode * msg)
{
const char *reason = "none";
gboolean needs_update = FALSE;
xmlXPathObjectPtr xpath_obj = NULL;
/* process new constraints */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION);
if (numXpathResults(xpath_obj) > 0) {
int max = numXpathResults(xpath_obj), lpc = 0;
/* Safest and simplest to always recompute */
needs_update = TRUE;
reason = "new location constraint";
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpath_obj, lpc);
crm_log_xml_trace(match, "new constraint");
}
}
freeXpathObject(xpath_obj);
/* process deletions */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE);
if (numXpathResults(xpath_obj) > 0) {
remove_cib_device(xpath_obj);
}
freeXpathObject(xpath_obj);
/* process additions */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE);
if (numXpathResults(xpath_obj) > 0) {
int max = numXpathResults(xpath_obj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
const char *rsc_id = NULL;
const char *standard = NULL;
xmlNode *match = getXpathResult(xpath_obj, lpc);
rsc_id = crm_element_value(match, XML_ATTR_ID);
standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
if (safe_str_neq(standard, "stonith")) {
continue;
}
crm_trace("Fencing resource %s was added or modified", rsc_id);
reason = "new resource";
needs_update = TRUE;
}
}
freeXpathObject(xpath_obj);
if(needs_update) {
crm_info("Updating device list from the cib: %s", reason);
cib_devices_update();
}
}
static void
update_cib_stonith_devices(const char *event, xmlNode * msg)
{
int format = 1;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
CRM_ASSERT(patchset);
crm_element_value_int(patchset, "format", &format);
switch(format) {
case 1:
update_cib_stonith_devices_v1(event, msg);
break;
case 2:
update_cib_stonith_devices_v2(event, msg);
break;
default:
crm_warn("Unknown patch format: %d", format);
}
}
static void
update_fencing_topology(const char *event, xmlNode * msg)
{
int format = 1;
const char *xpath;
xmlXPathObjectPtr xpathObj = NULL;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
CRM_ASSERT(patchset);
crm_element_value_int(patchset, "format", &format);
if(format == 1) {
/* Process deletions (only) */
xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL;
xpathObj = xpath_search(msg, xpath);
remove_fencing_topology(xpathObj);
freeXpathObject(xpathObj);
/* Process additions and changes */
xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL;
xpathObj = xpath_search(msg, xpath);
register_fencing_topology(xpathObj, FALSE);
freeXpathObject(xpathObj);
} else if(format == 2) {
xmlNode *change = NULL;
for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) {
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
xmlNode *f_topology = get_message_xml(change, XML_TAG_FENCING_TOPOLOGY);
if(op == NULL) {
continue;
} else if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION) && f_topology != NULL) {
if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) {
crm_info("Re-initializing fencing topology after top-level %s operation", op);
fencing_topology_init(NULL);
}
return;
} else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY "/") == NULL) {
continue;
} else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL "/") == NULL) {
if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) {
crm_info("Re-initializing fencing topology after top-level %s operation", op);
fencing_topology_init(NULL);
}
return;
}
crm_trace("Handling %s operation for %s", op, xpath);
if(strcmp(op, "move") == 0) {
continue;
} else if(strcmp(op, "create") == 0) {
handle_topology_change(change->children, FALSE);
} else if(strcmp(op, "modify") == 0) {
xmlNode *match = first_named_child(change, XML_DIFF_RESULT);
if(match) {
handle_topology_change(match->children, TRUE);
}
} else if(strcmp(op, "delete") == 0) {
/* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */
crm_info("Re-initializing fencing topology after %s operation", op);
fencing_topology_init(NULL);
return;
}
}
} else {
crm_warn("Unknown patch format: %d", format);
}
}
static bool have_cib_devices = FALSE;
static void
update_cib_cache_cb(const char *event, xmlNode * msg)
{
int rc = pcmk_ok;
xmlNode *stonith_enabled_xml = NULL;
xmlNode *stonith_watchdog_xml = NULL;
const char *stonith_enabled_s = NULL;
static gboolean stonith_enabled_saved = TRUE;
if(!have_cib_devices) {
crm_trace("Skipping updates until we get a full dump");
return;
} else if(msg == NULL) {
crm_trace("Missing %s update", event);
return;
}
/* Maintain a local copy of the CIB so that we have full access to the device definitions and location constraints */
if (local_cib != NULL) {
int rc = pcmk_ok;
xmlNode *patchset = NULL;
crm_element_value_int(msg, F_CIB_RC, &rc);
if (rc != pcmk_ok) {
return;
}
patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
xml_log_patchset(LOG_TRACE, "Config update", patchset);
rc = xml_apply_patchset(local_cib, patchset, TRUE);
switch (rc) {
case pcmk_ok:
case -pcmk_err_old_data:
break;
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(local_cib);
local_cib = NULL;
break;
default:
crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(local_cib);
local_cib = NULL;
}
}
if (local_cib == NULL) {
crm_trace("Re-requesting the full cib");
rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call);
if(rc != pcmk_ok) {
crm_err("Couldnt retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc);
return;
}
CRM_ASSERT(local_cib != NULL);
stonith_enabled_saved = FALSE; /* Trigger a full refresh below */
}
stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", local_cib, LOG_TRACE);
if (stonith_enabled_xml) {
stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE);
}
if(daemon_option_enabled(crm_system_name, "watchdog")) {
const char *value = NULL;
long timeout_ms = 0;
if(value == NULL) {
stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", local_cib, LOG_TRACE);
if (stonith_watchdog_xml) {
value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE);
}
}
if(value) {
timeout_ms = crm_get_msec(value);
}
if(timeout_ms != stonith_watchdog_timeout_ms) {
crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000);
stonith_watchdog_timeout_ms = timeout_ms;
}
}
if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) {
crm_trace("Ignoring cib updates while stonith is disabled");
stonith_enabled_saved = FALSE;
return;
} else if (stonith_enabled_saved == FALSE) {
crm_info("Updating stonith device and topology lists now that stonith is enabled");
stonith_enabled_saved = TRUE;
fencing_topology_init(NULL);
cib_devices_update();
} else {
update_fencing_topology(event, msg);
update_cib_stonith_devices(event, msg);
}
}
static void
init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
crm_info("Updating device list from the cib: init");
have_cib_devices = TRUE;
local_cib = copy_xml(output);
fencing_topology_init(msg);
cib_devices_update();
}
static void
stonith_shutdown(int nsig)
{
stonith_shutdown_flag = TRUE;
crm_info("Terminating with %d clients", crm_hash_table_size(client_connections));
if (mainloop != NULL && g_main_is_running(mainloop)) {
g_main_quit(mainloop);
} else {
stonith_cleanup();
crm_exit(pcmk_ok);
}
}
static void
cib_connection_destroy(gpointer user_data)
{
if (stonith_shutdown_flag) {
crm_info("Connection to the CIB closed.");
return;
} else {
crm_notice("Connection to the CIB terminated. Shutting down.");
}
if (cib_api) {
cib_api->cmds->signoff(cib_api);
}
stonith_shutdown(0);
}
static void
stonith_cleanup(void)
{
if (cib_api) {
cib_api->cmds->signoff(cib_api);
}
if (ipcs) {
qb_ipcs_destroy(ipcs);
}
crm_peer_destroy();
crm_client_cleanup();
free(stonith_our_uname);
free_xml(local_cib);
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
{"stand-alone", 0, 0, 's'},
{"stand-alone-w-cpg", 0, 0, 'c'},
+ {"logfile", 1, 0, 'l'},
{"verbose", 0, 0, 'V'},
{"version", 0, 0, '$'},
{"help", 0, 0, '?'},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
static void
setup_cib(void)
{
int rc, retries = 0;
static cib_t *(*cib_new_fn) (void) = NULL;
if (cib_new_fn == NULL) {
cib_new_fn = find_library_function(&cib_library, CIB_LIBRARY, "cib_new", TRUE);
}
if (cib_new_fn != NULL) {
cib_api = (*cib_new_fn) ();
}
if (cib_api == NULL) {
crm_err("No connection to the CIB");
return;
}
do {
sleep(retries);
rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_CRMD, cib_command);
} while (rc == -ENOTCONN && ++retries < 5);
if (rc != pcmk_ok) {
crm_err("Could not connect to the CIB service: %s (%d)", pcmk_strerror(rc), rc);
} else if (pcmk_ok !=
cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) {
crm_err("Could not set CIB notification callback");
} else {
rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local);
cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb",
init_cib_cache_cb);
cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy);
crm_notice("Watching for stonith topology changes");
}
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = st_ipc_accept,
.connection_created = st_ipc_created,
.msg_process = st_ipc_dispatch,
.connection_closed = st_ipc_closed,
.connection_destroyed = st_ipc_destroy
};
static void
st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
/*
* This is a hack until we can send to a nodeid and/or we fix node name lookups
* These messages are ignored in stonith_peer_callback()
*/
xmlNode *query = create_xml_node(NULL, "stonith_command");
crm_xml_add(query, F_XML_TAGNAME, "stonith_command");
crm_xml_add(query, F_TYPE, T_STONITH_NG);
crm_xml_add(query, F_STONITH_OPERATION, "poke");
crm_debug("Broadcasting our uname because of node %u", node->id);
send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
free_xml(query);
}
int
main(int argc, char **argv)
{
int flag;
int rc = 0;
int lpc = 0;
int argerr = 0;
int option_index = 0;
crm_cluster_t cluster;
const char *actions[] = { "reboot", "off", "list", "monitor", "status" };
crm_log_preinit("stonith-ng", argc, argv);
crm_set_options(NULL, "mode [options]", long_options,
"Provides a summary of cluster's current state."
"\n\nOutputs varying levels of detail in a number of different formats.\n");
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1) {
break;
}
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
+ case 'l':
+ crm_add_logfile(optarg);
+ break;
case 's':
stand_alone = TRUE;
break;
case 'c':
stand_alone = FALSE;
no_cib_connect = TRUE;
break;
case '$':
case '?':
crm_help(flag, EX_OK);
break;
default:
++argerr;
break;
}
}
if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) {
printf("<?xml version=\"1.0\"?><!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n");
printf("<resource-agent name=\"stonithd\">\n");
printf(" <version>1.0</version>\n");
printf
(" <longdesc lang=\"en\">This is a fake resource that details the instance attributes handled by stonithd.</longdesc>\n");
printf(" <shortdesc lang=\"en\">Options available for all stonith resources</shortdesc>\n");
printf(" <parameters>\n");
printf(" <parameter name=\"stonith-timeout\" unique=\"0\">\n");
printf
(" <shortdesc lang=\"en\">How long to wait for the STONITH action to complete per a stonith device.</shortdesc>\n");
printf
(" <longdesc lang=\"en\">Overrides the stonith-timeout cluster property</longdesc>\n");
printf(" <content type=\"time\" default=\"60s\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"priority\" unique=\"0\">\n");
printf
(" <shortdesc lang=\"en\">The priority of the stonith resource. Devices are tried in order of highest priority to lowest.</shortdesc>\n");
printf(" <content type=\"integer\" default=\"0\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTARG);
printf
(" <shortdesc lang=\"en\">Advanced use only: An alternate parameter to supply instead of 'port'</shortdesc>\n");
printf
(" <longdesc lang=\"en\">Some devices do not support the standard 'port' parameter or may provide additional ones.\n"
"Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n"
"A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n"
" </longdesc>\n");
printf(" <content type=\"string\" default=\"port\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTMAP);
printf
(" <shortdesc lang=\"en\">A mapping of host names to ports numbers for devices that do not support host names.</shortdesc>\n");
printf
(" <longdesc lang=\"en\">Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2</longdesc>\n");
printf(" <content type=\"string\" default=\"\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTLIST);
printf
(" <shortdesc lang=\"en\">A list of machines controlled by this device (Optional unless %s=static-list).</shortdesc>\n",
STONITH_ATTR_HOSTCHECK);
printf(" <content type=\"string\" default=\"\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTCHECK);
printf
(" <shortdesc lang=\"en\">How to determine which machines are controlled by the device.</shortdesc>\n");
printf
(" <longdesc lang=\"en\">Allowed values: dynamic-list (query the device), static-list (check the %s attribute), none (assume every device can fence every machine)</longdesc>\n",
STONITH_ATTR_HOSTLIST);
printf(" <content type=\"string\" default=\"dynamic-list\"/>\n");
printf(" </parameter>\n");
for (lpc = 0; lpc < DIMOF(actions); lpc++) {
printf(" <parameter name=\"pcmk_%s_action\" unique=\"0\">\n", actions[lpc]);
printf
(" <shortdesc lang=\"en\">Advanced use only: An alternate command to run instead of '%s'</shortdesc>\n",
actions[lpc]);
printf
(" <longdesc lang=\"en\">Some devices do not support the standard commands or may provide additional ones.\n"
"Use this to specify an alternate, device-specific, command that implements the '%s' action.</longdesc>\n",
actions[lpc]);
printf(" <content type=\"string\" default=\"%s\"/>\n", actions[lpc]);
printf(" </parameter>\n");
printf(" <parameter name=\"pcmk_%s_timeout\" unique=\"0\">\n", actions[lpc]);
printf
(" <shortdesc lang=\"en\">Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout</shortdesc>\n",
actions[lpc]);
printf
(" <longdesc lang=\"en\">Some devices need much more/less time to complete than normal.\n"
"Use this to specify an alternate, device-specific, timeout for '%s' actions.</longdesc>\n",
actions[lpc]);
printf(" <content type=\"time\" default=\"60s\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"pcmk_%s_retries\" unique=\"0\">\n", actions[lpc]);
printf
(" <shortdesc lang=\"en\">Advanced use only: The maximum number of times to retry the '%s' command within the timeout period</shortdesc>\n",
actions[lpc]);
printf(" <longdesc lang=\"en\">Some devices do not support multiple connections."
" Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
" Use this option to alter the number of times Pacemaker retries '%s' actions before giving up."
"</longdesc>\n", actions[lpc]);
printf(" <content type=\"integer\" default=\"2\"/>\n");
printf(" </parameter>\n");
}
printf(" </parameters>\n");
printf("</resource-agent>\n");
return 0;
}
if (optind != argc) {
++argerr;
}
if (argerr) {
crm_help('?', EX_USAGE);
}
crm_log_init("stonith-ng", LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
mainloop_add_signal(SIGTERM, stonith_shutdown);
crm_peer_init();
if (stand_alone == FALSE) {
#if SUPPORT_HEARTBEAT
cluster.hb_conn = NULL;
cluster.hb_dispatch = stonith_peer_hb_callback;
cluster.destroy = stonith_peer_hb_destroy;
#endif
if (is_openais_cluster()) {
#if SUPPORT_COROSYNC
cluster.destroy = stonith_peer_cs_destroy;
cluster.cpg.cpg_deliver_fn = stonith_peer_ais_callback;
cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership;
#endif
}
if (crm_cluster_connect(&cluster) == FALSE) {
crm_crit("Cannot sign in to the cluster... terminating");
crm_exit(DAEMON_RESPAWN_STOP);
}
stonith_our_uname = cluster.uname;
stonith_our_uuid = cluster.uuid;
if (no_cib_connect == FALSE) {
setup_cib();
}
} else {
stonith_our_uname = strdup("localhost");
}
crm_set_status_callback(&st_peer_update_callback);
device_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_device);
topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry);
if(daemon_option_enabled(crm_system_name, "watchdog")) {
xmlNode *xml;
stonith_key_value_t *params = NULL;
params = stonith_key_value_add(params, STONITH_ATTR_HOSTLIST, stonith_our_uname);
xml = create_device_registration_xml("watchdog", "internal", STONITH_WATCHDOG_AGENT, params, NULL);
stonith_device_register(xml, NULL, FALSE);
stonith_key_value_freeall(params, 1, 1);
free_xml(xml);
}
stonith_ipc_server_init(&ipcs, &ipc_callbacks);
#if SUPPORT_STONITH_CONFIG
if (((stand_alone == TRUE)) && !(standalone_cfg_read_file(STONITH_NG_CONF_FILE))) {
standalone_cfg_commit();
}
#endif
/* Create the mainloop and run it... */
mainloop = g_main_new(FALSE);
crm_info("Starting %s mainloop", crm_system_name);
g_main_run(mainloop);
stonith_cleanup();
#if SUPPORT_HEARTBEAT
if (cluster.hb_conn) {
cluster.hb_conn->llc_ops->delete(cluster.hb_conn);
}
#endif
crm_info("Done");
return crm_exit(rc);
}
diff --git a/fencing/regression.py.in b/fencing/regression.py.in
index c4cb2d8e0a..fe6d418d73 100644
--- a/fencing/regression.py.in
+++ b/fencing/regression.py.in
@@ -1,1071 +1,1081 @@
#!/usr/bin/python
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
import os
import sys
import subprocess
import shlex
import time
def output_from_command(command):
test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
test.wait()
return test.communicate()[0].split("\n")
class Test:
def __init__(self, name, description, verbose = 0, with_cpg = 0):
self.name = name
self.description = description
self.cmds = []
self.verbose = verbose
self.result_txt = ""
self.cmd_tool_output = ""
self.result_exitcode = 0;
self.stonith_options = "-s"
self.enable_corosync = 0
if with_cpg:
self.stonith_options = "-c"
self.enable_corosync = 1
self.stonith_process = None
self.stonith_output = ""
self.stonith_patterns = []
self.negative_stonith_patterns = []
self.executed = 0
rsc_classes = output_from_command("crm_resource --list-standards")
def __new_cmd(self, cmd, args, exitcode, stdout_match = "", no_wait = 0, stdout_negative_match = "", kill=None):
self.cmds.append(
{
"cmd" : cmd,
"kill" : kill,
"args" : args,
"expected_exitcode" : exitcode,
"stdout_match" : stdout_match,
"stdout_negative_match" : stdout_negative_match,
"no_wait" : no_wait,
}
)
def stop_pacemaker(self):
cmd = shlex.split("killall -9 -q pacemakerd")
test = subprocess.Popen(cmd, stdout=subprocess.PIPE)
test.wait()
def start_environment(self):
### make sure we are in full control here ###
self.stop_pacemaker()
cmd = shlex.split("killall -9 -q stonithd")
test = subprocess.Popen(cmd, stdout=subprocess.PIPE)
test.wait()
if self.verbose:
+ self.stonith_options = self.stonith_options + " -V"
print "Starting stonithd with %s" % self.stonith_options
+ if os.path.exists("/tmp/stonith-regression.log"):
+ os.remove('/tmp/stonith-regression.log')
+
self.stonith_process = subprocess.Popen(
- shlex.split("@CRM_DAEMON_DIR@/stonithd %s -V" % self.stonith_options),
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
+ shlex.split("@CRM_DAEMON_DIR@/stonithd %s -l /tmp/stonith-regression.log" % self.stonith_options))
time.sleep(1)
def clean_environment(self):
if self.stonith_process:
self.stonith_process.terminate()
+ self.stonith_process.wait()
- self.stonith_output = self.stonith_process.communicate()[1]
+ self.stonith_output = ""
self.stonith_process = None
+ f = open('/tmp/stonith-regression.log', 'r')
+ for line in f.readlines():
+ self.stonith_output = self.stonith_output + line
+
if self.verbose:
+ print "Daemon Output Start"
print self.stonith_output
+ print "Daemon Output End"
+ os.remove('/tmp/stonith-regression.log')
def add_stonith_log_pattern(self, pattern):
self.stonith_patterns.append(pattern)
def add_stonith_negative_log_pattern(self, pattern):
self.negative_stonith_patterns.append(pattern)
def add_cmd(self, cmd, args):
self.__new_cmd(cmd, args, 0, "")
def add_cmd_no_wait(self, cmd, args):
self.__new_cmd(cmd, args, 0, "", 1)
def add_cmd_check_stdout(self, cmd, args, match, no_match = ""):
self.__new_cmd(cmd, args, 0, match, 0, no_match)
def add_expected_fail_cmd(self, cmd, args, exitcode = 255):
self.__new_cmd(cmd, args, exitcode, "")
def get_exitcode(self):
return self.result_exitcode
def print_result(self, filler):
print "%s%s" % (filler, self.result_txt)
def run_cmd(self, args):
cmd = shlex.split(args['args'])
cmd.insert(0, args['cmd'])
if self.verbose:
print "\n\nRunning: "+" ".join(cmd)
test = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if args['kill']:
if self.verbose:
print "Also running: "+args['kill']
subprocess.Popen(shlex.split(args['kill']))
if args['no_wait'] == 0:
test.wait()
else:
return 0
output_res = test.communicate()
output = output_res[0] + output_res[1]
if self.verbose:
print output
if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0:
test.returncode = -2
print "STDOUT string '%s' was not found in cmd output: %s" % (args['stdout_match'], output)
if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0:
test.returncode = -2
print "STDOUT string '%s' was found in cmd output: %s" % (args['stdout_negative_match'], output)
return test.returncode;
def count_negative_matches(self, outline):
count = 0
for line in self.negative_stonith_patterns:
if outline.count(line):
count = 1
if self.verbose:
print "This pattern should not have matched = '%s" % (line)
return count
def match_stonith_patterns(self):
negative_matches = 0
cur = 0
pats = self.stonith_patterns
total_patterns = len(self.stonith_patterns)
if len(self.stonith_patterns) == 0:
return
for line in self.stonith_output.split("\n"):
negative_matches = negative_matches + self.count_negative_matches(line)
if len(pats) == 0:
continue
cur = -1
for p in pats:
cur = cur + 1
if line.count(pats[cur]):
del pats[cur]
break
if len(pats) > 0 or negative_matches:
if self.verbose:
for p in pats:
print "Pattern Not Matched = '%s'" % p
self.result_txt = "FAILURE - '%s' failed. %d patterns out of %d not matched. %d negative matches." % (self.name, len(pats), total_patterns, negative_matches)
self.result_exitcode = -1
def run(self):
res = 0
i = 1
self.start_environment()
if self.verbose:
print "\n--- START TEST - %s" % self.name
self.result_txt = "SUCCESS - '%s'" % (self.name)
self.result_exitcode = 0
for cmd in self.cmds:
res = self.run_cmd(cmd)
if res != cmd['expected_exitcode']:
print "Step %d FAILED - command returned %d, expected %d" % (i, res, cmd['expected_exitcode'])
self.result_txt = "FAILURE - '%s' failed at step %d. Command: %s %s" % (self.name, i, cmd['cmd'], cmd['args'])
self.result_exitcode = -1
break
else:
if self.verbose:
print "Step %d SUCCESS" % (i)
i = i + 1
self.clean_environment()
if self.result_exitcode == 0:
self.match_stonith_patterns()
print self.result_txt
if self.verbose:
print "--- END TEST - %s\n" % self.name
self.executed = 1
return res
class Tests:
def __init__(self, verbose = 0):
self.tests = []
self.verbose = verbose
self.autogen_corosync_cfg = 0
if not os.path.exists("/etc/corosync/corosync.conf"):
self.autogen_corosync_cfg = 1
def new_test(self, name, description, with_cpg = 0):
test = Test(name, description, self.verbose, with_cpg)
self.tests.append(test)
return test
def print_list(self):
print "\n==== %d TESTS FOUND ====" % (len(self.tests))
print "%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")
print "%35s - %s" % ("--------------------", "--------------------")
for test in self.tests:
print "%35s - %s" % (test.name, test.description)
print "==== END OF LIST ====\n"
def start_corosync(self):
if self.verbose:
print "Starting corosync"
test = subprocess.Popen("corosync", stdout=subprocess.PIPE)
test.wait()
time.sleep(10)
def stop_corosync(self):
cmd = shlex.split("killall -9 -q corosync")
test = subprocess.Popen(cmd, stdout=subprocess.PIPE)
test.wait()
def run_single(self, name):
for test in self.tests:
if test.name == name:
test.run()
break;
def run_tests_matching(self, pattern):
for test in self.tests:
if test.name.count(pattern) != 0:
test.run()
def run_cpg_only(self):
for test in self.tests:
if test.enable_corosync:
test.run()
def run_no_cpg(self):
for test in self.tests:
if not test.enable_corosync:
test.run()
def run_tests(self):
for test in self.tests:
test.run()
def exit(self):
for test in self.tests:
if test.executed == 0:
continue
if test.get_exitcode() != 0:
sys.exit(-1)
sys.exit(0)
def print_results(self):
failures = 0;
success = 0;
print "\n\n======= FINAL RESULTS =========="
print "\n--- FAILURE RESULTS:"
for test in self.tests:
if test.executed == 0:
continue
if test.get_exitcode() != 0:
failures = failures + 1
test.print_result(" ")
else:
success = success + 1
if failures == 0:
print " None"
print "\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures)
def build_api_sanity_tests(self):
verbose_arg = ""
if self.verbose:
verbose_arg = "-V"
test = self.new_test("standalone_low_level_api_test", "Sanity test client api in standalone mode.")
test.add_cmd("@CRM_DAEMON_DIR@/stonith-test", "-t %s" % (verbose_arg))
test = self.new_test("cpg_low_level_api_test", "Sanity test client api using mainloop and cpg.", 1)
test.add_cmd("@CRM_DAEMON_DIR@/stonith-test", "-m %s" % (verbose_arg))
def build_custom_timeout_tests(self):
# custom timeout without topology
test = self.new_test("cpg_custom_timeout_1",
"Verify per device timeouts work as expected without using topology.", 1)
test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=1\"")
test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=4\"")
test.add_cmd("stonith_admin", "-F node3 -t 2")
# timeout is 2+1+4 = 7
test.add_stonith_log_pattern("remote op timeout set to 7")
# custom timeout _WITH_ topology
test = self.new_test("cpg_custom_timeout_2",
"Verify per device timeouts work as expected _WITH_ topology.", 1)
test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=1\"")
test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=4000\"")
test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1")
test.add_cmd("stonith_admin", "-r node3 -i 3 -v false2")
test.add_cmd("stonith_admin", "-F node3 -t 2")
# timeout is 2+1+4000 = 4003
test.add_stonith_log_pattern("remote op timeout set to 4003")
def build_fence_merge_tests(self):
### Simple test that overlapping fencing operations get merged
test = self.new_test("cpg_custom_merge_single",
"Verify overlapping identical fencing operations are merged, no fencing levels used.", 1)
test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ")
test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
test.add_cmd("stonith_admin", "-F node3 -t 10")
### one merger will happen
test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
### the pattern below signifies that both the original and duplicate operation completed
test.add_stonith_log_pattern("Operation off of node3 by")
test.add_stonith_log_pattern("Operation off of node3 by")
### Test that multiple mergers occur
test = self.new_test("cpg_custom_merge_multiple",
"Verify multiple overlapping identical fencing operations are merged", 1)
test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ")
test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
test.add_cmd("stonith_admin", "-F node3 -t 10")
### 4 mergers should occur
test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
### the pattern below signifies that both the original and duplicate operation completed
test.add_stonith_log_pattern("Operation off of node3 by")
test.add_stonith_log_pattern("Operation off of node3 by")
test.add_stonith_log_pattern("Operation off of node3 by")
test.add_stonith_log_pattern("Operation off of node3 by")
test.add_stonith_log_pattern("Operation off of node3 by")
### Test that multiple mergers occur with topologies used
test = self.new_test("cpg_custom_merge_with_topology",
"Verify multiple overlapping identical fencing operations are merged with fencing levels.", 1)
test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ")
test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2")
test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1")
test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
test.add_cmd("stonith_admin", "-F node3 -t 10")
### 4 mergers should occur
test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
### the pattern below signifies that both the original and duplicate operation completed
test.add_stonith_log_pattern("Operation off of node3 by")
test.add_stonith_log_pattern("Operation off of node3 by")
test.add_stonith_log_pattern("Operation off of node3 by")
test.add_stonith_log_pattern("Operation off of node3 by")
test.add_stonith_log_pattern("Operation off of node3 by")
test = self.new_test("cpg_custom_no_merge",
"Verify differing fencing operations are not merged", 1)
test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"")
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3 node2\" ")
test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"")
test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2")
test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1")
test.add_cmd_no_wait("stonith_admin", "-F node2 -t 10")
test.add_cmd("stonith_admin", "-F node3 -t 10")
test.add_stonith_negative_log_pattern("Merging stonith action off for node node3 originating from client")
def build_standalone_tests(self):
test_types = [
{
"prefix" : "standalone" ,
"use_cpg" : 0,
},
{
"prefix" : "cpg" ,
"use_cpg" : 1,
},
]
# test what happens when all devices timeout
for test_type in test_types:
test = self.new_test("%s_fence_multi_device_failure" % test_type["prefix"],
"Verify that all devices timeout, a fencing failure is returned.", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
if test_type["use_cpg"] == 1:
test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", 194)
test.add_stonith_log_pattern("remote op timeout set to 6")
else:
test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", 55)
test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: ")
test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: ")
test.add_stonith_log_pattern("for host 'node3' with device 'false3' returned: ")
# test what happens when multiple devices can fence a node, but the first device fails.
for test_type in test_types:
test = self.new_test("%s_fence_device_failure_rollover" % test_type["prefix"],
"Verify that when one fence device fails for a node, the others are tried.", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-F node3 -t 2")
if test_type["use_cpg"] == 1:
test.add_stonith_log_pattern("remote op timeout set to 6")
# simple topology test for one device
for test_type in test_types:
if test_type["use_cpg"] == 0:
continue
test = self.new_test("%s_topology_simple" % test_type["prefix"],
"Verify all fencing devices at a level are used.", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-r node3 -i 1 -v true")
test.add_cmd("stonith_admin", "-F node3 -t 2")
test.add_stonith_log_pattern("remote op timeout set to 2")
test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0")
# add topology, delete topology, verify fencing still works
for test_type in test_types:
if test_type["use_cpg"] == 0:
continue
test = self.new_test("%s_topology_add_remove" % test_type["prefix"],
"Verify fencing occurrs after all topology levels are removed", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-r node3 -i 1 -v true")
test.add_cmd("stonith_admin", "-d node3 -i 1")
test.add_cmd("stonith_admin", "-F node3 -t 2")
test.add_stonith_log_pattern("remote op timeout set to 2")
test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0")
# test what happens when the first fencing level has multiple devices.
for test_type in test_types:
if test_type["use_cpg"] == 0:
continue
test = self.new_test("%s_topology_device_fails" % test_type["prefix"],
"Verify if one device in a level fails, the other is tried.", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R false -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-r node3 -i 1 -v false")
test.add_cmd("stonith_admin", "-r node3 -i 2 -v true")
test.add_cmd("stonith_admin", "-F node3 -t 20")
test.add_stonith_log_pattern("remote op timeout set to 40")
test.add_stonith_log_pattern("for host 'node3' with device 'false' returned: -201")
test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0")
# test what happens when the first fencing level fails.
for test_type in test_types:
if test_type["use_cpg"] == 0:
continue
test = self.new_test("%s_topology_multi_level_fails" % test_type["prefix"],
"Verify if one level fails, the next leve is tried.", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1")
test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2")
test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2")
test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3")
test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4")
test.add_cmd("stonith_admin", "-F node3 -t 2")
test.add_stonith_log_pattern("remote op timeout set to 12")
test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201")
test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: -201")
test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0")
test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0")
# test what happens when the first fencing level had devices that no one has registered
for test_type in test_types:
if test_type["use_cpg"] == 0:
continue
test = self.new_test("%s_topology_missing_devices" % test_type["prefix"],
"Verify topology can continue with missing devices.", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1")
test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2")
test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2")
test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3")
test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4")
test.add_cmd("stonith_admin", "-F node3 -t 2")
# Test what happens if multiple fencing levels are defined, and then the first one is removed.
for test_type in test_types:
if test_type["use_cpg"] == 0:
continue
test = self.new_test("%s_topology_level_removal" % test_type["prefix"],
"Verify level removal works.", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1")
test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2")
test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2")
test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3")
test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4")
# Now remove level 2, verify none of the devices in level two are hit.
test.add_cmd("stonith_admin", "-d node3 -i 2")
test.add_cmd("stonith_admin", "-F node3 -t 20")
test.add_stonith_log_pattern("remote op timeout set to 8")
test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201")
test.add_stonith_negative_log_pattern("for host 'node3' with device 'false2' returned: ")
test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0")
test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0")
# test the stonith builds the correct list of devices that can fence a node.
for test_type in test_types:
test = self.new_test("%s_list_devices" % test_type["prefix"],
"Verify list of devices that can fence a node is correct", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true2", "true1")
test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true3", "true1")
# simple test of device monitor
for test_type in test_types:
test = self.new_test("%s_monitor" % test_type["prefix"],
"Verify device is reachable", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "-Q true1")
test.add_cmd("stonith_admin", "-Q false1")
test.add_expected_fail_cmd("stonith_admin", "-Q true2", 237)
# Verify monitor occurs for duration of timeout period on failure
for test_type in test_types:
test = self.new_test("%s_monitor_timeout" % test_type["prefix"],
"Verify monitor uses duration of timeout period given.", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_monitor_fail -o \"pcmk_host_list=node3\"")
test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 5", 195)
test.add_stonith_log_pattern("Attempt 2 to execute")
# Verify monitor occurs for duration of timeout period on failure, but stops at max retries
for test_type in test_types:
test = self.new_test("%s_monitor_timeout_max_retries" % test_type["prefix"],
"Verify monitor retries until max retry value or timeout is hit.", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_monitor_fail -o \"pcmk_host_list=node3\"")
test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 15",195)
test.add_stonith_log_pattern("Attempted to execute agent fence_dummy_monitor_fail (list) the maximum number of times")
# simple register test
for test_type in test_types:
test = self.new_test("%s_register" % test_type["prefix"],
"Verify devices can be registered and un-registered", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "-Q true1")
test.add_cmd("stonith_admin", "-D true1")
test.add_expected_fail_cmd("stonith_admin", "-Q true1", 237)
# simple reboot test
for test_type in test_types:
test = self.new_test("%s_reboot" % test_type["prefix"],
"Verify devices can be rebooted", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "-B node3 -t 2")
test.add_cmd("stonith_admin", "-D true1")
test.add_expected_fail_cmd("stonith_admin", "-Q true1", 237)
# test fencing history.
for test_type in test_types:
if test_type["use_cpg"] == 0:
continue
test = self.new_test("%s_fence_history" % test_type["prefix"],
"Verify last fencing operation is returned.", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "-F node3 -t 2 -V")
test.add_cmd_check_stdout("stonith_admin", "-H node3", "was able to turn off node node3", "")
# simple test of dynamic list query
for test_type in test_types:
test = self.new_test("%s_dynamic_list_query" % test_type["prefix"],
"Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_list")
test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_list")
test.add_cmd("stonith_admin", "-R true3 -a fence_dummy_list")
test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found")
# fence using dynamic list query
for test_type in test_types:
test = self.new_test("%s_fence_dynamic_list_query" % test_type["prefix"],
"Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_list")
test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_list")
test.add_cmd("stonith_admin", "-R true3 -a fence_dummy_list")
test.add_cmd("stonith_admin", "-F fake_port_1 -t 5 -V");
# simple test of query using status action
for test_type in test_types:
test = self.new_test("%s_status_query" % test_type["prefix"],
"Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"")
test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"")
test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"")
test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found")
# test what happens when no reboot action is advertised
for test_type in test_types:
test = self.new_test("%s_no_reboot_support" % test_type["prefix"],
"Verify reboot action defaults to off when no reboot action is advertised by agent.", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_no_reboot -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-B node1 -t 5 -V");
test.add_stonith_log_pattern("does not advertise support for 'reboot', performing 'off'")
test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)");
# make sure reboot is used when reboot action is advertised
for test_type in test_types:
test = self.new_test("%s_with_reboot_support" % test_type["prefix"],
"Verify reboot action can be used when metadata advertises it.", test_type["use_cpg"])
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "-B node1 -t 5 -V");
test.add_stonith_negative_log_pattern("does not advertise support for 'reboot', performing 'off'")
test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)");
def build_nodeid_tests(self):
our_uname = output_from_command("uname -n")
if our_uname:
our_uname = our_uname[0]
### verify nodeid is supplied when nodeid is in the metadata parameters
test = self.new_test("cpg_supply_nodeid",
"Verify nodeid is given when fence agent has nodeid as parameter", 1)
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname))
test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname))
test.add_stonith_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname))
### verify nodeid is _NOT_ supplied when nodeid is not in the metadata parameters
test = self.new_test("cpg_do_not_supply_nodeid",
"Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter", 1)
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname))
test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname))
test.add_stonith_negative_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname))
### verify nodeid use doesn't explode standalone mode
test = self.new_test("standalone_do_not_supply_nodeid",
"Verify nodeid in metadata parameter list doesn't kill standalone mode", 0)
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname))
test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname))
test.add_stonith_negative_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname))
def build_unfence_tests(self):
our_uname = output_from_command("uname -n")
if our_uname:
our_uname = our_uname[0]
### verify unfencing using automatic unfencing
test = self.new_test("cpg_unfence_required_1",
"Verify require unfencing on all devices when automatic=true in agent's metadata", 1)
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname))
test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname))
test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname))
# both devices should be executed
test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)");
test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)");
### verify unfencing using automatic unfencing fails if any of the required agents fail
test = self.new_test("cpg_unfence_required_2",
"Verify require unfencing on all devices when automatic=true in agent's metadata", 1)
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname))
test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=fail\" -o \"pcmk_host_list=%s\"" % (our_uname))
test.add_expected_fail_cmd("stonith_admin", "-U %s -t 6" % (our_uname), 143)
### verify unfencing using automatic devices with topology
test = self.new_test("cpg_unfence_required_3",
"Verify require unfencing on all devices even when required devices are at different topology levels", 1)
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname))
test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname))
test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname))
test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)");
test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)");
### verify unfencing using automatic devices with topology
test = self.new_test("cpg_unfence_required_4",
"Verify all required devices are executed even with topology levels fail.", 1)
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
test.add_cmd("stonith_admin", "-R true3 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
test.add_cmd("stonith_admin", "-R true4 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
test.add_cmd("stonith_admin", "-R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
test.add_cmd("stonith_admin", "-R false4 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname))
test.add_cmd("stonith_admin", "-r %s -i 1 -v false1" % (our_uname))
test.add_cmd("stonith_admin", "-r %s -i 2 -v false2" % (our_uname))
test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname))
test.add_cmd("stonith_admin", "-r %s -i 2 -v false3" % (our_uname))
test.add_cmd("stonith_admin", "-r %s -i 2 -v true3" % (our_uname))
test.add_cmd("stonith_admin", "-r %s -i 3 -v false4" % (our_uname))
test.add_cmd("stonith_admin", "-r %s -i 4 -v true4" % (our_uname))
test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname))
test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)");
test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)");
test.add_stonith_log_pattern("with device 'true3' returned: 0 (OK)");
test.add_stonith_log_pattern("with device 'true4' returned: 0 (OK)");
### verify unfencing using on_target device
test = self.new_test("cpg_unfence_on_target_1",
"Verify unfencing with on_target = true", 1)
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname))
test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname))
test.add_stonith_log_pattern("(on) to be executed on the target node")
### verify failure of unfencing using on_target device
test = self.new_test("cpg_unfence_on_target_2",
"Verify failure unfencing with on_target = true", 1)
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake_1234\"" % (our_uname))
test.add_expected_fail_cmd("stonith_admin", "-U node_fake_1234 -t 3", 237)
test.add_stonith_log_pattern("(on) to be executed on the target node")
### verify unfencing using on_target device with topology
test = self.new_test("cpg_unfence_on_target_3",
"Verify unfencing with on_target = true using topology", 1)
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname))
test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname))
test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname))
test.add_stonith_log_pattern("(on) to be executed on the target node")
### verify unfencing using on_target device with topology fails when victim node doesn't exist
test = self.new_test("cpg_unfence_on_target_4",
"Verify unfencing failure with on_target = true using topology", 1)
test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname))
test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname))
test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1")
test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true2")
test.add_expected_fail_cmd("stonith_admin", "-U node_fake -t 3", 237)
test.add_stonith_log_pattern("(on) to be executed on the target node")
def setup_environment(self, use_corosync):
if self.autogen_corosync_cfg and use_corosync:
corosync_conf = ("""
totem {
version: 2
crypto_cipher: none
crypto_hash: none
nodeid: 101
secauth: off
interface {
ttl: 1
ringnumber: 0
mcastport: 6666
mcastaddr: 226.94.1.1
bindnetaddr: 127.0.0.1
}
}
logging {
debug: off
fileline: off
to_syslog: no
to_stderr: no
syslog_facility: daemon
timestamp: on
to_logfile: yes
logfile: /var/log/corosync.log
logfile_priority: info
}
""")
os.system("cat <<-END >>/etc/corosync/corosync.conf\n%s\nEND" % (corosync_conf))
if use_corosync:
### make sure we are in control ###
self.stop_corosync()
self.start_corosync()
monitor_fail_agent = ("""#!/usr/bin/python
import sys
def main():
for line in sys.stdin.readlines():
if line.count("monitor") > 0:
sys.exit(-1);
sys.exit(-1)
if __name__ == "__main__":
main()
""")
dynamic_list_agent = ("""#!/usr/bin/python
import sys
def main():
for line in sys.stdin.readlines():
if line.count("list") > 0:
print "fake_port_1"
sys.exit(0)
if line.count("off") > 0:
sys.exit(0)
sys.exit(-1)
if __name__ == "__main__":
main()
""")
os.system("cat <<-END >>/usr/sbin/fence_dummy_list\n%s\nEND" % (dynamic_list_agent))
os.system("chmod 711 /usr/sbin/fence_dummy_list")
os.system("cat <<-END >>/usr/sbin/fence_dummy_monitor_fail\n%s\nEND" % (monitor_fail_agent))
os.system("chmod 711 /usr/sbin/fence_dummy_monitor_fail")
os.system("cp /usr/share/pacemaker/tests/cts/fence_dummy /usr/sbin/fence_dummy")
# modifies dummy agent to do require unfencing
os.system("cat /usr/share/pacemaker/tests/cts/fence_dummy | sed 's/on_target=/automatic=/g' > /usr/sbin/fence_dummy_automatic_unfence");
os.system("chmod 711 /usr/sbin/fence_dummy_automatic_unfence")
# modifies dummy agent to not advertise reboot
os.system("cat /usr/share/pacemaker/tests/cts/fence_dummy | sed 's/^.*<action.*name.*reboot.*>.*//g' > /usr/sbin/fence_dummy_no_reboot");
os.system("chmod 711 /usr/sbin/fence_dummy_no_reboot")
def cleanup_environment(self, use_corosync):
if use_corosync:
self.stop_corosync()
if self.verbose and os.path.exists('/var/log/corosync.log'):
- print "Daemon output"
+ print "Corosync output"
f = open('/var/log/corosync.log', 'r')
for line in f.readlines():
print line.strip()
os.remove('/var/log/corosync.log')
if self.autogen_corosync_cfg:
os.system("rm -f /etc/corosync/corosync.conf")
os.system("rm -f /usr/sbin/fence_dummy_monitor_fail")
os.system("rm -f /usr/sbin/fence_dummy_list")
os.system("rm -f /usr/sbin/fence_dummy")
os.system("rm -f /usr/sbin/fence_dummy_automatic_unfence")
os.system("rm -f /usr/sbin/fence_dummy_no_reboot")
class TestOptions:
def __init__(self):
self.options = {}
self.options['list-tests'] = 0
self.options['run-all'] = 1
self.options['run-only'] = ""
self.options['run-only-pattern'] = ""
self.options['verbose'] = 0
self.options['invalid-arg'] = ""
self.options['cpg-only'] = 0
self.options['no-cpg'] = 0
self.options['show-usage'] = 0
def build_options(self, argv):
args = argv[1:]
skip = 0
for i in range(0, len(args)):
if skip:
skip = 0
continue
elif args[i] == "-h" or args[i] == "--help":
self.options['show-usage'] = 1
elif args[i] == "-l" or args[i] == "--list-tests":
self.options['list-tests'] = 1
elif args[i] == "-V" or args[i] == "--verbose":
self.options['verbose'] = 1
elif args[i] == "-n" or args[i] == "--no-cpg":
self.options['no-cpg'] = 1
elif args[i] == "-c" or args[i] == "--cpg-only":
self.options['cpg-only'] = 1
elif args[i] == "-r" or args[i] == "--run-only":
self.options['run-only'] = args[i+1]
skip = 1
elif args[i] == "-p" or args[i] == "--run-only-pattern":
self.options['run-only-pattern'] = args[i+1]
skip = 1
def show_usage(self):
print "usage: " + sys.argv[0] + " [options]"
print "If no options are provided, all tests will run"
print "Options:"
print "\t [--help | -h] Show usage"
print "\t [--list-tests | -l] Print out all registered tests."
print "\t [--cpg-only | -c] Only run tests that require corosync."
print "\t [--no-cpg | -n] Only run tests that do not require corosync"
print "\t [--run-only | -r 'testname'] Run a specific test"
print "\t [--verbose | -V] Verbose output"
print "\t [--run-only-pattern | -p 'string'] Run only tests containing the string value"
print "\n\tExample: Run only the test 'start_top'"
print "\t\t python ./regression.py --run-only start_stop"
print "\n\tExample: Run only the tests with the string 'systemd' present in them"
print "\t\t python ./regression.py --run-only-pattern systemd"
def main(argv):
o = TestOptions()
o.build_options(argv)
use_corosync = 1
tests = Tests(o.options['verbose'])
tests.build_standalone_tests()
tests.build_custom_timeout_tests()
tests.build_api_sanity_tests()
tests.build_fence_merge_tests()
tests.build_unfence_tests()
tests.build_nodeid_tests()
if o.options['list-tests']:
tests.print_list()
sys.exit(0)
elif o.options['show-usage']:
o.show_usage()
sys.exit(0)
print "Starting ..."
if o.options['no-cpg']:
use_corosync = 0
tests.setup_environment(use_corosync)
if o.options['run-only-pattern'] != "":
tests.run_tests_matching(o.options['run-only-pattern'])
tests.print_results()
elif o.options['run-only'] != "":
tests.run_single(o.options['run-only'])
tests.print_results()
elif o.options['no-cpg']:
tests.run_no_cpg()
tests.print_results()
elif o.options['cpg-only']:
tests.run_cpg_only()
tests.print_results()
else:
tests.run_tests()
tests.print_results()
tests.cleanup_environment(use_corosync)
tests.exit()
if __name__=="__main__":
main(sys.argv)

File Metadata

Mime Type
text/x-diff
Expires
Sat, Jan 25, 11:51 AM (1 d, 20 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1322442
Default Alt Text
(92 KB)

Event Timeline