diff --git a/fencing/main.c b/fencing/main.c index 2694452917..9773085f99 100644 --- a/fencing/main.c +++ b/fencing/main.c @@ -1,1404 +1,1408 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *stonith_our_uname = NULL; char *stonith_our_uuid = NULL; long stonith_watchdog_timeout_ms = 0; GMainLoop *mainloop = NULL; gboolean stand_alone = FALSE; gboolean no_cib_connect = FALSE; gboolean stonith_shutdown_flag = FALSE; qb_ipcs_service_t *ipcs = NULL; xmlNode *local_cib = NULL; static cib_t *cib_api = NULL; static void *cib_library = NULL; static void stonith_shutdown(int nsig); static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c)); return -EPERM; } if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void st_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection created for %p", c); } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; xmlNode *request = NULL; crm_client_t *c = crm_client_get(qbc); if (c == NULL) { crm_info("Invalid client: %p", qbc); return 0; } request = crm_ipcs_recv(c, data, size, &id, &flags); if (request == NULL) { crm_ipcs_send_ack(c, id, flags, "nack", __FUNCTION__, __LINE__); return 0; } if (c->name == NULL) { const char *value = crm_element_value(request, F_STONITH_CLIENTNAME); if (value == NULL) { value = "unknown"; } c->name = g_strdup_printf("%s.%u", value, c->pid); } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_trace("Flags %u/%u for command %u from %s", flags, call_options, id, crm_client_name(c)); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ c->request_id = id; /* Reply only to the last one */ } crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); crm_log_xml_trace(request, "Client[inbound]"); stonith_command(c, id, flags, request, NULL); free_xml(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client == NULL) { return 0; } crm_trace("Connection %p closed", c); crm_client_destroy(client); /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p destroyed", c); st_ipc_closed(c); } static void stonith_peer_callback(xmlNode * msg, void *private_data) { const char *remote_peer = crm_element_value(msg, F_ORIG); const char *op = crm_element_value(msg, F_STONITH_OPERATION); if (crm_str_eq(op, "poke", TRUE)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_HEARTBEAT static void stonith_peer_hb_callback(HA_Message * msg, void *private_data) { xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__); stonith_peer_callback(xml, private_data); free_xml(xml); } static void stonith_peer_hb_destroy(gpointer user_data) { if (stonith_shutdown_flag) { crm_info("Heartbeat disconnection complete... exiting"); } else { crm_err("Heartbeat connection lost! Exiting."); } stonith_shutdown(0); } #endif #if SUPPORT_COROSYNC static void stonith_peer_ais_callback(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, F_ORIG, from); /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ stonith_peer_callback(xml, NULL); } free_xml(xml); free(data); return; } static void stonith_peer_cs_destroy(gpointer user_data) { crm_err("Corosync connection terminated"); stonith_shutdown(0); } #endif void do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { /* send callback to originating child */ crm_client_t *client_obj = NULL; int local_rc = pcmk_ok; crm_trace("Sending response"); client_obj = crm_client_get_by_id(client_id); crm_trace("Sending callback to request originator"); if (client_obj == NULL) { local_rc = -1; crm_trace("No client to sent the response to. F_STONITH_CLIENTID not set."); } else { int rid = 0; if (sync_reply) { CRM_LOG_ASSERT(client_obj->request_id); rid = client_obj->request_id; client_obj->request_id = 0; crm_trace("Sending response %d to %s %s", rid, client_obj->name, from_peer ? "(originator of delegated request)" : ""); } else { crm_trace("Sending an event to %s %s", client_obj->name, from_peer ? "(originator of delegated request)" : ""); } local_rc = crm_ipcs_send(client_obj, rid, notify_src, sync_reply?crm_ipc_flags_none:crm_ipc_server_event); } if (local_rc < pcmk_ok && client_obj != NULL) { crm_warn("%sSync reply to %s failed: %s", sync_reply ? "" : "A-", client_obj ? client_obj->name : "", pcmk_strerror(local_rc)); } } long long get_stonith_flag(const char *name) { if (safe_str_eq(name, T_STONITH_NOTIFY_FENCE)) { return 0x01; } else if (safe_str_eq(name, STONITH_OP_DEVICE_ADD)) { return 0x04; } else if (safe_str_eq(name, STONITH_OP_DEVICE_DEL)) { return 0x10; } return 0; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; crm_client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, F_SUBTYPE); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if (client->ipcs == NULL) { crm_trace("Skipping client with NULL channel"); return; } if (client->options & get_stonith_flag(type)) { int rc = crm_ipcs_send(client, 0, update_msg, crm_ipc_server_event | crm_ipc_server_error); if (rc <= 0) { crm_warn("%s notification of client %s.%.6s failed: %s (%d)", type, crm_client_name(client), client->id, pcmk_strerror(rc), rc); } else { crm_trace("Sent %s notification to client %s.%.6s", type, crm_client_name(client), client->id); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { crm_client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = crm_client_get_by_id(client_id); if (!client) { return; } notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_STONITH_CALLID, call_id); crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client) { crm_ipcs_send(client, 0, notify_data, crm_ipc_server_event); } free_xml(notify_data); } void do_stonith_notify(int options, const char *type, int result, xmlNode * data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = create_xml_node(NULL, "notify"); CRM_CHECK(type != NULL,;); crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(update_msg, F_SUBTYPE, type); crm_xml_add(update_msg, F_STONITH_OPERATION, type); crm_xml_add_int(update_msg, F_STONITH_RC, result); if (data != NULL) { add_message_xml(update_msg, F_STONITH_CALLDATA, data); } crm_trace("Notifying clients"); g_hash_table_foreach(client_connections, stonith_notify_client, update_msg); free_xml(update_msg); crm_trace("Notify complete"); } static stonith_key_value_t * parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if (devices == NULL) { return output; } max = strlen(devices); for (lpc = 0; lpc <= max; lpc++) { if (devices[lpc] == ',' || devices[lpc] == 0) { char *line = NULL; line = calloc(1, 2 + lpc - last); snprintf(line, 1 + lpc - last, "%s", devices + last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } static void topology_remove_helper(const char *node, int level) { int rc; char *desc = NULL; xmlNode *data = create_xml_node(NULL, F_STONITH_LEVEL); xmlNode *notify_data = create_xml_node(NULL, STONITH_OP_LEVEL_DEL); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add_int(data, XML_ATTR_ID, level); crm_xml_add(data, F_STONITH_TARGET, node); rc = stonith_level_remove(data, &desc); crm_xml_add(notify_data, F_STONITH_DEVICE, desc); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology)); do_stonith_notify(0, STONITH_OP_LEVEL_DEL, rc, notify_data); free_xml(notify_data); free_xml(data); free(desc); } static void topology_register_helper(const char *node, int level, stonith_key_value_t * device_list) { int rc; char *desc = NULL; xmlNode *notify_data = create_xml_node(NULL, STONITH_OP_LEVEL_ADD); xmlNode *data = create_level_registration_xml(node, level, device_list); rc = stonith_level_register(data, &desc); crm_xml_add(notify_data, F_STONITH_DEVICE, desc); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology)); do_stonith_notify(0, STONITH_OP_LEVEL_ADD, rc, notify_data); free_xml(notify_data); free_xml(data); free(desc); } static void remove_cib_device(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); } if (safe_str_neq(standard, "stonith")) { continue; } rsc_id = crm_element_value(match, XML_ATTR_ID); stonith_device_remove(rsc_id, TRUE); } } static void handle_topology_change(xmlNode *match, bool remove) { CRM_LOG_ASSERT(match != NULL); if(match) { int index = 0; const char *target; const char *dev_list; stonith_key_value_t *devices = NULL; crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); target = crm_element_value(match, XML_ATTR_STONITH_TARGET); dev_list = crm_element_value(match, XML_ATTR_STONITH_DEVICES); devices = parse_device_list(dev_list); crm_trace("Updating %s[%d] (%s) to %s", target, index, ID(match), dev_list); if(remove) { topology_remove_helper(target, index); } topology_register_helper(target, index, devices); stonith_key_value_freeall(devices, 1, 1); } } static void remove_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if (match && crm_element_value(match, XML_DIFF_MARKER)) { /* Deletion */ int index = 0; const char *target = crm_element_value(match, XML_ATTR_STONITH_TARGET); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); if (target == NULL) { crm_err("Invalid fencing target in element %s", ID(match)); } else if (index <= 0) { crm_err("Invalid level for %s in element %s", target, ID(match)); } else { topology_remove_helper(target, index); } /* } else { Deal with modifications during the 'addition' stage */ } } } static void register_fencing_topology(xmlXPathObjectPtr xpathObj, gboolean force) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); handle_topology_change(match, TRUE); } } /* Fencing */ static void fencing_topology_init(xmlNode * msg) { xmlXPathObjectPtr xpathObj = NULL; const char *xpath = "//" XML_TAG_FENCING_LEVEL; crm_trace("Full topology refresh"); if(topology) { g_hash_table_destroy(topology); topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry); } /* Grab everything */ xpathObj = xpath_search(local_cib, xpath); register_fencing_topology(xpathObj, TRUE); freeXpathObject(xpathObj); } #define rsc_name(x) x->clone_name?x->clone_name:x->id static void cib_device_update(resource_t *rsc, pe_working_set_t *data_set) { node_t *node = NULL; const char *value = NULL; const char *rclass = NULL; node_t *parent = NULL; gboolean remove = TRUE; /* TODO: Mark each installed device and remove if untouched when this process finishes */ if(rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { cib_device_update(gIter->data, data_set); if(rsc->variant == pe_clone || rsc->variant == pe_master) { crm_trace("Only processing one copy of the clone %s", rsc->id); break; } } return; } rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if(safe_str_neq(rclass, "stonith")) { return; } value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if(value && strcmp(RSC_STOPPED, value) == 0) { crm_info("Device %s has been disabled", rsc->id); goto update_done; } else if(stonith_our_uname) { GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if(node && strcmp(node->details->uname, stonith_our_uname) == 0) { break; } node = NULL; } } if (rsc->parent && rsc->parent->variant == pe_group && stonith_our_uname) { GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->parent->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&parent)) { if(parent && strcmp(parent->details->uname, stonith_our_uname) == 0) { break; } parent = NULL; } } if(node == NULL) { GHashTableIter iter; crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { crm_trace("Available: %s = %d", node->details->uname, node->weight); } goto update_done; } else if(node->weight < 0 || (parent && parent->weight < 0)) { char *score = score2char((node->weight < 0) ? node->weight : parent->weight); crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score); free(score); goto update_done; } else { xmlNode *data; GHashTableIter gIter; stonith_key_value_t *params = NULL; const char *name = NULL; const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); const char *provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); const char *rsc_provides = NULL; crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight); get_rsc_attributes(rsc->parameters, rsc, node, data_set); get_meta_attributes(rsc->meta, rsc, node, data_set); rsc_provides = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROVIDES); g_hash_table_iter_init(&gIter, rsc->parameters); while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) { if (!name || !value) { continue; } params = stonith_key_value_add(params, name, value); crm_trace(" %s=%s", name, value); } remove = FALSE; data = create_device_registration_xml(rsc_name(rsc), provider, agent, params, rsc_provides); stonith_device_register(data, NULL, TRUE); stonith_key_value_freeall(params, 1, 1); free_xml(data); } update_done: if(remove && g_hash_table_lookup(device_list, rsc_name(rsc))) { stonith_device_remove(rsc_name(rsc), TRUE); } } extern xmlNode *do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, crm_time_t * now); extern node_t *create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set); static void cib_devices_update(void) { GListPtr gIter = NULL; pe_working_set_t data_set; crm_info("Updating devices to version %s.%s.%s", crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(local_cib, XML_ATTR_GENERATION), crm_element_value(local_cib, XML_ATTR_NUMUPDATES)); set_working_set_defaults(&data_set); data_set.input = local_cib; data_set.now = crm_time_new(NULL); data_set.flags |= pe_flag_quick_location; data_set.localhost = stonith_our_uname; cluster_status(&data_set); do_calculations(&data_set, NULL, NULL); for (gIter = data_set.resources; gIter != NULL; gIter = gIter->next) { cib_device_update(gIter->data, &data_set); } data_set.input = NULL; /* Wasn't a copy */ cleanup_alloc_calculations(&data_set); } static void update_cib_stonith_devices_v2(const char *event, xmlNode * msg) { xmlNode *change = NULL; char *reason = NULL; bool needs_update = FALSE; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); const char *shortpath = NULL; if(op == NULL || strcmp(op, "move") == 0) { continue; } else if(safe_str_eq(op, "delete") && strstr(xpath, XML_CIB_TAG_RESOURCE)) { const char *rsc_id = NULL; char *search = NULL; char *mutable = strdup(xpath); rsc_id = strstr(mutable, "primitive[@id=\'") + strlen("primitive[@id=\'"); search = strchr(rsc_id, '\''); search[0] = 0; stonith_device_remove(rsc_id, TRUE); free(mutable); } else if(strstr(xpath, "/"XML_CIB_TAG_RESOURCES)) { shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); reason = g_strdup_printf("%s %s", op, shortpath+1); needs_update = TRUE; break; } else if(strstr(xpath, XML_CONS_TAG_RSC_LOCATION)) { shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); reason = g_strdup_printf("%s %s", op, shortpath+1); needs_update = TRUE; break; } } if(needs_update) { crm_info("Updating device list from the cib: %s", reason); cib_devices_update(); } free(reason); } static void update_cib_stonith_devices_v1(const char *event, xmlNode * msg) { const char *reason = "none"; gboolean needs_update = FALSE; xmlXPathObjectPtr xpath_obj = NULL; /* process new constraints */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; /* Safest and simplest to always recompute */ needs_update = TRUE; reason = "new location constraint"; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpath_obj, lpc); crm_log_xml_trace(match, "new constraint"); } } freeXpathObject(xpath_obj); /* process deletions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { remove_cib_device(xpath_obj); } freeXpathObject(xpath_obj); /* process additions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpath_obj, lpc); rsc_id = crm_element_value(match, XML_ATTR_ID); standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); if (safe_str_neq(standard, "stonith")) { continue; } crm_trace("Fencing resource %s was added or modified", rsc_id); reason = "new resource"; needs_update = TRUE; } } freeXpathObject(xpath_obj); if(needs_update) { crm_info("Updating device list from the cib: %s", reason); cib_devices_update(); } } static void update_cib_stonith_devices(const char *event, xmlNode * msg) { int format = 1; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); switch(format) { case 1: update_cib_stonith_devices_v1(event, msg); break; case 2: update_cib_stonith_devices_v2(event, msg); break; default: crm_warn("Unknown patch format: %d", format); } } static void update_fencing_topology(const char *event, xmlNode * msg) { int format = 1; const char *xpath; xmlXPathObjectPtr xpathObj = NULL; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); if(format == 1) { /* Process deletions (only) */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); remove_fencing_topology(xpathObj); freeXpathObject(xpathObj); /* Process additions and changes */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); register_fencing_topology(xpathObj, FALSE); freeXpathObject(xpathObj); } else if(format == 2) { xmlNode *change = NULL; for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); xmlNode *f_topology = get_message_xml(change, XML_TAG_FENCING_TOPOLOGY); if(op == NULL) { continue; } else if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION) && f_topology != NULL) { if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { crm_info("Re-initializing fencing topology after top-level %s operation", op); fencing_topology_init(NULL); } return; } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY "/") == NULL) { continue; } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL "/") == NULL) { if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { crm_info("Re-initializing fencing topology after top-level %s operation", op); fencing_topology_init(NULL); } return; } crm_trace("Handling %s operation for %s", op, xpath); if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "create") == 0) { handle_topology_change(change->children, FALSE); } else if(strcmp(op, "modify") == 0) { xmlNode *match = first_named_child(change, XML_DIFF_RESULT); if(match) { handle_topology_change(match->children, TRUE); } } else if(strcmp(op, "delete") == 0) { /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ crm_info("Re-initializing fencing topology after %s operation", op); fencing_topology_init(NULL); return; } } } else { crm_warn("Unknown patch format: %d", format); } } static bool have_cib_devices = FALSE; static void update_cib_cache_cb(const char *event, xmlNode * msg) { int rc = pcmk_ok; xmlNode *stonith_enabled_xml = NULL; xmlNode *stonith_watchdog_xml = NULL; const char *stonith_enabled_s = NULL; static gboolean stonith_enabled_saved = TRUE; if(!have_cib_devices) { crm_trace("Skipping updates until we get a full dump"); return; } else if(msg == NULL) { crm_trace("Missing %s update", event); return; } /* Maintain a local copy of the CIB so that we have full access to the device definitions and location constraints */ if (local_cib != NULL) { int rc = pcmk_ok; xmlNode *patchset = NULL; crm_element_value_int(msg, F_CIB_RC, &rc); if (rc != pcmk_ok) { return; } patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); xml_log_patchset(LOG_TRACE, "Config update", patchset); rc = xml_apply_patchset(local_cib, patchset, TRUE); switch (rc) { case pcmk_ok: case -pcmk_err_old_data: break; case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; break; default: crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; } } if (local_cib == NULL) { crm_trace("Re-requesting the full cib"); rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call); if(rc != pcmk_ok) { crm_err("Couldnt retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc); return; } CRM_ASSERT(local_cib != NULL); stonith_enabled_saved = FALSE; /* Trigger a full refresh below */ } stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", local_cib, LOG_TRACE); if (stonith_enabled_xml) { stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE); } if(daemon_option_enabled(crm_system_name, "watchdog")) { const char *value = NULL; long timeout_ms = 0; if(value == NULL) { stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", local_cib, LOG_TRACE); if (stonith_watchdog_xml) { value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); } } if(value) { timeout_ms = crm_get_msec(value); } if(timeout_ms != stonith_watchdog_timeout_ms) { crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000); stonith_watchdog_timeout_ms = timeout_ms; } } if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) { crm_trace("Ignoring cib updates while stonith is disabled"); stonith_enabled_saved = FALSE; return; } else if (stonith_enabled_saved == FALSE) { crm_info("Updating stonith device and topology lists now that stonith is enabled"); stonith_enabled_saved = TRUE; fencing_topology_init(NULL); cib_devices_update(); } else { update_fencing_topology(event, msg); update_cib_stonith_devices(event, msg); } } static void init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { crm_info("Updating device list from the cib: init"); have_cib_devices = TRUE; local_cib = copy_xml(output); fencing_topology_init(msg); cib_devices_update(); } static void stonith_shutdown(int nsig) { stonith_shutdown_flag = TRUE; crm_info("Terminating with %d clients", crm_hash_table_size(client_connections)); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { stonith_cleanup(); crm_exit(pcmk_ok); } } static void cib_connection_destroy(gpointer user_data) { if (stonith_shutdown_flag) { crm_info("Connection to the CIB closed."); return; } else { crm_notice("Connection to the CIB terminated. Shutting down."); } if (cib_api) { cib_api->cmds->signoff(cib_api); } stonith_shutdown(0); } static void stonith_cleanup(void) { if (cib_api) { cib_api->cmds->signoff(cib_api); } if (ipcs) { qb_ipcs_destroy(ipcs); } crm_peer_destroy(); crm_client_cleanup(); free(stonith_our_uname); free_xml(local_cib); } /* *INDENT-OFF* */ static struct crm_option long_options[] = { {"stand-alone", 0, 0, 's'}, {"stand-alone-w-cpg", 0, 0, 'c'}, + {"logfile", 1, 0, 'l'}, {"verbose", 0, 0, 'V'}, {"version", 0, 0, '$'}, {"help", 0, 0, '?'}, {0, 0, 0, 0} }; /* *INDENT-ON* */ static void setup_cib(void) { int rc, retries = 0; static cib_t *(*cib_new_fn) (void) = NULL; if (cib_new_fn == NULL) { cib_new_fn = find_library_function(&cib_library, CIB_LIBRARY, "cib_new", TRUE); } if (cib_new_fn != NULL) { cib_api = (*cib_new_fn) (); } if (cib_api == NULL) { crm_err("No connection to the CIB"); return; } do { sleep(retries); rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_CRMD, cib_command); } while (rc == -ENOTCONN && ++retries < 5); if (rc != pcmk_ok) { crm_err("Could not connect to the CIB service: %s (%d)", pcmk_strerror(rc), rc); } else if (pcmk_ok != cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { crm_err("Could not set CIB notification callback"); } else { rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", init_cib_cache_cb); cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); crm_notice("Watching for stonith topology changes"); } } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = st_ipc_created, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; static void st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ xmlNode *query = create_xml_node(NULL, "stonith_command"); crm_xml_add(query, F_XML_TAGNAME, "stonith_command"); crm_xml_add(query, F_TYPE, T_STONITH_NG); crm_xml_add(query, F_STONITH_OPERATION, "poke"); crm_debug("Broadcasting our uname because of node %u", node->id); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); } int main(int argc, char **argv) { int flag; int rc = 0; int lpc = 0; int argerr = 0; int option_index = 0; crm_cluster_t cluster; const char *actions[] = { "reboot", "off", "list", "monitor", "status" }; crm_log_preinit("stonith-ng", argc, argv); crm_set_options(NULL, "mode [options]", long_options, "Provides a summary of cluster's current state." "\n\nOutputs varying levels of detail in a number of different formats.\n"); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) { break; } switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; + case 'l': + crm_add_logfile(optarg); + break; case 's': stand_alone = TRUE; break; case 'c': stand_alone = FALSE; no_cib_connect = TRUE; break; case '$': case '?': crm_help(flag, EX_OK); break; default: ++argerr; break; } } if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { printf("\n"); printf("\n"); printf(" 1.0\n"); printf (" This is a fake resource that details the instance attributes handled by stonithd.\n"); printf(" Options available for all stonith resources\n"); printf(" \n"); printf(" \n"); printf (" How long to wait for the STONITH action to complete per a stonith device.\n"); printf (" Overrides the stonith-timeout cluster property\n"); printf(" \n"); printf(" \n"); printf(" \n"); printf (" The priority of the stonith resource. Devices are tried in order of highest priority to lowest.\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTARG); printf (" Advanced use only: An alternate parameter to supply instead of 'port'\n"); printf (" Some devices do not support the standard 'port' parameter or may provide additional ones.\n" "Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n" "A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n" " \n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTMAP); printf (" A mapping of host names to ports numbers for devices that do not support host names.\n"); printf (" Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTLIST); printf (" A list of machines controlled by this device (Optional unless %s=static-list).\n", STONITH_ATTR_HOSTCHECK); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTCHECK); printf (" How to determine which machines are controlled by the device.\n"); printf (" Allowed values: dynamic-list (query the device), static-list (check the %s attribute), none (assume every device can fence every machine)\n", STONITH_ATTR_HOSTLIST); printf(" \n"); printf(" \n"); for (lpc = 0; lpc < DIMOF(actions); lpc++) { printf(" \n", actions[lpc]); printf (" Advanced use only: An alternate command to run instead of '%s'\n", actions[lpc]); printf (" Some devices do not support the standard commands or may provide additional ones.\n" "Use this to specify an alternate, device-specific, command that implements the '%s' action.\n", actions[lpc]); printf(" \n", actions[lpc]); printf(" \n"); printf(" \n", actions[lpc]); printf (" Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout\n", actions[lpc]); printf (" Some devices need much more/less time to complete than normal.\n" "Use this to specify an alternate, device-specific, timeout for '%s' actions.\n", actions[lpc]); printf(" \n"); printf(" \n"); printf(" \n", actions[lpc]); printf (" Advanced use only: The maximum number of times to retry the '%s' command within the timeout period\n", actions[lpc]); printf(" Some devices do not support multiple connections." " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." " Use this option to alter the number of times Pacemaker retries '%s' actions before giving up." "\n", actions[lpc]); printf(" \n"); printf(" \n"); } printf(" \n"); printf("\n"); return 0; } if (optind != argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } crm_log_init("stonith-ng", LOG_INFO, TRUE, FALSE, argc, argv, FALSE); mainloop_add_signal(SIGTERM, stonith_shutdown); crm_peer_init(); if (stand_alone == FALSE) { #if SUPPORT_HEARTBEAT cluster.hb_conn = NULL; cluster.hb_dispatch = stonith_peer_hb_callback; cluster.destroy = stonith_peer_hb_destroy; #endif if (is_openais_cluster()) { #if SUPPORT_COROSYNC cluster.destroy = stonith_peer_cs_destroy; cluster.cpg.cpg_deliver_fn = stonith_peer_ais_callback; cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership; #endif } if (crm_cluster_connect(&cluster) == FALSE) { crm_crit("Cannot sign in to the cluster... terminating"); crm_exit(DAEMON_RESPAWN_STOP); } stonith_our_uname = cluster.uname; stonith_our_uuid = cluster.uuid; if (no_cib_connect == FALSE) { setup_cib(); } } else { stonith_our_uname = strdup("localhost"); } crm_set_status_callback(&st_peer_update_callback); device_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_device); topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry); if(daemon_option_enabled(crm_system_name, "watchdog")) { xmlNode *xml; stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, STONITH_ATTR_HOSTLIST, stonith_our_uname); xml = create_device_registration_xml("watchdog", "internal", STONITH_WATCHDOG_AGENT, params, NULL); stonith_device_register(xml, NULL, FALSE); stonith_key_value_freeall(params, 1, 1); free_xml(xml); } stonith_ipc_server_init(&ipcs, &ipc_callbacks); #if SUPPORT_STONITH_CONFIG if (((stand_alone == TRUE)) && !(standalone_cfg_read_file(STONITH_NG_CONF_FILE))) { standalone_cfg_commit(); } #endif /* Create the mainloop and run it... */ mainloop = g_main_new(FALSE); crm_info("Starting %s mainloop", crm_system_name); g_main_run(mainloop); stonith_cleanup(); #if SUPPORT_HEARTBEAT if (cluster.hb_conn) { cluster.hb_conn->llc_ops->delete(cluster.hb_conn); } #endif crm_info("Done"); return crm_exit(rc); } diff --git a/fencing/regression.py.in b/fencing/regression.py.in index c4cb2d8e0a..fe6d418d73 100644 --- a/fencing/regression.py.in +++ b/fencing/regression.py.in @@ -1,1071 +1,1081 @@ #!/usr/bin/python # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. import os import sys import subprocess import shlex import time def output_from_command(command): test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) test.wait() return test.communicate()[0].split("\n") class Test: def __init__(self, name, description, verbose = 0, with_cpg = 0): self.name = name self.description = description self.cmds = [] self.verbose = verbose self.result_txt = "" self.cmd_tool_output = "" self.result_exitcode = 0; self.stonith_options = "-s" self.enable_corosync = 0 if with_cpg: self.stonith_options = "-c" self.enable_corosync = 1 self.stonith_process = None self.stonith_output = "" self.stonith_patterns = [] self.negative_stonith_patterns = [] self.executed = 0 rsc_classes = output_from_command("crm_resource --list-standards") def __new_cmd(self, cmd, args, exitcode, stdout_match = "", no_wait = 0, stdout_negative_match = "", kill=None): self.cmds.append( { "cmd" : cmd, "kill" : kill, "args" : args, "expected_exitcode" : exitcode, "stdout_match" : stdout_match, "stdout_negative_match" : stdout_negative_match, "no_wait" : no_wait, } ) def stop_pacemaker(self): cmd = shlex.split("killall -9 -q pacemakerd") test = subprocess.Popen(cmd, stdout=subprocess.PIPE) test.wait() def start_environment(self): ### make sure we are in full control here ### self.stop_pacemaker() cmd = shlex.split("killall -9 -q stonithd") test = subprocess.Popen(cmd, stdout=subprocess.PIPE) test.wait() if self.verbose: + self.stonith_options = self.stonith_options + " -V" print "Starting stonithd with %s" % self.stonith_options + if os.path.exists("/tmp/stonith-regression.log"): + os.remove('/tmp/stonith-regression.log') + self.stonith_process = subprocess.Popen( - shlex.split("@CRM_DAEMON_DIR@/stonithd %s -V" % self.stonith_options), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + shlex.split("@CRM_DAEMON_DIR@/stonithd %s -l /tmp/stonith-regression.log" % self.stonith_options)) time.sleep(1) def clean_environment(self): if self.stonith_process: self.stonith_process.terminate() + self.stonith_process.wait() - self.stonith_output = self.stonith_process.communicate()[1] + self.stonith_output = "" self.stonith_process = None + f = open('/tmp/stonith-regression.log', 'r') + for line in f.readlines(): + self.stonith_output = self.stonith_output + line + if self.verbose: + print "Daemon Output Start" print self.stonith_output + print "Daemon Output End" + os.remove('/tmp/stonith-regression.log') def add_stonith_log_pattern(self, pattern): self.stonith_patterns.append(pattern) def add_stonith_negative_log_pattern(self, pattern): self.negative_stonith_patterns.append(pattern) def add_cmd(self, cmd, args): self.__new_cmd(cmd, args, 0, "") def add_cmd_no_wait(self, cmd, args): self.__new_cmd(cmd, args, 0, "", 1) def add_cmd_check_stdout(self, cmd, args, match, no_match = ""): self.__new_cmd(cmd, args, 0, match, 0, no_match) def add_expected_fail_cmd(self, cmd, args, exitcode = 255): self.__new_cmd(cmd, args, exitcode, "") def get_exitcode(self): return self.result_exitcode def print_result(self, filler): print "%s%s" % (filler, self.result_txt) def run_cmd(self, args): cmd = shlex.split(args['args']) cmd.insert(0, args['cmd']) if self.verbose: print "\n\nRunning: "+" ".join(cmd) test = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if args['kill']: if self.verbose: print "Also running: "+args['kill'] subprocess.Popen(shlex.split(args['kill'])) if args['no_wait'] == 0: test.wait() else: return 0 output_res = test.communicate() output = output_res[0] + output_res[1] if self.verbose: print output if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0: test.returncode = -2 print "STDOUT string '%s' was not found in cmd output: %s" % (args['stdout_match'], output) if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0: test.returncode = -2 print "STDOUT string '%s' was found in cmd output: %s" % (args['stdout_negative_match'], output) return test.returncode; def count_negative_matches(self, outline): count = 0 for line in self.negative_stonith_patterns: if outline.count(line): count = 1 if self.verbose: print "This pattern should not have matched = '%s" % (line) return count def match_stonith_patterns(self): negative_matches = 0 cur = 0 pats = self.stonith_patterns total_patterns = len(self.stonith_patterns) if len(self.stonith_patterns) == 0: return for line in self.stonith_output.split("\n"): negative_matches = negative_matches + self.count_negative_matches(line) if len(pats) == 0: continue cur = -1 for p in pats: cur = cur + 1 if line.count(pats[cur]): del pats[cur] break if len(pats) > 0 or negative_matches: if self.verbose: for p in pats: print "Pattern Not Matched = '%s'" % p self.result_txt = "FAILURE - '%s' failed. %d patterns out of %d not matched. %d negative matches." % (self.name, len(pats), total_patterns, negative_matches) self.result_exitcode = -1 def run(self): res = 0 i = 1 self.start_environment() if self.verbose: print "\n--- START TEST - %s" % self.name self.result_txt = "SUCCESS - '%s'" % (self.name) self.result_exitcode = 0 for cmd in self.cmds: res = self.run_cmd(cmd) if res != cmd['expected_exitcode']: print "Step %d FAILED - command returned %d, expected %d" % (i, res, cmd['expected_exitcode']) self.result_txt = "FAILURE - '%s' failed at step %d. Command: %s %s" % (self.name, i, cmd['cmd'], cmd['args']) self.result_exitcode = -1 break else: if self.verbose: print "Step %d SUCCESS" % (i) i = i + 1 self.clean_environment() if self.result_exitcode == 0: self.match_stonith_patterns() print self.result_txt if self.verbose: print "--- END TEST - %s\n" % self.name self.executed = 1 return res class Tests: def __init__(self, verbose = 0): self.tests = [] self.verbose = verbose self.autogen_corosync_cfg = 0 if not os.path.exists("/etc/corosync/corosync.conf"): self.autogen_corosync_cfg = 1 def new_test(self, name, description, with_cpg = 0): test = Test(name, description, self.verbose, with_cpg) self.tests.append(test) return test def print_list(self): print "\n==== %d TESTS FOUND ====" % (len(self.tests)) print "%35s - %s" % ("TEST NAME", "TEST DESCRIPTION") print "%35s - %s" % ("--------------------", "--------------------") for test in self.tests: print "%35s - %s" % (test.name, test.description) print "==== END OF LIST ====\n" def start_corosync(self): if self.verbose: print "Starting corosync" test = subprocess.Popen("corosync", stdout=subprocess.PIPE) test.wait() time.sleep(10) def stop_corosync(self): cmd = shlex.split("killall -9 -q corosync") test = subprocess.Popen(cmd, stdout=subprocess.PIPE) test.wait() def run_single(self, name): for test in self.tests: if test.name == name: test.run() break; def run_tests_matching(self, pattern): for test in self.tests: if test.name.count(pattern) != 0: test.run() def run_cpg_only(self): for test in self.tests: if test.enable_corosync: test.run() def run_no_cpg(self): for test in self.tests: if not test.enable_corosync: test.run() def run_tests(self): for test in self.tests: test.run() def exit(self): for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != 0: sys.exit(-1) sys.exit(0) def print_results(self): failures = 0; success = 0; print "\n\n======= FINAL RESULTS ==========" print "\n--- FAILURE RESULTS:" for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != 0: failures = failures + 1 test.print_result(" ") else: success = success + 1 if failures == 0: print " None" print "\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures) def build_api_sanity_tests(self): verbose_arg = "" if self.verbose: verbose_arg = "-V" test = self.new_test("standalone_low_level_api_test", "Sanity test client api in standalone mode.") test.add_cmd("@CRM_DAEMON_DIR@/stonith-test", "-t %s" % (verbose_arg)) test = self.new_test("cpg_low_level_api_test", "Sanity test client api using mainloop and cpg.", 1) test.add_cmd("@CRM_DAEMON_DIR@/stonith-test", "-m %s" % (verbose_arg)) def build_custom_timeout_tests(self): # custom timeout without topology test = self.new_test("cpg_custom_timeout_1", "Verify per device timeouts work as expected without using topology.", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=1\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=4\"") test.add_cmd("stonith_admin", "-F node3 -t 2") # timeout is 2+1+4 = 7 test.add_stonith_log_pattern("remote op timeout set to 7") # custom timeout _WITH_ topology test = self.new_test("cpg_custom_timeout_2", "Verify per device timeouts work as expected _WITH_ topology.", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=1\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=4000\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 3 -v false2") test.add_cmd("stonith_admin", "-F node3 -t 2") # timeout is 2+1+4000 = 4003 test.add_stonith_log_pattern("remote op timeout set to 4003") def build_fence_merge_tests(self): ### Simple test that overlapping fencing operations get merged test = self.new_test("cpg_custom_merge_single", "Verify overlapping identical fencing operations are merged, no fencing levels used.", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") ### one merger will happen test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") ### Test that multiple mergers occur test = self.new_test("cpg_custom_merge_multiple", "Verify multiple overlapping identical fencing operations are merged", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") ### 4 mergers should occur test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") ### Test that multiple mergers occur with topologies used test = self.new_test("cpg_custom_merge_with_topology", "Verify multiple overlapping identical fencing operations are merged with fencing levels.", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") ### 4 mergers should occur test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test = self.new_test("cpg_custom_no_merge", "Verify differing fencing operations are not merged", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3 node2\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") test.add_cmd_no_wait("stonith_admin", "-F node2 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") test.add_stonith_negative_log_pattern("Merging stonith action off for node node3 originating from client") def build_standalone_tests(self): test_types = [ { "prefix" : "standalone" , "use_cpg" : 0, }, { "prefix" : "cpg" , "use_cpg" : 1, }, ] # test what happens when all devices timeout for test_type in test_types: test = self.new_test("%s_fence_multi_device_failure" % test_type["prefix"], "Verify that all devices timeout, a fencing failure is returned.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") if test_type["use_cpg"] == 1: test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", 194) test.add_stonith_log_pattern("remote op timeout set to 6") else: test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", 55) test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: ") test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: ") test.add_stonith_log_pattern("for host 'node3' with device 'false3' returned: ") # test what happens when multiple devices can fence a node, but the first device fails. for test_type in test_types: test = self.new_test("%s_fence_device_failure_rollover" % test_type["prefix"], "Verify that when one fence device fails for a node, the others are tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-F node3 -t 2") if test_type["use_cpg"] == 1: test.add_stonith_log_pattern("remote op timeout set to 6") # simple topology test for one device for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_simple" % test_type["prefix"], "Verify all fencing devices at a level are used.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true") test.add_cmd("stonith_admin", "-F node3 -t 2") test.add_stonith_log_pattern("remote op timeout set to 2") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # add topology, delete topology, verify fencing still works for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_add_remove" % test_type["prefix"], "Verify fencing occurrs after all topology levels are removed", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true") test.add_cmd("stonith_admin", "-d node3 -i 1") test.add_cmd("stonith_admin", "-F node3 -t 2") test.add_stonith_log_pattern("remote op timeout set to 2") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # test what happens when the first fencing level has multiple devices. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_device_fails" % test_type["prefix"], "Verify if one device in a level fails, the other is tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R false -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true") test.add_cmd("stonith_admin", "-F node3 -t 20") test.add_stonith_log_pattern("remote op timeout set to 40") test.add_stonith_log_pattern("for host 'node3' with device 'false' returned: -201") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # test what happens when the first fencing level fails. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_multi_level_fails" % test_type["prefix"], "Verify if one level fails, the next leve is tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") test.add_cmd("stonith_admin", "-F node3 -t 2") test.add_stonith_log_pattern("remote op timeout set to 12") test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201") test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: -201") test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0") test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0") # test what happens when the first fencing level had devices that no one has registered for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_missing_devices" % test_type["prefix"], "Verify topology can continue with missing devices.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") test.add_cmd("stonith_admin", "-F node3 -t 2") # Test what happens if multiple fencing levels are defined, and then the first one is removed. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_level_removal" % test_type["prefix"], "Verify level removal works.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") # Now remove level 2, verify none of the devices in level two are hit. test.add_cmd("stonith_admin", "-d node3 -i 2") test.add_cmd("stonith_admin", "-F node3 -t 20") test.add_stonith_log_pattern("remote op timeout set to 8") test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201") test.add_stonith_negative_log_pattern("for host 'node3' with device 'false2' returned: ") test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0") test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0") # test the stonith builds the correct list of devices that can fence a node. for test_type in test_types: test = self.new_test("%s_list_devices" % test_type["prefix"], "Verify list of devices that can fence a node is correct", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true2", "true1") test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true3", "true1") # simple test of device monitor for test_type in test_types: test = self.new_test("%s_monitor" % test_type["prefix"], "Verify device is reachable", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-Q true1") test.add_cmd("stonith_admin", "-Q false1") test.add_expected_fail_cmd("stonith_admin", "-Q true2", 237) # Verify monitor occurs for duration of timeout period on failure for test_type in test_types: test = self.new_test("%s_monitor_timeout" % test_type["prefix"], "Verify monitor uses duration of timeout period given.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_monitor_fail -o \"pcmk_host_list=node3\"") test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 5", 195) test.add_stonith_log_pattern("Attempt 2 to execute") # Verify monitor occurs for duration of timeout period on failure, but stops at max retries for test_type in test_types: test = self.new_test("%s_monitor_timeout_max_retries" % test_type["prefix"], "Verify monitor retries until max retry value or timeout is hit.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_monitor_fail -o \"pcmk_host_list=node3\"") test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 15",195) test.add_stonith_log_pattern("Attempted to execute agent fence_dummy_monitor_fail (list) the maximum number of times") # simple register test for test_type in test_types: test = self.new_test("%s_register" % test_type["prefix"], "Verify devices can be registered and un-registered", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-Q true1") test.add_cmd("stonith_admin", "-D true1") test.add_expected_fail_cmd("stonith_admin", "-Q true1", 237) # simple reboot test for test_type in test_types: test = self.new_test("%s_reboot" % test_type["prefix"], "Verify devices can be rebooted", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-B node3 -t 2") test.add_cmd("stonith_admin", "-D true1") test.add_expected_fail_cmd("stonith_admin", "-Q true1", 237) # test fencing history. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_fence_history" % test_type["prefix"], "Verify last fencing operation is returned.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-F node3 -t 2 -V") test.add_cmd_check_stdout("stonith_admin", "-H node3", "was able to turn off node node3", "") # simple test of dynamic list query for test_type in test_types: test = self.new_test("%s_dynamic_list_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_list") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_list") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy_list") test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found") # fence using dynamic list query for test_type in test_types: test = self.new_test("%s_fence_dynamic_list_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_list") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_list") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy_list") test.add_cmd("stonith_admin", "-F fake_port_1 -t 5 -V"); # simple test of query using status action for test_type in test_types: test = self.new_test("%s_status_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found") # test what happens when no reboot action is advertised for test_type in test_types: test = self.new_test("%s_no_reboot_support" % test_type["prefix"], "Verify reboot action defaults to off when no reboot action is advertised by agent.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_no_reboot -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-B node1 -t 5 -V"); test.add_stonith_log_pattern("does not advertise support for 'reboot', performing 'off'") test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); # make sure reboot is used when reboot action is advertised for test_type in test_types: test = self.new_test("%s_with_reboot_support" % test_type["prefix"], "Verify reboot action can be used when metadata advertises it.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-B node1 -t 5 -V"); test.add_stonith_negative_log_pattern("does not advertise support for 'reboot', performing 'off'") test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); def build_nodeid_tests(self): our_uname = output_from_command("uname -n") if our_uname: our_uname = our_uname[0] ### verify nodeid is supplied when nodeid is in the metadata parameters test = self.new_test("cpg_supply_nodeid", "Verify nodeid is given when fence agent has nodeid as parameter", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) test.add_stonith_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) ### verify nodeid is _NOT_ supplied when nodeid is not in the metadata parameters test = self.new_test("cpg_do_not_supply_nodeid", "Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) test.add_stonith_negative_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) ### verify nodeid use doesn't explode standalone mode test = self.new_test("standalone_do_not_supply_nodeid", "Verify nodeid in metadata parameter list doesn't kill standalone mode", 0) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) test.add_stonith_negative_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) def build_unfence_tests(self): our_uname = output_from_command("uname -n") if our_uname: our_uname = our_uname[0] ### verify unfencing using automatic unfencing test = self.new_test("cpg_unfence_required_1", "Verify require unfencing on all devices when automatic=true in agent's metadata", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) # both devices should be executed test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)"); ### verify unfencing using automatic unfencing fails if any of the required agents fail test = self.new_test("cpg_unfence_required_2", "Verify require unfencing on all devices when automatic=true in agent's metadata", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=fail\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_expected_fail_cmd("stonith_admin", "-U %s -t 6" % (our_uname), 143) ### verify unfencing using automatic devices with topology test = self.new_test("cpg_unfence_required_3", "Verify require unfencing on all devices even when required devices are at different topology levels", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)"); ### verify unfencing using automatic devices with topology test = self.new_test("cpg_unfence_required_4", "Verify all required devices are executed even with topology levels fail.", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-R true3 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-R true4 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-R false4 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v false1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v false2" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v false3" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true3" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 3 -v false4" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 4 -v true4" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)"); test.add_stonith_log_pattern("with device 'true3' returned: 0 (OK)"); test.add_stonith_log_pattern("with device 'true4' returned: 0 (OK)"); ### verify unfencing using on_target device test = self.new_test("cpg_unfence_on_target_1", "Verify unfencing with on_target = true", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("(on) to be executed on the target node") ### verify failure of unfencing using on_target device test = self.new_test("cpg_unfence_on_target_2", "Verify failure unfencing with on_target = true", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake_1234\"" % (our_uname)) test.add_expected_fail_cmd("stonith_admin", "-U node_fake_1234 -t 3", 237) test.add_stonith_log_pattern("(on) to be executed on the target node") ### verify unfencing using on_target device with topology test = self.new_test("cpg_unfence_on_target_3", "Verify unfencing with on_target = true using topology", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("(on) to be executed on the target node") ### verify unfencing using on_target device with topology fails when victim node doesn't exist test = self.new_test("cpg_unfence_on_target_4", "Verify unfencing failure with on_target = true using topology", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1") test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true2") test.add_expected_fail_cmd("stonith_admin", "-U node_fake -t 3", 237) test.add_stonith_log_pattern("(on) to be executed on the target node") def setup_environment(self, use_corosync): if self.autogen_corosync_cfg and use_corosync: corosync_conf = (""" totem { version: 2 crypto_cipher: none crypto_hash: none nodeid: 101 secauth: off interface { ttl: 1 ringnumber: 0 mcastport: 6666 mcastaddr: 226.94.1.1 bindnetaddr: 127.0.0.1 } } logging { debug: off fileline: off to_syslog: no to_stderr: no syslog_facility: daemon timestamp: on to_logfile: yes logfile: /var/log/corosync.log logfile_priority: info } """) os.system("cat <<-END >>/etc/corosync/corosync.conf\n%s\nEND" % (corosync_conf)) if use_corosync: ### make sure we are in control ### self.stop_corosync() self.start_corosync() monitor_fail_agent = ("""#!/usr/bin/python import sys def main(): for line in sys.stdin.readlines(): if line.count("monitor") > 0: sys.exit(-1); sys.exit(-1) if __name__ == "__main__": main() """) dynamic_list_agent = ("""#!/usr/bin/python import sys def main(): for line in sys.stdin.readlines(): if line.count("list") > 0: print "fake_port_1" sys.exit(0) if line.count("off") > 0: sys.exit(0) sys.exit(-1) if __name__ == "__main__": main() """) os.system("cat <<-END >>/usr/sbin/fence_dummy_list\n%s\nEND" % (dynamic_list_agent)) os.system("chmod 711 /usr/sbin/fence_dummy_list") os.system("cat <<-END >>/usr/sbin/fence_dummy_monitor_fail\n%s\nEND" % (monitor_fail_agent)) os.system("chmod 711 /usr/sbin/fence_dummy_monitor_fail") os.system("cp /usr/share/pacemaker/tests/cts/fence_dummy /usr/sbin/fence_dummy") # modifies dummy agent to do require unfencing os.system("cat /usr/share/pacemaker/tests/cts/fence_dummy | sed 's/on_target=/automatic=/g' > /usr/sbin/fence_dummy_automatic_unfence"); os.system("chmod 711 /usr/sbin/fence_dummy_automatic_unfence") # modifies dummy agent to not advertise reboot os.system("cat /usr/share/pacemaker/tests/cts/fence_dummy | sed 's/^.*.*//g' > /usr/sbin/fence_dummy_no_reboot"); os.system("chmod 711 /usr/sbin/fence_dummy_no_reboot") def cleanup_environment(self, use_corosync): if use_corosync: self.stop_corosync() if self.verbose and os.path.exists('/var/log/corosync.log'): - print "Daemon output" + print "Corosync output" f = open('/var/log/corosync.log', 'r') for line in f.readlines(): print line.strip() os.remove('/var/log/corosync.log') if self.autogen_corosync_cfg: os.system("rm -f /etc/corosync/corosync.conf") os.system("rm -f /usr/sbin/fence_dummy_monitor_fail") os.system("rm -f /usr/sbin/fence_dummy_list") os.system("rm -f /usr/sbin/fence_dummy") os.system("rm -f /usr/sbin/fence_dummy_automatic_unfence") os.system("rm -f /usr/sbin/fence_dummy_no_reboot") class TestOptions: def __init__(self): self.options = {} self.options['list-tests'] = 0 self.options['run-all'] = 1 self.options['run-only'] = "" self.options['run-only-pattern'] = "" self.options['verbose'] = 0 self.options['invalid-arg'] = "" self.options['cpg-only'] = 0 self.options['no-cpg'] = 0 self.options['show-usage'] = 0 def build_options(self, argv): args = argv[1:] skip = 0 for i in range(0, len(args)): if skip: skip = 0 continue elif args[i] == "-h" or args[i] == "--help": self.options['show-usage'] = 1 elif args[i] == "-l" or args[i] == "--list-tests": self.options['list-tests'] = 1 elif args[i] == "-V" or args[i] == "--verbose": self.options['verbose'] = 1 elif args[i] == "-n" or args[i] == "--no-cpg": self.options['no-cpg'] = 1 elif args[i] == "-c" or args[i] == "--cpg-only": self.options['cpg-only'] = 1 elif args[i] == "-r" or args[i] == "--run-only": self.options['run-only'] = args[i+1] skip = 1 elif args[i] == "-p" or args[i] == "--run-only-pattern": self.options['run-only-pattern'] = args[i+1] skip = 1 def show_usage(self): print "usage: " + sys.argv[0] + " [options]" print "If no options are provided, all tests will run" print "Options:" print "\t [--help | -h] Show usage" print "\t [--list-tests | -l] Print out all registered tests." print "\t [--cpg-only | -c] Only run tests that require corosync." print "\t [--no-cpg | -n] Only run tests that do not require corosync" print "\t [--run-only | -r 'testname'] Run a specific test" print "\t [--verbose | -V] Verbose output" print "\t [--run-only-pattern | -p 'string'] Run only tests containing the string value" print "\n\tExample: Run only the test 'start_top'" print "\t\t python ./regression.py --run-only start_stop" print "\n\tExample: Run only the tests with the string 'systemd' present in them" print "\t\t python ./regression.py --run-only-pattern systemd" def main(argv): o = TestOptions() o.build_options(argv) use_corosync = 1 tests = Tests(o.options['verbose']) tests.build_standalone_tests() tests.build_custom_timeout_tests() tests.build_api_sanity_tests() tests.build_fence_merge_tests() tests.build_unfence_tests() tests.build_nodeid_tests() if o.options['list-tests']: tests.print_list() sys.exit(0) elif o.options['show-usage']: o.show_usage() sys.exit(0) print "Starting ..." if o.options['no-cpg']: use_corosync = 0 tests.setup_environment(use_corosync) if o.options['run-only-pattern'] != "": tests.run_tests_matching(o.options['run-only-pattern']) tests.print_results() elif o.options['run-only'] != "": tests.run_single(o.options['run-only']) tests.print_results() elif o.options['no-cpg']: tests.run_no_cpg() tests.print_results() elif o.options['cpg-only']: tests.run_cpg_only() tests.print_results() else: tests.run_tests() tests.print_results() tests.cleanup_environment(use_corosync) tests.exit() if __name__=="__main__": main(sys.argv)