diff --git a/crm/admin/cibadmin.c b/crm/admin/cibadmin.c index c78a6c00f9..420cbe4278 100644 --- a/crm/admin/cibadmin.c +++ b/crm/admin/cibadmin.c @@ -1,568 +1,570 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GETOPT_H # include #endif #include /* someone complaining about _ha_msg_mod not being found */ int exit_code = cib_ok; int message_timer_id = -1; int message_timeout_ms = 30*1000; GMainLoop *mainloop = NULL; const char *crm_system_name = "cibadmin"; IPC_Channel *crmd_channel = NULL; const char *host = NULL; void usage(const char *cmd, int exit_status); enum cib_errors do_init(void); int do_work(crm_data_t *input, int command_options, crm_data_t **output); gboolean admin_msg_callback(IPC_Channel * source_data, void *private_data); gboolean admin_message_timeout(gpointer data); void cib_connection_destroy(gpointer user_data); void cibadmin_op_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); int command_options = 0; const char *cib_action = NULL; typedef struct str_list_s { int num_items; char *value; struct str_list_s *next; } str_list_t; char *this_msg_reference = NULL; char *obj_type = NULL; char *status = NULL; char *migrate_from = NULL; char *migrate_res = NULL; char *subtype = NULL; char *reset = NULL; int request_id = 0; int operation_status = 0; cib_t *the_cib = NULL; #define OPTARGS "V?o:QDUCEX:t:Srwlsh:MmBfbdRx:pP" int main(int argc, char **argv) { int argerr = 0; int flag; char *admin_input_xml = NULL; char *admin_input_file = NULL; gboolean admin_input_stdin = FALSE; crm_data_t *output = NULL; crm_data_t *input = NULL; #ifdef HAVE_GETOPT_H int option_index = 0; static struct option long_options[] = { /* Top-level Options */ {CIB_OP_ERASE, 0, 0, 'E'}, {CIB_OP_QUERY, 0, 0, 'Q'}, {CIB_OP_CREATE, 0, 0, 'C'}, {CIB_OP_REPLACE, 0, 0, 'R'}, {CIB_OP_UPDATE, 0, 0, 'U'}, {CIB_OP_MODIFY, 0, 0, 'M'}, {"patch", 0, 0, 'P'}, {CIB_OP_DELETE, 0, 0, 'D'}, {CIB_OP_DELETE_ALT, 0, 0, 'd'}, {CIB_OP_BUMP, 0, 0, 'B'}, {CIB_OP_SYNC, 0, 0, 'S'}, {CIB_OP_SLAVE, 0, 0, 'r'}, {CIB_OP_MASTER, 0, 0, 'w'}, {CIB_OP_ISMASTER,0, 0, 'm'}, {"force-quorum",0, 0, 'f'}, {"local", 0, 0, 'l'}, {"sync-call", 0, 0, 's'}, {"no-bcast", 0, 0, 'b'}, {"host", 0, 0, 'h'}, {F_CRM_DATA, 1, 0, 'X'}, {"xml-file", 1, 0, 'x'}, {"xml-pipe", 0, 0, 'p'}, {"verbose", 0, 0, 'V'}, {"help", 0, 0, '?'}, {"reference", 1, 0, 0}, {"timeout", 1, 0, 't'}, /* common options */ {"obj_type", 1, 0, 'o'}, {0, 0, 0, 0} }; #endif cl_log_set_entity("cibadmin"); cl_log_set_facility(LOG_USER); set_crm_log_level(LOG_CRIT-1); if(argc < 2) { usage(crm_system_name, LSB_EXIT_EINVAL); } while (1) { #ifdef HAVE_GETOPT_H flag = getopt_long(argc, argv, OPTARGS, long_options, &option_index); #else flag = getopt(argc, argv, OPTARGS); #endif if (flag == -1) break; switch(flag) { #ifdef HAVE_GETOPT_H case 0: printf("option %s", long_options[option_index].name); if (optarg) printf(" with arg %s", optarg); printf("\n"); if (safe_str_eq("reference", long_options[option_index].name)) { this_msg_reference = crm_strdup(optarg); } else { printf("Long option (--%s) is not (yet?) properly supported\n", long_options[option_index].name); ++argerr; } break; #endif case 't': message_timeout_ms = atoi(optarg); if(message_timeout_ms < 1) { message_timeout_ms = 30*1000; } break; case 'E': cib_action = CIB_OP_ERASE; break; case 'Q': cib_action = CIB_OP_QUERY; break; case 'P': cib_action = CIB_OP_APPLY_DIFF; break; case 'S': cib_action = CIB_OP_SYNC; break; case 'U': case 'M': cib_action = CIB_OP_MODIFY; break; case 'R': cib_action = CIB_OP_REPLACE; break; case 'C': cib_action = CIB_OP_CREATE; break; case 'D': cib_action = CIB_OP_DELETE; break; case 'd': cib_action = CIB_OP_DELETE_ALT; break; case 'm': cib_action = CIB_OP_ISMASTER; command_options |= cib_scope_local; break; case 'B': cib_action = CIB_OP_BUMP; break; case 'r': cib_action = CIB_OP_SLAVE; break; case 'w': cib_action = CIB_OP_MASTER; command_options |= cib_scope_local; break; case 'V': command_options = command_options | cib_verbose; cl_log_enable_stderr(TRUE); alter_debug(DEBUG_INC); break; case '?': usage(crm_system_name, LSB_EXIT_OK); break; case 'o': crm_debug_2("Option %c => %s", flag, optarg); obj_type = crm_strdup(optarg); break; case 'X': crm_debug_2("Option %c => %s", flag, optarg); admin_input_xml = crm_strdup(optarg); break; case 'x': crm_debug_2("Option %c => %s", flag, optarg); admin_input_file = crm_strdup(optarg); break; case 'p': admin_input_stdin = TRUE; break; case 'h': host = crm_strdup(optarg); break; case 'l': command_options |= cib_scope_local; break; case 'b': command_options |= cib_inhibit_bcast; command_options |= cib_scope_local; break; case 's': command_options |= cib_sync_call; break; case 'f': command_options |= cib_quorum_override; break; default: printf("Argument code 0%o (%c)" " is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (optind > argc) { ++argerr; } if(cib_action == NULL) { usage(crm_system_name, cib_operation); } if (argerr) { usage(crm_system_name, LSB_EXIT_GENERIC); } - + if(admin_input_file != NULL) { FILE *xml_strm = fopen(admin_input_file, "r"); input = file2xml(xml_strm, FALSE); if(input == NULL) { fprintf(stderr, "Couldn't parse input file: %s\n", admin_input_file); return 1; } fclose(xml_strm); } else if(admin_input_xml != NULL) { input = string2xml(admin_input_xml); if(input == NULL) { fprintf(stderr, "Couldn't parse input string: %s\n", admin_input_xml); return 1; } } else if(admin_input_stdin) { input = stdin2xml(); if(input == NULL) { fprintf(stderr, "Couldn't parse input from STDIN.\n"); return 1; } } if(input != NULL) { crm_log_xml_debug(input, "[admin input]"); } exit_code = do_init(); if(exit_code != cib_ok) { crm_err("Init failed, could not perform requested operations"); fprintf(stderr, "Init failed, could not perform requested operations\n"); return -exit_code; } + cl_log_args(argc, argv); + exit_code = do_work(input, command_options, &output); if (exit_code > 0) { /* wait for the reply by creating a mainloop and running it until * the callbacks are invoked... */ request_id = exit_code; add_cib_op_callback( request_id, FALSE, NULL, cibadmin_op_callback); mainloop = g_main_new(FALSE); crm_debug("Setting operation timeout to %dms for call %d", message_timeout_ms, request_id); message_timer_id = Gmain_timeout_add( message_timeout_ms, admin_message_timeout, NULL); crm_debug_3("%s waiting for reply from the local CIB", crm_system_name); crm_info("Starting mainloop"); g_main_run(mainloop); } else if(exit_code < 0) { crm_err("Call failed: %s", cib_error2string(exit_code)); fprintf(stderr, "Call failed: %s\n", cib_error2string(exit_code)); operation_status = exit_code; } if(output != NULL) { char *buffer = dump_xml_formatted(output); fprintf(stdout, "%s", crm_str(buffer)); crm_free(buffer); } crm_debug_3("%s exiting normally", crm_system_name); return -exit_code; } int do_work(crm_data_t *input, int call_options, crm_data_t **output) { /* construct the request */ if (strcasecmp(CIB_OP_SYNC, cib_action) == 0) { crm_debug_4("Performing %s op...", cib_action); return the_cib->cmds->sync_from( the_cib, host, obj_type, call_options); } else if (strcasecmp(CIB_OP_SLAVE, cib_action) == 0 && (call_options ^ cib_scope_local) ) { crm_debug_4("Performing %s op on all nodes...", cib_action); return the_cib->cmds->set_slave_all(the_cib, call_options); } else if (strcasecmp(CIB_OP_MASTER, cib_action) == 0) { crm_debug_4("Performing %s op on all nodes...", cib_action); return the_cib->cmds->set_master(the_cib, call_options); } else if(cib_action != NULL) { crm_debug_4("Passing \"%s\" to variant_op...", cib_action); if(strcasecmp(CIB_OP_APPLY_DIFF, cib_action) != 0 && input != NULL && do_id_check(input, NULL, TRUE, FALSE)) { crm_err("ID Check failed."); return cib_id_check; } return the_cib->cmds->variant_op( the_cib, cib_action, host, obj_type, input, output, call_options); } else { crm_err("You must specify an operation"); } return cib_operation; } enum cib_errors do_init(void) { enum cib_errors rc = cib_ok; the_cib = cib_new(); rc = the_cib->cmds->signon(the_cib, crm_system_name, cib_command); if(rc != cib_ok) { crm_err("Signon to CIB failed: %s", cib_error2string(rc)); fprintf(stderr, "Signon to CIB failed: %s\n", cib_error2string(rc)); } return rc; } void usage(const char *cmd, int exit_status) { FILE *stream; stream = exit_status != 0 ? stderr : stdout; fprintf(stream, "usage: %s [%s] command\n" "\twhere necessary, XML data will be obtained using -X," " -x, or -p options\n", cmd, OPTARGS); fprintf(stream, "Options\n"); fprintf(stream, "\t--%s (-%c) \tobject type being operated on\n", "obj_type", 'o'); fprintf(stream, "\t\tValid values are: nodes, resources, constraints, crm_config, status\n"); fprintf(stream, "\t--%s (-%c)\tturn on debug info." " additional instance increase verbosity\n", "verbose", 'V'); fprintf(stream, "\t--%s (-%c)\tthis help message\n", "help", '?'); fprintf(stream, "\nCommands\n"); fprintf(stream, "\t--%s (-%c)\tErase the contents of the whole CIB\n", CIB_OP_ERASE, 'E'); fprintf(stream, "\t--%s (-%c)\t\n", CIB_OP_QUERY, 'Q'); fprintf(stream, "\t--%s (-%c)\t\n", CIB_OP_CREATE, 'C'); fprintf(stream, "\t--%s (-%c)\tRecursivly replace an object in the CIB\n", CIB_OP_REPLACE,'R'); fprintf(stream, "\t--%s (-%c)\tRecursivly update an object in the CIB\n", CIB_OP_UPDATE, 'U'); fprintf(stream, "\t--%s (-%c)\tFind the object somewhere in the CIB's XML tree and update is as --"CIB_OP_UPDATE" would\n", CIB_OP_MODIFY, 'M'); fprintf(stream, "\t--%s (-%c)\t\n", CIB_OP_DELETE, 'D'); fprintf(stream, "\t\t\tDelete the first object matching the supplied criteria\n"); fprintf(stream, "\t\t\tEg. \n"); fprintf(stream, "\t\t\tThe tagname and all attributes must match in order for the element to be deleted\n"); fprintf(stream, "\t--%s (-%c)\t\n", CIB_OP_DELETE_ALT, 'd'); fprintf(stream, "\t\t\tDelete the object at specified fully qualified location\n"); fprintf(stream, "\t\t\tEg. ...\n"); fprintf(stream, "\t\t\tRequires -o\n"); fprintf(stream, "\t--%s (-%c)\t\n", CIB_OP_BUMP, 'B'); fprintf(stream, "\t--%s (-%c)\t\n", CIB_OP_ISMASTER,'m'); fprintf(stream, "\t--%s (-%c)\t\n", CIB_OP_SYNC, 'S'); fprintf(stream, "\nXML data\n"); fprintf(stream, "\t--%s (-%c) \t\tRetrieve XML from the supplied string\n", F_CRM_DATA, 'X'); fprintf(stream, "\t--%s (-%c) \tRetrieve XML from the named file\n", "xml-file", 'x'); fprintf(stream, "\t--%s (-%c)\t\t\tRetrieve XML from STDIN\n", "xml-pipe", 'p'); fprintf(stream, "\nAdvanced Options\n"); fprintf(stream, "\t--%s (-%c)\tsend command to specified host." " Applies to %s and %s commands only\n", "host", 'h', CIB_OP_QUERY, CIB_OP_SYNC); fprintf(stream, "\t--%s (-%c)\tcommand takes effect locally" " on the specified host\n", "local", 'l'); fprintf(stream, "\t--%s (-%c)\tcommand will not be broadcast even if" " it altered the CIB\n", "no-bcast", 'b'); fprintf(stream, "\t--%s (-%c)\twait for call to complete before" " returning\n", "sync-call", 's'); fflush(stream); exit(exit_status); } gboolean admin_message_timeout(gpointer data) { if(safe_str_eq(cib_action, CIB_OP_SLAVE)) { exit_code = cib_ok; fprintf(stdout, "CIB service(s) are in slave mode.\n"); } else { exit_code = cib_reply_failed; fprintf(stderr, "No messages received in %d seconds.. aborting\n", (int)message_timeout_ms/1000); crm_err("No messages received in %d seconds", (int)message_timeout_ms/1000); } g_main_quit(mainloop); return FALSE; } void cib_connection_destroy(gpointer user_data) { crm_err("Connection to the CIB terminated... exiting"); g_main_quit(mainloop); return; } void cibadmin_op_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { char *admin_input_xml = NULL; crm_info("our callback was invoked"); crm_log_message(LOG_MSG, msg); exit_code = rc; if(output != NULL) { admin_input_xml = dump_xml_formatted(output); } if(safe_str_eq(cib_action, CIB_OP_ISMASTER) && rc != cib_ok) { crm_info("CIB on %s is _not_ the master instance", host?host:"localhost"); fprintf(stderr, "CIB on %s is _not_ the master instance\n", host?host:"localhost"); } else if(safe_str_eq(cib_action, CIB_OP_ISMASTER)) { crm_info("CIB on %s _is_ the master instance", host?host:"localhost"); fprintf(stderr, "CIB on %s _is_ the master instance\n", host?host:"localhost"); } else if(rc != 0) { crm_warn("Call %s failed (%d): %s", cib_action, rc, cib_error2string(rc)); fprintf(stderr, "Call %s failed (%d): %s\n", cib_action, rc, cib_error2string(rc)); fprintf(stdout, "%s\n", crm_str(admin_input_xml)); } else if(safe_str_eq(cib_action, CIB_OP_QUERY) && output==NULL) { crm_err("Output expected in query response"); crm_log_message(LOG_ERR, msg); } else if(output == NULL) { crm_info("Call passed"); } else { crm_info("Call passed"); fprintf(stdout, "%s\n", crm_str(admin_input_xml)); } crm_free(admin_input_xml); if(call_id == request_id) { g_main_quit(mainloop); } else { crm_info("Message was not the response we were looking for (%d vs. %d", call_id, request_id); } } diff --git a/crm/admin/crm_attribute.c b/crm/admin/crm_attribute.c index 494bb3ef40..c9f389baec 100644 --- a/crm/admin/crm_attribute.c +++ b/crm/admin/crm_attribute.c @@ -1,477 +1,478 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GETOPT_H # include #endif void usage(const char *cmd, int exit_status); gboolean BE_QUIET = FALSE; gboolean DO_WRITE = TRUE; gboolean DO_DELETE = FALSE; gboolean inhibit_pe = FALSE; char *dest_node = NULL; char *set_name = NULL; char *attr_id = NULL; char *attr_name = NULL; const char *type = NULL; const char *rsc_id = NULL; const char *dest_uname = NULL; const char *attr_value = NULL; const char *crm_system_name = "crm_master"; #define OPTARGS "V?GDQU:u:s:n:v:l:t:i:!r:" int main(int argc, char **argv) { gboolean is_done = FALSE; cib_t * the_cib = NULL; enum cib_errors rc = cib_ok; int argerr = 0; int flag; #ifdef HAVE_GETOPT_H int option_index = 0; static struct option long_options[] = { /* Top-level Options */ {"verbose", 0, 0, 'V'}, {"help", 0, 0, '?'}, {"quiet", 0, 0, 'Q'}, {"get-value", 0, 0, 'G'}, {"delete-attr", 0, 0, 'D'}, {"node-uname", 1, 0, 'U'}, {"node-uuid", 1, 0, 'u'}, {"set-name", 1, 0, 's'}, {"attr-name", 1, 0, 'n'}, {"attr-value", 1, 0, 'v'}, {"resource-id", 1, 0, 'r'}, {"lifetime", 1, 0, 'l'}, {"inhibit-policy-engine", 0, 0, '!'}, {"type", 1, 0, 't'}, {0, 0, 0, 0} }; #endif crm_system_name = basename(argv[0]); crm_log_init(crm_system_name); crm_log_level = LOG_ERR; cl_log_enable_stderr(TRUE); if(argc < 2) { usage(crm_system_name, LSB_EXIT_EINVAL); } while (1) { #ifdef HAVE_GETOPT_H flag = getopt_long(argc, argv, OPTARGS, long_options, &option_index); #else flag = getopt(argc, argv, OPTARGS); #endif if (flag == -1) break; switch(flag) { case 'V': cl_log_enable_stderr(TRUE); alter_debug(DEBUG_INC); break; case '?': usage(crm_system_name, LSB_EXIT_OK); break; case 'G': DO_WRITE = FALSE; break; case 'Q': BE_QUIET = TRUE; break; case 'D': DO_DELETE = TRUE; break; case 'U': crm_debug_2("Option %c => %s", flag, optarg); dest_uname = optarg; break; case 'u': crm_debug_2("Option %c => %s", flag, optarg); dest_node = crm_strdup(optarg); break; case 's': crm_debug_2("Option %c => %s", flag, optarg); set_name = crm_strdup(optarg); break; case 'l': crm_debug_2("Option %c => %s", flag, optarg); type = optarg; break; case 't': crm_debug_2("Option %c => %s", flag, optarg); type = optarg; break; case 'n': crm_debug_2("Option %c => %s", flag, optarg); attr_name = crm_strdup(optarg); break; case 'i': crm_debug_2("Option %c => %s", flag, optarg); attr_id = crm_strdup(optarg); break; case 'v': crm_debug_2("Option %c => %s", flag, optarg); attr_value = optarg; break; case 'r': crm_debug_2("Option %c => %s", flag, optarg); rsc_id = optarg; break; case '!': inhibit_pe = TRUE; break; default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (optind > argc) { ++argerr; } if (argerr) { usage(crm_system_name, LSB_EXIT_GENERIC); } the_cib = cib_new(); rc = the_cib->cmds->signon( the_cib, crm_system_name, cib_command_synchronous); if(rc != cib_ok) { fprintf(stderr, "Error signing on to the CIB service: %s\n", cib_error2string(rc)); return rc; } + cl_log_args(argc, argv); if(safe_str_eq(crm_system_name, "crm_attribute") && type == NULL && dest_uname == NULL) { /* we're updating cluster options - dont populate dest_node */ type = XML_CIB_TAG_CRMCONFIG; } else if(dest_uname == NULL) { struct utsname name; if(uname(&name) < 0) { cl_perror("uname(2) call failed"); return 1; } dest_uname = name.nodename; crm_info("Detected uname: %s", dest_uname); } if(dest_node == NULL && dest_uname != NULL) { rc = query_node_uuid(the_cib, dest_uname, &dest_node); if(rc != cib_ok) { fprintf(stderr,"Could not map uname=%s to a UUID: %s\n", dest_uname, cib_error2string(rc)); return rc; } else { crm_info("Mapped %s to %s", dest_uname, crm_str(dest_node)); } } if(safe_str_eq(crm_system_name, "crm_master")) { int len = 0; if(safe_str_eq(type, "reboot")) { type = XML_CIB_TAG_STATUS; } else { type = XML_CIB_TAG_NODES; } rsc_id = getenv("OCF_RESOURCE_INSTANCE"); if(rsc_id == NULL && dest_node == NULL) { fprintf(stderr, "This program should only ever be " "invoked from inside an OCF resource agent.\n"); fprintf(stderr, "DO NOT INVOKE MANUALLY FROM THE COMMAND LINE.\n"); return 1; } else if(dest_node == NULL) { fprintf(stderr, "Could not determine node UUID.\n"); return 1; } else if(rsc_id == NULL) { fprintf(stderr, "Could not determine resource name.\n"); return 1; } len = 8 + strlen(rsc_id); crm_malloc0(attr_name, len); sprintf(attr_name, "master-%s", rsc_id); len = 3 + strlen(type) + strlen(attr_name) + strlen(dest_node); crm_malloc0(attr_id, len); sprintf(attr_id, "%s-%s-%s", type, attr_name, dest_node); len = 8 + strlen(dest_node); crm_malloc0(set_name, len); sprintf(set_name, "master-%s", dest_node); } else if(safe_str_eq(crm_system_name, "crm_failcount")) { type = XML_CIB_TAG_STATUS; if(rsc_id == NULL) { fprintf(stderr,"Please specify a resource with -r\n"); return 1; } if(dest_node == NULL) { fprintf(stderr,"Please specify a node with -U or -u\n"); return 1; } set_name = NULL; attr_name = crm_concat("fail-count", rsc_id, '-'); } else if(safe_str_eq(crm_system_name, "crm_standby")) { if(dest_node == NULL) { fprintf(stderr,"Please specify a node with -U or -u\n"); return 1; } else if(DO_DELETE) { rc = delete_standby( the_cib, dest_node, type, attr_value); } else if(DO_WRITE) { if(attr_value == NULL) { fprintf(stderr, "Please specify 'true' or 'false' with -v\n"); return 1; } rc = set_standby(the_cib, dest_node, type, attr_value); } else { char *read_value = NULL; char *scope = NULL; if(type) { scope = crm_strdup(type); } rc = query_standby( the_cib, dest_node, &scope, &read_value); if(BE_QUIET == FALSE) { if(attr_id) { fprintf(stdout, "id=%s ", attr_id); } if(attr_name) { fprintf(stdout, "name=%s ", attr_name); } fprintf(stdout, "scope=%s value=%s\n", scope, read_value?read_value:"(null)"); } else if(read_value != NULL) { fprintf(stdout, "%s\n", read_value); } crm_free(scope); } is_done = TRUE; } else if (type == NULL) { type = XML_CIB_TAG_NODES; return 1; } if(safe_str_eq(type, XML_CIB_TAG_CRMCONFIG)) { dest_node = NULL; } if(is_done) { } else if(DO_DELETE) { rc = delete_attr(the_cib, cib_sync_call, type, dest_node, set_name, attr_id, attr_name, attr_value); if(safe_str_eq(crm_system_name, "crm_failcount")) { char *now_s = NULL; time_t now = time(NULL); now_s = crm_itoa(now); update_attr(the_cib, cib_sync_call, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, "last-lrm-refresh", now_s); crm_free(now_s); } } else if(DO_WRITE) { int cib_opts = cib_sync_call; CRM_DEV_ASSERT(type != NULL); CRM_DEV_ASSERT(attr_name != NULL); CRM_DEV_ASSERT(attr_value != NULL); if(inhibit_pe) { crm_warn("Inhibiting notifications for this update"); cib_opts |= cib_inhibit_notify; } rc = update_attr(the_cib, cib_opts, type, dest_node, set_name, attr_id, attr_name, attr_value); } else { char *read_value = NULL; rc = read_attr(the_cib, type, dest_node, set_name, attr_id, attr_name, &read_value); if(rc == cib_NOTEXISTS && safe_str_eq(crm_system_name, "crm_failcount")) { rc = cib_ok; read_value = crm_strdup("0"); } crm_info("Read %s=%s %s%s", attr_name, crm_str(read_value), set_name?"in ":"", set_name?set_name:""); if(BE_QUIET == FALSE) { fprintf(stdout, "%s%s %s%s value=%s\n", attr_id?"id=":"", attr_id?attr_id:"", attr_name?"name=":"", attr_name?attr_name:"", read_value?read_value:"(null)"); } else if(read_value != NULL) { fprintf(stdout, "%s\n", read_value); } } the_cib->cmds->signoff(the_cib); if(rc != cib_ok) { fprintf(stderr, "Error performing operation: %s\n", cib_error2string(rc)); } return rc; } void usage(const char *cmd, int exit_status) { FILE *stream; stream = exit_status ? stderr : stdout; if(safe_str_eq(cmd, "crm_master")) { fprintf(stream, "usage: %s [-?VQ] -(D|G|v) [-l]\n", cmd); } else if(safe_str_eq(cmd, "crm_standby")) { fprintf(stream, "usage: %s [-?V] -(u|U) -(D|G|v) [-l]\n", cmd); } else if(safe_str_eq(cmd, "crm_failcount")) { fprintf(stream, "usage: %s [-?V] -(u|U) -(D|G|v) -r\n", cmd); } else { fprintf(stream, "usage: %s [-?V] -(D|G|v) [options]\n", cmd); } fprintf(stream, "Options\n"); fprintf(stream, "\t--%s (-%c)\t: this help message\n", "help", '?'); fprintf(stream, "\t--%s (-%c)\t: " "turn on debug info. additional instances increase verbosity\n", "verbose", 'V'); fprintf(stream, "\t--%s (-%c)\t: Print only the value on stdout" " (use with -G)\n", "quiet", 'Q'); fprintf(stream, "\t--%s (-%c)\t: " "Retrieve rather than set the %s\n", "get-value", 'G', safe_str_eq(cmd, "crm_master")?"named attribute":"preference to be promoted"); fprintf(stream, "\t--%s (-%c)\t: " "Delete rather than set the attribute\n", "delete-attr", 'D'); fprintf(stream, "\t--%s (-%c) \t: " "Value to use (ignored with -G)\n", "attr-value", 'v'); if(safe_str_eq(cmd, "crm_master")) { fprintf(stream, "\t--%s (-%c) \t: " "How long the preference lasts (reboot|forever)\n", "lifetime", 'l'); exit(exit_status); } else if(safe_str_eq(cmd, "crm_standby")) { fprintf(stream, "\t--%s (-%c) \t: " "UUID of the node to change\n", "node-uuid", 'u'); fprintf(stream, "\t--%s (-%c) \t: " "uname of the node to change\n", "node-uname", 'U'); fprintf(stream, "\t--%s (-%c) \t: " "How long the preference lasts (reboot|forever)\n" "\t If a forever value exists, it is ALWAYS used by the CRM\n" "\t instead of any reboot value\n", "lifetime", 'l'); exit(exit_status); } fprintf(stream, "\t--%s (-%c) \t: " "UUID of the node to change\n", "node-uuid", 'u'); fprintf(stream, "\t--%s (-%c) \t: " "uname of the node to change\n", "node-uname", 'U'); if(safe_str_eq(cmd, "crm_failcount")) { fprintf(stream, "\t--%s (-%c) \t: " "name of the resource to operate on\n", "resource-id", 'r'); } else { fprintf(stream, "\t--%s (-%c) \t: " "Set of attributes in which to read/write the attribute\n", "set-name", 's'); fprintf(stream, "\t--%s (-%c) \t: " "Attribute to set\n", "attr-name", 'n'); fprintf(stream, "\t--%s (-%c) \t: " "Which section of the CIB to set the attribute: (%s|%s|%s)\n", "type", 't', XML_CIB_TAG_NODES, XML_CIB_TAG_STATUS, XML_CIB_TAG_CRMCONFIG); fprintf(stream, "\t -t=%s options: -(U|u) -n [-s]\n", XML_CIB_TAG_NODES); fprintf(stream, "\t -t=%s options: -(U|u) -n [-s]\n", XML_CIB_TAG_STATUS); fprintf(stream, "\t -t=%s options: -n [-s]\n", XML_CIB_TAG_CRMCONFIG); } fflush(stream); exit(exit_status); } diff --git a/crm/admin/crm_resource.c b/crm/admin/crm_resource.c index 5a4100b5c3..901b9bc6b6 100644 --- a/crm/admin/crm_resource.c +++ b/crm/admin/crm_resource.c @@ -1,1301 +1,1303 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GETOPT_H # include #endif void usage(const char *cmd, int exit_status); gboolean do_force = FALSE; gboolean BE_QUIET = FALSE; const char *attr_set_type = XML_TAG_ATTR_SETS; char *host_id = NULL; const char *rsc_id = NULL; const char *host_uname = NULL; const char *crm_system_name = NULL; const char *prop_name = NULL; const char *prop_value = NULL; const char *rsc_type = NULL; const char *prop_id = NULL; const char *prop_set = NULL; char *migrate_lifetime = NULL; char rsc_cmd = 'L'; char *our_pid = NULL; IPC_Channel *crmd_channel = NULL; char *xml_file = NULL; int cib_options = cib_sync_call; #define OPTARGS "V?LRQxDCPp:WMUr:H:v:t:p:g:d:i:s:G:S:fX:lmu:" static int do_find_resource(const char *rsc, pe_working_set_t *data_set) { int found = 0; resource_t *the_rsc = pe_find_resource(data_set->resources, rsc); if(the_rsc == NULL) { return cib_NOTEXISTS; } slist_iter(node, node_t, the_rsc->running_on, lpc, crm_debug_3("resource %s is running on: %s", rsc, node->details->uname); if(BE_QUIET) { fprintf(stdout, "%s\n", node->details->uname); } else { fprintf(stdout, "resource %s is running on: %s\n", rsc, node->details->uname); } found++; ); if(BE_QUIET == FALSE && found == 0) { fprintf(stderr, "resource %s is NOT running\n", rsc); } return 0; } static void print_raw_rsc(resource_t *rsc, int level) { int lpc = 0; GListPtr children = NULL; for(; lpc < level; lpc++) { printf(" "); } printf(" * %s\n", rsc->id); children = rsc->fns->children(rsc); slist_iter(child, resource_t, children, lpc, print_raw_rsc(child, level+1); ); } static int do_find_resource_list(pe_working_set_t *data_set, gboolean raw) { int found = 0; slist_iter( rsc, resource_t, data_set->resources, lpc, if(raw) { found++; print_raw_rsc(rsc, 0); continue; } else if(rsc->orphan && rsc->fns->active(rsc, TRUE) == FALSE) { continue; } rsc->fns->print( rsc, NULL, pe_print_printf|pe_print_rsconly, stdout); found++; ); if(found == 0) { printf("NO resources configured\n"); return cib_NOTEXISTS; } return 0; } static int dump_resource(const char *rsc, pe_working_set_t *data_set) { char *rsc_xml = NULL; resource_t *the_rsc = pe_find_resource(data_set->resources, rsc); if(the_rsc == NULL) { return cib_NOTEXISTS; } the_rsc->fns->print(the_rsc, NULL, pe_print_printf, stdout); rsc_xml = dump_xml_formatted(the_rsc->xml); fprintf(stdout, "raw xml:\n%s", rsc_xml); crm_free(rsc_xml); return 0; } static int dump_resource_attr( const char *rsc, const char *attr, pe_working_set_t *data_set) { node_t *current = NULL; resource_t *the_rsc = pe_find_resource(data_set->resources, rsc); const char *value = NULL; if(the_rsc == NULL) { return cib_NOTEXISTS; } if(g_list_length(the_rsc->running_on) == 1) { current = the_rsc->running_on->data; } else if(g_list_length(the_rsc->running_on) > 1) { fprintf(stderr, "%s is active on more than one node," " returning the default value for %s\n", the_rsc->id, crm_str(value)); } unpack_instance_attributes( the_rsc->xml, attr_set_type, current?current->details->attrs:NULL, the_rsc->parameters, NULL, data_set->now); if(the_rsc->parameters != NULL) { crm_debug("Looking up %s in %s", attr, the_rsc->id); value = g_hash_table_lookup(the_rsc->parameters, attr); } if(value != NULL) { fprintf(stdout, "%s\n", value); return 0; } return cib_NOTEXISTS; } static int set_resource_attr(const char *rsc_id, const char *attr_set, const char *attr_id, const char *attr_name, const char *attr_value, cib_t *cib, pe_working_set_t *data_set) { int rc = cib_ok; int matches = 0; char *local_attr_id = NULL; char *local_attr_set = NULL; crm_data_t *xml_top = NULL; crm_data_t *xml_obj = NULL; crm_data_t *nv_children = NULL; crm_data_t *set_children = NULL; resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if(rsc == NULL) { return cib_NOTEXISTS; } /* filter by set and type */ matches = find_xml_children( &set_children, rsc->xml, attr_set_type, XML_ATTR_ID, attr_set, FALSE); crm_log_xml_debug(set_children, "search by set:"); crm_debug("%d objects matching tag=%s id=%s", matches, attr_set_type, attr_set?attr_set:""); if(matches == 0) { /* nothing more to search */ crm_debug("No objects matching tag=%s id=%s", attr_set_type, attr_set?attr_set:""); } else if(attr_id == NULL) { matches = find_xml_children( &nv_children, set_children, XML_CIB_TAG_NVPAIR, XML_NVPAIR_ATTR_NAME, attr_name, FALSE); crm_log_xml_debug(nv_children, "search by name:"); } else { matches = find_xml_children( &nv_children, set_children, XML_CIB_TAG_NVPAIR, XML_ATTR_ID, attr_id, FALSE); crm_log_xml_debug(nv_children, "search by id:"); } if(matches > 1) { fprintf(stderr, "Multiple attributes match name=%s for the resource %s:\n", attr_name, rsc->id); if(set_children == NULL) { free_xml(set_children); set_children = NULL; find_xml_children( &set_children, rsc->xml, attr_set_type, NULL, NULL, FALSE); xml_child_iter( set_children, set, free_xml(nv_children); nv_children = NULL; find_xml_children( &nv_children, set, XML_CIB_TAG_NVPAIR, XML_NVPAIR_ATTR_NAME, attr_name, FALSE); xml_child_iter( nv_children, child, fprintf(stderr," Set: %s,\tValue: %s,\tID: %s\n", ID(set), crm_element_value(child, XML_NVPAIR_ATTR_VALUE), ID(child)); ); ); } else { xml_child_iter( nv_children, child, fprintf(stderr," ID: %s, Value: %s\n", ID(child), crm_element_value(child, XML_NVPAIR_ATTR_VALUE)); ); } if(BE_QUIET == FALSE) { fprintf(stderr, "\nThe following text can be suppressed with the -Q option:\n"); if(attr_set == NULL) { fprintf(stderr, " * To choose an existing entry to change, please supply one of the set names above using the -s option.\n"); } else { fprintf(stderr, " * To choose an existing entry to change, please supply one of the IDs above using the -i option.\n"); } fprintf(stderr, " * To create a new value with a default ID, please supply a different set name using the -s option.\n"); fprintf(stderr, "You can also use --query-xml to display the complete resource definition.\n"); } return cib_unknown; } else if(matches == 0) { if(attr_set == NULL) { if(safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { local_attr_set = crm_concat(rsc->id, "meta-options", '-'); } else { local_attr_set = crm_strdup(rsc->id); } attr_set = local_attr_set; } if(attr_id == NULL) { local_attr_id = crm_concat(attr_set, attr_name, '-'); attr_id = local_attr_id; } xml_top = create_xml_node(NULL, crm_element_name(rsc->xml)); crm_xml_add(xml_top, XML_ATTR_ID, rsc->id); xml_obj = create_xml_node(xml_top, attr_set_type); crm_xml_add(xml_obj, XML_ATTR_ID, attr_set); xml_obj = create_xml_node(xml_obj, XML_TAG_ATTRS); xml_obj = create_xml_node(xml_obj, XML_CIB_TAG_NVPAIR); } else { if(attr_id == NULL) { /* extract it */ xml_child_iter(nv_children, child, attr_id = ID(child)); } xml_obj = create_xml_node(NULL, XML_CIB_TAG_NVPAIR); xml_top = xml_obj; } crm_xml_add(xml_obj, XML_ATTR_ID, attr_id); crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, attr_name); crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, attr_value); crm_log_xml_debug(xml_top, "Update"); rc = cib->cmds->modify(cib, XML_CIB_TAG_RESOURCES, xml_top, NULL, cib_options); free_xml(xml_top); crm_free(local_attr_id); crm_free(local_attr_set); return rc; } static int delete_resource_attr( const char *rsc_id, const char *attr_set, const char *attr_id, const char *attr_name, cib_t *cib, pe_working_set_t *data_set) { crm_data_t *xml_obj = NULL; crm_data_t *xml_match = NULL; int rc = cib_ok; char *local_attr_id = NULL; resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if(rsc == NULL) { return cib_NOTEXISTS; } xml_match = find_attr_details( rsc->xml, NULL, attr_set, attr_id, attr_name); if(xml_match == NULL) { return cib_missing_data; } if(attr_id == NULL) { local_attr_id = crm_element_value_copy(xml_match, XML_ATTR_ID); attr_id = local_attr_id; } xml_obj = create_xml_node(NULL, XML_CIB_TAG_NVPAIR); crm_xml_add(xml_obj, XML_ATTR_ID, attr_id); crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, attr_name); crm_log_xml_debug(xml_obj, "Delete"); rc = cib->cmds->delete(cib, XML_CIB_TAG_RESOURCES, xml_obj, NULL, cib_options); free_xml(xml_obj); free_xml(xml_match); crm_free(local_attr_id); return rc; } static int dump_resource_prop( const char *rsc, const char *attr, pe_working_set_t *data_set) { const char *value = NULL; resource_t *the_rsc = pe_find_resource(data_set->resources, rsc); if(the_rsc == NULL) { return cib_NOTEXISTS; } value = crm_element_value(the_rsc->xml, attr); if(value != NULL) { fprintf(stdout, "%s\n", value); return 0; } return cib_NOTEXISTS; } static void resource_ipc_connection_destroy(gpointer user_data) { crm_info("Connection to CRMd was terminated"); exit(1); } static gboolean crmd_msg_callback(IPC_Channel * server, void *private_data) { int lpc = 0; IPC_Message *msg = NULL; ha_msg_input_t *new_input = NULL; gboolean hack_return_good = TRUE; while (server->ch_status != IPC_DISCONNECT && server->ops->is_message_pending(server) == TRUE) { if(new_input != NULL) { delete_ha_msg_input(new_input); new_input = NULL; } if (server->ops->recv(server, &msg) != IPC_OK) { perror("Receive failure:"); return !hack_return_good; } if (msg == NULL) { crm_debug_4("No message this time"); continue; } lpc++; new_input = new_ipc_msg_input(msg); crm_log_message(LOG_MSG, new_input->msg); msg->msg_done(msg); if (validate_crm_message( new_input->msg, crm_system_name, our_pid, XML_ATTR_RESPONSE) == FALSE) { crm_info("Message was not a CRM response. Discarding."); } delete_ha_msg_input(new_input); new_input = NULL; } if (server->ch_status == IPC_DISCONNECT) { crm_debug_2("admin_msg_callback: received HUP"); return !hack_return_good; } return hack_return_good; } static int delete_lrm_rsc( IPC_Channel *crmd_channel, const char *host_uname, const char *rsc_id, pe_working_set_t *data_set) { char *key = NULL; int rc = cib_send_failed; HA_Message *cmd = NULL; crm_data_t *xml_rsc = NULL; const char *value = NULL; HA_Message *params = NULL; crm_data_t *msg_data = NULL; resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if(rsc == NULL) { fprintf(stderr, "Resource %s not found\n", rsc_id); return cib_NOTEXISTS; } else if(rsc->variant != pe_native) { fprintf(stderr, "We can only clean up primitive resources, not %s\n", rsc_id); return cib_NOTEXISTS; } else if(rsc->failed == FALSE && do_force == FALSE) { fprintf(stderr, "You should only clean up failed resources!\n" "Use --force to ignore this message and continue\n"); return cib_invalid_argument; } key = crm_concat("0:0:crm-resource-delete", our_pid, '-'); msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->id); crm_xml_add(xml_rsc, XML_ATTR_ID_LONG, rsc->long_name); value = crm_element_value(rsc->xml, XML_ATTR_TYPE); crm_xml_add(xml_rsc, XML_ATTR_TYPE, value); if(value) { fprintf(stderr, "%s has no type! Aborting...\n", rsc_id); return cib_NOTEXISTS; } value = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, value); if(value) { fprintf(stderr, "%s has no class! Aborting...\n", rsc_id); return cib_NOTEXISTS; } value = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, value); params = create_xml_node(msg_data, XML_TAG_ATTRS); crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); cmd = create_request(CRM_OP_LRM_DELETE, msg_data, host_uname, CRM_SYSTEM_CRMD, crm_system_name, our_pid); free_xml(msg_data); crm_free(key); if(send_ipc_message(crmd_channel, cmd)) { rc = 0; } crm_msg_del(cmd); return rc; } static int refresh_lrm(IPC_Channel *crmd_channel, const char *host_uname) { HA_Message *cmd = NULL; int rc = cib_send_failed; cmd = create_request(CRM_OP_LRM_REFRESH, NULL, host_uname, CRM_SYSTEM_CRMD, crm_system_name, our_pid); if(send_ipc_message(crmd_channel, cmd)) { rc = 0; } crm_msg_del(cmd); return rc; } static int migrate_resource( const char *rsc_id, const char *existing_node, const char *preferred_node, cib_t * cib_conn) { char *later_s = NULL; enum cib_errors rc = cib_ok; char *id = NULL; crm_data_t *cib = NULL; crm_data_t *rule = NULL; crm_data_t *expr = NULL; crm_data_t *constraints = NULL; crm_data_t *fragment = NULL; crm_data_t *lifetime = NULL; crm_data_t *can_run = NULL; crm_data_t *dont_run = NULL; fragment = create_cib_fragment(NULL, NULL); cib = fragment; CRM_DEV_ASSERT(safe_str_eq(crm_element_name(cib), XML_TAG_CIB)); constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, cib); id = crm_concat("cli-prefer", rsc_id, '-'); can_run = create_xml_node(NULL, XML_CONS_TAG_RSC_LOCATION); crm_xml_add(can_run, XML_ATTR_ID, id); crm_free(id); id = crm_concat("cli-standby", rsc_id, '-'); dont_run = create_xml_node(NULL, XML_CONS_TAG_RSC_LOCATION); crm_xml_add(dont_run, XML_ATTR_ID, id); crm_free(id); if(migrate_lifetime) { char *life = crm_strdup(migrate_lifetime); char *life_mutable = life; ha_time_t *now = NULL; ha_time_t *later = NULL; ha_time_t *duration = parse_time_duration(&life_mutable); if(duration == NULL) { fprintf(stderr, "Invalid duration specified: %s\n", migrate_lifetime); fprintf(stderr, "Please refer to" " http://en.wikipedia.org/wiki/ISO_8601#Duration" " for examples of valid durations\n"); crm_free(life); return cib_invalid_argument; } now = new_ha_date(TRUE); later = add_time(now, duration); log_date(LOG_INFO, "now ", now, ha_log_date|ha_log_time); log_date(LOG_INFO, "later ", later, ha_log_date|ha_log_time); log_date(LOG_INFO, "duration", duration, ha_log_date|ha_log_time|ha_log_local); later_s = date_to_string(later, ha_log_date|ha_log_time); printf("Migration will take effect until: %s\n", later_s); free_ha_date(duration); free_ha_date(later); free_ha_date(now); crm_free(life); } if(existing_node == NULL) { crm_log_xml_notice(can_run, "Deleting"); rc = cib_conn->cmds->delete(cib_conn, XML_CIB_TAG_CONSTRAINTS, dont_run, NULL, cib_options); if(rc == cib_NOTEXISTS) { rc = cib_ok; } else if(rc != cib_ok) { goto bail; } } else { if(BE_QUIET == FALSE) { fprintf(stderr, "WARNING: Creating rsc_location constraint '%s'" " with a score of -INFINITY for resource %s" " on %s.\n", ID(dont_run), rsc_id, existing_node); fprintf(stderr, "\tThis will prevent %s from running" " on %s until the constraint is removed using" " the 'crm_resource -U' command or manually" " with cibadmin\n", rsc_id, existing_node); fprintf(stderr, "\tThis will be the case even if %s is" " the last node in the cluster\n", existing_node); fprintf(stderr, "\tThis messgae can be disabled with -Q\n"); } crm_xml_add(dont_run, "rsc", rsc_id); if(later_s) { lifetime = create_xml_node(dont_run, "lifetime"); rule = create_xml_node(lifetime, XML_TAG_RULE); id = crm_concat("cli-standby-lifetime", rsc_id, '-'); crm_xml_add(rule, XML_ATTR_ID, id); crm_free(id); expr = create_xml_node(rule, "date_expression"); id = crm_concat("cli-standby-lifetime-end",rsc_id,'-'); crm_xml_add(expr, XML_ATTR_ID, id); crm_free(id); crm_xml_add(expr, "operation", "lt"); crm_xml_add(expr, "end", later_s); } rule = create_xml_node(dont_run, XML_TAG_RULE); expr = create_xml_node(rule, XML_TAG_EXPRESSION); id = crm_concat("cli-standby-rule", rsc_id, '-'); crm_xml_add(rule, XML_ATTR_ID, id); crm_free(id); crm_xml_add(rule, XML_RULE_ATTR_SCORE, MINUS_INFINITY_S); id = crm_concat("cli-standby-expr", rsc_id, '-'); crm_xml_add(expr, XML_ATTR_ID, id); crm_free(id); crm_xml_add(expr, XML_EXPR_ATTR_ATTRIBUTE, "#uname"); crm_xml_add(expr, XML_EXPR_ATTR_OPERATION, "eq"); crm_xml_add(expr, XML_EXPR_ATTR_VALUE, existing_node); crm_xml_add(expr, XML_EXPR_ATTR_TYPE, "string"); add_node_copy(constraints, dont_run); } if(preferred_node == NULL) { crm_log_xml_notice(can_run, "Deleting"); rc = cib_conn->cmds->delete(cib_conn, XML_CIB_TAG_CONSTRAINTS, can_run, NULL, cib_options); if(rc == cib_NOTEXISTS) { rc = cib_ok; } else if(rc != cib_ok) { goto bail; } } else { crm_xml_add(can_run, "rsc", rsc_id); if(later_s) { lifetime = create_xml_node(can_run, "lifetime"); rule = create_xml_node(lifetime, XML_TAG_RULE); id = crm_concat("cli-prefer-lifetime", rsc_id, '-'); crm_xml_add(rule, XML_ATTR_ID, id); crm_free(id); expr = create_xml_node(rule, "date_expression"); id = crm_concat("cli-prefer-lifetime-end", rsc_id, '-'); crm_xml_add(expr, XML_ATTR_ID, id); crm_free(id); crm_xml_add(expr, "operation", "lt"); crm_xml_add(expr, "end", later_s); } rule = create_xml_node(can_run, XML_TAG_RULE); expr = create_xml_node(rule, XML_TAG_EXPRESSION); id = crm_concat("cli-prefer-rule", rsc_id, '-'); crm_xml_add(rule, XML_ATTR_ID, id); crm_free(id); crm_xml_add(rule, XML_RULE_ATTR_SCORE, INFINITY_S); id = crm_concat("cli-prefer-expr", rsc_id, '-'); crm_xml_add(expr, XML_ATTR_ID, id); crm_free(id); crm_xml_add(expr, XML_EXPR_ATTR_ATTRIBUTE, "#uname"); crm_xml_add(expr, XML_EXPR_ATTR_OPERATION, "eq"); crm_xml_add(expr, XML_EXPR_ATTR_VALUE, preferred_node); crm_xml_add(expr, XML_EXPR_ATTR_TYPE, "string"); add_node_copy(constraints, can_run); } if(preferred_node != NULL || existing_node != NULL) { crm_log_xml_notice(fragment, "CLI Update"); rc = cib_conn->cmds->update(cib_conn, XML_CIB_TAG_CONSTRAINTS, fragment, NULL, cib_options); } bail: free_xml(fragment); free_xml(dont_run); free_xml(can_run); crm_free(later_s); return rc; } int main(int argc, char **argv) { pe_working_set_t data_set; crm_data_t *cib_xml_copy = NULL; cib_t * cib_conn = NULL; enum cib_errors rc = cib_ok; int argerr = 0; int flag; #ifdef HAVE_GETOPT_H int option_index = 0; static struct option long_options[] = { /* Top-level Options */ {"verbose", 0, 0, 'V'}, {"help", 0, 0, '?'}, {"quiet", 0, 0, 'Q'}, {"list", 0, 0, 'L'}, {"list-raw", 0, 0, 'l'}, {"refresh", 0, 0, 'R'}, {"reprobe", 0, 0, 'P'}, {"query-xml", 0, 0, 'x'}, {"delete", 0, 0, 'D'}, {"cleanup", 0, 0, 'C'}, {"locate", 0, 0, 'W'}, {"migrate", 0, 0, 'M'}, {"un-migrate", 0, 0, 'U'}, {"resource", 1, 0, 'r'}, {"host-uname", 1, 0, 'H'}, {"lifetime", 1, 0, 'u'}, {"force", 0, 0, 'f'}, {"meta", 0, 0, 'm'}, {"set-parameter", 1, 0, 'p'}, {"get-parameter", 1, 0, 'g'}, {"delete-parameter",1, 0, 'd'}, {"property-value", 1, 0, 'v'}, {"get-property", 1, 0, 'G'}, {"set-property", 1, 0, 'S'}, {"resource-type", 1, 0, 't'}, {"xml-file", 0, 0, 'X'}, {0, 0, 0, 0} }; #endif crm_system_name = basename(argv[0]); cl_log_set_entity(crm_system_name); cl_log_set_facility(LOG_USER); set_crm_log_level(LOG_ERR); cl_log_enable_stderr(TRUE); if(argc < 2) { usage(crm_system_name, LSB_EXIT_EINVAL); } while (1) { #ifdef HAVE_GETOPT_H flag = getopt_long(argc, argv, OPTARGS, long_options, &option_index); #else flag = getopt(argc, argv, OPTARGS); #endif if (flag == -1) break; switch(flag) { case 'V': cl_log_enable_stderr(TRUE); alter_debug(DEBUG_INC); break; case '?': usage(crm_system_name, LSB_EXIT_OK); break; case 'X': xml_file = crm_strdup(optarg); break; case 'Q': BE_QUIET = TRUE; break; case 'm': attr_set_type = XML_TAG_META_SETS; break; case 'L': case 'l': case 'R': case 'x': case 'D': case 'C': case 'P': case 'W': case 'M': case 'U': rsc_cmd = flag; break; case 'u': migrate_lifetime = crm_strdup(optarg); break; case 'p': crm_debug_2("Option %c => %s", flag, optarg); prop_name = optarg; rsc_cmd = flag; break; case 'g': crm_debug_2("Option %c => %s", flag, optarg); prop_name = optarg; rsc_cmd = flag; break; case 'd': crm_debug_2("Option %c => %s", flag, optarg); prop_name = optarg; rsc_cmd = flag; break; case 'S': crm_debug_2("Option %c => %s", flag, optarg); prop_name = optarg; rsc_cmd = flag; break; case 'G': crm_debug_2("Option %c => %s", flag, optarg); prop_name = optarg; rsc_cmd = flag; break; case 'f': do_force = TRUE; break; case 'i': crm_debug_2("Option %c => %s", flag, optarg); prop_id = optarg; break; case 's': crm_debug_2("Option %c => %s", flag, optarg); prop_set = optarg; break; case 'r': crm_debug_2("Option %c => %s", flag, optarg); rsc_id = optarg; break; case 'v': crm_debug_2("Option %c => %s", flag, optarg); prop_value = optarg; break; case 't': crm_debug_2("Option %c => %s", flag, optarg); rsc_type = optarg; break; case 'H': crm_debug_2("Option %c => %s", flag, optarg); host_uname = optarg; break; default: fprintf(stderr, "Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { fprintf(stderr, "non-option ARGV-elements: "); while (optind < argc) { fprintf(stderr, "%s ", argv[optind++]); } fprintf(stderr, "\n"); } if (optind > argc) { ++argerr; } if (argerr) { usage(crm_system_name, LSB_EXIT_GENERIC); } + cl_log_args(argc, argv); + crm_malloc0(our_pid, 11); if(our_pid != NULL) { snprintf(our_pid, 10, "%d", getpid()); our_pid[10] = '\0'; } if(do_force) { crm_debug("Forcing..."); cib_options |= cib_scope_local|cib_quorum_override; } if(rsc_cmd == 'L' || rsc_cmd == 'W' || rsc_cmd == 'D' || rsc_cmd == 'x' || rsc_cmd == 'M' || rsc_cmd == 'U' || rsc_cmd == 'C' || rsc_cmd == 'p' || rsc_cmd == 'd' || rsc_cmd == 'g' || rsc_cmd == 'G' || rsc_cmd == 'S' || rsc_cmd == 'l') { resource_t *rsc = NULL; if(xml_file != NULL) { FILE *xml_strm = fopen(xml_file, "r"); if(strstr(xml_file, ".bz2") != NULL) { cib_xml_copy = file2xml(xml_strm, TRUE); } else { cib_xml_copy = file2xml(xml_strm, FALSE); } if(xml_strm != NULL) { fclose(xml_strm); } } else { cib_conn = cib_new(); rc = cib_conn->cmds->signon( cib_conn, crm_system_name, cib_command_synchronous); if(rc != cib_ok) { fprintf(stderr, "Error signing on to the CIB service: %s\n", cib_error2string(rc)); return rc; } cib_xml_copy = get_cib_copy(cib_conn); } set_working_set_defaults(&data_set); data_set.input = cib_xml_copy; data_set.now = new_ha_date(TRUE); cluster_status(&data_set); rsc = pe_find_resource(data_set.resources, rsc_id); if(rsc != NULL) { rsc_id = rsc->id; } else { rc = cib_NOTEXISTS; } } if(rsc_cmd == 'R' || rsc_cmd == 'C' || rsc_cmd == 'P') { GCHSource *src = NULL; src = init_client_ipc_comms(CRM_SYSTEM_CRMD, crmd_msg_callback, NULL, &crmd_channel); if(src == NULL) { fprintf(stderr, "Error signing on to the CRMd service\n"); return 1; } send_hello_message( crmd_channel, our_pid, crm_system_name, "0", "1"); set_IPC_Channel_dnotify(src, resource_ipc_connection_destroy); } if(rsc_cmd == 'L') { rc = cib_ok; do_find_resource_list(&data_set, FALSE); } else if(rsc_cmd == 'l') { rc = cib_ok; do_find_resource_list(&data_set, TRUE); } else if(rsc_cmd == 'C') { int rc = delete_lrm_rsc(crmd_channel, host_uname, rsc_id, &data_set); sleep(5); refresh_lrm(crmd_channel, host_uname); if(rc == 0) { char *now_s = NULL; time_t now = time(NULL); /* force the TE to start a transition */ sleep(5); /* wait for the refresh */ now_s = crm_itoa(now); update_attr(cib_conn, cib_options, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, "last-lrm-refresh", now_s); crm_free(now_s); } } else if(rc == cib_NOTEXISTS) { fprintf(stderr, "Resource %s not found: %s\n", crm_str(rsc_id), cib_error2string(rc)); } else if(rsc_cmd == 'W') { if(rsc_id == NULL) { fprintf(stderr, "Must supply a resource id with -r\n"); return cib_NOTEXISTS; } rc = do_find_resource(rsc_id, &data_set); } else if(rsc_cmd == 'x') { if(rsc_id == NULL) { fprintf(stderr, "Must supply a resource id with -r\n"); return cib_NOTEXISTS; } rc = dump_resource(rsc_id, &data_set); } else if(rsc_cmd == 'U') { if(rsc_id == NULL) { fprintf(stderr, "Must supply a resource id with -r\n"); return cib_NOTEXISTS; } rc = migrate_resource(rsc_id, NULL, NULL, cib_conn); } else if(rsc_cmd == 'M') { node_t *dest = NULL; node_t *current = NULL; const char *current_uname = NULL; resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); if(rsc != NULL && rsc->running_on != NULL) { current = rsc->running_on->data; if(current != NULL) { current_uname = current->details->uname; } } if(host_uname != NULL) { dest = pe_find_node(data_set.nodes, host_uname); } if(rsc == NULL) { fprintf(stderr, "Resource %s not migrated:" " not found\n", rsc_id); } else if(rsc->variant == pe_native && g_list_length(rsc->running_on) > 1) { fprintf(stderr, "Resource %s not migrated:" " active on multiple nodes\n", rsc_id); } else if(host_uname != NULL && dest == NULL) { fprintf(stderr, "Error performing operation: " "%s is not a known node\n", host_uname); } else if(host_uname != NULL && safe_str_eq(current_uname, host_uname)) { fprintf(stderr, "Error performing operation: " "%s is already active on %s\n", rsc_id, host_uname); } else if(current_uname != NULL && (do_force || host_uname == NULL)) { rc = migrate_resource(rsc_id, current_uname, host_uname, cib_conn); } else if(host_uname != NULL) { rc = migrate_resource( rsc_id, NULL, host_uname, cib_conn); } else { fprintf(stderr, "Resource %s not migrated: " "not-active and no prefered location" " specified.\n", rsc_id); } } else if(rsc_cmd == 'G') { if(rsc_id == NULL) { fprintf(stderr, "Must supply a resource id with -r\n"); return cib_NOTEXISTS; } rc = dump_resource_prop(rsc_id, prop_name, &data_set); } else if(rsc_cmd == 'S') { crm_data_t *msg_data = NULL; if(prop_value == NULL) { fprintf(stderr, "You need to supply a value with the -v option\n"); return CIBRES_MISSING_FIELD; } else if(cib_conn == NULL) { return cib_connection; } if(rsc_id == NULL) { fprintf(stderr, "Must supply a resource id with -r\n"); return cib_NOTEXISTS; } CRM_DEV_ASSERT(rsc_type != NULL); CRM_DEV_ASSERT(prop_name != NULL); CRM_DEV_ASSERT(prop_value != NULL); msg_data = create_xml_node(NULL, rsc_type); crm_xml_add(msg_data, XML_ATTR_ID, rsc_id); crm_xml_add(msg_data, prop_name, prop_value); rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_RESOURCES, msg_data, NULL, cib_options); free_xml(msg_data); } else if(rsc_cmd == 'g') { if(rsc_id == NULL) { fprintf(stderr, "Must supply a resource id with -r\n"); return cib_NOTEXISTS; } rc = dump_resource_attr(rsc_id, prop_name, &data_set); } else if(rsc_cmd == 'p') { if(rsc_id == NULL) { fprintf(stderr, "Must supply a resource id with -r\n"); return cib_NOTEXISTS; } if(prop_value == NULL) { fprintf(stderr, "You need to supply a value with the -v option\n"); return CIBRES_MISSING_FIELD; } rc = set_resource_attr(rsc_id, prop_set, prop_id, prop_name, prop_value, cib_conn, &data_set); } else if(rsc_cmd == 'd') { if(rsc_id == NULL) { fprintf(stderr, "Must supply a resource id with -r\n"); return cib_NOTEXISTS; } rc = delete_resource_attr(rsc_id, prop_id, prop_set, prop_name, cib_conn, &data_set); } else if(rsc_cmd == 'P') { HA_Message *cmd = NULL; cmd = create_request(CRM_OP_REPROBE, NULL, host_uname, CRM_SYSTEM_CRMD, crm_system_name, our_pid); send_ipc_message(crmd_channel, cmd); crm_msg_del(cmd); } else if(rsc_cmd == 'R') { refresh_lrm(crmd_channel, host_uname); } else if(rsc_cmd == 'D') { crm_data_t *msg_data = NULL; if(rsc_id == NULL) { fprintf(stderr, "Must supply a resource id with -r\n"); return cib_NOTEXISTS; } if(rsc_type == NULL) { fprintf(stderr, "You need to specify a resource type with -t"); return cib_NOTEXISTS; } else if(cib_conn == NULL) { return cib_connection; } msg_data = create_xml_node(NULL, rsc_type); crm_xml_add(msg_data, XML_ATTR_ID, rsc_id); rc = cib_conn->cmds->delete(cib_conn, XML_CIB_TAG_RESOURCES, msg_data, NULL, cib_options); free_xml(msg_data); } else { fprintf(stderr, "Unknown command: %c\n", rsc_cmd); } if(cib_conn != NULL) { cleanup_calculations(&data_set); cib_conn->cmds->signoff(cib_conn); } if(rc == cib_no_quorum) { fprintf(stderr, "Error performing operation: %s\n", cib_error2string(rc)); fprintf(stderr, "Try using -f\n"); } else if(rc != cib_ok) { fprintf(stderr, "Error performing operation: %s\n", cib_error2string(rc)); } return rc; } void usage(const char *cmd, int exit_status) { FILE *stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-?VS] -(L|Q|W|D|C|P|p) [options]\n", cmd); fprintf(stream, "\t--%s (-%c)\t: this help message\n", "help", '?'); fprintf(stream, "\t--%s (-%c)\t: " "turn on debug info. additional instances increase verbosity\n", "verbose", 'V'); fprintf(stream, "\t--%s (-%c)\t: Print only the value on stdout (for use with -W)\n", "quiet", 'Q'); fprintf(stream, "\nCommands\n"); fprintf(stream, "\t--%s (-%c)\t: List all resources\n", "list", 'L'); fprintf(stream, "\t--%s (-%c)\t: Query a resource\n" "\t\t\t Requires: -r\n", "query-xml", 'x'); fprintf(stream, "\t--%s (-%c)\t: Locate a resource\n" "\t\t\t Requires: -r\n", "locate", 'W'); fprintf(stream, "\t--%s (-%c)\t: Migrate a resource from it current" " location. Use -H to specify a destination\n" "\t\tIf -H is not specified, we will force the resource to move by" " creating a rule for the current location and a score of -INFINITY\n" "\t\tNOTE: This will prevent the resource from running on this" " node until the constraint is removed with -U\n" "\t\t\t Requires: -r, Optional: -H, -f, --lifetime\n", "migrate", 'M'); fprintf(stream, "\t--%s (-%c)\t: Remove all constraints created by -M\n" "\t\t\t Requires: -r\n", "un-migrate", 'U'); fprintf(stream, "\t--%s (-%c)\t: Delete a resource from the CIB\n" "\t\t\t Requires: -r, -t\n", "delete", 'D'); fprintf(stream, "\t--%s (-%c)\t: Delete a resource from the LRM\n" "\t\t\t Requires: -r. Optional: -H\n", "cleanup", 'C'); fprintf(stream, "\t--%s (-%c)\t: Recheck for resources started outside of the CRM\n" "\t\t\t Optional: -H\n", "reprobe", 'P'); fprintf(stream, "\t--%s (-%c)\t: Refresh the CIB from the LRM\n" "\t\t\t Optional: -H\n", "refresh", 'R'); fprintf(stream, "\t--%s (-%c) \t: " "Set the named parameter for a resource\n" "\t\t\t Requires: -r, -v. Optional: -i, -s, --meta\n", "set-parameter", 'p'); fprintf(stream, "\t--%s (-%c) \t: " "Get the named parameter for a resource\n" "\t\t\t Requires: -r. Optional: -i, -s, --meta\n", "get-parameter", 'g'); fprintf(stream, "\t--%s (-%c) : " "Delete the named parameter for a resource\n" "\t\t\t Requires: -r. Optional: -i, --meta\n", "delete-parameter", 'd'); fprintf(stream, "\nOptions\n"); fprintf(stream, "\t--%s (-%c) \t: Resource ID\n", "resource", 'r'); fprintf(stream, "\t--%s (-%c) \t: " "Resource type (primitive, clone, group, ...)\n", "resource-type", 't'); fprintf(stream, "\t--%s (-%c) \t: " "Property value\n", "property-value", 'v'); fprintf(stream, "\t--%s (-%c) \t: " "Host name\n", "host-uname", 'H'); fprintf(stream, "\t--%s\t: Modify a resource's configuration option rather than one which is passed to the resource agent script." "\n\t\tFor use with -p, -g, -d\n", "meta"); fprintf(stream, "\t--%s (-%c) \t: " "Lifespan of migration constraints\n", "lifetime", 'u'); fprintf(stream, "\t--%s (-%c)\t: " "Force the resource to move by creating a rule for the" " current location and a score of -INFINITY\n" "\t\tThis should be used if the resource's stickiness and" " constraint scores total more than INFINITY (Currently 100,000)\n" "\t\tNOTE: This will prevent the resource from running on this" " node until the constraint is removed with -U or the --lifetime duration expires\n", "force", 'f'); fprintf(stream, "\t-%c \t: (Advanced Use Only) ID of the instance_attributes object to change\n", 's'); fprintf(stream, "\t-%c \t: (Advanced Use Only) ID of the nvpair object to change/delete\n", 'i'); fflush(stream); exit(exit_status); } diff --git a/crm/admin/crm_uuid.c b/crm/admin/crm_uuid.c index 00303768c4..5f01a95e4e 100644 --- a/crm/admin/crm_uuid.c +++ b/crm/admin/crm_uuid.c @@ -1,181 +1,183 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #ifdef HAVE_STRING_H #include #endif #include #include #include #include #include #include #ifdef HAVE_GETOPT_H # include #endif #define UUID_LEN 16 #define UUID_FILE HA_VARLIBDIR"/"PACKAGE"/hb_uuid" #define OPTARGS "rw:" int read_local_hb_uuid(void); int write_local_hb_uuid(const char *buffer); static void usage(void) { fprintf(stderr, "crm_uuid [-r|-w new_ascii_value]\n"); exit(1); } int main(int argc, char **argv) { int flag; int rc = 0; cl_log_enable_stderr(TRUE); if(argc == 1) { /* no arguments specified, default to read */ rc = read_local_hb_uuid(); return rc; } + cl_log_args(argc, argv); + while (1) { flag = getopt(argc, argv, OPTARGS); if (flag == -1) { break; } switch(flag) { case 'r': rc = read_local_hb_uuid(); break; case 'w': rc = write_local_hb_uuid(optarg); break; default: usage(); break; } } return rc; } int read_local_hb_uuid(void) { int rc = 0; cl_uuid_t uuid; char *buffer = NULL; long start = 0, read_len = 0; FILE *input = fopen(UUID_FILE, "r"); if(input == NULL) { cl_perror("Could not open UUID file %s\n", UUID_FILE); return 1; } /* see how big the file is */ start = ftell(input); fseek(input, 0L, SEEK_END); if(UUID_LEN != ftell(input)) { fprintf(stderr, "%s must contain exactly %d bytes\n", UUID_FILE, UUID_LEN); abort(); } fseek(input, 0L, start); if(start != ftell(input)) { fprintf(stderr, "fseek not behaving: %ld vs. %ld\n", start, ftell(input)); rc = 2; goto bail; } /* fprintf(stderr, "Reading %d bytes from: %s\n", UUID_LEN, UUID_FILE); */ buffer = cl_malloc(50); read_len = fread(uuid.uuid, 1, UUID_LEN, input); if(read_len != UUID_LEN) { fprintf(stderr, "Expected and read bytes differ: %d vs. %ld\n", UUID_LEN, read_len); rc = 3; goto bail; } else if(buffer != NULL) { cl_uuid_unparse(&uuid, buffer); fprintf(stdout, "%s\n", buffer); } else { fprintf(stderr, "No buffer to unparse\n"); rc = 4; } bail: cl_free(buffer); fclose(input); return rc; } int write_local_hb_uuid(const char *new_value) { int fd; int rc = 0; cl_uuid_t uuid; char *buffer = strdup(new_value); rc = cl_uuid_parse(buffer, &uuid); if(rc != 0) { fprintf(stderr, "Invalid ASCII UUID supplied: [%s]\n", new_value); fprintf(stderr, "ASCII UUIDs must be of the form" " XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" " and contain only letters and digits\n"); return 5; } if ((fd = open(UUID_FILE, O_WRONLY|O_SYNC|O_CREAT, 0644)) < 0) { cl_perror("Could not open %s", UUID_FILE); return 6; } if (write(fd, uuid.uuid, UUID_LEN) != UUID_LEN) { cl_perror("Could not write UUID to %s", UUID_FILE); rc=7; } if (close(fd) < 0) { cl_perror("Could not close %s", UUID_FILE); rc=8; } return rc; } diff --git a/crm/admin/crmadmin.c b/crm/admin/crmadmin.c index 34d5634ee7..96d106613f 100644 --- a/crm/admin/crmadmin.c +++ b/crm/admin/crmadmin.c @@ -1,731 +1,732 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GETOPT_H # include #endif int message_timer_id = -1; int message_timeout_ms = 30*1000; GMainLoop *mainloop = NULL; IPC_Channel *crmd_channel = NULL; char *admin_uuid = NULL; void usage(const char *cmd, int exit_status); ll_cluster_t *do_init(void); int do_work(ll_cluster_t * hb_cluster); void crmd_ipc_connection_destroy(gpointer user_data); gboolean admin_msg_callback(IPC_Channel * source_data, void *private_data); char *pluralSection(const char *a_section); crm_data_t *handleCibMod(void); int do_find_node_list(crm_data_t *xml_node); gboolean admin_message_timeout(gpointer data); gboolean is_node_online(crm_data_t *node_state); enum debug { debug_none, debug_dec, debug_inc }; gboolean BE_VERBOSE = FALSE; int expected_responses = 1; gboolean BASH_EXPORT = FALSE; gboolean DO_HEALTH = FALSE; gboolean DO_RESET = FALSE; gboolean DO_RESOURCE = FALSE; gboolean DO_ELECT_DC = FALSE; gboolean DO_WHOIS_DC = FALSE; gboolean DO_NODE_LIST = FALSE; gboolean BE_SILENT = FALSE; gboolean DO_RESOURCE_LIST = FALSE; enum debug DO_DEBUG = debug_none; const char *crmd_operation = NULL; crm_data_t *msg_options = NULL; const char *standby_on_off = "on"; const char *admin_verbose = XML_BOOLEAN_FALSE; char *id = NULL; char *this_msg_reference = NULL; char *disconnect = NULL; char *dest_node = NULL; char *rsc_name = NULL; char *crm_option = NULL; int operation_status = 0; const char *sys_to = NULL; const char *crm_system_name = NULL; #define OPTARGS "V?K:S:HE:Dd:i:RNqt:Bv" int main(int argc, char **argv) { int argerr = 0; int flag; ll_cluster_t *hb_cluster = NULL; #ifdef HAVE_GETOPT_H int option_index = 0; static struct option long_options[] = { /* Top-level Options */ {"verbose", 0, 0, 'V'}, {"help", 0, 0, '?'}, {"quiet", 0, 0, 'q'}, {"reference", 1, 0, 0}, {XML_ATTR_TIMEOUT, 1, 0, 't'}, {"bash-export", 0, 0, 'B'}, /* daemon options */ {"kill", 1, 0, 'K'}, /* stop a node */ {"die", 0, 0, 0}, /* kill a node, no respawn */ {"debug_inc", 1, 0, 'i'}, {"debug_dec", 1, 0, 'd'}, {"status", 1, 0, 'S'}, {"standby", 1, 0, 's'}, {"active", 1, 0, 'a'}, {"health", 0, 0, 'H'}, {"election", 0, 0, 'E'}, {"dc_lookup", 0, 0, 'D'}, {"nodes", 0, 0, 'N'}, {"option", 1, 0, 'o'}, {"version", 0, 0, 'v'}, {0, 0, 0, 0} }; #endif crm_system_name = basename(argv[0]); crm_log_init(crm_system_name); if(argc < 2) { usage(crm_system_name, LSB_EXIT_EINVAL); } while (1) { #ifdef HAVE_GETOPT_H flag = getopt_long(argc, argv, OPTARGS, long_options, &option_index); #else flag = getopt(argc, argv, OPTARGS); #endif if (flag == -1) break; switch(flag) { #ifdef HAVE_GETOPT_H case 0: printf("option %s", long_options[option_index].name); if (optarg) printf(" with arg %s", optarg); printf("\n"); if (strcasecmp("reference", long_options[option_index].name) == 0) { this_msg_reference = crm_strdup(optarg); } else if (strcasecmp("die", long_options[option_index].name) == 0) { DO_RESET = TRUE; crmd_operation = CRM_OP_DIE; } else { printf( "?? Long option (--%s) is not yet properly supported ??\n", long_options[option_index].name); ++argerr; } break; #endif /* a sample test for multiple instance if (digit_optind != 0 && digit_optind != this_option_optind) printf ("digits occur in two different argv-elements.\n"); digit_optind = this_option_optind; printf ("option %c\n", c); */ case 'v': fprintf(stdout, "HA Version %s, CRM Version %s (CIB feature set %s)\n", VERSION, CRM_FEATURE_SET, CIB_FEATURE_SET); exit(0); break; case 'V': BE_VERBOSE = TRUE; admin_verbose = XML_BOOLEAN_TRUE; cl_log_enable_stderr(TRUE); alter_debug(DEBUG_INC); break; case 't': message_timeout_ms = atoi(optarg); if(message_timeout_ms < 1) { message_timeout_ms = 30*1000; } break; case '?': usage(crm_system_name, LSB_EXIT_OK); break; case 'D': DO_WHOIS_DC = TRUE; break; case 'B': BASH_EXPORT = TRUE; break; case 'K': DO_RESET = TRUE; crm_debug_2("Option %c => %s", flag, optarg); dest_node = crm_strdup(optarg); crmd_operation = CRM_OP_LOCAL_SHUTDOWN; break; case 'q': BE_SILENT = TRUE; break; case 'i': DO_DEBUG = debug_inc; crm_debug_2("Option %c => %s", flag, optarg); dest_node = crm_strdup(optarg); break; case 'd': DO_DEBUG = debug_dec; crm_debug_2("Option %c => %s", flag, optarg); dest_node = crm_strdup(optarg); break; case 'S': DO_HEALTH = TRUE; crm_debug_2("Option %c => %s", flag, optarg); dest_node = crm_strdup(optarg); break; case 'E': DO_ELECT_DC = TRUE; break; case 'N': DO_NODE_LIST = TRUE; break; case 'H': DO_HEALTH = TRUE; break; default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (optind > argc) { ++argerr; } if (argerr) { usage(crm_system_name, LSB_EXIT_GENERIC); } hb_cluster = do_init(); if (hb_cluster != NULL) { - int res = do_work(hb_cluster); - if(res == 0) { - } else if (res > 0) { + int res = 0; + cl_log_args(argc, argv); + res = do_work(hb_cluster); + if (res > 0) { /* wait for the reply by creating a mainloop and running it until * the callbacks are invoked... */ mainloop = g_main_new(FALSE); expected_responses++; crm_debug_2("Waiting for %d replies from the local CRM", expected_responses); message_timer_id = Gmain_timeout_add( message_timeout_ms, admin_message_timeout, NULL); g_main_run(mainloop); return_to_orig_privs(); - } else { + } else if(res < 0) { crm_err("No message to send"); operation_status = -1; } } else { crm_warn("Init failed, could not perform requested operations"); operation_status = -2; } crm_debug_2("%s exiting normally", crm_system_name); return operation_status; } int do_work(ll_cluster_t * hb_cluster) { int ret = 1; /* construct the request */ crm_data_t *msg_data = NULL; gboolean all_is_good = TRUE; msg_options = create_xml_node(NULL, XML_TAG_OPTIONS); crm_xml_add(msg_options, XML_ATTR_VERBOSE, admin_verbose); crm_xml_add(msg_options, XML_ATTR_TIMEOUT, "0"); if (DO_HEALTH == TRUE) { crm_debug_2("Querying the system"); sys_to = CRM_SYSTEM_DC; if (dest_node != NULL) { sys_to = CRM_SYSTEM_CRMD; crmd_operation = CRM_OP_PING; if (BE_VERBOSE) { expected_responses = 1; } crm_xml_add(msg_options, XML_ATTR_TIMEOUT, "0"); } else { crm_info("Cluster-wide health not available yet"); all_is_good = FALSE; } } else if(DO_ELECT_DC) { /* tell the local node to initiate an election */ sys_to = CRM_SYSTEM_CRMD; crmd_operation = CRM_OP_VOTE; crm_xml_add(msg_options, XML_ATTR_TIMEOUT, "0"); dest_node = NULL; ret = 0; /* no return message */ } else if(DO_WHOIS_DC) { sys_to = CRM_SYSTEM_DC; crmd_operation = CRM_OP_PING; crm_xml_add(msg_options, XML_ATTR_TIMEOUT, "0"); dest_node = NULL; } else if(DO_NODE_LIST) { cib_t * the_cib = cib_new(); crm_data_t *output = NULL; enum cib_errors rc = the_cib->cmds->signon( the_cib, crm_system_name, cib_command_synchronous); if(rc != cib_ok) { return -1; } output = get_cib_copy(the_cib); do_find_node_list(output); free_xml(output); the_cib->cmds->signoff(the_cib); exit(rc); } else if(DO_RESET) { /* tell dest_node to initiate the shutdown proceedure * * if dest_node is NULL, the request will be sent to the * local node */ sys_to = CRM_SYSTEM_CRMD; crm_xml_add(msg_options, XML_ATTR_TIMEOUT, "0"); ret = 0; /* no return message */ } else if(DO_DEBUG == debug_inc) { /* tell dest_node to increase its debug level * * if dest_node is NULL, the request will be sent to the * local node */ sys_to = CRM_SYSTEM_CRMD; crmd_operation = CRM_OP_DEBUG_UP; ret = 0; /* no return message */ } else if(DO_DEBUG == debug_dec) { /* tell dest_node to increase its debug level * * if dest_node is NULL, the request will be sent to the * local node */ sys_to = CRM_SYSTEM_CRMD; crmd_operation = CRM_OP_DEBUG_DOWN; ret = 0; /* no return message */ } else { crm_err("Unknown options"); all_is_good = FALSE; } if(all_is_good == FALSE) { crm_err("Creation of request failed. No message to send"); return -1; } /* send it */ if (crmd_channel == NULL) { crm_err("The IPC connection is not valid, cannot send anything"); return -1; } if(sys_to == NULL) { if (dest_node != NULL) { sys_to = CRM_SYSTEM_CRMD; } else { sys_to = CRM_SYSTEM_DC; } } { HA_Message *cmd = create_request( crmd_operation, msg_data, dest_node, sys_to, crm_system_name, admin_uuid); if(this_msg_reference != NULL) { ha_msg_mod(cmd, XML_ATTR_REFERENCE, this_msg_reference); } send_ipc_message(crmd_channel, cmd); crm_msg_del(cmd); } return ret; } void crmd_ipc_connection_destroy(gpointer user_data) { crm_info("Connection to CRMd was terminated"); exit(1); } ll_cluster_t * do_init(void) { int facility; GCHSource *src = NULL; ll_cluster_t *hb_cluster = NULL; /* change the logging facility to the one used by heartbeat daemon */ hb_cluster = ll_cluster_new("heartbeat"); crm_debug_2("Switching to Heartbeat logger"); if (( facility = hb_cluster->llc_ops->get_logfacility(hb_cluster)) > 0) { cl_log_set_facility(facility); } crm_malloc0(admin_uuid, 11); if(admin_uuid != NULL) { snprintf(admin_uuid, 10, "%d", getpid()); admin_uuid[10] = '\0'; } src = init_client_ipc_comms( CRM_SYSTEM_CRMD, admin_msg_callback, NULL, &crmd_channel); if(DO_RESOURCE || DO_RESOURCE_LIST || DO_NODE_LIST) { return hb_cluster; } else if(crmd_channel != NULL) { send_hello_message( crmd_channel, admin_uuid, crm_system_name,"0", "1"); set_IPC_Channel_dnotify(src, crmd_ipc_connection_destroy); return hb_cluster; } return NULL; } gboolean admin_msg_callback(IPC_Channel * server, void *private_data) { int lpc = 0; IPC_Message *msg = NULL; ha_msg_input_t *new_input = NULL; gboolean hack_return_good = TRUE; static int received_responses = 0; char *filename = NULL; int filename_len = 0; const char *result = NULL; Gmain_timeout_remove(message_timer_id); while (server->ch_status != IPC_DISCONNECT && server->ops->is_message_pending(server) == TRUE) { if(new_input != NULL) { delete_ha_msg_input(new_input); new_input = NULL; } if (server->ops->recv(server, &msg) != IPC_OK) { perror("Receive failure:"); return !hack_return_good; } if (msg == NULL) { crm_debug_4("No message this time"); continue; } lpc++; received_responses++; new_input = new_ipc_msg_input(msg); crm_log_message(LOG_MSG, new_input->msg); msg->msg_done(msg); if (new_input->xml == NULL) { crm_info("XML in IPC message was not valid... " "discarding."); goto cleanup; } else if (validate_crm_message( new_input->msg, crm_system_name, admin_uuid, XML_ATTR_RESPONSE) == FALSE) { crm_debug_2("Message was not a CRM response. Discarding."); goto cleanup; } result = cl_get_string(new_input->msg, XML_ATTR_RESULT); if(result == NULL || strcasecmp(result, "ok") == 0) { result = "pass"; } else { result = "fail"; } if(DO_HEALTH) { const char *state = crm_element_value( new_input->xml, "crmd_state"); printf("Status of %s@%s: %s (%s)\n", crm_element_value(new_input->xml,XML_PING_ATTR_SYSFROM), cl_get_string(new_input->msg, F_CRM_HOST_FROM), state, crm_element_value(new_input->xml,XML_PING_ATTR_STATUS)); if(BE_SILENT && state != NULL) { fprintf(stderr, "%s\n", state); } } else if(DO_WHOIS_DC) { const char *dc = cl_get_string( new_input->msg, F_CRM_HOST_FROM); printf("Designated Controller is: %s\n", dc); if(BE_SILENT && dc != NULL) { fprintf(stderr, "%s\n", dc); } } if (this_msg_reference != NULL) { /* in testing mode... */ /* 31 = "test-_.xml" + an_int_as_string + '\0' */ filename_len = 31 + strlen(this_msg_reference); crm_malloc0(filename, filename_len); if(filename != NULL) { sprintf(filename, "%s-%s_%d.xml", result, this_msg_reference, received_responses); filename[filename_len - 1] = '\0'; if (0 > write_xml_file( new_input->xml, filename, FALSE)) { crm_crit("Could not save response to" " %s", filename); } } } cleanup: delete_ha_msg_input(new_input); new_input = NULL; } if (server->ch_status == IPC_DISCONNECT) { crm_debug_2("admin_msg_callback: received HUP"); return !hack_return_good; } if (received_responses >= expected_responses) { crm_debug_2( "Recieved expected number (%d) of messages from Heartbeat." " Exiting normally.", expected_responses); g_main_quit(mainloop); return !hack_return_good; } message_timer_id = Gmain_timeout_add( message_timeout_ms, admin_message_timeout, NULL); return hack_return_good; } gboolean admin_message_timeout(gpointer data) { fprintf(stderr, "No messages received in %d seconds.. aborting\n", (int)message_timeout_ms/1000); crm_err("No messages received in %d seconds", (int)message_timeout_ms/1000); g_main_quit(mainloop); return FALSE; } gboolean is_node_online(crm_data_t *node_state) { const char *uname = crm_element_value(node_state,XML_ATTR_UNAME); const char *join_state = crm_element_value(node_state,XML_CIB_ATTR_JOINSTATE); const char *exp_state = crm_element_value(node_state,XML_CIB_ATTR_EXPSTATE); const char *crm_state = crm_element_value(node_state,XML_CIB_ATTR_CRMDSTATE); const char *ha_state = crm_element_value(node_state,XML_CIB_ATTR_HASTATE); const char *ccm_state = crm_element_value(node_state,XML_CIB_ATTR_INCCM); if(safe_str_neq(join_state, CRMD_JOINSTATE_DOWN) && (ha_state == NULL || safe_str_eq(ha_state, ACTIVESTATUS)) && crm_is_true(ccm_state) && safe_str_eq(crm_state, ONLINESTATUS)) { crm_debug_3("Node %s is online", uname); return TRUE; } crm_debug_3("Node %s: ha=%s ccm=%s join=%s exp=%s crm=%s", uname, crm_str(ha_state), crm_str(ccm_state), crm_str(join_state), crm_str(exp_state), crm_str(crm_state)); crm_debug_3("Node %s is offline", uname); return FALSE; } int do_find_node_list(crm_data_t *xml_node) { int found = 0; crm_data_t *nodes = get_object_root(XML_CIB_TAG_NODES, xml_node); xml_child_iter_filter( nodes, node, XML_CIB_TAG_NODE, if(BASH_EXPORT) { printf("export %s=%s\n", crm_element_value(node, XML_ATTR_UNAME), crm_element_value(node, XML_ATTR_ID)); } else { printf("%s node: %s (%s)\n", crm_element_value(node, XML_ATTR_TYPE), crm_element_value(node, XML_ATTR_UNAME), crm_element_value(node, XML_ATTR_ID)); } found++; ); if(found == 0) { printf("NO nodes configured\n"); } return found; } void usage(const char *cmd, int exit_status) { FILE *stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-?Vs] [command] [command args]\n", cmd); fprintf(stream, "Options\n"); fprintf(stream, "\t--%s (-%c)\t: this help message\n", "help", '?'); fprintf(stream, "\t--%s (-%c)\t: version details\n", "version", 'v'); fprintf(stream, "\t--%s (-%c)\t: " "turn on debug info. additional instances increase verbosity\n", "verbose", 'V'); fprintf(stream, "\t--%s (-%c)\t: be very *very* quiet\n", "quiet", 'q'); fprintf(stream, "\t--%s (-%c)\t: Only applies to -N.\n" "\t\tCreate Bash export entries of the form \"export uname=uuid\"\n", "bash-export", 'B'); fprintf(stream, "\nCommands\n"); fprintf(stream, "\t--%s (-%c) \t: " "increment the CRMd debug level on \n", CRM_OP_DEBUG_UP,'i'); fprintf(stream, "\t--%s (-%c) \t: " "decrement the CRMd debug level on \n", CRM_OP_DEBUG_DOWN,'d'); fprintf(stream, "\t--%s (-%c) \t: " "shutdown the CRMd on \n", "kill", 'K'); fprintf(stream, "\t--%s (-%c) \t: " "request the status of \n", "status", 'S'); #if 0 fprintf(stream, "\t--%s (-%c)\t\t: " "request the status of all nodes\n", "health", 'H'); #endif fprintf(stream, "\t--%s (-%c) \t: " "initiate an election from \n", "election", 'E'); fprintf(stream, "\t--%s (-%c)\t: " "request the uname of the DC\n", "dc_lookup", 'D'); fprintf(stream, "\t--%s (-%c)\t\t: " "request the uname of all member nodes\n", "nodes", 'N'); /* fprintf(stream, "\t--%s (-%c)\t\n", "disconnect", 'D'); */ fflush(stream); exit(exit_status); } diff --git a/cts/CTStests.py.in b/cts/CTStests.py.in index 3714e6e379..bf0fa11f6e 100644 --- a/cts/CTStests.py.in +++ b/cts/CTStests.py.in @@ -1,2454 +1,2454 @@ #!@PYTHON@ '''CTS: Cluster Testing System: Tests module There are a few things we want to do here: ''' __copyright__=''' Copyright (C) 2000, 2001 Alan Robertson Licensed under the GNU GPL. Add RecourceRecover testcase Zhao Kai ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # SPECIAL NOTE: # # Tests may NOT implement any cluster-manager-specific code in them. # EXTEND the ClusterManager object to provide the base capabilities # the test needs if you need to do something that the current CM classes # do not. Otherwise you screw up the whole point of the object structure # in CTS. # # Thank you. # import CTS from CM_hb import HBConfig import CTSaudits import time, os, re, types, string, tempfile, sys from CTSaudits import * from stat import * # List of all class objects for tests which we ought to # consider running. class RandomTests: ''' A collection of tests which are run at random. ''' def __init__(self, scenario, cm, tests, Audits): self.CM = cm self.Env = cm.Env self.Scenario = scenario self.Tests = [] self.Audits = [] self.ns=CTS.NodeStatus(self.Env) for test in tests: if not issubclass(test.__class__, CTSTest): raise ValueError("Init value must be a subclass of CTSTest") if test.is_applicable(): self.Tests.append(test) if not scenario.IsApplicable(): raise ValueError("Scenario not applicable in" " given Environment") self.Stats = {"success":0, "failure":0, "BadNews":0} self.IndividualStats= {} for audit in Audits: if not issubclass(audit.__class__, ClusterAudit): raise ValueError("Init value must be a subclass of ClusterAudit") if audit.is_applicable(): self.Audits.append(audit) def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 def audit(self, BadNews, test): errcount=0 BadNewsDebug=0 #BadNews.debug=1 while errcount < 1000: if BadNewsDebug: print "Looking for BadNews" match=BadNews.look(0) if match: if BadNewsDebug: print "BadNews found: "+match add_err = 1 ignorelist = [] if test: ignorelist=test.errorstoignore() ignorelist.append(" CTS: ") ignorelist.append("BadNews:") for ignore in ignorelist: if re.search(ignore, match): if BadNewsDebug: print "Ignoring:"+match+" (pattern: "+ignore+")" add_err = 0 if add_err == 1: ignorelist=self.CM.errorstoignore() for ignore in ignorelist: if re.search(ignore, match): if BadNewsDebug: print "Ignoring:"+match+" (pattern: "+ignore+")" add_err = 0 if add_err == 1: self.CM.log("BadNews: " + match) self.incr("BadNews") errcount=errcount+1 else: break else: self.CM.log("Big problems. Shutting down.") self.CM.stopall() self.summarize() raise ValueError("Looks like we hit the jackpot! :-)") for audit in self.Audits: if not audit(): self.CM.log("Audit " + audit.name() + " FAILED.") self.incr("auditfail") if test: test.incr("auditfail") def summarize(self): self.CM.log("****************") self.CM.log("Overall Results:" + repr(self.Stats)) self.CM.log("****************") self.CM.log("Detailed Results") for test in self.Tests: self.CM.log("Test %s: \t%s" %(test.name, repr(test.Stats))) self.CM.log("<<<<<<<<<<<<<<<< TESTS COMPLETED") def run(self, max=1): ( ''' Set up the given scenario, then run the selected tests at random for the selected number of iterations. ''') BadNews=CTS.LogWatcher(self.CM["LogFileName"], self.CM["BadRegexes"] , timeout=0) BadNews.setwatch() self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"]) for node in self.CM.Env["nodes"]: if node in self.CM.Env["oprofile"]: self.CM.log("Enabling oprofile on %s" % node) self.CM.rsh.remote_py(node, "os", "system", "opcontrol --init") self.CM.rsh.remote_py(node, "os", "system", "opcontrol --start") if not self.Scenario.SetUp(self.CM): return None for node in self.CM.Env["nodes"]: if node in self.CM.Env["oprofile"]: self.CM.rsh.remote_py( node, "os", "system", "opcontrol --save=cts.setup") testcount=1 time.sleep(30) # This makes sure everything is stabilized before starting... self.audit(BadNews, None) while testcount <= max: test = self.Env.RandomGen.choice(self.Tests) # Some tests want a node as an argument. nodechoice = self.Env.RandomNode() #logsize = os.stat(self.CM["LogFileName"])[ST_SIZE] #self.CM.log("Running test %s (%s) \t[%d : %d]" # % (test.name, nodechoice, testcount, logsize)) self.CM.log("Running test %s (%s) \t[%d]" % (test.name, nodechoice, testcount)) testcount = testcount + 1 starttime=time.time() test.starttime=starttime ret=test(nodechoice) for node in self.CM.Env["nodes"]: if node in self.CM.Env["oprofile"]: self.CM.rsh.remote_py( node, "os", "system", "opcontrol --save=cts.%d" % (testcount-1)) if ret: self.incr("success") else: self.incr("failure") self.CM.log("Test %s (%s) \t[FAILED]" %(test.name,nodechoice)) # Better get the current info from the cluster... self.CM.statall() # Make sure logging is working and we have enough disk space... if not self.CM.Env["DoBSC"]: if not self.CM.TestLogging(): sys.exit(1) if not self.CM.CheckDf(): sys.exit(1) stoptime=time.time() elapsed_time = stoptime - starttime test_time = stoptime - test.starttime if not test.has_key("min_time"): test["elapsed_time"] = elapsed_time test["min_time"] = test_time test["max_time"] = test_time else: test["elapsed_time"] = test["elapsed_time"] + elapsed_time if test_time < test["min_time"]: test["min_time"] = test_time if test_time > test["max_time"]: test["max_time"] = test_time self.audit(BadNews, test) self.Scenario.TearDown(self.CM) for node in self.CM.Env["nodes"]: if node in self.CM.Env["oprofile"]: self.CM.log("Disabling oprofile on %s" % node) self.CM.rsh.remote_py(node, "os", "system", "opcontrol --shutdown") self.audit(BadNews, None) for test in self.Tests: self.IndividualStats[test.name] = test.Stats return self.Stats, self.IndividualStats AllTestClasses = [ ] class CTSTest: ''' A Cluster test. We implement the basic set of properties and behaviors for a generic cluster test. Cluster tests track their own statistics. We keep each of the kinds of counts we track as separate {name,value} pairs. ''' def __init__(self, cm): #self.name="the unnamed test" self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} # if not issubclass(cm.__class__, ClusterManager): # raise ValueError("Must be a ClusterManager object") self.CM = cm self.timeout=120 self.starttime=0 def has_key(self, key): return self.Stats.has_key(key) def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 def failure(self, reason="none"): '''Increment the failure count''' self.incr("failure") self.CM.log("Test " + self.name + " failed [reason:" + reason + "]") return None def success(self): '''Increment the success count''' self.incr("success") return 1 def skipped(self): '''Increment the skipped count''' self.incr("skipped") return 1 def __call__(self, node): '''Perform the given test''' raise ValueError("Abstract Class member (__call__)") self.incr("calls") return self.failure() def is_applicable(self): '''Return TRUE if we are applicable in the current test configuration''' raise ValueError("Abstract Class member (is_applicable)") return 1 def canrunnow(self): '''Return TRUE if we can meaningfully run right now''' return 1 def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [] ################################################################### class StopTest(CTSTest): ################################################################### '''Stop (deactivate) the cluster manager on a node''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name="Stop" self.uspat = self.CM["Pat:We_stopped"] self.thempat = self.CM["Pat:They_stopped"] def __call__(self, node): '''Perform the 'stop' test. ''' self.incr("calls") if self.CM.ShouldBeStatus[node] != self.CM["up"]: return self.skipped() patterns = [] # Technically we should always be able to notice ourselves stopping patterns.append(self.CM["Pat:We_stopped"] % node) if self.CM.Env["use_logd"]: patterns.append(self.CM["Pat:Logd_stopped"] % node) # Any active node needs to notice this one left # NOTE: This wont work if we have multiple partitions for other in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[other] == self.CM["up"] and other != node: patterns.append(self.CM["Pat:They_stopped"] %(other, node)) #self.debug("Checking %s will notice %s left"%(other, node)) watch = CTS.LogWatcher( self.CM["LogFileName"], patterns, self.CM["DeadTime"]) watch.setwatch() if node == self.CM.OurNode: self.incr("us") else: if self.CM.upcount() <= 1: self.incr("all") else: self.incr("them") self.CM.StopaCM(node) watch_result = watch.lookforall() UnmatchedList = "||" if watch.unmatched: for regex in watch.unmatched: self.CM.log ("Warn: Shutdown pattern not found: %s" % (regex)) UnmatchedList += regex + "||"; self.CM.cluster_stable(self.CM["DeadTime"]) # because syslog looses so many messages we can only really fail # the stop if _none_ of the CCM peers notice the node leave if not watch.unmatched or self.CM.upcount() == 0: return self.success() elif len(watch.unmatched) >= self.CM.upcount(): return self.failure("no match against (%s)" % UnmatchedList) return self.success() # # We don't register StopTest because it's better when called by # another test... # ################################################################### class StartTest(CTSTest): ################################################################### '''Start (activate) the cluster manager on a node''' def __init__(self, cm, debug=None): CTSTest.__init__(self,cm) self.name="start" self.debug = debug self.uspat = self.CM["Pat:We_started"] self.thempat = self.CM["Pat:They_started"] def __call__(self, node): '''Perform the 'start' test. ''' self.incr("calls") if self.CM.upcount() == 0: self.incr("us") else: self.incr("them") if self.CM.ShouldBeStatus[node] != self.CM["down"]: return self.skipped() elif self.CM.StartaCM(node): return self.success() else: return self.failure("Startup %s on node %s failed" %(self.CM["Name"], node)) def is_applicable(self): '''StartTest is always applicable''' return 1 # # We don't register StartTest because it's better when called by # another test... # ################################################################### class FlipTest(CTSTest): ################################################################### '''If it's running, stop it. If it's stopped start it. Overthrow the status quo... ''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Flip" self.start = StartTest(cm) self.stop = StopTest(cm) def __call__(self, node): '''Perform the 'Flip' test. ''' self.incr("calls") if self.CM.ShouldBeStatus[node] == self.CM["up"]: self.incr("stopped") ret = self.stop(node) type="up->down" # Give the cluster time to recognize it's gone... time.sleep(self.CM["StableTime"]) elif self.CM.ShouldBeStatus[node] == self.CM["down"]: self.incr("started") ret = self.start(node) type="down->up" else: return self.skipped() self.incr(type) if ret: return self.success() else: return self.failure("%s failure" % type) def is_applicable(self): '''FlipTest is always applicable''' return 1 # Register FlipTest as a good test to run AllTestClasses.append(FlipTest) ################################################################### class RestartTest(CTSTest): ################################################################### '''Stop and restart a node''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Restart" self.start = StartTest(cm) self.stop = StopTest(cm) def __call__(self, node): '''Perform the 'restart' test. ''' self.incr("calls") self.incr("node:" + node) ret1 = 1 if self.CM.StataCM(node): self.incr("WasStopped") if not self.start(node): return self.failure("start (setup) failure: "+node) self.starttime=time.time() if not self.stop(node): return self.failure("stop failure: "+node) if not self.start(node): return self.failure("start failure: "+node) return self.success() def is_applicable(self): '''RestartTest is always applicable''' return 1 # Register RestartTest as a good test to run AllTestClasses.append(RestartTest) ################################################################### class StonithTest(CTSTest): ################################################################### '''Reboot a node by whacking it with stonith.''' def __init__(self, cm, timeout=900): CTSTest.__init__(self,cm) self.name="Stonith" self.theystopped = self.CM["Pat:They_dead"] self.allstopped = self.CM["Pat:All_stopped"] self.usstart = self.CM["Pat:We_started"] self.themstart = self.CM["Pat:They_started"] self.timeout = timeout self.ssherror = False def _reset(self, node): StonithWorked=False for tries in 1,2,3,4,5: if self.CM.Env.ResetNode(node): StonithWorked=True break return StonithWorked def setup(self, target_node): # nothing to do return 1 def __call__(self, node): '''Perform the 'stonith' test. (whack the node)''' self.incr("calls") stopwatch = 0 rc = 0 if not self.setup(node): return self.failure("Setup failed") # Figure out what log message to look for when/if it goes down # # Any active node needs to notice this one left # NOTE: This wont work if we have multiple partitions stop_patterns = [] for other in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[other] == self.CM["up"] and other != node: stop_patterns.append(self.CM["Pat:They_stopped"] %(other, node)) stopwatch = 1 #self.debug("Checking %s will notice %s left"%(other, node)) if self.CM.ShouldBeStatus[node] == self.CM["down"]: # actually no-one will notice this node die since HA isnt running stopwatch = 0 # Figure out what log message to look for when it comes up if self.CM.upcount() == 1 and self.CM.ShouldBeStatus[node] == self.CM["up"]: uppat = (self.usstart % node) else: uppat = (self.themstart % node) upwatch = CTS.LogWatcher(self.CM["LogFileName"], [uppat] , timeout=self.timeout) if stopwatch == 1: watch = CTS.LogWatcher(self.CM["LogFileName"], stop_patterns , timeout=self.CM["DeadTime"]+10) watch.setwatch() # Reset (stonith) the node self.CM.debug("Resetting: "+node) StonithWorked = self._reset(node) if not StonithWorked: return self.failure("Stonith didn't work") if self.ssherror == True: self.CM.log("NOTE: Stonith command reported success but node %s did not restart (atd, reboot or ssh error)" % node) return self.success() upwatch.setwatch() # Look() and see if the machine went down if stopwatch == 0: # Allow time for the node to die time.sleep(self.CM["DeadTime"]+10) elif not watch.lookforall(): if watch.unmatched: for regex in watch.unmatched: self.CM.log("Warn: STONITH pattern not found: %s"%regex) # !!no-one!! saw this node die if len(watch.unmatched) == len(stop_patterns): return self.failure("No-one saw %s die" %node) # else: syslog* lost a message # Alas I dont think this check is plausable (beekhof) # # Check it really stopped... #self.CM.ShouldBeStatus[node] = self.CM["down"] #if self.CM.StataCM(node) == 1: # ret1=0 # Look() and see if the machine came back up rc=0 if upwatch.look(): self.CM.debug("Startup pattern found: %s" %uppat) rc=1 else: self.CM.log("Warn: Startup pattern not found: %s" %uppat) # Check it really started... self.CM.ShouldBeStatus[node] = self.CM["up"] if rc == 0 and self.CM.StataCM(node) == 1: rc=1 # wait for the cluster to stabilize self.CM.cluster_stable() if node in self.CM.Env["oprofile"]: self.CM.log("Enabling oprofile on %s" % node) self.CM.rsh.remote_py(node, "os", "system", "opcontrol --init") self.CM.rsh.remote_py(node, "os", "system", "opcontrol --start") # return case processing if rc == 0: return self.failure("Node %s did not restart" %node) else: return self.success() def is_applicable(self): '''StonithTest is applicable unless suppressed by CM.Env["DoStonith"] == FALSE''' # for v2, stonithd test is a better test to run. if self.CM["Name"] == "linux-ha-v2": return None if self.CM.Env.has_key("DoStonith"): return self.CM.Env["DoStonith"] return 1 # Register StonithTest as a good test to run AllTestClasses.append(StonithTest) ################################################################### class StonithdTest(StonithTest): ################################################################### def __init__(self, cm, timeout=600): StonithTest.__init__(self, cm, timeout=600) self.name="Stonithd" self.startall = SimulStartLite(cm) self.start = StartTest(cm) self.stop = StopTest(cm) self.init_node = None def _reset(self, target_node): if len(self.CM.Env["nodes"]) < 2: return self.skipped() StonithWorked = False SshNotWork = 0 for tries in range(1,5): # For some unknown reason, every now and then the ssh plugin just # can't kill the target_node - everything works fine with stonithd # and the plugin, but atd, reboot or ssh (or maybe something else) # doesn't do its job and target_node remains alive. So look for # the indicative messages and bubble-up the error via ssherror watchpats = [] watchpats.append("Initiating ssh-reset") watchpats.append("CRIT: still able to ping") watch = CTS.LogWatcher(self.CM["LogFileName"], watchpats , timeout=self.CM["DeadTime"]+30) watch.setwatch() fail_reasons = [] if self.CM.Env.ResetNode2(self.init_node, target_node, fail_reasons): StonithWorked = True break if watch.lookforall(): SshNotWork = SshNotWork + 1 continue for reason in fail_reasons: self.CM.log(reason) if StonithWorked == False and SshNotWork == tries: StonithWorked = True self.ssherror = True return StonithWorked def setup(self, target_node): if len(self.CM.Env["nodes"]) < 2: return 1 self.init_node = self.CM.Env.RandomNode() while self.init_node == target_node: self.init_node = self.CM.Env.RandomNode() if not self.startall(None): return self.failure("Test setup failed") return 1 def is_applicable(self): if not self.CM["Name"] == "linux-ha-v2": return 0 if self.CM.Env.has_key("DoStonith"): return self.CM.Env["DoStonith"] return 1 AllTestClasses.append(StonithdTest) ################################################################### class IPaddrtest(CTSTest): ################################################################### '''Find the machine supporting a particular IP address, and knock it down. [Hint: This code isn't finished yet...] ''' def __init__(self, cm, IPaddrs): CTSTest.__init__(self,cm) self.name="IPaddrtest" self.IPaddrs = IPaddrs self.start = StartTest(cm) self.stop = StopTest(cm) def __call__(self, IPaddr): ''' Perform the IPaddr test... ''' self.incr("calls") node = self.CM.Env.RandomNode() self.incr("node:" + node) if self.CM.ShouldBeStatus[node] == self.CM["down"]: self.incr("WasStopped") self.start(node) ret1 = self.stop(node) # Give the cluster time to recognize we're gone... time.sleep(self.CM["StableTime"]) ret2 = self.start(node) if not ret1: return self.failure("Could not stop") if not ret2: return self.failure("Could not start") return self.success() def is_applicable(self): '''IPaddrtest is always applicable (but shouldn't be)''' return 1 ################################################################### class StartOnebyOne(CTSTest): ################################################################### '''Start all the nodes ~ one by one''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="StartOnebyOne" self.stopall = SimulStopLite(cm) self.start = StartTest(cm) self.ns=CTS.NodeStatus(cm.Env) def __call__(self, dummy): '''Perform the 'StartOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Test setup failed") failed=[] self.starttime=time.time() for node in self.CM.Env["nodes"]: if not self.start(node): failed.append(node) if len(failed) > 0: return self.failure("Some node failed to start: " + repr(failed)) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [] def is_applicable(self): '''StartOnebyOne is always applicable''' return 1 # Register StartOnebyOne as a good test to run AllTestClasses.append(StartOnebyOne) ################################################################### class SimulStart(CTSTest): ################################################################### '''Start all the nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStart" self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) def __call__(self, dummy): '''Perform the 'SimulStart' test. ''' self.incr("calls") # We ignore the "node" parameter... # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Setup failed") self.CM.clear_all_caches() if not self.startall(None): return self.failure("Startall failed") return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [] def is_applicable(self): '''SimulStart is always applicable''' return 1 # Register SimulStart as a good test to run AllTestClasses.append(SimulStart) class SimulStop(CTSTest): ################################################################### '''Stop all the nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStop" self.startall = SimulStartLite(cm) self.stopall = SimulStopLite(cm) def __call__(self, dummy): '''Perform the 'SimulStop' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") if not self.stopall(None): return self.failure("Stopall failed") return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [] def is_applicable(self): '''SimulStop is always applicable''' return 1 # Register SimulStop as a good test to run AllTestClasses.append(SimulStop) class StopOnebyOne(CTSTest): ################################################################### '''Stop all the nodes in order''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="StopOnebyOne" self.startall = SimulStartLite(cm) self.stop = StopTest(cm) def __call__(self, dummy): '''Perform the 'StopOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") failed=[] self.starttime=time.time() for node in self.CM.Env["nodes"]: if not self.stop(node): failed.append(node) if len(failed) > 0: return self.failure("Some node failed to stop: " + repr(failed)) self.CM.clear_all_caches() return self.success() def is_applicable(self): '''StopOnebyOne is always applicable''' return 1 # Register StopOnebyOne as a good test to run AllTestClasses.append(StopOnebyOne) class RestartOnebyOne(CTSTest): ################################################################### '''Restart all the nodes in order''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="RestartOnebyOne" self.startall = SimulStartLite(cm) def __call__(self, dummy): '''Perform the 'RestartOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") did_fail=[] self.starttime=time.time() self.restart = RestartTest(self.CM) for node in self.CM.Env["nodes"]: if not self.restart(node): did_fail.append(node) if did_fail: return self.failure("Could not restart %d nodes: %s" %(len(did_fail), repr(did_fail))) return self.success() def is_applicable(self): '''RestartOnebyOne is always applicable''' return 1 # Register StopOnebyOne as a good test to run AllTestClasses.append(RestartOnebyOne) class PartialStart(CTSTest): ################################################################### '''Start a node - but tell it to stop before it finishes starting up''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="PartialStart" self.startall = SimulStartLite(cm) self.stopall = SimulStopLite(cm) def __call__(self, node): '''Perform the 'PartialStart' test. ''' self.incr("calls") ret = self.stopall(None) if not ret: return self.failure("Setup failed") watchpats = [] watchpats.append("Starting crmd") watch = CTS.LogWatcher(self.CM["LogFileName"], watchpats, timeout=self.CM["DeadTime"]+10) watch.setwatch() self.CM.StartaCMnoBlock(node) ret = watch.lookforall() if not ret: self.CM.log("Patterns not found: " + repr(watch.unmatched)) return self.failure("Setup of %s failed" % node) ret = self.stopall(None) if not ret: return self.failure("%s did not stop in time" % node) return self.success() def is_applicable(self): '''Partial is always applicable''' return 1 # Register StopOnebyOne as a good test to run AllTestClasses.append(PartialStart) ################################################################### class StandbyTest(CTSTest): ################################################################### '''Put a node in standby mode''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="standby" self.successpat = self.CM["Pat:StandbyOK"] self.nostandbypat = self.CM["Pat:StandbyNONE"] self.transient = self.CM["Pat:StandbyTRANSIENT"] def __call__(self, node): '''Perform the 'standby' test. ''' self.incr("calls") if self.CM.ShouldBeStatus[node] == self.CM["down"]: return self.skipped() if self.CM.upcount() < 2: self.incr("nostandby") pat = self.nostandbypat else: self.incr("standby") pat = self.successpat # # You could make a good argument that the cluster manager # ought to give us good clues on when its a bad time to # switch over to the other side, but heartbeat doesn't... # It could also queue the request. But, heartbeat # doesn't do that either :-) # retrycount=0 while (retrycount < 10): watch = CTS.LogWatcher(self.CM["LogFileName"] , [pat, self.transient] , timeout=self.CM["DeadTime"]+10) watch.setwatch() self.CM.rsh(node, self.CM["Standby"]) match = watch.look() if match: if re.search(self.transient, match): self.incr("retries") time.sleep(2) retrycount=retrycount+1 else: return self.success() else: break # No point in retrying... return self.failure("did not find pattern " + pat) def is_applicable(self): '''StandbyTest is applicable when the CM has a Standby command''' if not self.CM.has_key("Standby"): return None else: #if self.CM.Env.has_key("DoStandby"): #flag=self.CM.Env["DoStandby"] #if type(flag) == types.IntType: #return flag #if not re.match("[yt]", flag, re.I): #return None # # We need to strip off everything after the first blank # cmd=self.CM["Standby"] cmd = cmd.split()[0] if not os.access(cmd, os.X_OK): return None cf = self.CM.cf if not cf.Parameters.has_key("auto_failback"): return None elif cf.Parameters["auto_failback"][0] == "legacy": return None return 1 # Register StandbyTest as a good test to run AllTestClasses.append(StandbyTest) ####################################################################### class StandbyTest2(CTSTest): ####################################################################### '''Standby with CRM of HA release 2''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="standby2" self.start = StartTest(cm) self.startall = SimulStartLite(cm) # make sure the node is active # set the node to standby mode # check resources, none resource should be running on the node # set the node to active mode # check resouces, resources should have been migrated back (SHOULD THEY?) def __call__(self, node): self.incr("calls") ret=self.startall(None) if not ret: return self.failure("Start all nodes failed") self.CM.debug("Make sure node %s is active" % node) if self.CM.StandbyStatus(node) != "off": if not self.CM.SetStandbyMode(node, "off"): return self.failure("can't set node %s to active mode" % node) self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "off": return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status)) self.CM.debug("Getting resources running on node %s" % node) rsc_on_node = [] for rsc in self.CM.Resources(): if rsc.IsRunningOn(node): rsc_on_node.append(rsc) self.CM.debug("Setting node %s to standby mode" % node) if not self.CM.SetStandbyMode(node, "on"): return self.failure("can't set node %s to standby mode" % node) time.sleep(30) # Allow time for the update to be applied and cause something self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "on": return self.failure("standby status of %s is [%s] but we expect [on]" % (node, status)) self.CM.debug("Checking resources") for rsc in self.CM.Resources(): if rsc.IsRunningOn(node): return self.failure("%s set to standby, %s is still running on it" % (node, rsc.rid)) self.CM.debug("Setting node %s to active mode" % node) if not self.CM.SetStandbyMode(node, "off"): return self.failure("can't set node %s to active mode" % node) time.sleep(30) # Allow time for the update to be applied and cause something self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "off": return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status)) self.CM.debug("Checking resources") for rsc in rsc_on_node: if not rsc.IsRunningOn(node): return self.failure("%s set to active but %s is NOT back" % (node, rsc.rid)) return self.success() def is_applicable(self): if self.CM["Name"] == "linux-ha-v2": return 1 return 0 AllTestClasses.append(StandbyTest2) ####################################################################### class Fastdetection(CTSTest): ####################################################################### '''Test the time which one node find out the other node is killed very quickly''' def __init__(self,cm,timeout=60): CTSTest.__init__(self, cm) self.name = "DetectionTime" self.they_stopped = self.CM["Pat:They_stopped"] self.timeout = timeout self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.standby = StandbyTest(cm) self.__setitem__("min", 0) self.__setitem__("max", 0) self.__setitem__("totaltime", 0) def __call__(self, node): '''Perform the fastfailureDetection test''' self.incr("calls") ret=self.startall(None) if not ret: return self.failure("Test setup failed") if self.CM.upcount() < 2: return self.skipped() # Make sure they're not holding any resources ret = self.standby(node) if not ret: return ret stoppat = (self.they_stopped % ("", node)) stopwatch = CTS.LogWatcher(self.CM["LogFileName"], [stoppat], timeout=self.timeout) stopwatch.setwatch() # # This test is CM-specific - FIXME!! # if self.CM.rsh(node, "killall -9 heartbeat")==0: Starttime = os.times()[4] if stopwatch.look(): Stoptime = os.times()[4] # This test is CM-specific - FIXME!! self.CM.rsh(node, "killall -9 @libdir@/heartbeat/ccm @libdir@/heartbeat/ipfail >/dev/null 2>&1; true") Detectiontime = Stoptime-Starttime detectms = int(Detectiontime*1000+0.5) self.CM.log("...failure detection time: %d ms" % detectms) self.Stats["totaltime"] = self.Stats["totaltime"] + Detectiontime if self.Stats["min"] == 0: self.Stats["min"] = Detectiontime if Detectiontime > self.Stats["max"]: self.Stats["max"] = Detectiontime if Detectiontime < self.Stats["min"]: self.Stats["min"] = Detectiontime self.CM.ShouldBeStatus[node] = self.CM["down"] self.start(node) return self.success() else: # This test is CM-specific - FIXME!! self.CM.rsh(node, "killall -9 @libdir@/heartbeat/ccm @libdir@/heartbeat/ipfail >/dev/null 2>&1; true") self.CM.ShouldBeStatus[node] = self.CM["down"] ret=self.start(node) return self.failure("Didn't find the log message") else: return self.failure("Couldn't kill cluster manager") def is_applicable(self): '''This test is applicable when auto_failback != legacy''' return self.standby.is_applicable() # This test is CM-specific - FIXME!! def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [ "ccm.*ERROR: ccm_control_process:failure to send protoversion request" , "ccm.*ERROR: Lost connection to heartbeat service. Need to bail out" ] AllTestClasses.append(Fastdetection) ############################################################################## class BandwidthTest(CTSTest): ############################################################################## # Tests should not be cluster-manager-specific # If you need to find out cluster manager configuration to do this, then # it should be added to the generic cluster manager API. '''Test the bandwidth which heartbeat uses''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name = "Bandwidth" self.start = StartTest(cm) self.__setitem__("min",0) self.__setitem__("max",0) self.__setitem__("totalbandwidth",0) self.tempfile = tempfile.mktemp(".cts") self.startall = SimulStartLite(cm) def __call__(self, node): '''Perform the Bandwidth test''' self.incr("calls") if self.CM.upcount()<1: return self.skipped() Path = self.CM.InternalCommConfig() if "ip" not in Path["mediatype"]: return self.skipped() port = Path["port"][0] port = int(port) ret = self.startall(None) if not ret: return self.failure("Test setup failed") time.sleep(5) # We get extra messages right after startup. fstmpfile = "/var/run/band_estimate" dumpcmd = "tcpdump -p -n -c 102 -i any udp port %d > %s 2>&1" \ % (port, fstmpfile) rc = self.CM.rsh(node, dumpcmd) if rc == 0: farfile = "root@%s:%s" % (node, fstmpfile) self.CM.rsh.cp(farfile, self.tempfile) Bandwidth = self.countbandwidth(self.tempfile) if not Bandwidth: self.CM.log("Could not compute bandwidth.") return self.success() intband = int(Bandwidth + 0.5) self.CM.log("...bandwidth: %d bits/sec" % intband) self.Stats["totalbandwidth"] = self.Stats["totalbandwidth"] + Bandwidth if self.Stats["min"] == 0: self.Stats["min"] = Bandwidth if Bandwidth > self.Stats["max"]: self.Stats["max"] = Bandwidth if Bandwidth < self.Stats["min"]: self.Stats["min"] = Bandwidth self.CM.rsh(node, "rm -f %s" % fstmpfile) os.unlink(self.tempfile) return self.success() else: return self.failure("no response from tcpdump command [%d]!" % rc) def countbandwidth(self, file): fp = open(file, "r") fp.seek(0) count = 0 sum = 0 while 1: line = fp.readline() if not line: return None if re.search("udp",line) or re.search("UDP,", line): count=count+1 linesplit = string.split(line," ") for j in range(len(linesplit)-1): if linesplit[j]=="udp": break if linesplit[j]=="length:": break try: sum = sum + int(linesplit[j+1]) except ValueError: self.CM.log("Invalid tcpdump line: %s" % line) return None T1 = linesplit[0] timesplit = string.split(T1,":") time2split = string.split(timesplit[2],".") time1 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001 break while count < 100: line = fp.readline() if not line: return None if re.search("udp",line) or re.search("UDP,", line): count = count+1 linessplit = string.split(line," ") for j in range(len(linessplit)-1): if linessplit[j] =="udp": break if linesplit[j]=="length:": break try: sum=int(linessplit[j+1])+sum except ValueError: self.CM.log("Invalid tcpdump line: %s" % line) return None T2 = linessplit[0] timesplit = string.split(T2,":") time2split = string.split(timesplit[2],".") time2 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001 time = time2-time1 if (time <= 0): return 0 return (sum*8)/time def is_applicable(self): '''BandwidthTest is always applicable''' return 0 AllTestClasses.append(BandwidthTest) ########################################################################## class RedundantpathTest(CTSTest): ########################################################################## '''In heartbeat, it has redundant path to communicate between the cluster''' # # Tests should not be cluster-manager specific # One needs to isolate what you need from the cluster manager and then # add a (new) API to do it. # def __init__(self,cm,timeout=60): CTSTest.__init__(self,cm) self.name = "RedundantpathTest" self.timeout = timeout def PathCount(self): '''Return number of communication paths''' Path = self.CM.InternalCommConfig() cf = self.CM.cf eths = [] serials = [] num = 0 for interface in Path["interface"]: if re.search("eth",interface): eths.append(interface) num = num + 1 if re.search("/dev",interface): serials.append(interface) num = num + 1 return (num, eths, serials) def __call__(self,node): '''Perform redundant path test''' self.incr("calls") if self.CM.ShouldBeStatus[node]!=self.CM["up"]: return self.skipped() (num, eths, serials) = self.PathCount() for eth in eths: if self.CM.rsh(node,"ifconfig %s down" % eth)==0: PathDown = "OK" break if PathDown != "OK": for serial in serials: if self.CM.rsh(node,"setserial %s uart none" % serial)==0: PathDown = "OK" break if PathDown != "OK": return self.failure("Cannot break the path") time.sleep(self.timeout) for audit in CTSaudits.AuditList(self.CM): if not audit(): for eth in eths: self.CM.rsh(node,"ifconfig %s up" % eth) for serial in serials: self.CM.rsh(node,"setserial %s uart 16550" % serial) return self.failure("Redundant path fail") for eth in eths: self.CM.rsh(node,"ifconfig %s up" % eth) for serial in serials: self.CM.rsh(node,"setserial %s uart 16550" % serial) return self.success() def is_applicable(self): '''It is applicable when you have more than one connection''' return self.PathCount()[0] > 1 # FIXME!! Why is this one commented out? #AllTestClasses.append(RedundantpathTest) ########################################################################## class DRBDTest(CTSTest): ########################################################################## '''In heartbeat, it provides replicated storage.''' def __init__(self,cm, timeout=10): CTSTest.__init__(self,cm) self.name = "DRBD" self.timeout = timeout def __call__(self, dummy): '''Perform the 'DRBD' test.''' self.incr("calls") for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["down"]: return self.skipped() # Note: All these special cases with Start/Stop/StatusDRBD # should be reworked to use resource objects instead of # being hardwired to bypass the objects here. for node in self.CM.Env["nodes"]: done=time.time()+self.timeout+1 while (time.time()done: return self.failure("Can't start drbd, please check it") device={} for node in self.CM.Env["nodes"]: device[node]=self.getdevice(node) node = self.CM.Env["nodes"][0] done=time.time()+self.timeout+1 while 1: if (time.time()>done): return self.failure("the drbd could't sync") self.CM.rsh(node,"cp /proc/drbd /var/run >/dev/null 2>&1") if self.CM.rsh.cp("%s:/var/run/drbd" % node,"/var/run"): line = open("/tmp/var/run").readlines()[2] p = line.find("Primary") s1 = line.find("Secondary") s2 = line.rfind("Secondary") if s1!=s2: if self.CM.rsh(node,"drbdsetup %s primary" % device[node]): pass if p!=-1: if p/dev/null" % node) watch.lookforall() self.CM.cluster_stable() recovernode=self.CM.ResourceLocation(self.rid) if len(recovernode)==1: self.CM.debug("Recovered: %s is running on %s" %(self.rid, recovernode[0])) if not watch.unmatched: return self.success() else: return self.failure("Patterns not found: %s" % repr(watch.unmatched)) elif len(recovernode)==0: return self.failure("%s was not recovered and is inactive" % self.rid) else: return self.failure("%s is now active on more than one node: %s" %(self.rid, str(recovernode))) def is_applicable(self): '''ResourceRecover is applicable only when there are resources running on our cluster and environment is linux-ha-v2''' if self.CM["Name"] == "linux-ha-v2": resourcelist=self.CM.Resources() if len(resourcelist)==0: self.CM.log("No resources on this cluster") return 0 else: return 1 return 0 def errorstoignore(self): '''Return list of errors which should be ignored''' return [ """Updating failcount for %s""" % self.rid, """Unknown operation: fail""", """ERROR: sending stonithRA op to stonithd failed.""", """ERROR: process_graph_event: Action %s_%s_%d initiated outside of a transition""" % (self.rid, self.action, self.interval) ] AllTestClasses.append(ResourceRecover) ################################################################### class ComponentFail(CTSTest): ################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name="ComponentFail" self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.stopall = SimulStopLite(cm) self.complist = cm.Components() self.theystart = cm["Pat:They_started"] def __call__(self, node): '''Perform the 'ComponentFail' test. ''' self.incr("calls") # start all nodes if not self.CM.cluster_stable(): self.stopall(None) ret = self.startall(None) if not ret: return self.failure("Setup failed") # select a component to kill component = self.CM.Env.RandomGen.choice(self.complist) self.CM.log("choose %s to kill"%component.name) patterns = [] patterns.append("%s heartbeat.*Respawning.*%s" %(node, component.name)) patterns.append(self.theystart%node) # set the watch for stable watch = CTS.LogWatcher( self.CM["LogFileName"], patterns, self.CM["DeadTime"]+10) watch.setwatch() # kill the component component.kill(node) # check to see Heartbeat noticed match = watch.look() if match: self.CM.log("Found match: %s"%(match)) # now watch it recover... if self.CM.cluster_stable(): return self.success() else: self.failure("Cluster not stable") else: return self.failure("Heartbeat didnt notice %s die" %component) def is_applicable(self): if self.CM["Name"] == "linux-ha-v2": return 1 return 0 def errorstoignore(self): '''Return list of errors which should be ignored''' return ["""heartbeat.*killed by signal 9""", """heartbeat.*Respawning"""] #AllTestClasses.append(ComponentFail) #################################################################### class Split_brainTest2(CTSTest): #################################################################### '''It is used to test split-brain. when the path between the two nodes break check the two nodes both take over the resource''' def __init__(self,cm): CTSTest.__init__(self,cm) self.name = "Split_brain2" self.start = StartTest(cm) self.startall = SimulStartLite(cm) def __call__(self, node): '''Perform split-brain test''' self.incr("calls") ret = self.startall(None) if not ret: return self.failure("Setup failed") count1 = self.CM.Env.RandomGen.randint(1,len(self.CM.Env["nodes"])-1) partition1 = [] while len(partition1) < count1: select = self.CM.Env.RandomGen.choice(self.CM.Env["nodes"]) if not select in partition1: partition1.append(select) partition2 = [] for member in self.CM.Env["nodes"]: if not member in partition1: partition2.append(member) allownodes1 = "" for member in partition1: allownodes1 += member + " " allownodes2 = "" for member in partition2: allownodes2 += member + " " self.CM.log("Partition1: " + str(partition1)) self.CM.log("Partition2: " + str(partition2)) '''isolate nodes, Look for node is dead message''' watchdeadpats = [ ] deadpat = self.CM["Pat:They_dead"] for member in self.CM.Env["nodes"]: thispat = (deadpat % member) watchdeadpats.append(thispat) watchdead = CTS.LogWatcher(self.CM["LogFileName"], watchdeadpats\ , timeout=self.CM["DeadTime"]+60) watchdead.ReturnOnlyMatch() watchdead.setwatch() for member in partition1: if float(self.CM.Env["XmitLoss"])!=0 or float(self.CM.Env["RecvLoss"])!=0 : self.CM.savecomm_node(node) if not self.CM.isolate_node(member,allownodes1): return self.failure("Could not isolate the nodes") for member in partition2: if float(self.CM.Env["XmitLoss"])!=0 or float(self.CM.Env["RecvLoss"])!=0 : self.CM.savecomm_node(node) if not self.CM.isolate_node(member,allownodes2): return self.failure("Could not isolate the nodes") if not watchdead.lookforall(): for member in self.CM.Env["nodes"]: self.CM.unisolate_node(member) self.CM.log("Patterns not found: " + repr(watchdead.unmatched)) return self.failure("Didn't find the log 'dead' message") dcnum=0 while dcnum < 2: dcnum = 0 for member in self.CM.Env["nodes"]: if self.CM.is_node_dc(member): dcnum += 1 time.sleep(1) ''' Unisolate the node, look for the return partition message and check whether they restart ''' watchpartitionpats = [self.CM["Pat:DC_IDLE"]] partitionpat = self.CM["Pat:Return_partition"] for member in self.CM.Env["nodes"]: thispat = (partitionpat % member) watchpartitionpats.append(thispat) watchpartition = CTS.LogWatcher(self.CM["LogFileName"], watchpartitionpats\ , timeout=self.CM["DeadTime"]+60) watchpartition.setwatch() for member in self.CM.Env["nodes"]: if float(self.CM.Env["XmitLoss"])!=0 or float(self.CM.Env["RecvLoss"])!=0 : self.CM.restorecomm_node(node) self.CM.unisolate_node(member) if not watchpartition.lookforall(): self.CM.log("Patterns not found: " + repr(watchpartition.unmatched)) return self.failure("Didn't find return from partition messages") return self.success() def is_applicable(self): if self.CM["Name"] == "linux-ha-v2": return 1 return 0 def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [ "ERROR:.*Both machines own.*resources" , "ERROR:.*lost a lot of packets!" , "ERROR: Cannot rexmit pkt .*: seqno too low" , "ERROR: Irretrievably lost packet: node" ] #AllTestClasses.append(Split_brainTest2) #################################################################### class MemoryTest(CTSTest): #################################################################### '''Check to see if anyone is leaking memory''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Memory" # self.test = ElectionMemoryTest(cm) self.test = ResourceRecover(cm) self.startall = SimulStartLite(cm) self.before = {} self.after = {} def __call__(self, node): ps_command='''ps -eo ucomm,pid,pmem,tsiz,dsiz,rss,vsize | grep -e ccm -e ha_logd -e cib -e crmd -e lrmd -e tengine -e pengine''' memory_error = [ "", "", "", "Code", "Data", "Resident", "Total" ] ret = self.startall(None) if not ret: return self.failure("Test setup failed") time.sleep(10) for node in self.CM.Env["nodes"]: self.before[node] = {} rsh_pipe = self.CM.rsh.popen(node, ps_command) rsh_pipe.tochild.close() result = rsh_pipe.fromchild.readline() while result: tokens = result.split() self.before[node][tokens[1]] = result result = rsh_pipe.fromchild.readline() rsh_pipe.fromchild.close() self.lastrc = rsh_pipe.wait() # do something... if not self.test(node): return self.failure("Underlying test failed") time.sleep(10) for node in self.CM.Env["nodes"]: self.after[node] = {} rsh_pipe = self.CM.rsh.popen(node, ps_command) rsh_pipe.tochild.close() result = rsh_pipe.fromchild.readline() while result: tokens = result.split() self.after[node][tokens[1]] = result result = rsh_pipe.fromchild.readline() rsh_pipe.fromchild.close() self.lastrc = rsh_pipe.wait() failed_nodes = [] for node in self.CM.Env["nodes"]: failed = 0 for process in self.before[node]: messages = [] before_line = self.before[node][process] after_line = self.after[node][process] if not after_line: self.CM.log("%s %s[%s] exited during the test" %(node, before_tokens[0], before_tokens[1])) continue before_tokens = before_line.split() after_tokens = after_line.split() # 3 : Code size # 4 : Data size # 5 : Resident size # 6 : Total size for index in [ 3, 4, 6 ]: mem_before = int(before_tokens[index]) mem_after = int(after_tokens[index]) mem_diff = mem_after - mem_before mem_allow = mem_before * 0.01 # for now... mem_allow = 0 if mem_diff > mem_allow: failed = 1 messages.append("%s size grew by %dkB (%dkB)" %(memory_error[index], mem_diff, mem_after)) elif mem_diff < 0: messages.append("%s size shrank by %dkB (%dkB)" %(memory_error[index], mem_diff, mem_after)) if len(messages) > 0: self.CM.log("Process %s[%s] on %s: %s" %(before_tokens[0], before_tokens[1], node, repr(messages))) self.CM.debug("%s Before: %s[%s] (%s%%):\tcode=%skB, data=%skB, resident=%skB, total=%skB" %(node, before_tokens[0], before_tokens[1], before_tokens[2], before_tokens[3], before_tokens[4], before_tokens[5], before_tokens[6])) self.CM.debug("%s After: %s[%s] (%s%%):\tcode=%skB, data=%skB, resident=%skB, total=%skB" %(node, after_tokens[0], after_tokens[1], after_tokens[2], after_tokens[3], after_tokens[4], after_tokens[5], after_tokens[6])) if failed == 1: failed_nodes.append(node) if len(failed_nodes) > 0: return self.failure("Memory leaked on: " + repr(failed_nodes)) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [ """ERROR: .* LRM operation.*monitor on .*: not running""", """pengine:.*Handling failed """] def is_applicable(self): if self.CM["Name"] == "linux-ha-v2": return 1 return 0 #AllTestClasses.append(MemoryTest) #################################################################### class ElectionMemoryTest(CTSTest): #################################################################### '''Check to see if anyone is leaking memory''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Election" def __call__(self, node): self.rsh.readaline(node, self.CM["ElectionCmd"]%node) if self.CM.cluster_stable(): return self.success() return self.failure("Cluster not stable") def errorstoignore(self): '''Return list of errors which should be ignored''' return [] def is_applicable(self): '''Never applicable, only for use by the memory test''' return 0 AllTestClasses.append(ElectionMemoryTest) #################################################################### class SpecialTest1(CTSTest): #################################################################### '''Set up a custom test to cause quorum failure issues for Andrew''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SpecialTest1" self.startall = SimulStartLite(cm) self.restart1 = RestartTest(cm) self.stopall = SimulStopLite(cm) def __call__(self, node): '''Perform the 'SpecialTest1' test for Andrew. ''' self.incr("calls") # Shut down all the nodes... ret = self.stopall(None) if not ret: return ret # Start the selected node ret = self.restart1(node) if not ret: return ret # Start all remaining nodes ret = self.startall(None) return ret def errorstoignore(self): '''Return list of errors which should be ignored''' return [] def is_applicable(self): return 1 AllTestClasses.append(SpecialTest1) ################################################################### class NearQuorumPointTest(CTSTest): ################################################################### ''' This test brings larger clusters near the quorum point (50%). In addition, it will test doing starts and stops at the same time. Here is how I think it should work: - loop over the nodes and decide randomly which will be up and which will be down Use a 50% probability for each of up/down. - figure out what to do to get into that state from the current state - in parallel, bring up those going up and bring those going down. ''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="NearQuorumPoint" def __call__(self, dummy): '''Perform the 'NearQuorumPoint' test. ''' self.incr("calls") startset = [] stopset = [] #decide what to do with each node for node in self.CM.Env["nodes"]: action = self.CM.Env.RandomGen.choice(["start","stop"]) #action = self.CM.Env.RandomGen.choice(["start","stop","no change"]) if action == "start" : startset.append(node) elif action == "stop" : stopset.append(node) self.CM.debug("start nodes:" + repr(startset)) self.CM.debug("stop nodes:" + repr(stopset)) #add search patterns watchpats = [ ] for node in stopset: if self.CM.ShouldBeStatus[node] == self.CM["up"]: watchpats.append(self.CM["Pat:We_stopped"] % node) for node in startset: if self.CM.ShouldBeStatus[node] == self.CM["down"]: watchpats.append(self.CM["Pat:They_started"] % node) if len(watchpats) == 0: return self.skipped() watch = CTS.LogWatcher(self.CM["LogFileName"], watchpats , timeout=self.CM["DeadTime"]+10) watch.setwatch() #begin actions for node in stopset: if self.CM.ShouldBeStatus[node] == self.CM["up"]: self.CM.StopaCMnoBlock(node) for node in startset: if self.CM.ShouldBeStatus[node] == self.CM["down"]: self.CM.StartaCMnoBlock(node) #get the result if watch.lookforall(): self.CM.cluster_stable() return self.success() self.CM.log("Warn: Patterns not found: " + repr(watch.unmatched)) #get the "bad" nodes upnodes = [] for node in stopset: if self.CM.StataCM(node) == 1: upnodes.append(node) downnodes = [] for node in startset: if self.CM.StataCM(node) == 0: downnodes.append(node) if upnodes == [] and downnodes == []: self.CM.cluster_stable() return self.success() if len(upnodes) > 0: self.CM.log("Warn: Unstoppable nodes: " + repr(upnodes)) if len(downnodes) > 0: self.CM.log("Warn: Unstartable nodes: " + repr(downnodes)) return self.failure() def errorstoignore(self): '''Return list of errors which should be ignored''' return [] def is_applicable(self): if self.CM["Name"] == "linux-ha-v2": return 1 return 0 AllTestClasses.append(NearQuorumPointTest) ################################################################### class BSC_AddResource(CTSTest): ################################################################### '''Add a resource to the cluster''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name="AddResource" self.resource_offset = 0 self.cib_cmd="""@sbindir@/cibadmin -C -o %s -X '%s' """ def __call__(self, node): self.resource_offset = self.resource_offset + 1 r_id = "bsc-rsc-%s-%d" % (node, self.resource_offset) start_pat = "crmd.*%s_start_0.*complete" patterns = [] patterns.append(start_pat % r_id) watch = CTS.LogWatcher( self.CM["LogFileName"], patterns, self.CM["DeadTime"]) watch.setwatch() fields = string.split(self.CM.Env["IPBase"], '.') fields[3] = str(int(fields[3])+1) ip = string.join(fields, '.') self.CM.Env["IPBase"] = ip if not self.make_ip_resource(node, r_id, "ocf", "IPaddr", ip): return self.failure("Make resource %s failed" % r_id) failed = 0 watch_result = watch.lookforall() if watch.unmatched: for regex in watch.unmatched: self.CM.log ("Warn: Pattern not found: %s" % (regex)) failed = 1 if failed: return self.failure("Resource pattern(s) not found") if not self.CM.cluster_stable(self.CM["DeadTime"]): return self.failure("Unstable cluster") return self.success() def make_ip_resource(self, node, id, rclass, type, ip): self.CM.log("Creating %s::%s:%s (%s) on %s" % (rclass,type,id,ip,node)) rsc_xml=""" """ % (id, rclass, type, id, id, ip) node_constraint=""" """ % (id, id, id, id, node) rc = 0 (rc, lines) = self.CM.rsh.remote_py(node, "os", "system", self.cib_cmd % ("constraints", node_constraint)) if rc != 0: self.CM.log("Constraint creation failed: %d" % rc) return None (rc, lines) = self.CM.rsh.remote_py(node, "os", "system", self.cib_cmd % ("resources", rsc_xml)) if rc != 0: self.CM.log("Resource creation failed: %d" % rc) return None return 1 def is_applicable(self): if self.CM["Name"] == "linux-ha-v2" and self.CM.Env["DoBSC"]: return 1 return None def TestList(cm): result = [] for testclass in AllTestClasses: bound_test = testclass(cm) if bound_test.is_applicable(): result.append(bound_test) return result class SimulStopLite(CTSTest): ################################################################### '''Stop any active nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStopLite" def __call__(self, dummy): '''Perform the 'SimulStopLite' setup work. ''' self.incr("calls") self.CM.debug("Setup: " + self.name) # We ignore the "node" parameter... watchpats = [ ] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["up"]: self.incr("WasStarted") watchpats.append(self.CM["Pat:All_stopped"] % node) if self.CM.Env["use_logd"]: watchpats.append(self.CM["Pat:Logd_stopped"] % node) if len(watchpats) == 0: self.CM.clear_all_caches() return self.skipped() # Stop all the nodes - at about the same time... watch = CTS.LogWatcher(self.CM["LogFileName"], watchpats , timeout=self.CM["DeadTime"]+10) watch.setwatch() self.starttime=time.time() for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["up"]: self.CM.StopaCMnoBlock(node) if watch.lookforall(): self.CM.clear_all_caches() return self.success() did_fail=0 up_nodes = [] for node in self.CM.Env["nodes"]: if self.CM.StataCM(node) == 1: did_fail=1 up_nodes.append(node) if did_fail: return self.failure("Active nodes exist: " + repr(up_nodes)) self.CM.log("Warn: All nodes stopped but CTS didnt detect: " + repr(watch.unmatched)) self.CM.clear_all_caches() return self.success() def is_applicable(self): '''SimulStopLite is a setup test and never applicable''' return 0 ################################################################### class SimulStartLite(CTSTest): ################################################################### '''Start any stopped nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStartLite" def __call__(self, dummy): '''Perform the 'SimulStartList' setup work. ''' self.incr("calls") self.CM.debug("Setup: " + self.name) # We ignore the "node" parameter... watchpats = [ ] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["down"]: self.incr("WasStopped") watchpats.append(self.CM["Pat:They_started"] % node) if len(watchpats) == 0: return self.skipped() # Start all the nodes - at about the same time... watch = CTS.LogWatcher(self.CM["LogFileName"], watchpats , timeout=self.CM["DeadTime"]+10) watch.setwatch() self.starttime=time.time() for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["down"]: self.CM.StartaCMnoBlock(node) if watch.lookforall(): for attempt in (1, 2, 3, 4, 5): if self.CM.cluster_stable(): return self.success() return self.failure("Cluster did not stabilize") did_fail=0 unstable = [] for node in self.CM.Env["nodes"]: if self.CM.StataCM(node) == 0: did_fail=1 unstable.append(node) if did_fail: return self.failure("Unstarted nodes exist: " + repr(unstable)) unstable = [] for node in self.CM.Env["nodes"]: if not self.CM.node_stable(node): did_fail=1 unstable.append(node) if did_fail: return self.failure("Unstable cluster nodes exist: " + repr(unstable)) self.CM.log("Warn: All nodes started but CTS didnt detect: " + repr(watch.unmatched)) return self.success() def is_applicable(self): '''SimulStartLite is a setup test and never applicable''' return 0 ################################################################### class LoggingTest(CTSTest): ################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Logging" def __call__(self, dummy): '''Perform the 'Logging' test. ''' self.incr("calls") # Make sure logging is working and we have enough disk space... if not self.CM.TestLogging(): sys.exit(1) if not self.CM.CheckDf(): sys.exit(1) def is_applicable(self): '''ResourceRecover is applicable only when there are resources running on our cluster and environment is linux-ha-v2''' return self.CM.Env["DoBSC"] def errorstoignore(self): '''Return list of errors which should be ignored''' return [] #AllTestClasses.append(LoggingTest) diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c index bcb4dca82b..541081462a 100644 --- a/tools/attrd_updater.c +++ b/tools/attrd_updater.c @@ -1,161 +1,163 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #define OPTARGS "hVn:v:d:s:S:" const char* crm_system_name = "attrd_updater"; const char *attr_name = NULL; const char *attr_value = NULL; const char *attr_set = NULL; const char *attr_section = NULL; const char *attr_dampen = NULL; void usage(const char* cmd, int exit_status); static gboolean process_attrd_message( HA_Message *msg, crm_data_t *xml_data, IPC_Channel *sender) { crm_err("Why did we get a message?"); crm_log_message_adv(LOG_WARNING, "attrd:msg", msg); return TRUE; } int main(int argc, char ** argv) { HA_Message *update = NULL; IPC_Channel *attrd = NULL; int argerr = 0; int flag; crm_log_init(crm_system_name); crm_debug_3("Begining option processing"); while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch(flag) { case 'V': alter_debug(DEBUG_INC); break; case 'h': /* Help message */ usage(crm_system_name, LSB_EXIT_OK); break; case 'n': attr_name = crm_strdup(optarg); break; case 'v': attr_value = crm_strdup(optarg); break; case 's': attr_set = crm_strdup(optarg); break; case 'd': attr_dampen = crm_strdup(optarg); break; case 'S': attr_section = crm_strdup(optarg); break; default: ++argerr; break; } } crm_debug_3("Option processing complete"); if (optind > argc) { ++argerr; } if (argerr) { usage(crm_system_name, LSB_EXIT_GENERIC); } /* read local config file */ init_client_ipc_comms(T_ATTRD, subsystem_msg_dispatch, (void*)process_attrd_message, &attrd); if(attrd == NULL) { fprintf(stderr, "Could not connect to "T_ATTRD"\n"); return 1; } + cl_log_args(argc, argv); + update = ha_msg_new(4); ha_msg_add(update, F_TYPE, T_ATTRD); ha_msg_add(update, F_ORIG, crm_system_name); ha_msg_add(update, F_ATTRD_TASK, "update"); ha_msg_add(update, F_ATTRD_ATTRIBUTE, attr_name); if(attr_value != NULL) { ha_msg_add(update, F_ATTRD_VALUE, attr_value); } if(attr_set != NULL) { ha_msg_add(update, F_ATTRD_SET, attr_set); } if(attr_section != NULL) { ha_msg_add(update, F_ATTRD_SECTION, attr_section); } if(attr_dampen != NULL) { ha_msg_add(update, F_ATTRD_DAMPEN, attr_dampen); } if(send_ipc_message(attrd, update) == FALSE) { fprintf(stderr, "Could not send update\n"); crm_msg_del(update); return 1; } crm_msg_del(update); return 0; } void usage(const char* cmd, int exit_status) { FILE* stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s -n [-vdsS]\n", cmd); fprintf(stream, "\t-n \tthe attribute that changed\n"); fprintf(stream, "\t-v \tthe attribute's value\n"); fprintf(stream, "\t\tIf no value is supplied, the attribute value for this node will be deleted\n"); fprintf(stream, "\t-d \tthe time to wait (dampening) further changes occur\n"); fprintf(stream, "\t-s \tthe attribute set in which to place the value\n"); fprintf(stream, "\t\tMost people have no need to specify this\n"); fprintf(stream, "\t-S \tthe section in which to place the value\n"); fprintf(stream, "\t\tMost people have no need to specify this\n"); fflush(stream); exit(exit_status); }