diff --git a/crm/admin/crmadmin.c b/crm/admin/crmadmin.c index bb0004e4fb..d4ae3d9a1a 100644 --- a/crm/admin/crmadmin.c +++ b/crm/admin/crmadmin.c @@ -1,912 +1,911 @@ -/* $Id: crmadmin.c,v 1.42 2005/05/19 10:50:08 andrew Exp $ */ +/* $Id: crmadmin.c,v 1.43 2005/06/10 07:05:06 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int message_timer_id = -1; int message_timeout_ms = 30*1000; GMainLoop *mainloop = NULL; IPC_Channel *crmd_channel = NULL; char *admin_uuid = NULL; void usage(const char *cmd, int exit_status); ll_cluster_t *do_init(void); int do_work(ll_cluster_t * hb_cluster); void crmd_ipc_connection_destroy(gpointer user_data); gboolean admin_msg_callback(IPC_Channel * source_data, void *private_data); char *pluralSection(const char *a_section); crm_data_t *handleCibMod(void); int do_find_resource(const char *rsc, crm_data_t *xml_node); int do_find_resource_list(crm_data_t *xml_node); int do_find_node_list(crm_data_t *xml_node); gboolean admin_message_timeout(gpointer data); gboolean is_node_online(crm_data_t *node_state); enum debug { debug_none, debug_dec, debug_inc }; gboolean BE_VERBOSE = FALSE; int expected_responses = 1; gboolean DO_HEALTH = FALSE; gboolean DO_RESET = FALSE; gboolean DO_RESOURCE = FALSE; gboolean DO_ELECT_DC = FALSE; gboolean DO_WHOIS_DC = FALSE; gboolean DO_NODE_LIST = FALSE; gboolean BE_SILENT = FALSE; gboolean DO_RESOURCE_LIST = FALSE; gboolean DO_OPTION = FALSE; gboolean DO_STANDBY = FALSE; enum debug DO_DEBUG = debug_none; const char *crmd_operation = NULL; crm_data_t *msg_options = NULL; const char *standby_on_off = "on"; const char *admin_verbose = XML_BOOLEAN_FALSE; char *id = NULL; char *this_msg_reference = NULL; char *disconnect = NULL; char *dest_node = NULL; char *rsc_name = NULL; char *crm_option = NULL; int operation_status = 0; const char *sys_to = NULL; const char *crm_system_name = "crmadmin"; #define OPTARGS "V?K:S:HE:DW:d:i:RNs:a:qt:o:" int main(int argc, char **argv) { int option_index = 0; int argerr = 0; int flag; ll_cluster_t *hb_cluster = NULL; static struct option long_options[] = { /* Top-level Options */ {"verbose", 0, 0, 'V'}, {"help", 0, 0, '?'}, {"quiet", 0, 0, 'q'}, {"reference", 1, 0, 0}, {XML_ATTR_TIMEOUT, 1, 0, 't'}, /* daemon options */ {"kill", 1, 0, 'K'}, /* stop a node */ {"die", 0, 0, 0}, /* kill a node, no respawn */ {"crm_debug_inc", 1, 0, 'i'}, {"crm_debug_dec", 1, 0, 'd'}, {"status", 1, 0, 'S'}, {"standby", 1, 0, 's'}, {"active", 1, 0, 'a'}, {"health", 0, 0, 'H'}, {"election", 0, 0, 'E'}, {"dc_lookup", 0, 0, 'D'}, {"resources", 0, 0, 'R'}, {"nodes", 0, 0, 'N'}, {"whereis", 1, 0, 'W'}, {"option", 1, 0, 'o'}, {0, 0, 0, 0} }; crm_system_name = basename(argv[0]); crm_log_level = 0; crm_log_init(crm_system_name); crm_log_level = 0; if(argc < 2) { usage(crm_system_name, LSB_EXIT_EINVAL); } while (1) { flag = getopt_long(argc, argv, OPTARGS, long_options, &option_index); if (flag == -1) break; switch(flag) { case 0: printf("option %s", long_options[option_index].name); if (optarg) printf(" with arg %s", optarg); printf("\n"); if (strcmp("reference", long_options[option_index].name) == 0) { this_msg_reference = crm_strdup(optarg); } else if (strcmp("die", long_options[option_index].name) == 0) { DO_RESET = TRUE; crmd_operation = CRM_OP_DIE; } else { printf( "?? Long option (--%s) is not yet properly supported ??\n", long_options[option_index].name); ++argerr; } break; /* a sample test for multiple instance if (digit_optind != 0 && digit_optind != this_option_optind) printf ("digits occur in two different argv-elements.\n"); digit_optind = this_option_optind; printf ("option %c\n", c); */ case 'V': BE_VERBOSE = TRUE; admin_verbose = XML_BOOLEAN_TRUE; cl_log_enable_stderr(TRUE); alter_debug(DEBUG_INC); break; case 't': message_timeout_ms = atoi(optarg); if(message_timeout_ms < 1) { message_timeout_ms = 30*1000; } break; case '?': usage(crm_system_name, LSB_EXIT_OK); break; case 'D': DO_WHOIS_DC = TRUE; break; case 'W': DO_RESOURCE = TRUE; crm_debug_2("Option %c => %s", flag, optarg); rsc_name = crm_strdup(optarg); break; case 'K': DO_RESET = TRUE; crm_debug_2("Option %c => %s", flag, optarg); dest_node = crm_strdup(optarg); crmd_operation = CRM_OP_LOCAL_SHUTDOWN; break; case 'o': DO_OPTION = TRUE; crm_debug_2("Option %c => %s", flag, optarg); crm_option = crm_strdup(optarg); break; case 'q': BE_SILENT = TRUE; break; case 'i': DO_DEBUG = debug_inc; crm_debug_2("Option %c => %s", flag, optarg); dest_node = crm_strdup(optarg); break; case 'd': DO_DEBUG = debug_dec; crm_debug_2("Option %c => %s", flag, optarg); dest_node = crm_strdup(optarg); break; case 's': DO_STANDBY = TRUE; crm_debug_2("Option %c => %s", flag, optarg); dest_node = crm_strdup(optarg); break; case 'a': DO_STANDBY = TRUE; crm_debug_2("Option %c => %s", flag, optarg); dest_node = crm_strdup(optarg); standby_on_off = "off"; break; case 'S': DO_HEALTH = TRUE; crm_debug_2("Option %c => %s", flag, optarg); dest_node = crm_strdup(optarg); break; case 'E': DO_ELECT_DC = TRUE; break; case 'N': DO_NODE_LIST = TRUE; break; case 'R': DO_RESOURCE_LIST = TRUE; break; case 'H': DO_HEALTH = TRUE; break; default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (optind > argc) { ++argerr; } if (argerr) { usage(crm_system_name, LSB_EXIT_GENERIC); } hb_cluster = do_init(); if (hb_cluster != NULL) { int res = do_work(hb_cluster); - if (res > 0) { + if (res >= 0) { /* wait for the reply by creating a mainloop and running it until * the callbacks are invoked... */ mainloop = g_main_new(FALSE); - crm_debug_2("%s waiting for reply from the local CRM", - crm_system_name); + expected_responses++; + if(res == 0) { + crm_debug_2("no reply expected," + " wait for the hello message only"); + + } else { + crm_debug_2("Waiting for reply from the local CRM"); + } message_timer_id = Gmain_timeout_add( message_timeout_ms, admin_message_timeout, NULL); g_main_run(mainloop); return_to_orig_privs(); - } else if(res == 0) { - crm_debug_2("%s: no reply expected", - crm_system_name); - } else { crm_err("No message to send"); operation_status = -1; } } else { crm_err("Init failed, could not perform requested operations"); operation_status = -2; } crm_debug_2("%s exiting normally", crm_system_name); return operation_status; } int do_work(ll_cluster_t * hb_cluster) { int ret = 1; /* construct the request */ crm_data_t *msg_data = NULL; gboolean all_is_good = TRUE; msg_options = create_xml_node(NULL, XML_TAG_OPTIONS); set_xml_property_copy(msg_options, XML_ATTR_VERBOSE, admin_verbose); set_xml_property_copy(msg_options, XML_ATTR_TIMEOUT, "0"); if (DO_HEALTH == TRUE) { crm_debug_2("Querying the system"); sys_to = CRM_SYSTEM_DC; if (dest_node != NULL) { sys_to = CRM_SYSTEM_CRMD; crmd_operation = CRM_OP_PING; if (BE_VERBOSE) { expected_responses = -1;/* wait until timeout instead */ } set_xml_property_copy( msg_options, XML_ATTR_TIMEOUT, "0"); } else { crm_info("Cluster-wide health not available yet"); all_is_good = FALSE; } } else if(DO_ELECT_DC) { /* tell the local node to initiate an election */ sys_to = CRM_SYSTEM_CRMD; crmd_operation = CRM_OP_VOTE; set_xml_property_copy( msg_options, XML_ATTR_TIMEOUT, "0"); dest_node = NULL; ret = 0; /* no return message */ } else if(DO_WHOIS_DC) { sys_to = CRM_SYSTEM_DC; crmd_operation = CRM_OP_PING; set_xml_property_copy( msg_options, XML_ATTR_TIMEOUT, "0"); dest_node = NULL; } else if(DO_RESOURCE || DO_RESOURCE_LIST || DO_NODE_LIST || DO_OPTION){ cib_t * the_cib = cib_new(); crm_data_t *output = NULL; int call_options = cib_sync_call; enum cib_errors rc = the_cib->cmds->signon( the_cib, crm_system_name, cib_command); if(rc != cib_ok) { return -1; } else if(DO_RESOURCE) { output = get_cib_copy(the_cib); do_find_resource(rsc_name, output); } else if(DO_RESOURCE_LIST) { output = get_cib_copy(the_cib); do_find_resource_list(output); } else if(DO_NODE_LIST) { output = get_cib_copy(the_cib); do_find_node_list(output); } else if(DO_OPTION) { char *name = NULL; char *value = NULL; crm_data_t *xml_option = NULL; crm_data_t *fragment = NULL; if(decodeNVpair(crm_option, '=', &name, &value)==FALSE){ crm_err("%s needs to be of the form" " =", crm_option); return -1; } xml_option = create_xml_node(NULL, XML_CIB_TAG_NVPAIR); set_xml_property_copy( xml_option, XML_NVPAIR_ATTR_NAME, name); set_xml_property_copy( xml_option, XML_NVPAIR_ATTR_VALUE, value); fragment = create_cib_fragment(xml_option, NULL); free_xml(xml_option); crm_free(name); crm_free(value); rc = the_cib->cmds->modify( the_cib, XML_CIB_TAG_CRMCONFIG, fragment, NULL, call_options|cib_discard_reply); free_xml(fragment); } else if(DO_STANDBY) { char *name = NULL; char *value = NULL; crm_data_t *a_node = NULL; crm_data_t *xml_obj = NULL; crm_data_t *fragment = NULL; if(decodeNVpair(crm_option, '=', &name, &value)==FALSE){ crm_err("%s needs to be of the form" " =", crm_option); return -1; } a_node = create_xml_node(NULL, XML_CIB_TAG_NODE); set_xml_property_copy(a_node, XML_ATTR_ID, dest_node); xml_obj = create_xml_node(a_node, XML_TAG_ATTR_SETS); xml_obj = create_xml_node(xml_obj, XML_TAG_ATTRS); xml_obj = create_xml_node(xml_obj, XML_CIB_TAG_NVPAIR); set_xml_property_copy( xml_obj, XML_NVPAIR_ATTR_NAME, "standby"); set_xml_property_copy( xml_obj, XML_NVPAIR_ATTR_VALUE, standby_on_off); fragment = create_cib_fragment(a_node, NULL); free_xml(a_node); crm_free(name); crm_free(value); rc = the_cib->cmds->modify( the_cib, XML_CIB_TAG_NODES, fragment, NULL, call_options|cib_discard_reply); free_xml(fragment); } free_xml(output); the_cib->cmds->signoff(the_cib); return rc; } else if(DO_RESET) { /* tell dest_node to initiate the shutdown proceedure * * if dest_node is NULL, the request will be sent to the * local node */ sys_to = CRM_SYSTEM_CRMD; set_xml_property_copy( msg_options, XML_ATTR_TIMEOUT, "0"); ret = 0; /* no return message */ } else if(DO_DEBUG == debug_inc) { /* tell dest_node to increase its debug level * * if dest_node is NULL, the request will be sent to the * local node */ sys_to = CRM_SYSTEM_CRMD; crmd_operation = CRM_OP_DEBUG_UP; ret = 0; /* no return message */ } else if(DO_DEBUG == debug_dec) { /* tell dest_node to increase its debug level * * if dest_node is NULL, the request will be sent to the * local node */ sys_to = CRM_SYSTEM_CRMD; crmd_operation = CRM_OP_DEBUG_DOWN; ret = 0; /* no return message */ } else { crm_err("Unknown options"); all_is_good = FALSE; } if(all_is_good == FALSE) { crm_err("Creation of request failed. No message to send"); return -1; } /* send it */ if (crmd_channel == NULL) { crm_err("The IPC connection is not valid, cannot send anything"); return -1; } if(sys_to == NULL) { if (dest_node != NULL) sys_to = CRM_SYSTEM_CRMD; else sys_to = CRM_SYSTEM_DC; } { HA_Message *cmd = create_request( crmd_operation, msg_data, dest_node, sys_to, crm_system_name, admin_uuid); if(this_msg_reference != NULL) { ha_msg_mod(cmd, XML_ATTR_REFERENCE, this_msg_reference); } send_ipc_message(crmd_channel, cmd); } return ret; } void crmd_ipc_connection_destroy(gpointer user_data) { crm_err("Connection to CRMd was terminated"); exit(1); } ll_cluster_t * do_init(void) { int facility; GCHSource *src = NULL; ll_cluster_t *hb_cluster = NULL; /* change the logging facility to the one used by heartbeat daemon */ hb_cluster = ll_cluster_new("heartbeat"); crm_debug_2("Switching to Heartbeat logger"); if (( facility = hb_cluster->llc_ops->get_logfacility(hb_cluster)) > 0) { cl_log_set_facility(facility); } crm_malloc0(admin_uuid, sizeof(char) * 11); if(admin_uuid != NULL) { snprintf(admin_uuid, 10, "%d", getpid()); admin_uuid[10] = '\0'; } src = init_client_ipc_comms( CRM_SYSTEM_CRMD, admin_msg_callback, NULL, &crmd_channel); if(crmd_channel != NULL) { send_hello_message( crmd_channel, admin_uuid, crm_system_name,"0", "1"); set_IPC_Channel_dnotify(src, crmd_ipc_connection_destroy); return hb_cluster; } return NULL; } gboolean admin_msg_callback(IPC_Channel * server, void *private_data) { int lpc = 0; IPC_Message *msg = NULL; ha_msg_input_t *new_input = NULL; gboolean hack_return_good = TRUE; static int received_responses = 0; char *filename = NULL; int filename_len = 0; const char *result = NULL; g_source_remove(message_timer_id); while (server->ch_status != IPC_DISCONNECT && server->ops->is_message_pending(server) == TRUE) { if(new_input != NULL) { delete_ha_msg_input(new_input); } if (server->ops->recv(server, &msg) != IPC_OK) { perror("Receive failure:"); return !hack_return_good; } if (msg == NULL) { crm_debug_4("No message this time"); continue; } lpc++; + received_responses++; new_input = new_ipc_msg_input(msg); msg->msg_done(msg); crm_log_message(LOG_MSG, new_input->msg); if (new_input->xml == NULL) { - crm_info( - "XML in IPC message was not valid... " + crm_info("XML in IPC message was not valid... " "discarding."); continue; } else if (validate_crm_message( new_input->msg, crm_system_name, admin_uuid, XML_ATTR_RESPONSE) == FALSE) { - crm_info( - "Message was not a CRM response. Discarding."); + crm_info("Message was not a CRM response. Discarding."); continue; } result = cl_get_string(new_input->msg, XML_ATTR_RESULT); if(result == NULL || strcmp(result, "ok") == 0) { result = "pass"; } else { result = "fail"; } - received_responses++; - if(DO_HEALTH) { const char *state = crm_element_value( new_input->xml, "crmd_state"); printf("Status of %s@%s: %s (%s)\n", crm_element_value(new_input->xml,XML_PING_ATTR_SYSFROM), cl_get_string(new_input->msg, F_CRM_HOST_FROM), state, crm_element_value(new_input->xml,XML_PING_ATTR_STATUS)); if(BE_SILENT && state != NULL) { fprintf(stderr, "%s\n", state); } } else if(DO_WHOIS_DC) { const char *dc = cl_get_string( new_input->msg, F_CRM_HOST_FROM); printf("Designated Controller is: %s\n", dc); if(BE_SILENT && dc != NULL) { fprintf(stderr, "%s\n", dc); } } if (this_msg_reference != NULL) { /* in testing mode... */ /* 31 = "test-_.xml" + an_int_as_string + '\0' */ filename_len = 31 + strlen(this_msg_reference); crm_malloc0(filename, sizeof(char) * filename_len); if(filename != NULL) { sprintf(filename, "%s-%s_%d.xml", result, this_msg_reference, received_responses); filename[filename_len - 1] = '\0'; if (0 > write_xml_file(new_input->xml, filename)) { crm_crit("Could not save response to" " %s", filename); } } } } if (server->ch_status == IPC_DISCONNECT) { crm_debug_2("admin_msg_callback: received HUP"); return !hack_return_good; } if (received_responses >= expected_responses) { crm_debug_2( "Recieved expected number (%d) of messages from Heartbeat." " Exiting normally.", expected_responses); g_main_quit(mainloop); return !hack_return_good; } message_timer_id = Gmain_timeout_add( message_timeout_ms, admin_message_timeout, NULL); return hack_return_good; } gboolean admin_message_timeout(gpointer data) { fprintf(stderr, "No messages received in %d seconds.. aborting\n", (int)message_timeout_ms/1000); crm_err("No messages received in %d seconds", (int)message_timeout_ms/1000); g_main_quit(mainloop); return FALSE; } int do_find_resource(const char *rsc, crm_data_t *xml_node) { int found = 0; crm_data_t *nodestates = get_object_root(XML_CIB_TAG_STATUS, xml_node); const char *path2[] = { XML_CIB_TAG_LRM, XML_LRM_TAG_RESOURCES }; xml_child_iter( nodestates, a_node, XML_CIB_TAG_STATE, crm_data_t *rscstates = NULL; if(is_node_online(a_node) == FALSE) { crm_debug_3("Skipping offline node: %s", crm_element_value(a_node, XML_ATTR_ID)); continue; } rscstates = find_xml_node_nested(a_node, path2, DIMOF(path2)); xml_child_iter( rscstates, rsc_state, XML_LRM_TAG_RESOURCE, const char *id = crm_element_value( rsc_state,XML_ATTR_ID); const char *target = crm_element_value( a_node, XML_ATTR_UNAME); const char *last_op = crm_element_value( rsc_state,XML_LRM_ATTR_LASTOP); const char *op_code = crm_element_value( rsc_state,XML_LRM_ATTR_OPSTATUS); crm_debug_3("checking %s:%s for %s", target, id, rsc); if(safe_str_neq(rsc, id)){ crm_debug_4("no match"); continue; } if(safe_str_eq("stop", last_op)) { crm_debug_3("resource %s is stopped on: %s", rsc, target); } else if(safe_str_eq(op_code, "-1")) { crm_debug_3("resource %s is pending on: %s", rsc, target); } else if(safe_str_neq(op_code, "0")) { crm_debug_3("resource %s is failed on: %s", rsc, target); } else { crm_debug_3("resource %s is running on: %s", rsc, target); printf("resource %s is running on: %s\n", rsc, target); if(BE_SILENT) { fprintf(stderr, "%s ", target); } found++; } ); if(BE_SILENT) { fprintf(stderr, "\n"); } ); if(found == 0) { printf("resource %s is NOT running\n", rsc); } return found; } gboolean is_node_online(crm_data_t *node_state) { const char *uname = crm_element_value(node_state,XML_ATTR_UNAME); const char *join_state = crm_element_value(node_state,XML_CIB_ATTR_JOINSTATE); const char *exp_state = crm_element_value(node_state,XML_CIB_ATTR_EXPSTATE); const char *crm_state = crm_element_value(node_state,XML_CIB_ATTR_CRMDSTATE); const char *ha_state = crm_element_value(node_state,XML_CIB_ATTR_HASTATE); const char *ccm_state = crm_element_value(node_state,XML_CIB_ATTR_INCCM); if(safe_str_neq(join_state, CRMD_JOINSTATE_DOWN) && (ha_state == NULL || safe_str_eq(ha_state, ACTIVESTATUS)) && crm_is_true(ccm_state) && safe_str_eq(crm_state, ONLINESTATUS)) { crm_debug_3("Node %s is online", uname); return TRUE; } crm_debug_3("Node %s: ha=%s ccm=%s join=%s exp=%s crm=%s", uname, crm_str(ha_state), crm_str(ccm_state), crm_str(join_state), crm_str(exp_state), crm_str(crm_state)); crm_debug_3("Node %s is offline", uname); return FALSE; } int do_find_resource_list(crm_data_t *xml_node) { int found = 0; crm_data_t *rscs = get_object_root(XML_CIB_TAG_RESOURCES, xml_node); xml_child_iter( rscs, rsc, XML_CIB_TAG_RESOURCE, printf("%s resource: %s (%s)\n", crm_element_value(rsc, "class"), crm_element_value(rsc, XML_ATTR_ID), crm_element_value(rsc, XML_ATTR_TYPE)); found++; ); if(found == 0) { printf("NO resources configured\n"); } return found; } int do_find_node_list(crm_data_t *xml_node) { int found = 0; crm_data_t *nodes = get_object_root(XML_CIB_TAG_NODES, xml_node); xml_child_iter( nodes, node, XML_CIB_TAG_NODE, printf("%s node: %s (%s)\n", crm_element_value(node, XML_ATTR_TYPE), crm_element_value(node, XML_ATTR_UNAME), crm_element_value(node, XML_ATTR_ID)); found++; ); if(found == 0) { printf("NO nodes configured\n"); } return found; } void usage(const char *cmd, int exit_status) { FILE *stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-?vs] [command] [command args]\n", cmd); fprintf(stream, "Options\n"); fprintf(stream, "\t--%s (-%c)\t: " "turn on debug info. additional instances increase verbosity\n", "verbose", 'V'); fprintf(stream, "\t--%s (-%c)\t: be very *very* quiet\n", "quiet", 'q'); fprintf(stream, "\t--%s (-%c)\t: this help message\n", "help", '?'); fprintf(stream, "\nCommands\n"); fprintf(stream, "\t--%s (-%c) \t: " "increment the CRMd debug level on \n", CRM_OP_DEBUG_UP,'i'); fprintf(stream, "\t--%s (-%c) \t: " "decrement the CRMd debug level on \n", CRM_OP_DEBUG_DOWN,'d'); fprintf(stream, "\t--%s (-%c) \t: " "shutdown the CRMd on \n", "kill", 'K'); fprintf(stream, "\t--%s (-%c) \t: " "request the status of \n", "status", 'S'); fprintf(stream, "\t--%s (-%c)\t\t: " "request the status of all nodes\n", "health", 'H'); fprintf(stream, "\t--%s (-%c) \t: " "initiate an election from \n", "election", 'E'); fprintf(stream, "\t--%s (-%c)\t: " "request the uname of the DC\n", "dc_lookup", 'D'); fprintf(stream, "\t--%s (-%c)\t\t: " "request the uname of all member nodes\n", "nodes", 'N'); fprintf(stream, "\t--%s (-%c)\t: " "request the names of all resources\n", "resources", 'R'); fprintf(stream, "\t--%s (-%c) \t: " "request the location of \n", "whereis", 'W'); fprintf(stream, "\t--%s (-%c) \t: " "Tell the node to enter \"standby\" mode\n", "standby", 's'); fprintf(stream, "\t--%s (-%c) \t: " "Tell the node to exit \"standby\" mode\n", "active", 'a'); /* fprintf(stream, "\t--%s (-%c)\t\n", "disconnect", 'D'); */ fflush(stream); exit(exit_status); } diff --git a/crm/crmd/callbacks.c b/crm/crmd/callbacks.c index 034e1b67ef..34f80aef00 100644 --- a/crm/crmd/callbacks.c +++ b/crm/crmd/callbacks.c @@ -1,581 +1,581 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *crmd_peer_state = NULL; crm_data_t *find_xml_in_hamessage(const HA_Message * msg); void crmd_ha_connection_destroy(gpointer user_data); /* From join_dc... */ extern gboolean check_join_state( enum crmd_fsa_state cur_state, const char *source); /* #define MAX_EMPTY_CALLBACKS 20 */ /* int empty_callbacks = 0; */ gboolean crmd_ha_msg_dispatch(IPC_Channel *channel, gpointer user_data) { int lpc = 0; ll_cluster_t *hb_cluster = (ll_cluster_t*)user_data; while(lpc < 2 && hb_cluster->llc_ops->msgready(hb_cluster)) { if(channel->ch_status != IPC_CONNECT) { /* there really is no point continuing */ break; } lpc++; /* invoke the callbacks but dont block */ hb_cluster->llc_ops->rcvmsg(hb_cluster, 0); } crm_debug_3("%d HA messages dispatched", lpc); G_main_set_trigger(fsa_source); if (channel && (channel->ch_status != IPC_CONNECT)) { crm_crit("Lost connection to heartbeat service."); return FALSE; } return TRUE; } void crmd_ha_msg_callback(const HA_Message * msg, void* private_data) { ha_msg_input_t *new_input = NULL; oc_node_t *from_node = NULL; const char *from = ha_msg_value(msg, F_ORIG); const char *seq = ha_msg_value(msg, F_SEQ); const char *op = ha_msg_value(msg, F_CRM_TASK); const char *sys_to = ha_msg_value(msg, F_CRM_SYS_TO); const char *sys_from = ha_msg_value(msg, F_CRM_SYS_FROM); CRM_DEV_ASSERT(from != NULL); if(fsa_membership_copy == NULL) { crm_debug("Ignoring HA messages until we are" " connected to the CCM (%s op from %s)", op, from); crm_log_message_adv( LOG_MSG, "HA[inbound]: Ignore (No CCM)", msg); return; } from_node = g_hash_table_lookup(fsa_membership_copy->members, from); if(from_node == NULL) { int level = LOG_DEBUG; if(safe_str_eq(op, CRM_OP_VOTE)) { level = LOG_WARNING; } else if(AM_I_DC && safe_str_eq(op, CRM_OP_JOIN_ANNOUNCE)) { level = LOG_WARNING; } else if(safe_str_eq(sys_from, CRM_SYSTEM_DC)) { level = LOG_WARNING; } do_crm_log(level, __FILE__, __FUNCTION__, "Ignoring HA message (op=%s) from %s: not in our" " membership list (size=%d)", op, from, g_hash_table_size(fsa_membership_copy->members)); crm_log_message_adv(LOG_MSG, "HA[inbound]: CCM Discard", msg); } else if(AM_I_DC && safe_str_eq(sys_from, CRM_SYSTEM_DC) && safe_str_neq(from, fsa_our_uname)) { crm_err("Another DC detected: %s (op=%s)", from, op); crm_log_message_adv( LOG_WARNING, "HA[inbound]: Duplicate DC", msg); new_input = new_ha_msg_input(msg); /* make sure the election happens NOW */ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, new_input, __FUNCTION__); #if 0 /* still thinking about this one... * could create a timing issue if we dont notice the * election before a new DC is elected. */ } else if(fsa_our_dc != NULL && safe_str_eq(sys_from, CRM_SYSTEM_DC) && safe_str_neq(from, fsa_our_dc)) { crm_warn("Ignoring message from wrong DC: %s vs. %s ", from, fsa_our_dc); crm_log_message_adv(LOG_WARNING, "HA[inbound]: wrong DC", msg); #endif } else if(safe_str_eq(sys_to, CRM_SYSTEM_DC) && AM_I_DC == FALSE) { crm_debug_2("Ignoring message for the DC [F_SEQ=%s]", seq); crm_log_message_adv(LOG_DEBUG_4, "HA[inbound]: ignore", msg); return; } else if(safe_str_eq(from, fsa_our_uname) && safe_str_eq(op, CRM_OP_VOTE)) { crm_log_message_adv(LOG_DEBUG_4, "HA[inbound]", msg); crm_debug_2("Ignoring our own vote [F_SEQ=%s]: own vote", seq); return; } else if(AM_I_DC && safe_str_eq(op, CRM_OP_HBEAT)) { crm_debug_2("Ignoring our own heartbeat [F_SEQ=%s]", seq); crm_log_message_adv(LOG_DEBUG_4, "HA[inbound]: own heartbeat", msg); return; } else { crm_debug_3("Processing message"); crm_log_message_adv(LOG_MSG, "HA[inbound]", msg); new_input = new_ha_msg_input(msg); register_fsa_input(C_HA_MESSAGE, I_ROUTER, new_input); } #if 0 if(ha_msg_value(msg, XML_ATTR_REFERENCE) == NULL) { ha_msg_add(new_input->msg, XML_ATTR_REFERENCE, seq); } #endif delete_ha_msg_input(new_input); return; } /* * Apparently returning TRUE means "stay connected, keep doing stuff". * Returning FALSE means "we're all done, close the connection" */ gboolean crmd_ipc_msg_callback(IPC_Channel *client, gpointer user_data) { int lpc = 0; IPC_Message *msg = NULL; ha_msg_input_t *new_input = NULL; crmd_client_t *curr_client = (crmd_client_t*)user_data; gboolean stay_connected = TRUE; crm_debug_2("Processing IPC message from %s", curr_client->table_key); while(lpc == 0 && client->ops->is_message_pending(client)) { - if (client->ch_status != IPC_CONNECT) { + if (client->ch_status == IPC_DISCONNECT) { /* The message which was pending for us is that * the IPC status is now IPC_DISCONNECT */ break; } if (client->ops->recv(client, &msg) != IPC_OK) { perror("Receive failure:"); crm_err("[%s] [receive failure]", curr_client->table_key); stay_connected = FALSE; break; } else if (msg == NULL) { crm_err("[%s] [no message this time]", curr_client->table_key); continue; } lpc++; new_input = new_ipc_msg_input(msg); msg->msg_done(msg); crm_debug_2("Processing msg from %s", curr_client->table_key); crm_log_message_adv(LOG_MSG, "CRMd[inbound]", new_input->msg); if(crmd_authorize_message(new_input, curr_client)) { register_fsa_input(C_IPC_MESSAGE, I_ROUTER, new_input); } delete_ha_msg_input(new_input); msg = NULL; new_input = NULL; } crm_debug_2("Processed %d messages", lpc); - if (client->ch_status != IPC_CONNECT) { + if (client->ch_status == IPC_DISCONNECT) { stay_connected = FALSE; process_client_disconnect(curr_client); } G_main_set_trigger(fsa_source); return stay_connected; } gboolean lrm_dispatch(IPC_Channel*src_not_used, gpointer user_data) { int num_msgs = 0; ll_lrm_t *lrm = (ll_lrm_t*)user_data; crm_debug_3("received callback"); num_msgs = lrm->lrm_ops->rcvmsg(lrm, FALSE); if(num_msgs < 1) { crm_err("lrm->lrm_ops->rcvmsg() failed, connection lost?"); clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); return FALSE; } return TRUE; } void lrm_op_callback(lrm_op_t* op) { CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return; } crm_debug("received callback: %s/%s (%s)", op->op_type, op->rsc_id, op_status2text(op->op_status)); /* Make sure the LRM events are received in order */ register_fsa_input_later(C_LRM_OP_CALLBACK, I_LRM_EVENT, op); } void crmd_ha_status_callback( const char *node, const char * status, void* private_data) { crm_data_t *update = NULL; crm_debug_3("received callback"); crm_notice("Status update: Node %s now has status [%s]",node,status); if(safe_str_neq(status, DEADSTATUS)) { crm_debug_3("nstatus callback was not for a dead node"); return; } /* this node is taost */ update = create_node_state( node, node, status, NULL, NULL, NULL, NULL, __FUNCTION__); set_xml_property_copy( update, XML_CIB_ATTR_CLEAR_SHUTDOWN, XML_BOOLEAN_TRUE); update_local_cib(create_cib_fragment(update, NULL)); G_main_set_trigger(fsa_source); free_xml(update); } void crmd_client_status_callback(const char * node, const char * client, const char * status, void * private) { const char *join = NULL; const char *extra = NULL; crm_data_t * update = NULL; crm_debug_3("received callback"); if(safe_str_neq(client, CRM_SYSTEM_CRMD)) { return; } if(safe_str_eq(status, JOINSTATUS)){ status = ONLINESTATUS; extra = XML_CIB_ATTR_CLEAR_SHUTDOWN; } else if(safe_str_eq(status, LEAVESTATUS)){ status = OFFLINESTATUS; join = CRMD_JOINSTATE_DOWN; extra = XML_CIB_ATTR_CLEAR_SHUTDOWN; } set_bit_inplace(fsa_input_register, R_PEER_DATA); g_hash_table_replace( crmd_peer_state, crm_strdup(node), crm_strdup(status)); if(fsa_state == S_STARTING || fsa_state == S_STOPPING) { return; } crm_notice("Status update: Client %s/%s now has status [%s]", node, client, status); if(safe_str_eq(node, fsa_our_dc) && safe_str_eq(status, OFFLINESTATUS)) { /* did our DC leave us */ crm_info("Got client status callback - our DC is dead"); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); } else { crm_data_t *fragment = NULL; crm_debug_3("Got client status callback"); update = create_node_state( node, node, NULL, NULL, status, join, NULL, __FUNCTION__); set_xml_property_copy(update, extra, XML_BOOLEAN_TRUE); fragment = create_cib_fragment(update, NULL); /* it is safe to keep these updates on the local node * each node updates their own CIB */ fsa_cib_conn->cmds->modify( fsa_cib_conn, XML_CIB_TAG_STATUS, fragment, NULL, cib_inhibit_bcast|cib_scope_local|cib_quorum_override); free_xml(fragment); free_xml(update); if(AM_I_DC && safe_str_eq(status, OFFLINESTATUS)) { g_hash_table_remove(confirmed_nodes, node); g_hash_table_remove(finalized_nodes, node); g_hash_table_remove(integrated_nodes, node); g_hash_table_remove(welcomed_nodes, node); check_join_state(fsa_state, __FUNCTION__); } } G_main_set_trigger(fsa_source); } void crmd_ha_connection_destroy(gpointer user_data) { crm_crit("Heartbeat has left us"); /* this is always an error */ /* feed this back into the FSA */ register_fsa_input(C_HA_DISCONNECT, I_ERROR, NULL); } gboolean crmd_client_connect(IPC_Channel *client_channel, gpointer user_data) { if (client_channel == NULL) { crm_err("Channel was NULL"); } else if (client_channel->ch_status == IPC_DISCONNECT) { crm_err("Channel was disconnected"); } else { crmd_client_t *blank_client = NULL; crm_debug_3("Channel connected"); crm_malloc0(blank_client, sizeof(crmd_client_t)); if (blank_client == NULL) { return FALSE; } client_channel->ops->set_recv_qlen(client_channel, 100); client_channel->ops->set_send_qlen(client_channel, 100); blank_client->client_channel = client_channel; blank_client->sub_sys = NULL; blank_client->uuid = NULL; blank_client->table_key = NULL; blank_client->client_source = G_main_add_IPC_Channel( G_PRIORITY_LOW, client_channel, FALSE, crmd_ipc_msg_callback, blank_client, default_ipc_connection_destroy); } return TRUE; } gboolean ccm_dispatch(int fd, gpointer user_data) { int rc = 0; oc_ev_t *ccm_token = (oc_ev_t*)user_data; gboolean was_error = FALSE; crm_debug_3("received callback"); rc = oc_ev_handle_event(ccm_token); if(rc != 0) { crm_err("CCM connection appears to have failed: rc=%d.", rc); register_fsa_input(C_CCM_CALLBACK, I_ERROR, NULL); was_error = TRUE; } G_main_set_trigger(fsa_source); return !was_error; } static gboolean fsa_have_quorum = FALSE; void crmd_ccm_msg_callback( oc_ed_t event, void *cookie, size_t size, const void *data) { int instance = -1; gboolean update_cache = FALSE; struct crmd_ccm_data_s *event_data = NULL; const oc_ev_membership_t *membership = data; gboolean update_quorum = FALSE; gboolean trigger_transition = FALSE; crm_debug_3("received callback"); if(data != NULL) { instance = membership->m_instance; } crm_info("Quorum %s after event=%s (id=%d)", ccm_have_quorum(event)?"(re)attained":"lost", ccm_event_name(event), instance); switch(event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID:/* fall through */ update_cache = TRUE; update_quorum = TRUE; break; case OC_EV_MS_NOT_PRIMARY: #if UNTESTED if(AM_I_DC == FALSE) { break; } /* tell the TE to pretend it completed and stop */ /* side effect: we'll end up in S_IDLE */ register_fsa_action(A_TE_HALT, TRUE); #endif break; case OC_EV_MS_PRIMARY_RESTORED: fsa_membership_copy->id = instance; if(AM_I_DC && need_transition(fsa_state)) { trigger_transition = TRUE; } break; case OC_EV_MS_EVICTED: update_quorum = TRUE; register_fsa_input(C_FSA_INTERNAL, I_STOP, NULL); break; default: crm_err("Unknown CCM event: %d", event); } if(update_quorum && ccm_have_quorum(event) == FALSE) { /* did we just loose quorum? */ if(fsa_have_quorum && need_transition(fsa_state)) { crm_info("Quorum lost: triggering transition (%s)", ccm_event_name(event)); trigger_transition = TRUE; } fsa_have_quorum = FALSE; } else if(update_quorum) { crm_debug("Updating quorum after event %s", ccm_event_name(event)); fsa_have_quorum = TRUE; } if(update_cache) { crm_debug("Updating cache after event %s", ccm_event_name(event)); crm_malloc0(event_data, sizeof(struct crmd_ccm_data_s)); if(event_data == NULL) { return; } event_data->event = event; if(data != NULL) { event_data->oc = copy_ccm_oc_data(data); } register_fsa_input_adv( C_CCM_CALLBACK, I_CCM_EVENT, event_data, trigger_transition?A_TE_CANCEL:A_NOTHING, FALSE, __FUNCTION__); if (event_data->oc) { crm_free(event_data->oc); event_data->oc = NULL; } crm_free(event_data); } else if(trigger_transition) { crm_debug("Scheduling transition after event %s", ccm_event_name(event)); register_fsa_action(A_TE_CANCEL); } oc_ev_callback_done(cookie); return; } void crmd_cib_connection_destroy(gpointer user_data) { if(is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("Connection to the CIB terminated..."); return; } /* eventually this will trigger a reconnect, not a shutdown */ crm_err("Connection to the CIB terminated..."); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit_inplace(fsa_input_register, R_CIB_CONNECTED); return; } longclock_t fsa_start = 0; longclock_t fsa_stop = 0; longclock_t fsa_diff = 0; gboolean crm_fsa_trigger(gpointer user_data) { unsigned int fsa_diff_ms = 0; if(fsa_diff_max_ms > 0) { fsa_start = time_longclock(); } s_crmd_fsa(C_FSA_INTERNAL); if(fsa_diff_max_ms > 0) { fsa_stop = time_longclock(); fsa_diff = sub_longclock(fsa_stop, fsa_start); fsa_diff_ms = longclockto_ms(fsa_diff); if(fsa_diff_ms > fsa_diff_max_ms) { crm_err("FSA took %dms to complete", fsa_diff_ms); } else if(fsa_diff_ms > fsa_diff_warn_ms) { crm_warn("FSA took %dms to complete", fsa_diff_ms); } } return TRUE; }