diff --git a/crmd/messages.c b/crmd/messages.c index 06c7e15b9f..5126b55659 100644 --- a/crmd/messages.c +++ b/crmd/messages.c @@ -1,969 +1,970 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include GListPtr fsa_message_queue = NULL; extern void crm_shutdown(int nsig); void handle_response(xmlNode * stored_msg); enum crmd_fsa_input handle_request(xmlNode * stored_msg); enum crmd_fsa_input handle_shutdown_request(xmlNode * stored_msg); #ifdef MSG_LOG # define ROUTER_RESULT(x) crm_trace("Router result: %s", x); \ crm_log_xml_trace(msg, "router.log"); #else # define ROUTER_RESULT(x) crm_trace("Router result: %s", x) #endif /* debug only, can wrap all it likes */ int last_data_id = 0; void register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, fsa_data_t * cur_data, void *new_data, const char *raised_from) { /* save the current actions if any */ if (fsa_actions != A_NOTHING) { register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL, I_NULL, cur_data ? cur_data->data : NULL, fsa_actions, TRUE, __FUNCTION__); } /* reset the action list */ fsa_actions = A_NOTHING; /* register the error */ register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from); } int register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, void *data, long long with_actions, gboolean prepend, const char *raised_from) { unsigned old_len = g_list_length(fsa_message_queue); fsa_data_t *fsa_data = NULL; last_data_id++; CRM_CHECK(raised_from != NULL, raised_from = ""); crm_trace("%s %s FSA input %d (%s) (cause=%s) %s data", raised_from, prepend ? "prepended" : "appended", last_data_id, fsa_input2string(input), fsa_cause2string(cause), data ? "with" : "without"); if (input == I_WAIT_FOR_EVENT) { do_fsa_stall = TRUE; crm_debug("Stalling the FSA pending further input: cause=%s", fsa_cause2string(cause)); if (old_len > 0) { crm_warn("%s stalled the FSA with pending inputs", raised_from); fsa_dump_queue(LOG_DEBUG); } if (data == NULL) { set_bit(fsa_actions, with_actions); with_actions = A_NOTHING; return 0; } crm_err("%s stalled the FSA with data - this may be broken", raised_from); } if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) { /* no point doing anything */ crm_err("Cannot add entry to queue: no input and no action"); return 0; } fsa_data = calloc(1, sizeof(fsa_data_t)); fsa_data->id = last_data_id; fsa_data->fsa_input = input; fsa_data->fsa_cause = cause; fsa_data->origin = raised_from; fsa_data->data = NULL; fsa_data->data_type = fsa_dt_none; fsa_data->actions = with_actions; if (with_actions != A_NOTHING) { crm_trace("Adding actions %.16llx to input", with_actions); } if (data != NULL) { switch (cause) { case C_FSA_INTERNAL: case C_CRMD_STATUS_CALLBACK: case C_IPC_MESSAGE: case C_HA_MESSAGE: crm_trace("Copying %s data from %s as a HA msg", fsa_cause2string(cause), raised_from); CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL, crm_err("Bogus data from %s", raised_from)); fsa_data->data = copy_ha_msg_input(data); fsa_data->data_type = fsa_dt_ha_msg; break; case C_LRM_OP_CALLBACK: crm_trace("Copying %s data from %s as lrmd_event_data_t", fsa_cause2string(cause), raised_from); fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data); fsa_data->data_type = fsa_dt_lrm; break; case C_CCM_CALLBACK: case C_SUBSYSTEM_CONNECT: case C_LRM_MONITOR_CALLBACK: case C_TIMER_POPPED: case C_SHUTDOWN: case C_HEARTBEAT_FAILED: case C_HA_DISCONNECT: case C_ILLEGAL: case C_UNKNOWN: case C_STARTUP: crm_err("Copying %s data (from %s)" " not yet implemented", fsa_cause2string(cause), raised_from); crmd_exit(1); break; } crm_trace("%s data copied", fsa_cause2string(fsa_data->fsa_cause)); } /* make sure to free it properly later */ if (prepend) { crm_trace("Prepending input"); fsa_message_queue = g_list_prepend(fsa_message_queue, fsa_data); } else { fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); } crm_trace("Queue len: %d", g_list_length(fsa_message_queue)); fsa_dump_queue(LOG_DEBUG_2); if (old_len == g_list_length(fsa_message_queue)) { crm_err("Couldnt add message to the queue"); } if (fsa_source) { crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); } return last_data_id; } void fsa_dump_queue(int log_level) { int offset = 0; GListPtr lpc = NULL; for (lpc = fsa_message_queue; lpc != NULL; lpc = lpc->next) { fsa_data_t *data = (fsa_data_t *) lpc->data; do_crm_log_unlikely(log_level, "queue[%d(%d)]: input %s raised by %s()\t(cause=%s)", offset++, data->id, fsa_input2string(data->fsa_input), data->origin, fsa_cause2string(data->fsa_cause)); } } ha_msg_input_t * copy_ha_msg_input(ha_msg_input_t * orig) { ha_msg_input_t *copy = NULL; xmlNodePtr data = NULL; if (orig != NULL) { crm_trace("Copy msg"); data = copy_xml(orig->msg); } else { crm_trace("No message to copy"); } copy = new_ha_msg_input(data); if (orig && orig->msg != NULL) { CRM_CHECK(copy->msg != NULL, crm_err("copy failed")); } return copy; } void delete_fsa_input(fsa_data_t * fsa_data) { lrmd_event_data_t *op = NULL; xmlNode *foo = NULL; if (fsa_data == NULL) { return; } crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause)); if (fsa_data->data != NULL) { switch (fsa_data->data_type) { case fsa_dt_ha_msg: delete_ha_msg_input(fsa_data->data); break; case fsa_dt_xml: foo = fsa_data->data; free_xml(foo); break; case fsa_dt_lrm: op = (lrmd_event_data_t *) fsa_data->data; lrmd_free_event(op); break; case fsa_dt_none: if (fsa_data->data != NULL) { crm_err("Dont know how to free %s data from %s", fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); crmd_exit(1); } break; } crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause)); } free(fsa_data); } /* returns the next message */ fsa_data_t * get_message(void) { fsa_data_t *message = g_list_nth_data(fsa_message_queue, 0); fsa_message_queue = g_list_remove(fsa_message_queue, message); crm_trace("Processing input %d", message->id); return message; } /* returns the current head of the FIFO queue */ gboolean is_message(void) { return (g_list_length(fsa_message_queue) > 0); } void * fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller) { void *ret_val = NULL; if (fsa_data == NULL) { crm_err("%s: No FSA data available", caller); } else if (fsa_data->data == NULL) { crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin); } else if (fsa_data->data_type != a_type) { crm_crit( "%s: Message data was the wrong type! %d vs. requested=%d." " Origin: %s", caller, fsa_data->data_type, a_type, fsa_data->origin); CRM_ASSERT(fsa_data->data_type == a_type); } else { ret_val = fsa_data->data; } return ret_val; } /* A_MSG_ROUTE */ void do_msg_route(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); route_message(msg_data->fsa_cause, input->msg); } void route_message(enum crmd_fsa_cause cause, xmlNode * input) { ha_msg_input_t fsa_input; enum crmd_fsa_input result = I_NULL; fsa_input.msg = input; CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return); /* try passing the buck first */ if (relay_message(input, cause == C_IPC_MESSAGE)) { return; } /* handle locally */ result = handle_message(input); /* done or process later? */ switch (result) { case I_NULL: case I_CIB_OP: case I_ROUTER: case I_NODE_JOIN: case I_JOIN_REQUEST: case I_JOIN_RESULT: break; default: /* Defering local processing of message */ register_fsa_input_later(cause, result, &fsa_input); return; } if (result != I_NULL) { /* add to the front of the queue */ register_fsa_input(cause, result, &fsa_input); } } gboolean relay_message(xmlNode * msg, gboolean originated_locally) { int dest = 1; int is_for_dc = 0; int is_for_dcib = 0; int is_for_te = 0; int is_for_crm = 0; int is_for_cib = 0; int is_local = 0; gboolean processing_complete = FALSE; const char *host_to = crm_element_value(msg, F_CRM_HOST_TO); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *type = crm_element_value(msg, F_TYPE); const char *msg_error = NULL; crm_trace("Routing message %s", crm_element_value(msg, XML_ATTR_REFERENCE)); if (msg == NULL) { msg_error = "Cannot route empty message"; } else if (safe_str_eq(CRM_OP_HELLO, crm_element_value(msg, F_CRM_TASK))) { /* quietly ignore */ processing_complete = TRUE; } else if (safe_str_neq(type, T_CRM)) { msg_error = "Bad message type"; } else if (sys_to == NULL) { msg_error = "Bad message destination: no subsystem"; } if (msg_error != NULL) { processing_complete = TRUE; crm_err("%s", msg_error); crm_log_xml_warn(msg, "bad msg"); } if (processing_complete) { return TRUE; } processing_complete = TRUE; is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0); is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0); is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0); is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0); is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0); is_local = 0; if (host_to == NULL || strlen(host_to) == 0) { if (is_for_dc || is_for_te) { is_local = 0; } else if (is_for_crm && originated_locally) { is_local = 0; } else { is_local = 1; } } else if (safe_str_eq(fsa_our_uname, host_to)) { is_local = 1; } if (is_for_dc || is_for_dcib || is_for_te) { if (AM_I_DC && is_for_te) { ROUTER_RESULT("Message result: Local relay"); send_msg_via_ipc(msg, sys_to); } else if (AM_I_DC) { ROUTER_RESULT("Message result: DC/CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if (originated_locally && safe_str_neq(sys_from, CRM_SYSTEM_PENGINE) && safe_str_neq(sys_from, CRM_SYSTEM_TENGINE)) { /* Neither the TE or PE should be sending messages * to DC's on other nodes * * By definition, if we are no longer the DC, then * the PE or TE's data should be discarded */ #if SUPPORT_COROSYNC if (is_openais_cluster()) { dest = text2msg_type(sys_to); } #endif ROUTER_RESULT("Message result: External relay to DC"); send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE); } else { /* discard */ ROUTER_RESULT("Message result: Discard, not DC"); } } else if (is_local && (is_for_crm || is_for_cib)) { ROUTER_RESULT("Message result: CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if (is_local) { ROUTER_RESULT("Message result: Local relay"); send_msg_via_ipc(msg, sys_to); } else { #if SUPPORT_COROSYNC if (is_openais_cluster()) { dest = text2msg_type(sys_to); } #endif ROUTER_RESULT("Message result: External relay"); send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE); } return processing_complete; } static gboolean process_hello_message(xmlNode * hello, char **uuid, char **client_name, char **major_version, char **minor_version) { const char *local_uuid; const char *local_client_name; const char *local_major_version; const char *local_minor_version; *uuid = NULL; *client_name = NULL; *major_version = NULL; *minor_version = NULL; if (hello == NULL) { return FALSE; } local_uuid = crm_element_value(hello, "client_uuid"); local_client_name = crm_element_value(hello, "client_name"); local_major_version = crm_element_value(hello, "major_version"); local_minor_version = crm_element_value(hello, "minor_version"); if (local_uuid == NULL || strlen(local_uuid) == 0) { crm_err("Hello message was not valid (field %s not found)", "uuid"); return FALSE; } else if (local_client_name == NULL || strlen(local_client_name) == 0) { crm_err("Hello message was not valid (field %s not found)", "client name"); return FALSE; } else if (local_major_version == NULL || strlen(local_major_version) == 0) { crm_err("Hello message was not valid (field %s not found)", "major version"); return FALSE; } else if (local_minor_version == NULL || strlen(local_minor_version) == 0) { crm_err("Hello message was not valid (field %s not found)", "minor version"); return FALSE; } *uuid = strdup(local_uuid); *client_name = strdup(local_client_name); *major_version = strdup(local_major_version); *minor_version = strdup(local_minor_version); crm_trace("Hello message ok"); return TRUE; } gboolean crmd_authorize_message(xmlNode * client_msg, crmd_client_t * curr_client) { /* check the best case first */ const char *sys_from = crm_element_value(client_msg, F_CRM_SYS_FROM); char *uuid = NULL; char *client_name = NULL; char *major_version = NULL; char *minor_version = NULL; const char *filtered_from; gpointer table_key = NULL; gboolean auth_result = FALSE; gboolean can_reply = FALSE; /* no-one has registered with this id */ xmlNode *xml = NULL; const char *op = crm_element_value(client_msg, F_CRM_TASK); if (safe_str_neq(CRM_OP_HELLO, op)) { if (sys_from == NULL) { crm_warn("Message [%s] was had no value for %s... discarding", crm_element_value(client_msg, XML_ATTR_REFERENCE), F_CRM_SYS_FROM); return FALSE; } filtered_from = sys_from; /* The CIB can have two names on the DC */ if (strcasecmp(sys_from, CRM_SYSTEM_DCIB) == 0) filtered_from = CRM_SYSTEM_CIB; if (g_hash_table_lookup(ipc_clients, filtered_from) != NULL) { can_reply = TRUE; /* reply can be routed */ } crm_trace("Message reply can%s be routed from %s.", can_reply ? "" : " not", sys_from); if (can_reply == FALSE) { crm_warn("Message [%s] not authorized", crm_element_value(client_msg, XML_ATTR_REFERENCE)); } return can_reply; } crm_trace("received client join msg"); crm_log_xml_trace(client_msg, "join"); xml = get_message_xml(client_msg, F_CRM_DATA); auth_result = process_hello_message(xml, &uuid, &client_name, &major_version, &minor_version); if (auth_result == TRUE) { if (client_name == NULL || uuid == NULL) { crm_err("Bad client details (client_name=%s, uuid=%s)", crm_str(client_name), crm_str(uuid)); auth_result = FALSE; } } if (auth_result == TRUE) { /* check version */ int mav = atoi(major_version); int miv = atoi(minor_version); crm_trace("Checking client version number"); if (mav < 0 || miv < 0) { crm_err("Client version (%d:%d) is not acceptable", mav, miv); auth_result = FALSE; } } table_key = (gpointer) generate_hash_key(client_name, uuid); if (auth_result == TRUE) { crm_trace("Accepted client %s", crm_str(table_key)); curr_client->table_key = table_key; curr_client->sub_sys = strdup(client_name); curr_client->uuid = strdup(uuid); g_hash_table_insert(ipc_clients, table_key, curr_client->ipc); crm_trace("Updated client list with %s", crm_str(table_key)); crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); } else { free(table_key); crm_warn("Rejected client logon request"); qb_ipcs_disconnect(curr_client->ipc); } free(uuid); free(minor_version); free(major_version); free(client_name); /* hello messages should never be processed further */ return FALSE; } enum crmd_fsa_input handle_message(xmlNode * msg) { const char *type = NULL; CRM_CHECK(msg != NULL, return I_NULL); type = crm_element_value(msg, F_CRM_MSG_TYPE); if (crm_str_eq(type, XML_ATTR_REQUEST, TRUE)) { return handle_request(msg); } else if (crm_str_eq(type, XML_ATTR_RESPONSE, TRUE)) { handle_response(msg); return I_NULL; } crm_err("Unknown message type: %s", type); return I_NULL; } static enum crmd_fsa_input handle_failcount_op(xmlNode * stored_msg) { const char *rsc = NULL; xmlNode *xml_rsc = get_xpath_object("//" XML_CIB_TAG_RESOURCE, stored_msg, LOG_ERR); if (xml_rsc) { rsc = ID(xml_rsc); } if (rsc) { char *attr = NULL; crm_info("Removing failcount for %s", rsc); attr = crm_concat("fail-count", rsc, '-'); update_attrd(NULL, attr, NULL, NULL); free(attr); attr = crm_concat("last-failure", rsc, '-'); update_attrd(NULL, attr, NULL, NULL); free(attr); lrm_clear_last_failure(rsc); } else { crm_log_xml_warn(stored_msg, "invalid failcount op"); } return I_NULL; } enum crmd_fsa_input handle_request(xmlNode * stored_msg) { xmlNode *msg = NULL; const char *op = crm_element_value(stored_msg, F_CRM_TASK); /* Optimize this for the DC - it has the most to do */ if (op == NULL) { crm_log_xml_err(stored_msg, "Bad message"); return I_NULL; } /*========== DC-Only Actions ==========*/ if (AM_I_DC) { if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) { return I_NODE_JOIN; } else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) { return I_JOIN_REQUEST; } else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) { return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("Shutting ourselves down (DC)"); return I_STOP; } else if (dc_match) { crm_err("We didnt ask to be shut down, yet our" " TE is telling us too." " Better get out now!"); return I_TERMINATE; } else if (fsa_state != S_STOPPING) { crm_err("Another node is asking us to shutdown" " but we think we're ok."); return I_ELECTION; } } else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { /* a slave wants to shut down */ /* create cib fragment and add to message */ return handle_shutdown_request(stored_msg); } } /*========== common actions ==========*/ if (strcmp(op, CRM_OP_NOVOTE) == 0) { ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__); } else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) { return handle_failcount_op(stored_msg); } else if (strcmp(op, CRM_OP_VOTE) == 0) { /* count the vote and decide what to do after that */ ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__); /* Sometimes we _must_ go into S_ELECTION */ if (fsa_state == S_HALT) { crm_debug("Forcing an election from S_HALT"); return I_ELECTION; #if 0 } else if (AM_I_DC) { /* This is the old way of doing things but what is gained? */ return I_ELECTION; #endif } } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) { crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); return I_JOIN_OFFER; } else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) { crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0 || strcmp(op, CRM_OP_LRM_FAIL) == 0 || strcmp(op, CRM_OP_LRM_REFRESH) == 0 || strcmp(op, CRM_OP_REPROBE) == 0) { crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); return I_ROUTER; } else if (strcmp(op, CRM_OP_NOOP) == 0) { return I_NULL; } else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) { crm_shutdown(SIGTERM); /*return I_SHUTDOWN; */ return I_NULL; /*========== (NOT_DC)-Only Actions ==========*/ } else if (AM_I_DC == FALSE && strcmp(op, CRM_OP_SHUTDOWN) == 0) { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if (dc_match || fsa_our_dc == NULL) { if (is_set(fsa_input_register, R_SHUTDOWN) == FALSE) { crm_err("We didn't ask to be shut down, yet our" " DC is telling us too."); set_bit(fsa_input_register, R_STAYDOWN); return I_STOP; } crm_info("Shutting down"); return I_STOP; } else { crm_warn("Discarding %s op from %s", op, host_from); } } else if (strcmp(op, CRM_OP_PING) == 0) { /* eventually do some stuff to figure out * if we /are/ ok */ const char *sys_to = crm_element_value(stored_msg, F_CRM_SYS_TO); xmlNode *ping = create_xml_node(NULL, XML_CRM_TAG_PING); crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok"); crm_xml_add(ping, XML_PING_ATTR_SYSFROM, sys_to); crm_xml_add(ping, "crmd_state", fsa_state2string(fsa_state)); /* Ok, so technically not so interesting, but CTS needs to see this */ crm_notice("Current ping state: %s", fsa_state2string(fsa_state)); msg = create_reply(stored_msg, ping); relay_message(msg, TRUE); free_xml(ping); free_xml(msg); } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) { - xmlNode *options = get_xpath_object("//"XML_TAG_OPTIONS, stored_msg, LOG_ERR); int id = 0; + const char *name = NULL; + xmlNode *options = get_xpath_object("//"XML_TAG_OPTIONS, stored_msg, LOG_ERR); if (options) { crm_element_value_int(options, XML_ATTR_ID, &id); + name = crm_element_value(options, XML_ATTR_UNAME); } - if (id) { - reap_crm_member(id); - } + + reap_crm_member(id, name); } else { crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node"); crm_log_xml_err(stored_msg, "Unexpected"); } return I_NULL; } void handle_response(xmlNode * stored_msg) { const char *op = crm_element_value(stored_msg, F_CRM_TASK); if (op == NULL) { crm_log_xml_err(stored_msg, "Bad message"); } else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) { /* Check if the PE answer been superceeded by a subsequent request? */ const char *msg_ref = crm_element_value(stored_msg, XML_ATTR_REFERENCE); if (msg_ref == NULL) { crm_err("%s - Ignoring calculation with no reference", op); } else if (safe_str_eq(msg_ref, fsa_pe_ref)) { ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); crm_trace("Completed: %s...", fsa_pe_ref); } else { crm_info("%s calculation %s is obsolete", op, msg_ref); } } else if (strcmp(op, CRM_OP_VOTE) == 0 || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) { } else { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); crm_err("Unexpected response (op=%s, src=%s) sent to the %s", op, host_from, AM_I_DC ? "DC" : "CRMd"); } } enum crmd_fsa_input handle_shutdown_request(xmlNode * stored_msg) { /* handle here to avoid potential version issues * where the shutdown message/proceedure may have * been changed in later versions. * * This way the DC is always in control of the shutdown */ char *now_s = NULL; time_t now = time(NULL); const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); if (host_from == NULL) { /* we're shutting down and the DC */ host_from = fsa_our_uname; } crm_info("Creating shutdown request for %s (state=%s)", host_from, fsa_state2string(fsa_state)); crm_log_xml_trace(stored_msg, "message"); now_s = crm_itoa(now); update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL); free(now_s); /* will be picked up by the TE as long as its running */ return I_NULL; } /* msg is deleted by the time this returns */ extern gboolean process_te_message(xmlNode * msg, xmlNode * xml_data); gboolean send_msg_via_ipc(xmlNode * msg, const char *sys) { gboolean send_ok = TRUE; qb_ipcs_connection_t *client_channel; client_channel = (qb_ipcs_connection_t *) g_hash_table_lookup(ipc_clients, sys); if (crm_element_value(msg, F_CRM_HOST_FROM) == NULL) { crm_xml_add(msg, F_CRM_HOST_FROM, fsa_our_uname); } if (client_channel != NULL) { /* Transient clients such as crmadmin */ send_ok = crm_ipcs_send(client_channel, 0, msg, TRUE); } else if (sys != NULL && strcmp(sys, CRM_SYSTEM_TENGINE) == 0) { xmlNode *data = get_message_xml(msg, F_CRM_DATA); process_te_message(msg, data); } else if (sys != NULL && strcmp(sys, CRM_SYSTEM_LRMD) == 0) { fsa_data_t fsa_data; ha_msg_input_t fsa_input; fsa_input.msg = msg; fsa_input.xml = get_message_xml(msg, F_CRM_DATA); fsa_data.id = 0; fsa_data.actions = 0; fsa_data.data = &fsa_input; fsa_data.fsa_input = I_MESSAGE; fsa_data.fsa_cause = C_IPC_MESSAGE; fsa_data.origin = __FUNCTION__; fsa_data.data_type = fsa_dt_ha_msg; #ifdef FSA_TRACE crm_trace("Invoking action A_LRM_INVOKE (%.16llx)", A_LRM_INVOKE); #endif do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, &fsa_data); } else { crm_err("Unknown Sub-system (%s)... discarding message.", crm_str(sys)); send_ok = FALSE; } return send_ok; } ha_msg_input_t * new_ha_msg_input(xmlNode * orig) { ha_msg_input_t *input_copy = NULL; input_copy = calloc(1, sizeof(ha_msg_input_t)); input_copy->msg = orig; input_copy->xml = get_message_xml(input_copy->msg, F_CRM_DATA); return input_copy; } void delete_ha_msg_input(ha_msg_input_t * orig) { if (orig == NULL) { return; } free_xml(orig->msg); free(orig); } diff --git a/include/crm/cluster.h b/include/crm/cluster.h index 386436bf43..4e391991c1 100644 --- a/include/crm/cluster.h +++ b/include/crm/cluster.h @@ -1,176 +1,176 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM_COMMON_CLUSTER__H # define CRM_COMMON_CLUSTER__H # include # include # if SUPPORT_HEARTBEAT # include # include # endif extern gboolean crm_have_quorum; extern GHashTable *crm_peer_cache; extern GHashTable *crm_peer_id_cache; extern unsigned long long crm_peer_seq; # ifndef CRM_SERVICE # define CRM_SERVICE PCMK_SERVICE_ID # endif /* *INDENT-OFF* */ #define CRM_NODE_LOST "lost" #define CRM_NODE_MEMBER "member" #define CRM_NODE_ACTIVE CRM_NODE_MEMBER #define CRM_NODE_EVICTED "evicted" /* *INDENT-ON* */ typedef struct crm_peer_node_s { uint32_t id; /* Only used by corosync derivatives */ uint64_t born; /* Only used by heartbeat and the legacy plugin */ uint64_t last_seen; int32_t votes; /* Only used by the legacy plugin */ uint32_t processes; char *uname; char *uuid; char *state; char *expected; char *addr; /* Only used by the legacy plugin */ char *version;/* Unused */ } crm_node_t; void crm_peer_init(void); void crm_peer_destroy(void); char *get_corosync_uuid(uint32_t id, const char *uname); const char *get_node_uuid(uint32_t id, const char *uname); int get_corosync_id(int id, const char *uuid); typedef struct crm_cluster_s { char *uuid; char *uname; uint32_t nodeid; #if SUPPORT_HEARTBEAT ll_cluster_t *hb_conn; void (*hb_dispatch)(HA_Message *msg, void *private); #endif gboolean (*cs_dispatch) (int kind, const char *from, const char *data); void (*destroy) (gpointer); } crm_cluster_t; gboolean crm_cluster_connect(crm_cluster_t *cluster); void crm_cluster_disconnect(crm_cluster_t *cluster); enum crm_ais_msg_class { crm_class_cluster = 0, crm_class_members = 1, crm_class_notify = 2, crm_class_nodeid = 3, crm_class_rmpeer = 4, crm_class_quorum = 5, }; /* order here matters - its used to index into the crm_children array */ enum crm_ais_msg_types { crm_msg_none = 0, crm_msg_ais = 1, crm_msg_lrmd = 2, crm_msg_cib = 3, crm_msg_crmd = 4, crm_msg_attrd = 5, crm_msg_stonithd = 6, crm_msg_te = 7, crm_msg_pe = 8, crm_msg_stonith_ng = 9, }; gboolean send_cluster_message(crm_node_t *node, enum crm_ais_msg_types service, xmlNode * data, gboolean ordered); void destroy_crm_node(gpointer/* crm_node_t* */ data); crm_node_t *crm_get_peer(unsigned int id, const char *uname); guint crm_active_peers(void); gboolean crm_is_peer_active(const crm_node_t * node); -guint reap_crm_member(uint32_t id); +guint reap_crm_member(uint32_t id, const char *name); int crm_terminate_member(int nodeid, const char *uname, void* unused); int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection); gboolean crm_get_cluster_name(char **cname); # if SUPPORT_HEARTBEAT gboolean crm_is_heartbeat_peer_active(const crm_node_t * node); # endif # if SUPPORT_COROSYNC extern int ais_fd_sync; gboolean crm_is_corosync_peer_active(const crm_node_t * node); gboolean send_ais_text(int class, const char *data, gboolean local, crm_node_t *node, enum crm_ais_msg_types dest); gboolean get_ais_nodeid(uint32_t * id, char **uname); # endif void empty_uuid_cache(void); const char *get_uuid(const char *uname); const char *get_uname(const char *uuid); void set_uuid(xmlNode * node, const char *attr, const char *uname); void unget_uuid(const char *uname); enum crm_status_type { crm_status_uname, crm_status_nstate, crm_status_processes, }; enum crm_ais_msg_types text2msg_type(const char *text); void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)); /* *INDENT-OFF* */ enum cluster_type_e { pcmk_cluster_unknown = 0x0001, pcmk_cluster_invalid = 0x0002, pcmk_cluster_heartbeat = 0x0004, pcmk_cluster_classic_ais = 0x0010, pcmk_cluster_corosync = 0x0020, pcmk_cluster_cman = 0x0040, }; /* *INDENT-ON* */ enum cluster_type_e get_cluster_type(void); const char *name_for_cluster_type(enum cluster_type_e type); gboolean is_corosync_cluster(void); gboolean is_cman_cluster(void); gboolean is_openais_cluster(void); gboolean is_classic_ais_cluster(void); gboolean is_heartbeat_cluster(void); char *get_local_node_name(void); char *get_node_name(uint32_t nodeid); #endif diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c index 53cc7b9d33..4f5a7707ad 100644 --- a/lib/cluster/corosync.c +++ b/lib/cluster/corosync.c @@ -1,1078 +1,1078 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include cpg_handle_t pcmk_cpg_handle = 0; struct cpg_name pcmk_cpg_group = { .length = 0, .value[0] = 0, }; quorum_handle_t pcmk_quorum_handle = 0; gboolean(*quorum_app_callback) (unsigned long long seq, gboolean quorate) = NULL; static char *pcmk_uname = NULL; static int pcmk_uname_len = 0; static uint32_t pcmk_nodeid = 0; #define cs_repeat(counter, max, code) do { \ code; \ if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ counter++; \ crm_debug("Retrying operation after %ds", counter); \ sleep(counter); \ } else { \ break; \ } \ } while(counter < max) /* * CFG functionality stolen from node_name() in corosync-quorumtool.c * This resolves the first address assigned to a node and returns the name or IP address. */ char *corosync_node_name(uint64_t /*cmap_handle_t*/ cmap_handle, uint32_t nodeid) { int lpc = 0; int rc = CS_OK; int retries = 0; char *name = NULL; cmap_handle_t local_handle = 0; corosync_cfg_handle_t cfg_handle = 0; static corosync_cfg_callbacks_t cfg_callbacks = {}; /* nodeid == 0 == CMAN_NODEID_US */ if(nodeid == 0 && pcmk_nodeid) { nodeid = pcmk_nodeid; } else if(nodeid == 0) { /* Look it up */ int rc = -1; int retries = 0; cpg_handle_t handle = 0; cpg_callbacks_t cb = {}; cs_repeat(retries, 5, rc = cpg_initialize(&handle, &cb)); if (rc == CS_OK) { retries = 0; cs_repeat(retries, 5, rc = cpg_local_get(handle, &pcmk_nodeid)); } if (rc != CS_OK) { crm_err("Could not get local node id from the CPG API: %d", rc); } cpg_finalize(handle); } if(cmap_handle == 0 && local_handle == 0) { retries = 0; crm_trace("Initializing CMAP connection"); do { rc = cmap_initialize(&local_handle); if(rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while(retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API, error %s", cs_strerror(rc)); local_handle = 0; } } if(cmap_handle == 0) { cmap_handle = local_handle; } while(name == NULL && cmap_handle != 0) { uint32_t id = 0; char *key = NULL; key = g_strdup_printf("nodelist.node.%d.nodeid", lpc); rc = cmap_get_uint32(cmap_handle, key, &id); crm_trace("Checking %u vs %u from %s", nodeid, id, key); g_free(key); if(rc != CS_OK) { break; } if(nodeid == id) { crm_trace("Searching for node name for %u in nodelist.node.%d %s", nodeid, lpc, name); if(name == NULL) { key = g_strdup_printf("nodelist.node.%d.ring0_addr", lpc); rc = cmap_get_string(cmap_handle, key, &name); crm_trace("%s = %s", key, name); if(node_name_is_valid(key, name) == FALSE) { free(name); name = NULL; } g_free(key); } if(name == NULL) { key = g_strdup_printf("nodelist.node.%d.name", lpc); rc = cmap_get_string(cmap_handle, key, &name); crm_trace("%s = %s %d", key, name, rc); if(node_name_is_valid(key, name) == FALSE) { free(name); name = NULL; } g_free(key); } break; } lpc++; } if(name == NULL) { retries = 0; crm_trace("Initializing CFG connection"); do { rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks); if(rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while(retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to the Corosync CFG API, error %d", cs_strerror(rc)); cfg_handle = 0; } } if(name == NULL && cfg_handle != 0) { int numaddrs; char buf[INET6_ADDRSTRLEN]; socklen_t addrlen; struct sockaddr_storage *ss; corosync_cfg_node_address_t addrs[INTERFACE_MAX]; rc = corosync_cfg_get_node_addrs(cfg_handle, nodeid, INTERFACE_MAX, &numaddrs, addrs); if (rc == CS_OK) { ss = (struct sockaddr_storage *)addrs[0].address; if (ss->ss_family == AF_INET6) { addrlen = sizeof(struct sockaddr_in6); } else { addrlen = sizeof(struct sockaddr_in); } if (getnameinfo((struct sockaddr *)addrs[0].address, addrlen, buf, sizeof(buf), NULL, 0, 0) == 0) { crm_notice("Inferred node name '%s' for nodeid %u from DNS", buf, nodeid); if(node_name_is_valid("DNS", buf)) { name = strdup(buf); } } } else { crm_debug("Unable to get node address for nodeid %u: %s", nodeid, cs_strerror(rc)); } corosync_cfg_finalize(cfg_handle); } if(local_handle) { cmap_finalize(local_handle); } if(name == NULL) { crm_err("Unable to get node name for nodeid %u", nodeid); } return name; } enum crm_ais_msg_types text2msg_type(const char *text) { int type = crm_msg_none; CRM_CHECK(text != NULL, return type); if (safe_str_eq(text, "ais")) { type = crm_msg_ais; } else if (safe_str_eq(text, "crm_plugin")) { type = crm_msg_ais; } else if (safe_str_eq(text, CRM_SYSTEM_CIB)) { type = crm_msg_cib; } else if (safe_str_eq(text, CRM_SYSTEM_CRMD)) { type = crm_msg_crmd; } else if (safe_str_eq(text, CRM_SYSTEM_DC)) { type = crm_msg_crmd; } else if (safe_str_eq(text, CRM_SYSTEM_TENGINE)) { type = crm_msg_te; } else if (safe_str_eq(text, CRM_SYSTEM_PENGINE)) { type = crm_msg_pe; } else if (safe_str_eq(text, CRM_SYSTEM_LRMD)) { type = crm_msg_lrmd; } else if (safe_str_eq(text, CRM_SYSTEM_STONITHD)) { type = crm_msg_stonithd; } else if (safe_str_eq(text, "stonith-ng")) { type = crm_msg_stonith_ng; } else if (safe_str_eq(text, "attrd")) { type = crm_msg_attrd; } else { /* This will normally be a transient client rather than * a cluster daemon. Set the type to the pid of the client */ int scan_rc = sscanf(text, "%d", &type); if (scan_rc != 1) { /* Ensure its sane */ type = crm_msg_none; } } return type; } static char *ais_cluster_name = NULL; gboolean crm_get_cluster_name(char **cname) { CRM_CHECK(cname != NULL, return FALSE); if (ais_cluster_name) { *cname = strdup(ais_cluster_name); return TRUE; } return FALSE; } gboolean send_ais_text(int class, const char *data, gboolean local, crm_node_t *node, enum crm_ais_msg_types dest) { static int msg_id = 0; static int local_pid = 0; int retries = 0; int rc = CS_OK; int buf_len = sizeof(cs_ipc_header_response_t); char *buf = NULL; struct iovec iov; const char *transport = "pcmk"; AIS_Message *ais_msg = NULL; enum crm_ais_msg_types sender = text2msg_type(crm_system_name); /* There are only 6 handlers registered to crm_lib_service in plugin.c */ CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); return FALSE); if (data == NULL) { data = ""; } if (local_pid == 0) { local_pid = getpid(); } if (sender == crm_msg_none) { sender = local_pid; } ais_msg = calloc(1, sizeof(AIS_Message)); ais_msg->id = msg_id++; ais_msg->header.id = class; ais_msg->header.error = CS_OK; ais_msg->host.type = dest; ais_msg->host.local = local; if (node) { if (node->uname) { ais_msg->host.size = strlen(node->uname); memset(ais_msg->host.uname, 0, MAX_NAME); memcpy(ais_msg->host.uname, node->uname, ais_msg->host.size); } ais_msg->host.id = node->id; } ais_msg->sender.id = 0; ais_msg->sender.type = sender; ais_msg->sender.pid = local_pid; ais_msg->sender.size = pcmk_uname_len; memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, pcmk_uname, ais_msg->sender.size); ais_msg->size = 1 + strlen(data); if (ais_msg->size < CRM_BZ2_THRESHOLD) { failback: ais_msg = realloc(ais_msg, sizeof(AIS_Message) + ais_msg->size); memcpy(ais_msg->data, data, ais_msg->size); } else { char *compressed = NULL; char *uncompressed = strdup(data); unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */ crm_trace("Compressing message payload"); /* coverity[returned_null] Ignore */ compressed = malloc( len); rc = BZ2_bzBuffToBuffCompress(compressed, &len, uncompressed, ais_msg->size, CRM_BZ2_BLOCKS, 0, CRM_BZ2_WORK); free(uncompressed); if (rc != BZ_OK) { crm_err("Compression failed: %d", rc); free(compressed); goto failback; } ais_msg = realloc(ais_msg, sizeof(AIS_Message) + len + 1); memcpy(ais_msg->data, compressed, len); ais_msg->data[len] = 0; free(compressed); ais_msg->is_compressed = TRUE; ais_msg->compressed_size = len; crm_trace("Compression details: %d -> %d", ais_msg->size, ais_data_len(ais_msg)); } ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg); crm_trace("Sending%s message %d to %s.%s (data=%d, total=%d)", ais_msg->is_compressed ? " compressed" : "", ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(dest), ais_data_len(ais_msg), ais_msg->header.size); iov.iov_base = ais_msg; iov.iov_len = ais_msg->header.size; buf = realloc(buf, buf_len); do { if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { retries++; crm_info("Peer overloaded or membership in flux:" " Re-sending message (Attempt %d of 20)", retries); sleep(retries); /* Proportional back off */ } errno = 0; transport = "cpg"; CRM_CHECK(dest != crm_msg_ais, rc = CS_ERR_MESSAGE_ERROR; goto bail); rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, &iov, 1); if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { cpg_flow_control_state_t fc_state = CPG_FLOW_CONTROL_DISABLED; int rc2 = cpg_flow_control_state_get(pcmk_cpg_handle, &fc_state); if (rc2 == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) { crm_warn("Connection overloaded, cannot send messages"); goto bail; } else if (rc2 != CS_OK) { crm_warn("Could not determin the connection state: %s (%d)", ais_error2text(rc2), rc2); goto bail; } } } while ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20); bail: if (rc != CS_OK) { crm_perror(LOG_ERR, "Sending message %d via %s: FAILED (rc=%d): %s", ais_msg->id, transport, rc, ais_error2text(rc)); } else { crm_trace("Message %d: sent", ais_msg->id); } free(buf); free(ais_msg); return (rc == CS_OK); } gboolean send_ais_message(xmlNode * msg, gboolean local, crm_node_t *node, enum crm_ais_msg_types dest) { gboolean rc = TRUE; char *data = dump_xml_unformatted(msg); rc = send_ais_text(crm_class_cluster, data, local, node, dest); free(data); return rc; } void terminate_cs_connection(void) { crm_notice("Disconnecting from Corosync"); if(pcmk_cpg_handle) { crm_trace("Disconnecting CPG"); cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group); cpg_finalize(pcmk_cpg_handle); pcmk_cpg_handle = 0; } else { crm_info("No CPG connection"); } if(pcmk_quorum_handle) { crm_trace("Disconnecting quorum"); quorum_finalize(pcmk_quorum_handle); pcmk_quorum_handle = 0; } else { crm_info("No Quorum connection"); } } int ais_membership_timer = 0; gboolean ais_membership_force = FALSE; static gboolean ais_dispatch_message(AIS_Message * msg, gboolean(*dispatch) (int kind, const char *from, const char *data)) { char *data = NULL; char *uncompressed = NULL; xmlNode *xml = NULL; CRM_ASSERT(msg != NULL); crm_trace("Got new%s message (size=%d, %d, %d)", msg->is_compressed ? " compressed" : "", ais_data_len(msg), msg->size, msg->compressed_size); data = msg->data; if (msg->is_compressed && msg->size > 0) { int rc = BZ_OK; unsigned int new_size = msg->size + 1; if (check_message_sanity(msg, NULL) == FALSE) { goto badmsg; } crm_trace("Decompressing message data"); uncompressed = calloc(1, new_size); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, data, msg->compressed_size, 1, 0); if (rc != BZ_OK) { crm_err("Decompression failed: %d", rc); goto badmsg; } CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); data = uncompressed; } else if (check_message_sanity(msg, data) == FALSE) { goto badmsg; } else if (safe_str_eq("identify", data)) { int pid = getpid(); char *pid_s = crm_itoa(pid); send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); free(pid_s); goto done; } if (msg->header.id != crm_class_members) { /* Is this even needed anymore? */ crm_get_peer(msg->sender.id, msg->sender.uname); } if (msg->header.id == crm_class_rmpeer) { uint32_t id = crm_int_helper(data, NULL); crm_info("Removing peer %s/%u", data, id); - reap_crm_member(id); + reap_crm_member(id, NULL); goto done; } crm_trace("Payload: %s", data); if (dispatch != NULL) { dispatch(msg->header.id, msg->sender.uname, data); } done: free(uncompressed); free_xml(xml); return TRUE; badmsg: crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" " min=%d, total=%d, size=%d, bz2_size=%d", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, (int)sizeof(AIS_Message), msg->header.size, msg->size, msg->compressed_size); goto done; } gboolean(*pcmk_cpg_dispatch_fn) (int kind, const char *from, const char *data) = NULL; static int pcmk_cpg_dispatch(gpointer user_data) { int rc = 0; pcmk_cpg_dispatch_fn = user_data; rc = cpg_dispatch(pcmk_cpg_handle, CS_DISPATCH_ALL); if (rc != CS_OK) { crm_err("Connection to the CPG API failed: %d", rc); return -1; } return 0; } static void pcmk_cpg_deliver(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { AIS_Message *ais_msg = (AIS_Message *) msg; if (ais_msg->sender.id > 0 && ais_msg->sender.id != nodeid) { crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, ais_msg->sender.id); return; } else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, pcmk_uname)) { /* Not for us */ return; } else if (ais_msg->host.id != 0 && (pcmk_nodeid != ais_msg->host.id)) { /* Not for us */ return; } ais_msg->sender.id = nodeid; if (ais_msg->sender.size == 0) { crm_node_t *peer = crm_get_peer(nodeid, NULL); if (peer == NULL) { crm_err("Peer with nodeid=%u is unknown", nodeid); } else if (peer->uname == NULL) { crm_err("No uname for peer with nodeid=%u", nodeid); } else { crm_notice("Fixing uname for peer with nodeid=%u", nodeid); ais_msg->sender.size = strlen(peer->uname); memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, peer->uname, ais_msg->sender.size); } } ais_dispatch_message(ais_msg, pcmk_cpg_dispatch_fn); } static void pcmk_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { int i; gboolean found = FALSE; static int counter = 0; for (i = 0; i < left_list_entries; i++) { crm_node_t *peer = crm_get_peer(left_list[i].nodeid, NULL); crm_info("Left[%d.%d] %s.%d ", counter, i, groupName->value, left_list[i].nodeid); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, OFFLINESTATUS); } for (i = 0; i < joined_list_entries; i++) { crm_info("Joined[%d.%d] %s.%d ", counter, i, groupName->value, joined_list[i].nodeid); } for (i = 0; i < member_list_entries; i++) { crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); crm_info("Member[%d.%d] %s.%d ", counter, i, groupName->value, member_list[i].nodeid); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); if(pcmk_nodeid == member_list[i].nodeid) { found = TRUE; } } if(!found) { crm_err("We're not part of CPG group %s anymore!", groupName->value); /* Possibly re-call cpg_join() */ } counter++; } cpg_callbacks_t cpg_callbacks = { .cpg_deliver_fn = pcmk_cpg_deliver, .cpg_confchg_fn = pcmk_cpg_membership, }; static gboolean init_cpg_connection(gboolean(*dispatch) (int kind, const char *from, const char *data), void (*destroy) (gpointer), uint32_t * nodeid) { int rc = -1; int fd = 0; int retries = 0; crm_node_t *peer = NULL; struct mainloop_fd_callbacks cpg_fd_callbacks = { .dispatch = pcmk_cpg_dispatch, .destroy = destroy, }; strncpy(pcmk_cpg_group.value, crm_system_name, 128); pcmk_cpg_group.length = strlen(crm_system_name) + 1; cs_repeat(retries, 30, rc = cpg_initialize(&pcmk_cpg_handle, &cpg_callbacks)); if (rc != CS_OK) { crm_err("Could not connect to the Cluster Process Group API: %d\n", rc); goto bail; } retries = 0; cs_repeat(retries, 30, rc = cpg_local_get(pcmk_cpg_handle, (unsigned int *)nodeid)); if (rc != CS_OK) { crm_err("Could not get local node id from the CPG API"); goto bail; } retries = 0; cs_repeat(retries, 30, rc = cpg_join(pcmk_cpg_handle, &pcmk_cpg_group)); if (rc != CS_OK) { crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc); goto bail; } rc = cpg_fd_get(pcmk_cpg_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CPG API connection: %d\n", rc); goto bail; } mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, dispatch, &cpg_fd_callbacks); bail: if (rc != CS_OK) { cpg_finalize(pcmk_cpg_handle); return FALSE; } peer = crm_get_peer(pcmk_nodeid, pcmk_uname); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); return TRUE; } static int pcmk_quorum_dispatch(gpointer user_data) { int rc = 0; rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL); if (rc < 0) { crm_err("Connection to the Quorum API failed: %d", rc); return -1; } return 0; } static void corosync_mark_unseen_peer_dead(gpointer key, gpointer value, gpointer user_data) { int *seq = user_data; crm_node_t *node = value; if (node->last_seen != *seq && node->state && crm_str_eq(CRM_NODE_LOST, node->state, TRUE) == FALSE) { crm_notice("Node %d/%s was not seen in the previous transition", node->id, node->uname); crm_update_peer_state(__FUNCTION__, node, CRM_NODE_LOST, 0); } } static void corosync_mark_node_unseen(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; node->last_seen = 0; } static void pcmk_quorum_notification(quorum_handle_t handle, uint32_t quorate, uint64_t ring_id, uint32_t view_list_entries, uint32_t * view_list) { int i; static gboolean init_phase = TRUE; if (quorate != crm_have_quorum) { crm_notice("Membership " U64T ": quorum %s (%lu)", ring_id, quorate ? "acquired" : "lost", (long unsigned int)view_list_entries); crm_have_quorum = quorate; } else { crm_info("Membership " U64T ": quorum %s (%lu)", ring_id, quorate ? "retained" : "still lost", (long unsigned int)view_list_entries); } if(view_list_entries == 0 && init_phase) { crm_info("Corosync membership is still forming, ignoring"); return; } init_phase = FALSE; g_hash_table_foreach(crm_peer_cache, corosync_mark_node_unseen, NULL); for (i = 0; i < view_list_entries; i++) { uint32_t id = view_list[i]; char *name = NULL; crm_node_t *node = NULL; crm_debug("Member[%d] %d ", i, id); node = crm_get_peer(id, NULL); if(node->uname == NULL) { crm_info("Obtaining name for new node %u", id); name = corosync_node_name(0, id); node = crm_get_peer(id, name); } crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, ring_id); free(name); } crm_trace("Reaping unseen nodes..."); g_hash_table_foreach(crm_peer_cache, corosync_mark_unseen_peer_dead, &ring_id); if (quorum_app_callback) { quorum_app_callback(ring_id, quorate); } } quorum_callbacks_t quorum_callbacks = { .quorum_notify_fn = pcmk_quorum_notification, }; gboolean init_quorum_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)) { int rc = -1; int fd = 0; int quorate = 0; uint32_t quorum_type = 0; struct mainloop_fd_callbacks quorum_fd_callbacks; quorum_fd_callbacks.dispatch = pcmk_quorum_dispatch; quorum_fd_callbacks.destroy = destroy; crm_debug("Configuring Pacemaker to obtain quorum from Corosync"); rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks, &quorum_type); if (rc != CS_OK) { crm_err("Could not connect to the Quorum API: %d\n", rc); goto bail; } else if (quorum_type != QUORUM_SET) { crm_err("Corosync quorum is not configured\n"); goto bail; } rc = quorum_getquorate(pcmk_quorum_handle, &quorate); if (rc != CS_OK) { crm_err("Could not obtain the current Quorum API state: %d\n", rc); goto bail; } crm_notice("Quorum %s", quorate ? "acquired" : "lost"); quorum_app_callback = dispatch; crm_have_quorum = quorate; rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT); if (rc != CS_OK) { crm_err("Could not setup Quorum API notifications: %d\n", rc); goto bail; } rc = quorum_fd_get(pcmk_quorum_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the Quorum API connection: %d\n", rc); goto bail; } mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks); corosync_initialize_nodelist(NULL, FALSE, NULL); bail: if (rc != CS_OK) { quorum_finalize(pcmk_quorum_handle); return FALSE; } return TRUE; } gboolean init_cs_connection(crm_cluster_t *cluster) { int retries = 0; while (retries < 5) { int rc = init_cs_connection_once(cluster); retries++; switch (rc) { case CS_OK: return TRUE; break; case CS_ERR_TRY_AGAIN: case CS_ERR_QUEUE_FULL: sleep(retries); break; default: return FALSE; } } crm_err("Could not connect to corosync after %d retries", retries); return FALSE; } gboolean init_cs_connection_once(crm_cluster_t *cluster) { enum cluster_type_e stack = get_cluster_type(); crm_peer_init(); /* Here we just initialize comms */ if(stack != pcmk_cluster_corosync) { crm_err("Invalid cluster type: %s (%d)", name_for_cluster_type(stack), stack); return FALSE; } if (init_cpg_connection(cluster->cs_dispatch, cluster->destroy, &pcmk_nodeid) == FALSE) { return FALSE; } pcmk_uname = get_local_node_name(); crm_info("Connection to '%s': established", name_for_cluster_type(stack)); CRM_ASSERT(pcmk_uname != NULL); pcmk_uname_len = strlen(pcmk_uname); if (pcmk_nodeid != 0) { /* Ensure the local node always exists */ crm_get_peer(pcmk_nodeid, pcmk_uname); } cluster->uuid = get_corosync_uuid(pcmk_nodeid, pcmk_uname); cluster->uname = strdup(pcmk_uname); cluster->nodeid = pcmk_nodeid; return TRUE; } gboolean check_message_sanity(const AIS_Message * msg, const char *data) { gboolean sane = TRUE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if (sane && msg->header.size == 0) { crm_warn("Message with no size"); sane = FALSE; } if (sane && msg->header.error != CS_OK) { crm_warn("Message header contains an error: %d", msg->header.error); sane = FALSE; } if (sane && ais_data_len(msg) != tmp_size) { crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg), tmp_size); sane = TRUE; } if (sane && ais_data_len(msg) == 0) { crm_warn("Message with no payload"); sane = FALSE; } if (sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if (ais_data_len(msg) != str_size) { int lpc = 0; crm_warn("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for (lpc = (str_size - 10); lpc < msg->size; lpc++) { if (lpc < 0) { lpc = 0; } crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if (sane == FALSE) { crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { crm_trace ("Verfied message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } enum cluster_type_e find_corosync_variant(void) { int rc = CS_OK; cmap_handle_t handle; /* There can be only one (possibility if confdb isn't around) */ rc = cmap_initialize(&handle); if (rc != CS_OK) { crm_info("Failed to initialize the cmap API. Error %d", rc); return pcmk_cluster_unknown; } cmap_finalize(handle); return pcmk_cluster_corosync; } gboolean crm_is_corosync_peer_active(const crm_node_t * node) { if (node == NULL) { crm_trace("NULL"); return FALSE; } else if(safe_str_neq(node->state, CRM_NODE_MEMBER)) { crm_trace("%s: state=%s", node->uname, node->state); return FALSE; } else if((node->processes & crm_proc_cpg) == 0) { crm_trace("%s: processes=%.16x", node->uname, node->processes); return FALSE; } return TRUE; } gboolean corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode *xml_parent) { int lpc = 0; int rc = CS_OK; int retries = 0; gboolean any = FALSE; cmap_handle_t cmap_handle; do { rc = cmap_initialize(&cmap_handle); if(rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while(retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc); return FALSE; } crm_trace("Initializing corosync nodelist"); for(lpc = 0; ; lpc++) { uint32_t nodeid = 0; char *name = NULL; char *key = NULL; key = g_strdup_printf("nodelist.node.%d.nodeid", lpc); rc = cmap_get_uint32(cmap_handle, key, &nodeid); g_free(key); if(rc != CS_OK) { break; } name = corosync_node_name(cmap_handle, nodeid); if(nodeid > 0 || name != NULL) { crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name); crm_get_peer(nodeid, name); } if(nodeid > 0 && name != NULL) { any = TRUE; if(xml_parent) { xmlNode *node = create_xml_node(xml_parent, XML_CIB_TAG_NODE); crm_xml_add_int(node, XML_ATTR_ID, nodeid); crm_xml_add(node, XML_ATTR_UNAME, name); if(force_member) { crm_xml_add(node, XML_ATTR_TYPE, CRM_NODE_MEMBER); } } } free(name); } cmap_finalize(cmap_handle); return any; } diff --git a/lib/cluster/legacy.c b/lib/cluster/legacy.c index 2340f81451..b557c394f7 100644 --- a/lib/cluster/legacy.c +++ b/lib/cluster/legacy.c @@ -1,1451 +1,1451 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #if SUPPORT_COROSYNC # include # include # include # include cpg_handle_t pcmk_cpg_handle = 0; struct cpg_name pcmk_cpg_group = { .length = 0, .value[0] = 0, }; #endif #if HAVE_CMAP # include #endif #if SUPPORT_CMAN # include cman_handle_t pcmk_cman_handle = NULL; #endif static char *pcmk_uname = NULL; static int pcmk_uname_len = 0; static uint32_t pcmk_nodeid = 0; int ais_membership_timer = 0; gboolean ais_membership_force = FALSE; int ais_dispatch(gpointer user_data); #define cs_repeat(counter, max, code) do { \ code; \ if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ counter++; \ crm_debug("Retrying operation after %ds", counter); \ sleep(counter); \ } else { \ break; \ } \ } while(counter < max) enum crm_ais_msg_types text2msg_type(const char *text) { int type = crm_msg_none; CRM_CHECK(text != NULL, return type); if (safe_str_eq(text, "ais")) { type = crm_msg_ais; } else if (safe_str_eq(text, "crm_plugin")) { type = crm_msg_ais; } else if (safe_str_eq(text, CRM_SYSTEM_CIB)) { type = crm_msg_cib; } else if (safe_str_eq(text, CRM_SYSTEM_CRMD)) { type = crm_msg_crmd; } else if (safe_str_eq(text, CRM_SYSTEM_DC)) { type = crm_msg_crmd; } else if (safe_str_eq(text, CRM_SYSTEM_TENGINE)) { type = crm_msg_te; } else if (safe_str_eq(text, CRM_SYSTEM_PENGINE)) { type = crm_msg_pe; } else if (safe_str_eq(text, CRM_SYSTEM_LRMD)) { type = crm_msg_lrmd; } else if (safe_str_eq(text, CRM_SYSTEM_STONITHD)) { type = crm_msg_stonithd; } else if (safe_str_eq(text, "stonith-ng")) { type = crm_msg_stonith_ng; } else if (safe_str_eq(text, "attrd")) { type = crm_msg_attrd; } else { /* This will normally be a transient client rather than * a cluster daemon. Set the type to the pid of the client */ int scan_rc = sscanf(text, "%d", &type); if (scan_rc != 1) { /* Ensure its sane */ type = crm_msg_none; } } return type; } char * get_ais_data(const AIS_Message * msg) { int rc = BZ_OK; char *uncompressed = NULL; unsigned int new_size = msg->size + 1; if (msg->is_compressed == FALSE) { crm_trace("Returning uncompressed message data"); uncompressed = strdup(msg->data); } else { crm_trace("Decompressing message data"); uncompressed = calloc(1, new_size); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, (char *)msg->data, msg->compressed_size, 1, 0); CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); } return uncompressed; } #if SUPPORT_COROSYNC int ais_fd_sync = -1; int ais_fd_async = -1; /* never send messages via this channel */ void *ais_ipc_ctx = NULL; hdb_handle_t ais_ipc_handle = 0; static char *ais_cluster_name = NULL; gboolean get_ais_nodeid(uint32_t * id, char **uname) { struct iovec iov; int retries = 0; int rc = CS_OK; cs_ipc_header_response_t header; struct crm_ais_nodeid_resp_s answer; header.error = CS_OK; header.id = crm_class_nodeid; header.size = sizeof(cs_ipc_header_response_t); CRM_CHECK(id != NULL, return FALSE); CRM_CHECK(uname != NULL, return FALSE); iov.iov_base = &header; iov.iov_len = header.size; retry: errno = 0; rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, &answer, sizeof(answer)); if (rc == CS_OK) { CRM_CHECK(answer.header.size == sizeof(struct crm_ais_nodeid_resp_s), crm_err("Odd message: id=%d, size=%d, error=%d", answer.header.id, answer.header.size, answer.header.error)); CRM_CHECK(answer.header.id == crm_class_nodeid, crm_err("Bad response id: %d", answer.header.id)); } if ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20) { retries++; crm_info("Peer overloaded: Re-sending message (Attempt %d of 20)", retries); sleep(retries); /* Proportional back off */ goto retry; } if (rc != CS_OK) { crm_err("Sending nodeid request: FAILED (rc=%d): %s", rc, ais_error2text(rc)); return FALSE; } else if (answer.header.error != CS_OK) { crm_err("Bad response from peer: (rc=%d): %s", rc, ais_error2text(rc)); return FALSE; } crm_info("Server details: id=%u uname=%s cname=%s", answer.id, answer.uname, answer.cname); *id = answer.id; *uname = strdup(answer.uname); ais_cluster_name = strdup(answer.cname); return TRUE; } gboolean crm_get_cluster_name(char **cname) { CRM_CHECK(cname != NULL, return FALSE); if (ais_cluster_name) { *cname = strdup(ais_cluster_name); return TRUE; } return FALSE; } gboolean send_ais_text(int class, const char *data, gboolean local, crm_node_t *node, enum crm_ais_msg_types dest) { static int msg_id = 0; static int local_pid = 0; enum cluster_type_e cluster_type = get_cluster_type(); int retries = 0; int rc = CS_OK; int buf_len = sizeof(cs_ipc_header_response_t); char *buf = NULL; struct iovec iov; const char *transport = "pcmk"; cs_ipc_header_response_t *header = NULL; AIS_Message *ais_msg = NULL; enum crm_ais_msg_types sender = text2msg_type(crm_system_name); /* There are only 6 handlers registered to crm_lib_service in plugin.c */ CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); return FALSE); if (data == NULL) { data = ""; } if (local_pid == 0) { local_pid = getpid(); } if (sender == crm_msg_none) { sender = local_pid; } ais_msg = calloc(1, sizeof(AIS_Message)); ais_msg->id = msg_id++; ais_msg->header.id = class; ais_msg->header.error = CS_OK; ais_msg->host.type = dest; ais_msg->host.local = local; if (node) { if (node->uname) { ais_msg->host.size = strlen(node->uname); memset(ais_msg->host.uname, 0, MAX_NAME); memcpy(ais_msg->host.uname, node->uname, ais_msg->host.size); } ais_msg->host.id = node->id; } ais_msg->sender.id = 0; ais_msg->sender.type = sender; ais_msg->sender.pid = local_pid; ais_msg->sender.size = pcmk_uname_len; memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, pcmk_uname, ais_msg->sender.size); ais_msg->size = 1 + strlen(data); if (ais_msg->size < CRM_BZ2_THRESHOLD) { failback: ais_msg = realloc(ais_msg, sizeof(AIS_Message) + ais_msg->size); memcpy(ais_msg->data, data, ais_msg->size); } else { char *compressed = NULL; char *uncompressed = strdup(data); unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */ crm_trace("Compressing message payload"); compressed = malloc( len); rc = BZ2_bzBuffToBuffCompress(compressed, &len, uncompressed, ais_msg->size, CRM_BZ2_BLOCKS, 0, CRM_BZ2_WORK); free(uncompressed); if (rc != BZ_OK) { crm_err("Compression failed: %d", rc); free(compressed); goto failback; } ais_msg = realloc(ais_msg, sizeof(AIS_Message) + len + 1); memcpy(ais_msg->data, compressed, len); ais_msg->data[len] = 0; free(compressed); ais_msg->is_compressed = TRUE; ais_msg->compressed_size = len; crm_trace("Compression details: %d -> %d", ais_msg->size, ais_data_len(ais_msg)); } ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg); crm_trace("Sending%s message %d to %s.%s (data=%d, total=%d)", ais_msg->is_compressed ? " compressed" : "", ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(dest), ais_data_len(ais_msg), ais_msg->header.size); iov.iov_base = ais_msg; iov.iov_len = ais_msg->header.size; buf = realloc(buf, buf_len); do { if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { retries++; crm_info("Peer overloaded or membership in flux:" " Re-sending message (Attempt %d of 20)", retries); sleep(retries); /* Proportional back off */ } errno = 0; switch (cluster_type) { case pcmk_cluster_corosync: CRM_ASSERT(FALSE/*Not supported here*/); break; case pcmk_cluster_classic_ais: rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, buf, buf_len); header = (cs_ipc_header_response_t *) buf; if (rc == CS_OK) { CRM_CHECK(header->size == sizeof(cs_ipc_header_response_t), crm_err("Odd message: id=%d, size=%d, class=%d, error=%d", header->id, header->size, class, header->error)); CRM_ASSERT(buf_len >= header->size); CRM_CHECK(header->id == CRM_MESSAGE_IPC_ACK, crm_err("Bad response id (%d) for request (%d)", header->id, ais_msg->header.id)); CRM_CHECK(header->error == CS_OK, rc = header->error); } break; case pcmk_cluster_cman: transport = "cpg"; CRM_CHECK(dest != crm_msg_ais, rc = CS_ERR_MESSAGE_ERROR; goto bail); rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, &iov, 1); if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { cpg_flow_control_state_t fc_state = CPG_FLOW_CONTROL_DISABLED; int rc2 = cpg_flow_control_state_get(pcmk_cpg_handle, &fc_state); if (rc2 == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) { crm_warn("Connection overloaded, cannot send messages"); goto bail; } else if (rc2 != CS_OK) { crm_warn("Could not determin the connection state: %s (%d)", ais_error2text(rc2), rc2); goto bail; } } break; case pcmk_cluster_unknown: case pcmk_cluster_invalid: case pcmk_cluster_heartbeat: CRM_ASSERT(is_openais_cluster()); break; } } while ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20); bail: if (rc != CS_OK) { crm_perror(LOG_ERR, "Sending message %d via %s: FAILED (rc=%d): %s", ais_msg->id, transport, rc, ais_error2text(rc)); } else { crm_trace("Message %d: sent", ais_msg->id); } free(buf); free(ais_msg); return (rc == CS_OK); } gboolean send_ais_message(xmlNode * msg, gboolean local, crm_node_t *node, enum crm_ais_msg_types dest) { gboolean rc = TRUE; char *data = NULL; if (is_classic_ais_cluster()) { if (ais_fd_async < 0) { crm_err("Not connected to AIS: %d", ais_fd_async); return FALSE; } } data = dump_xml_unformatted(msg); rc = send_ais_text(crm_class_cluster, data, local, node, dest); free(data); return rc; } void terminate_cs_connection(void) { crm_notice("Disconnecting from Corosync"); if (is_classic_ais_cluster()) { if(ais_ipc_handle) { crm_trace("Disconnecting plugin"); coroipcc_service_disconnect(ais_ipc_handle); ais_ipc_handle = 0; } else { crm_info("No plugin connection"); } } else { if(pcmk_cpg_handle) { crm_trace("Disconnecting CPG"); cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group); cpg_finalize(pcmk_cpg_handle); pcmk_cpg_handle = 0; } else { crm_info("No CPG connection"); } } # if SUPPORT_CMAN if (is_cman_cluster()) { if(pcmk_cman_handle) { crm_trace("Disconnecting cman"); cman_stop_notification(pcmk_cman_handle); cman_finish(pcmk_cman_handle); } else { crm_info("No cman connection"); } } # endif ais_fd_async = -1; ais_fd_sync = -1; } static crm_node_t * crm_update_ais_node(xmlNode * member, long long seq) { const char *id_s = crm_element_value(member, "id"); const char *addr = crm_element_value(member, "addr"); const char *uname = crm_element_value(member, "uname"); const char *state = crm_element_value(member, "state"); const char *born_s = crm_element_value(member, "born"); const char *seen_s = crm_element_value(member, "seen"); const char *votes_s = crm_element_value(member, "votes"); const char *procs_s = crm_element_value(member, "processes"); int votes = crm_int_helper(votes_s, NULL); unsigned int id = crm_int_helper(id_s, NULL); unsigned int procs = crm_int_helper(procs_s, NULL); /* TODO: These values will contain garbage if version < 0.7.1 */ uint64_t born = crm_int_helper(born_s, NULL); uint64_t seen = crm_int_helper(seen_s, NULL); return crm_update_peer(__FUNCTION__, id, born, seen, votes, procs, uname, uname, addr, state); } static gboolean ais_dispatch_message(AIS_Message * msg, gboolean(*dispatch) (int kind, const char *from, const char *data)) { char *data = NULL; char *uncompressed = NULL; xmlNode *xml = NULL; CRM_ASSERT(msg != NULL); crm_trace("Got new%s message (size=%d, %d, %d)", msg->is_compressed ? " compressed" : "", ais_data_len(msg), msg->size, msg->compressed_size); data = msg->data; if (msg->is_compressed && msg->size > 0) { int rc = BZ_OK; unsigned int new_size = msg->size + 1; if (check_message_sanity(msg, NULL) == FALSE) { goto badmsg; } crm_trace("Decompressing message data"); uncompressed = calloc(1, new_size); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, data, msg->compressed_size, 1, 0); if (rc != BZ_OK) { crm_err("Decompression failed: %d", rc); goto badmsg; } CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); data = uncompressed; } else if (check_message_sanity(msg, data) == FALSE) { goto badmsg; } else if (safe_str_eq("identify", data)) { int pid = getpid(); char *pid_s = crm_itoa(pid); send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); free(pid_s); goto done; } if (msg->header.id != crm_class_members) { crm_get_peer(msg->sender.id, msg->sender.uname); } if (msg->header.id == crm_class_rmpeer) { uint32_t id = crm_int_helper(data, NULL); crm_info("Removing peer %s/%u", data, id); - reap_crm_member(id); + reap_crm_member(id, NULL); goto done; } else if (is_classic_ais_cluster()) { if (msg->header.id == crm_class_members || msg->header.id == crm_class_quorum) { xmlNode *node = NULL; const char *value = NULL; gboolean quorate = FALSE; xml = string2xml(data); if (xml == NULL) { crm_err("Invalid membership update: %s", data); goto badmsg; } value = crm_element_value(xml, "quorate"); CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No quorum value:"); goto badmsg); if (crm_is_true(value)) { quorate = TRUE; } value = crm_element_value(xml, "id"); CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No membership id"); goto badmsg); crm_peer_seq = crm_int_helper(value, NULL); if (quorate != crm_have_quorum) { crm_notice("Membership %s: quorum %s", value, quorate ? "acquired" : "lost"); crm_have_quorum = quorate; } else { crm_info("Membership %s: quorum %s", value, quorate ? "retained" : "still lost"); } for (node = __xml_first_child(xml); node != NULL; node = __xml_next(node)) { crm_update_ais_node(node, crm_peer_seq); } } } crm_trace("Payload: %s", data); if (dispatch != NULL) { dispatch(msg->header.id, msg->sender.uname, data); } done: free(uncompressed); free_xml(xml); return TRUE; badmsg: crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" " min=%d, total=%d, size=%d, bz2_size=%d", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, (int)sizeof(AIS_Message), msg->header.size, msg->size, msg->compressed_size); goto done; } int ais_dispatch(gpointer user_data) { int rc = CS_OK; gboolean good = TRUE; gboolean(*dispatch) (int kind, const char *from, const char *data) = user_data; do { char *buffer = NULL; rc = coroipcc_dispatch_get(ais_ipc_handle, (void **)&buffer, 0); if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { return 0; } if (rc != CS_OK) { crm_perror(LOG_ERR, "Receiving message body failed: (%d) %s", rc, ais_error2text(rc)); return -1; } if (buffer == NULL) { /* NULL is a legal "no message afterall" value */ return 0; } good = ais_dispatch_message((AIS_Message *) buffer, dispatch); coroipcc_dispatch_put(ais_ipc_handle); } while (good && ais_ipc_handle); if(good) { return 0; } return -1; } static void ais_destroy(gpointer user_data) { crm_err("AIS connection terminated"); ais_fd_sync = -1; crm_exit(1); } # if SUPPORT_CMAN static int pcmk_cman_dispatch(gpointer user_data) { int rc = cman_dispatch(pcmk_cman_handle, CMAN_DISPATCH_ALL); if (rc < 0) { crm_err("Connection to cman failed: %d", rc); return FALSE; } return TRUE; } # define MAX_NODES 256 static void cman_event_callback(cman_handle_t handle, void *privdata, int reason, int arg) { int rc = 0, lpc = 0, node_count = 0; cman_cluster_t cluster; static cman_node_t cman_nodes[MAX_NODES]; gboolean(*dispatch) (unsigned long long, gboolean) = privdata; switch (reason) { case CMAN_REASON_STATECHANGE: memset(&cluster, 0, sizeof(cluster)); rc = cman_get_cluster(pcmk_cman_handle, &cluster); if (rc < 0) { crm_err("Couldn't query cman cluster details: %d %d", rc, errno); return; } crm_peer_seq = cluster.ci_generation; if (arg != crm_have_quorum) { crm_notice("Membership %llu: quorum %s", crm_peer_seq, arg ? "acquired" : "lost"); crm_have_quorum = arg; } else { crm_info("Membership %llu: quorum %s", crm_peer_seq, arg ? "retained" : "still lost"); } rc = cman_get_nodes(pcmk_cman_handle, MAX_NODES, &node_count, cman_nodes); if (rc < 0) { crm_err("Couldn't query cman node list: %d %d", rc, errno); return; } for (lpc = 0; lpc < node_count; lpc++) { if (cman_nodes[lpc].cn_nodeid == 0) { /* Never allow node ID 0 to be considered a member #315711 */ cman_nodes[lpc].cn_member = 0; } crm_update_peer(__FUNCTION__, cman_nodes[lpc].cn_nodeid, cman_nodes[lpc].cn_incarnation, cman_nodes[lpc].cn_member ? crm_peer_seq : 0, 0, 0, cman_nodes[lpc].cn_name, cman_nodes[lpc].cn_name, NULL, cman_nodes[lpc].cn_member ? CRM_NODE_MEMBER : CRM_NODE_LOST); } if (dispatch) { dispatch(crm_peer_seq, crm_have_quorum); } break; case CMAN_REASON_TRY_SHUTDOWN: /* Always reply with a negative - pacemaker needs to be stopped first */ crm_info("CMAN wants to shut down: %s", arg ? "forced" : "optional"); cman_replyto_shutdown(pcmk_cman_handle, 0); break; case CMAN_REASON_CONFIG_UPDATE: /* Ignore */ break; } } # endif gboolean init_cman_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)) { # if SUPPORT_CMAN int rc = -1, fd = -1; cman_cluster_t cluster; struct mainloop_fd_callbacks cman_fd_callbacks = { .dispatch = pcmk_cman_dispatch, .destroy = destroy, }; crm_info("Configuring Pacemaker to obtain quorum from cman"); memset(&cluster, 0, sizeof(cluster)); pcmk_cman_handle = cman_init(dispatch); if (pcmk_cman_handle == NULL || cman_is_active(pcmk_cman_handle) == FALSE) { crm_err("Couldn't connect to cman"); goto cman_bail; } rc = cman_get_cluster(pcmk_cman_handle, &cluster); if (rc < 0) { crm_err("Couldn't query cman cluster details: %d %d", rc, errno); goto cman_bail; } ais_cluster_name = strdup(cluster.ci_name); rc = cman_start_notification(pcmk_cman_handle, cman_event_callback); if (rc < 0) { crm_err("Couldn't register for cman notifications: %d %d", rc, errno); goto cman_bail; } /* Get the current membership state */ cman_event_callback(pcmk_cman_handle, dispatch, CMAN_REASON_STATECHANGE, cman_is_quorate(pcmk_cman_handle)); fd = cman_get_fd(pcmk_cman_handle); mainloop_add_fd("cman", G_PRIORITY_MEDIUM, fd, dispatch, &cman_fd_callbacks); cman_bail: if (rc < 0) { cman_finish(pcmk_cman_handle); return FALSE; } # else crm_err("cman qorum is not supported in this build"); crm_exit(100); # endif return TRUE; } # ifdef SUPPORT_COROSYNC gboolean(*pcmk_cpg_dispatch_fn) (int kind, const char *from, const char *data) = NULL; static int pcmk_cpg_dispatch(gpointer user_data) { int rc = 0; pcmk_cpg_dispatch_fn = user_data; rc = cpg_dispatch(pcmk_cpg_handle, CS_DISPATCH_ALL); if (rc != CS_OK) { crm_err("Connection to the CPG API failed: %d", rc); return -1; } return 0; } static void pcmk_cpg_deliver(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { AIS_Message *ais_msg = (AIS_Message *) msg; if (ais_msg->sender.id > 0 && ais_msg->sender.id != nodeid) { crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, ais_msg->sender.id); return; } else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, pcmk_uname)) { /* Not for us */ return; } else if (ais_msg->host.id != 0 && (pcmk_nodeid != ais_msg->host.id)) { /* Not for us */ return; } ais_msg->sender.id = nodeid; if (ais_msg->sender.size == 0) { crm_node_t *peer = crm_get_peer(nodeid, NULL); if (peer == NULL) { crm_err("Peer with nodeid=%u is unknown", nodeid); } else if (peer->uname == NULL) { crm_err("No uname for peer with nodeid=%u", nodeid); } else { crm_notice("Fixing uname for peer with nodeid=%u", nodeid); ais_msg->sender.size = strlen(peer->uname); memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, peer->uname, ais_msg->sender.size); } } ais_dispatch_message(ais_msg, pcmk_cpg_dispatch_fn); } static void pcmk_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { int i; for (i = 0; i < member_list_entries; i++) { crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); crm_debug("Member[%d] %d ", i, member_list[i].nodeid); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); } for (i = 0; i < left_list_entries; i++) { crm_node_t *peer = crm_get_peer(left_list[i].nodeid, NULL); crm_debug("Left[%d] %d ", i, left_list[i].nodeid); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, OFFLINESTATUS); } } cpg_callbacks_t cpg_callbacks = { .cpg_deliver_fn = pcmk_cpg_deliver, .cpg_confchg_fn = pcmk_cpg_membership, }; # endif static gboolean init_cpg_connection(crm_cluster_t *cluster) { # ifdef SUPPORT_COROSYNC int rc = -1; int fd = 0; int retries = 0; crm_node_t *peer = NULL; struct mainloop_fd_callbacks cpg_fd_callbacks = { .dispatch = pcmk_cpg_dispatch, .destroy = cluster->destroy, }; strcpy(pcmk_cpg_group.value, crm_system_name); pcmk_cpg_group.length = strlen(crm_system_name) + 1; cs_repeat(retries, 30, rc = cpg_initialize(&pcmk_cpg_handle, &cpg_callbacks)); if (rc != CS_OK) { crm_err("Could not connect to the Cluster Process Group API: %d\n", rc); goto bail; } retries = 0; cs_repeat(retries, 30, rc = cpg_local_get(pcmk_cpg_handle, (unsigned int *)&cluster->nodeid)); if (rc != CS_OK) { crm_err("Could not get local node id from the CPG API"); goto bail; } retries = 0; cs_repeat(retries, 30, rc = cpg_join(pcmk_cpg_handle, &pcmk_cpg_group)); if (rc != CS_OK) { crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc); goto bail; } rc = cpg_fd_get(pcmk_cpg_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CPG API connection: %d\n", rc); goto bail; } mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster->cs_dispatch, &cpg_fd_callbacks); bail: if (rc != CS_OK) { cpg_finalize(pcmk_cpg_handle); return FALSE; } peer = crm_get_peer(cluster->nodeid, pcmk_uname); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); # else crm_err("The Corosync CPG API is not supported in this build"); crm_exit(100); # endif return TRUE; } gboolean init_quorum_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)) { crm_err("The Corosync quorum API is not supported in this build"); crm_exit(100); return TRUE; } static gboolean init_cs_connection_classic(crm_cluster_t *cluster) { int rc; int pid = 0; char *pid_s = NULL; struct utsname name; struct mainloop_fd_callbacks ais_fd_callbacks = { .dispatch = ais_dispatch, .destroy = cluster->destroy, }; crm_info("Creating connection to our Corosync plugin"); rc = coroipcc_service_connect(COROSYNC_SOCKET_NAME, PCMK_SERVICE_ID, AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE, &ais_ipc_handle); if (ais_ipc_handle) { coroipcc_fd_get(ais_ipc_handle, &ais_fd_async); } else { crm_info("Connection to our AIS plugin (%d) failed: %s (%d)", PCMK_SERVICE_ID, strerror(errno), errno); return FALSE; } if (ais_fd_async <= 0 && rc == CS_OK) { crm_err("No context created, but connection reported 'ok'"); rc = CS_ERR_LIBRARY; } if (rc != CS_OK) { crm_info("Connection to our AIS plugin (%d) failed: %s (%d)", PCMK_SERVICE_ID, ais_error2text(rc), rc); } if (rc != CS_OK) { return FALSE; } if (ais_fd_callbacks.destroy == NULL) { ais_fd_callbacks.destroy = ais_destroy; } mainloop_add_fd("corosync-plugin", G_PRIORITY_MEDIUM, ais_fd_async, cluster->cs_dispatch, &ais_fd_callbacks); crm_info("AIS connection established"); pid = getpid(); pid_s = crm_itoa(pid); send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); free(pid_s); if (uname(&name) < 0) { crm_perror(LOG_ERR, "Could not determin the current host"); crm_exit(100); } get_ais_nodeid(&pcmk_nodeid, &pcmk_uname); if (safe_str_neq(name.nodename, pcmk_uname)) { crm_crit("Node name mismatch! Corosync supplied %s, our lookup returned %s", pcmk_uname, name.nodename); crm_notice ("Node name mismatches usually occur when assigned automatically by DHCP servers"); crm_notice("If this node was part of the cluster with a different name," " you will need to remove the old entry with crm_node --remove"); } return TRUE; } static int pcmk_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) { xmlNode *msg = string2xml(buffer); if (msg && is_classic_ais_cluster()) { xmlNode *node = NULL; for (node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) { int id = 0; int children = 0; const char *uname = crm_element_value(node, "uname"); crm_element_value_int(node, "id", &id); crm_element_value_int(node, "processes", &children); if (id == 0) { crm_log_xml_err(msg, "Bad Update"); } else { crm_node_t *peer = crm_get_peer(id, uname); crm_update_peer_proc(__FUNCTION__, peer, children, NULL); } } } free_xml(msg); return 0; } static void pcmk_mcp_destroy(gpointer user_data) { void (*callback)(gpointer data) = user_data; if(callback) { callback(NULL); } } gboolean init_cs_connection(crm_cluster_t *cluster) { int retries = 0; static struct ipc_client_callbacks mcp_callbacks = { .dispatch = pcmk_mcp_dispatch, .destroy = pcmk_mcp_destroy }; while (retries < 5) { int rc = init_cs_connection_once(cluster); retries++; switch (rc) { case CS_OK: if (getenv("HA_mcp")) { xmlNode *poke = create_xml_node(NULL, "poke"); mainloop_io_t *ipc = mainloop_add_ipc_client(CRM_SYSTEM_MCP, G_PRIORITY_MEDIUM, 0, cluster->destroy, &mcp_callbacks); crm_ipc_send(mainloop_get_ipc_client(ipc), poke, 0, 0, NULL); free_xml(poke); } return TRUE; break; case CS_ERR_TRY_AGAIN: case CS_ERR_QUEUE_FULL: sleep(retries); break; default: return FALSE; } } crm_err("Retry count exceeded: %d", retries); return FALSE; } char *classic_node_name(uint32_t nodeid) { int rc = CS_OK; int retries = 0; char *name = NULL; corosync_cfg_handle_t cfg_handle = 0; static corosync_cfg_callbacks_t cfg_callbacks = {}; if(nodeid == 0 /* AKA. CMAN_NODEID_US */) { nodeid = pcmk_nodeid; } if(name == NULL) { retries = 0; crm_trace("Initializing CFG connection"); do { rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks); if(rc != CS_OK) { retries++; crm_debug("API connection setup failed: %d. Retrying in %ds", rc, retries); sleep(retries); } } while(retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to the Corosync CFG API, error %d", rc); cfg_handle = 0; } } if(name == NULL && cfg_handle != 0) { int numaddrs; char buf[INET6_ADDRSTRLEN]; socklen_t addrlen; struct sockaddr_storage *ss; corosync_cfg_node_address_t addrs[INTERFACE_MAX]; rc = corosync_cfg_get_node_addrs(cfg_handle, nodeid, INTERFACE_MAX, &numaddrs, addrs); if (rc == CS_OK) { ss = (struct sockaddr_storage *)addrs[0].address; if (ss->ss_family == AF_INET6) { addrlen = sizeof(struct sockaddr_in6); } else { addrlen = sizeof(struct sockaddr_in); } if (getnameinfo((struct sockaddr *)addrs[0].address, addrlen, buf, sizeof(buf), NULL, 0, 0) == 0) { crm_notice("Inferred node name '%s' for nodeid %u from DNS", buf, nodeid); if(node_name_is_valid("DNS", buf)) { name = strdup(buf); } } } else { crm_debug("Unable to get node address for nodeid %u: %d", nodeid, rc); } corosync_cfg_finalize(cfg_handle); } if(name == NULL) { crm_debug("Unable to get node name for nodeid %u", nodeid); } return name; } char *cman_node_name(uint32_t nodeid) { char *name = NULL; # if SUPPORT_CMAN cman_node_t us; cman_handle_t cman; cman = cman_init(NULL); if (cman != NULL && cman_is_active(cman)) { us.cn_name[0] = 0; cman_get_node(cman, nodeid, &us); name = strdup(us.cn_name); crm_info("Using CMAN node name %s for %u", name, nodeid); } cman_finish(cman); # endif if(name == NULL) { crm_debug("Unable to get node name for nodeid %u", nodeid); } return name; } extern int set_cluster_type(enum cluster_type_e type); gboolean init_cs_connection_once(crm_cluster_t *cluster) { enum cluster_type_e stack = get_cluster_type(); crm_peer_init(); /* Here we just initialize comms */ switch (stack) { case pcmk_cluster_classic_ais: if (init_cs_connection_classic(cluster) == FALSE) { return FALSE; } break; case pcmk_cluster_cman: if (init_cpg_connection(cluster) == FALSE) { return FALSE; } pcmk_uname = cman_node_name(0 /* CMAN_NODEID_US */); break; case pcmk_cluster_heartbeat: crm_info("Could not find an active corosync based cluster"); return FALSE; break; default: crm_err("Invalid cluster type: %s (%d)", name_for_cluster_type(stack), stack); return FALSE; break; } crm_info("Connection to '%s': established", name_for_cluster_type(stack)); CRM_ASSERT(pcmk_uname != NULL); pcmk_uname_len = strlen(pcmk_uname); pcmk_nodeid = cluster->nodeid; if (pcmk_nodeid != 0) { /* Ensure the local node always exists */ crm_get_peer(pcmk_nodeid, pcmk_uname); } cluster->uuid = get_corosync_uuid(pcmk_nodeid, pcmk_uname); cluster->uname = strdup(pcmk_uname); return TRUE; } gboolean check_message_sanity(const AIS_Message * msg, const char *data) { gboolean sane = TRUE; gboolean repaired = FALSE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if (sane && msg->header.size == 0) { crm_warn("Message with no size"); sane = FALSE; } if (sane && msg->header.error != CS_OK) { crm_warn("Message header contains an error: %d", msg->header.error); sane = FALSE; } if (sane && ais_data_len(msg) != tmp_size) { crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg), tmp_size); sane = TRUE; } if (sane && ais_data_len(msg) == 0) { crm_warn("Message with no payload"); sane = FALSE; } if (sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if (ais_data_len(msg) != str_size) { int lpc = 0; crm_warn("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for (lpc = (str_size - 10); lpc < msg->size; lpc++) { if (lpc < 0) { lpc = 0; } crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if (sane == FALSE) { crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else if (repaired) { crm_err ("Repaired message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { crm_trace ("Verfied message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } #endif static int get_config_opt(confdb_handle_t config, hdb_handle_t object_handle, const char *key, char **value, const char *fallback) { size_t len = 0; char *env_key = NULL; const char *env_value = NULL; char buffer[256]; if (*value) { free(*value); *value = NULL; } if (object_handle > 0) { if (CS_OK == confdb_key_get(config, object_handle, key, strlen(key), &buffer, &len)) { *value = strdup(buffer); } } if (*value) { crm_info("Found '%s' for option: %s", *value, key); return 0; } env_key = crm_concat("HA", key, '_'); env_value = getenv(env_key); free(env_key); if (*value) { crm_info("Found '%s' in ENV for option: %s", *value, key); *value = strdup(env_value); return 0; } if (fallback) { crm_info("Defaulting to '%s' for option: %s", fallback, key); *value = strdup(fallback); } else { crm_info("No default for option: %s", key); } return -1; } static confdb_handle_t config_find_init(confdb_handle_t config) { cs_error_t rc = CS_OK; confdb_handle_t local_handle = OBJECT_PARENT_HANDLE; rc = confdb_object_find_start(config, local_handle); if (rc == CS_OK) { return local_handle; } else { crm_err("Couldn't create search context: %d", rc); } return 0; } static hdb_handle_t config_find_next(confdb_handle_t config, const char *name, confdb_handle_t top_handle) { cs_error_t rc = CS_OK; hdb_handle_t local_handle = 0; if (top_handle == 0) { crm_err("Couldn't search for %s: no valid context", name); return 0; } crm_trace("Searching for %s in " HDB_X_FORMAT, name, top_handle); rc = confdb_object_find(config, top_handle, name, strlen(name), &local_handle); if (rc != CS_OK) { crm_info("No additional configuration supplied for: %s", name); local_handle = 0; } else { crm_info("Processing additional %s options...", name); } return local_handle; } enum cluster_type_e find_corosync_variant(void) { confdb_handle_t config; enum cluster_type_e found = pcmk_cluster_unknown; int rc; char *value = NULL; confdb_handle_t top_handle = 0; hdb_handle_t local_handle = 0; static confdb_callbacks_t callbacks = { }; rc = confdb_initialize(&config, &callbacks); if (rc != CS_OK) { crm_debug("Could not initialize Cluster Configuration Database API instance error %d", rc); return found; } top_handle = config_find_init(config); local_handle = config_find_next(config, "service", top_handle); while (local_handle) { get_config_opt(config, local_handle, "name", &value, NULL); if (safe_str_eq("pacemaker", value)) { found = pcmk_cluster_classic_ais; get_config_opt(config, local_handle, "ver", &value, "0"); crm_trace("Found Pacemaker plugin version: %s", value); break; } local_handle = config_find_next(config, "service", top_handle); } if (found == pcmk_cluster_unknown) { top_handle = config_find_init(config); local_handle = config_find_next(config, "quorum", top_handle); get_config_opt(config, local_handle, "provider", &value, NULL); if (safe_str_eq("quorum_cman", value)) { crm_trace("Found CMAN quorum provider"); found = pcmk_cluster_cman; } } free(value); confdb_finalize(config); return found; } gboolean crm_is_corosync_peer_active(const crm_node_t * node) { enum crm_proc_flag proc = crm_proc_none; if (node == NULL) { crm_trace("NULL"); return FALSE; } else if(safe_str_neq(node->state, CRM_NODE_MEMBER)) { crm_trace("%s: state=%s", node->uname, node->state); return FALSE; } else if(is_cman_cluster() && (node->processes & crm_proc_cpg)) { /* If we can still talk to our peer process on that node, * then its also part of the corosync membership */ crm_trace("%s: processes=%.8x", node->uname, node->processes); return TRUE; } else if(is_classic_ais_cluster()) { if(node->processes < crm_proc_none) { crm_debug("%s: unknown process list, assuming active for now", node->uname); return TRUE; } else if(is_set(node->processes, crm_proc_none)) { crm_debug("%s: all processes are inactive", node->uname); return FALSE; } else if(is_not_set(node->processes, crm_proc_plugin)) { crm_trace("%s: processes=%.8x", node->uname, node->processes); return FALSE; } } proc = text2proc(crm_system_name); if(proc > crm_proc_none && (node->processes & proc) == 0) { crm_trace("%s: proc %.8x not in %.8x", node->uname, proc, node->processes); return FALSE; } return TRUE; } diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index 847f569706..7216d69414 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -1,423 +1,443 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include GHashTable *crm_peer_id_cache = NULL; GHashTable *crm_peer_cache = NULL; unsigned long long crm_peer_seq = 0; gboolean crm_have_quorum = FALSE; gboolean crm_is_peer_active(const crm_node_t * node) { #if SUPPORT_COROSYNC if(is_openais_cluster()) { return crm_is_corosync_peer_active(node); } #endif #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { return crm_is_heartbeat_peer_active(node); } #endif crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type())); return FALSE; } static gboolean crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; crm_node_t *search = user_data; - if (search != NULL && node->id != search->id) { + if (search == NULL) { + return FALSE; + + } else if(search->id && node->id != search->id) { + return FALSE; + + } else if(search->id == 0 && safe_str_neq(node->uname, search->uname)) { return FALSE; } else if (crm_is_peer_active(value) == FALSE) { crm_notice("Removing %s/%u from the membership list", node->uname, node->id); return TRUE; } return FALSE; } guint -reap_crm_member(uint32_t id) +reap_crm_member(uint32_t id, const char *name) { int matches = 0; - crm_node_t *node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); + crm_node_t *node = NULL; + + if(crm_peer_cache == NULL || crm_peer_id_cache == NULL) { + crm_trace("Nothing to do, cache not initialized"); + return 0; + } + + if (name) { + node = g_hash_table_lookup(crm_peer_cache, name); + } + + if (node == NULL && id > 0) { + node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); + } if (node == NULL) { - crm_info("Peer %u is unknown", id); + crm_info("Peer %u/%s cannot be purged: does not exist", id, name); + return 0; + } - } else if (crm_is_peer_active(node)) { - crm_warn("Peer %u/%s is still active", id, node->uname); + if (crm_is_peer_active(node)) { + crm_warn("Peer %u/%s cannot be purged: still active", id, name); } else { if (g_hash_table_remove(crm_peer_id_cache, GUINT_TO_POINTER(id))) { - crm_notice("Removed dead peer %u from the uuid cache", id); + crm_notice("Purged dead peer %u/%s from the uuid cache", id, name); - } else { - crm_warn("Peer %u/%s was not removed", id, node->uname); + } else if(id) { + crm_warn("Peer %u/%s was not found in the ID cache", id, name); } matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, node); - - crm_notice("Removed %d dead peers with id=%u from the membership list", matches, id); + crm_notice("Purged %d dead peers with id=%u from the membership cache", matches, id); } return matches; } static void crm_count_peer(gpointer key, gpointer value, gpointer user_data) { guint *count = user_data; crm_node_t *node = value; if (crm_is_peer_active(node)) { *count = *count + 1; } } guint crm_active_peers(void) { guint count = 0; if(crm_peer_cache) { g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count); } return count; } void destroy_crm_node(gpointer data) { crm_node_t *node = data; crm_trace("Destroying entry for node %u", node->id); free(node->addr); free(node->uname); free(node->state); free(node->uuid); free(node); } void crm_peer_init(void) { static gboolean initialized = FALSE; if (initialized) { return; } initialized = TRUE; crm_peer_destroy(); if (crm_peer_cache == NULL) { crm_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, destroy_crm_node); } if (crm_peer_id_cache == NULL) { crm_peer_id_cache = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); } } void crm_peer_destroy(void) { if (crm_peer_cache != NULL) { g_hash_table_destroy(crm_peer_cache); crm_peer_cache = NULL; } if (crm_peer_id_cache != NULL) { g_hash_table_destroy(crm_peer_id_cache); crm_peer_id_cache = NULL; } } void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL; void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) { crm_status_callback = dispatch; } /* coverity[-alloc] Memory is referenced in one or both hashtables */ crm_node_t * crm_get_peer(unsigned int id, const char *uname) { crm_node_t *node = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (node == NULL && uname != NULL) { node = g_hash_table_lookup(crm_peer_cache, uname); } if (node == NULL && id > 0) { node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); if (node && node->uname && uname) { crm_crit("Node %s and %s share the same cluster node id '%u'!", node->uname, uname, id); /* NOTE: Calling crm_new_peer() means the entry in * crm_peer_id_cache will point to the new entity * * TO-DO: Replace the old uname instead? */ node = NULL; } } if (node == NULL) { crm_debug("Creating entry for node %s/%u", uname, id); node = calloc(1, sizeof(crm_node_t)); CRM_ASSERT(node); } if (id > 0 && node->id != id) { node->id = id; crm_info("Node %s now has id: %u", crm_str(uname), id); g_hash_table_replace(crm_peer_id_cache, GUINT_TO_POINTER(node->id), node); } if (uname && node->uname == NULL) { node->uname = strdup(uname); crm_info("Node %u is now known as %s", id, uname); g_hash_table_replace(crm_peer_cache, node->uname, node); if (crm_status_callback) { crm_status_callback(crm_status_uname, node, NULL); } } if (node && node->uname && node->uuid == NULL) { const char *uuid = get_node_uuid(id, node->uname); if(uuid) { node->uuid = strdup(uuid); crm_info("Node %u has uuid %s", id, node->uuid); } else { crm_warn("Cannot obtain a UUID for node %d/%s", id, node->uname); } } return node; } crm_node_t * crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children, const char *uuid, const char *uname, const char *addr, const char *state) { #if SUPPORT_PLUGIN gboolean addr_changed = FALSE; gboolean votes_changed = FALSE; #endif crm_node_t *node = NULL; id = get_corosync_id(id, uuid); node = crm_get_peer(id, uname); CRM_ASSERT(node != NULL); if (node->uuid == NULL) { if (is_openais_cluster()) { /* Yes, overrule whatever was passed in */ node->uuid = get_corosync_uuid(id, uname); } else if (uuid != NULL) { node->uuid = strdup(uuid); } } if (children > 0) { crm_update_peer_proc(source, node, children, state); } if (state != NULL) { crm_update_peer_state(source, node, state, seen); } #if SUPPORT_HEARTBEAT if (born != 0) { node->born = born; } #endif #if SUPPORT_PLUGIN /* These were only used by the plugin */ if (born != 0) { node->born = born; } if (votes > 0 && node->votes != votes) { votes_changed = TRUE; node->votes = votes; } if (addr != NULL) { if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) { addr_changed = TRUE; free(node->addr); node->addr = strdup(addr); } } if (addr_changed || votes_changed) { crm_info("%s: Node %s: id=%u state=%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T " proc=%.32x", source, node->uname, node->id, node->state, node->addr, addr_changed ? " (new)" : "", node->votes, votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes); } #endif return node; } void crm_update_peer_proc(const char *source, crm_node_t *node, uint32_t flag, const char *status) { uint32_t last = 0; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL", source, peer2text(flag), status); return); last = node->processes; if(status == NULL) { node->processes = flag; if(node->processes != last) { changed = TRUE; } } else if (safe_str_eq(status, ONLINESTATUS)) { if ((node->processes & flag) == 0) { set_bit(node->processes, flag); changed = TRUE; } #if SUPPORT_PLUGIN } else if (safe_str_eq(status, CRM_NODE_MEMBER)) { if (flag > 0 && node->processes != flag) { node->processes = flag; changed = TRUE; } #endif } else if (node->processes & flag) { clear_bit(node->processes, flag); changed = TRUE; } if (changed) { if(status == NULL && flag <= crm_proc_none) { crm_info("%s: Node %s[%d] - all processes are now offline", source, node->uname, node->id); } else { crm_info("%s: Node %s[%d] - %s is now %s", source, node->uname, node->id, peer2text(flag), status); } if (crm_status_callback) { crm_status_callback(crm_status_processes, node, &last); } } else { crm_trace("%s: Node %s[%d] - %s is unchanged (%s)", source, node->uname, node->id, peer2text(flag), status); } } void crm_update_peer_expected(const char *source, crm_node_t *node, const char *expected) { char *last = NULL; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected); return); last = node->expected; if (expected != NULL && safe_str_neq(node->expected, expected)) { node->expected = strdup(expected); changed = TRUE; } if (changed) { crm_info("%s: Node %s[%d] - expected state is now %s", source, node->uname, node->id, expected); free(last); } else { crm_trace("%s: Node %s[%d] - expected state is unchanged (%s)", source, node->uname, node->id, expected); } } void crm_update_peer_state(const char *source, crm_node_t *node, const char *state, int membership) { char *last = NULL; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set 'state' to %s", source, state); return); last = node->state; if (state != NULL && safe_str_neq(node->state, state)) { node->state = strdup(state); changed = TRUE; } if (membership != 0 && safe_str_eq(node->state, CRM_NODE_MEMBER)) { node->last_seen = membership; } if (changed) { crm_notice("%s: Node %s[%u] - state is now %s", source, node->uname, node->id, state); if (crm_status_callback) { crm_status_callback(crm_status_nstate, node, last); } free(last); } else { crm_trace("%s: Node %s[%u] - state is unchanged (%s)", source, node->uname, node->id, state); } } int crm_terminate_member(int nodeid, const char *uname, void * unused) { /* Always use the synchronous, non-mainloop version */ return stonith_api_kick(nodeid, uname, 120, TRUE); } int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) { return stonith_api_kick(nodeid, uname, 120, TRUE); } diff --git a/tools/crm_node.c b/tools/crm_node.c index aef1fbc3dd..fe03e6e5d1 100644 --- a/tools/crm_node.c +++ b/tools/crm_node.c @@ -1,798 +1,817 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include /* for basename() */ #include #include #include #include #include int command = 0; int ccm_fd = 0; gboolean do_quiet = FALSE; char *target_uuid = NULL; char *target_uname = NULL; const char *standby_value = NULL; const char *standby_scope = NULL; /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"quiet", 0, 0, 'Q', "\tEssential output only"}, {"-spacer-", 1, 0, '-', "\nStack:"}, #if SUPPORT_CMAN {"cman", 0, 0, 'c', "\tOnly try connecting to a cman-based cluster"}, #endif #if SUPPORT_COROSYNC {"openais", 0, 0, 'A', "\tOnly try connecting to an OpenAIS-based cluster"}, #endif #ifdef SUPPORT_HEARTBEAT {"heartbeat", 0, 0, 'H', "Only try connecting to a Heartbeat-based cluster"}, #endif {"-spacer-", 1, 0, '-', "\nCommands:"}, {"name", 0, 0, 'n', "\tDisplay the name used by the cluster for this node"}, {"name-for-id", 1, 0, 'N', "\tDisplay the name used by the cluster for the node with the specified id"}, {"epoch", 0, 0, 'e', "\tDisplay the epoch during which this node joined the cluster"}, {"quorum", 0, 0, 'q', "\tDisplay a 1 if our partition has quorum, 0 if not"}, {"list", 0, 0, 'l', "\tDisplay all known members (past and present) of this cluster (Not available for heartbeat clusters)"}, {"partition", 0, 0, 'p', "Display the members of this partition"}, {"cluster-id", 0, 0, 'i', "Display this node's cluster id"}, {"remove", 1, 0, 'R', "(Advanced, AIS-Only) Remove the (stopped) node with the specified nodeid from the cluster"}, {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, {"force", 0, 0, 'f'}, {0, 0, 0, 0} }; /* *INDENT-ON* */ #if SUPPORT_HEARTBEAT # include # include # include # define UUID_LEN 16 oc_ev_t *ccm_token = NULL; static void *ccm_library = NULL; void oc_ev_special(const oc_ev_t *, oc_ev_class_t, int); static gboolean read_local_hb_uuid(void) { cl_uuid_t uuid; char *buffer = NULL; long start = 0, read_len = 0; FILE *input = fopen(UUID_FILE, "r"); if (input == NULL) { crm_info("Could not open UUID file %s\n", UUID_FILE); return FALSE; } /* see how big the file is */ start = ftell(input); fseek(input, 0L, SEEK_END); if (UUID_LEN != ftell(input)) { fprintf(stderr, "%s must contain exactly %d bytes\n", UUID_FILE, UUID_LEN); abort(); } fseek(input, 0L, start); if (start != ftell(input)) { fprintf(stderr, "fseek not behaving: %ld vs. %ld\n", start, ftell(input)); crm_exit(2); } buffer = malloc(50); read_len = fread(uuid.uuid, 1, UUID_LEN, input); fclose(input); if (read_len != UUID_LEN) { fprintf(stderr, "Expected and read bytes differ: %d vs. %ld\n", UUID_LEN, read_len); crm_exit(3); } else if (buffer != NULL) { cl_uuid_unparse(&uuid, buffer); fprintf(stdout, "%s\n", buffer); return TRUE; } else { fprintf(stderr, "No buffer to unparse\n"); crm_exit(4); } free(buffer); return FALSE; } static void ccm_age_callback(oc_ed_t event, void *cookie, size_t size, const void *data) { int lpc; int node_list_size; const oc_ev_membership_t *oc = (const oc_ev_membership_t *)data; int (*ccm_api_callback_done) (void *cookie) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_callback_done", 1); node_list_size = oc->m_n_member; if (command == 'q') { crm_debug("Processing \"%s\" event.", event == OC_EV_MS_NEW_MEMBERSHIP ? "NEW MEMBERSHIP" : event == OC_EV_MS_NOT_PRIMARY ? "NOT PRIMARY" : event == OC_EV_MS_PRIMARY_RESTORED ? "PRIMARY RESTORED" : event == OC_EV_MS_EVICTED ? "EVICTED" : "NO QUORUM MEMBERSHIP"); if (ccm_have_quorum(event)) { fprintf(stdout, "1\n"); } else { fprintf(stdout, "0\n"); } } else if (command == 'e') { crm_debug("Searching %d members for our birth", oc->m_n_member); } for (lpc = 0; lpc < node_list_size; lpc++) { if (command == 'p') { fprintf(stdout, "%s ", oc->m_array[oc->m_memb_idx + lpc].node_uname); } else if (command == 'e') { int (*ccm_api_is_my_nodeid) (const oc_ev_t * token, const oc_node_t * node) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_is_my_nodeid", 1); if ((*ccm_api_is_my_nodeid) (ccm_token, &(oc->m_array[lpc]))) { crm_debug("MATCH: nodeid=%d, uname=%s, born=%d", oc->m_array[oc->m_memb_idx + lpc].node_id, oc->m_array[oc->m_memb_idx + lpc].node_uname, oc->m_array[oc->m_memb_idx + lpc].node_born_on); fprintf(stdout, "%d\n", oc->m_array[oc->m_memb_idx + lpc].node_born_on); } } } (*ccm_api_callback_done) (cookie); if (command == 'p') { fprintf(stdout, "\n"); } fflush(stdout); crm_exit(0); } static gboolean ccm_age_connect(int *ccm_fd) { gboolean did_fail = FALSE; int ret = 0; int (*ccm_api_register) (oc_ev_t ** token) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_register", 1); int (*ccm_api_set_callback) (const oc_ev_t * token, oc_ev_class_t class, oc_ev_callback_t * fn, oc_ev_callback_t ** prev_fn) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_set_callback", 1); void (*ccm_api_special) (const oc_ev_t *, oc_ev_class_t, int) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_special", 1); int (*ccm_api_activate) (const oc_ev_t * token, int *fd) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_activate", 1); crm_debug("Registering with CCM"); ret = (*ccm_api_register) (&ccm_token); if (ret != 0) { crm_info("CCM registration failed: %d", ret); did_fail = TRUE; } if (did_fail == FALSE) { crm_debug("Setting up CCM callbacks"); ret = (*ccm_api_set_callback) (ccm_token, OC_EV_MEMB_CLASS, ccm_age_callback, NULL); if (ret != 0) { crm_warn("CCM callback not set: %d", ret); did_fail = TRUE; } } if (did_fail == FALSE) { (*ccm_api_special) (ccm_token, OC_EV_MEMB_CLASS, 0 /*don't care */ ); crm_debug("Activating CCM token"); ret = (*ccm_api_activate) (ccm_token, ccm_fd); if (ret != 0) { crm_warn("CCM Activation failed: %d", ret); did_fail = TRUE; } } return !did_fail; } static gboolean try_heartbeat(int command, enum cluster_type_e stack) { crm_debug("Attempting to process %c command", command); if (command == 'i') { if (read_local_hb_uuid()) { crm_exit(0); } } else if (ccm_age_connect(&ccm_fd)) { int rc = 0; fd_set rset; int (*ccm_api_handle_event) (const oc_ev_t * token) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_handle_event", 1); while (1) { sleep(1); FD_ZERO(&rset); FD_SET(ccm_fd, &rset); errno = 0; rc = select(ccm_fd + 1, &rset, NULL, NULL, NULL); if (rc > 0 && (*ccm_api_handle_event) (ccm_token) != 0) { crm_err("oc_ev_handle_event failed"); return FALSE; } else if (rc < 0 && errno != EINTR) { crm_perror(LOG_ERR, "select failed: %d", rc); return FALSE; } } } return FALSE; } #endif #if SUPPORT_CMAN # include # define MAX_NODES 256 static gboolean try_cman(int command, enum cluster_type_e stack) { int rc = -1, lpc = 0, node_count = 0; cman_node_t node; cman_cluster_t cluster; cman_handle_t cman_handle = NULL; cman_node_t cman_nodes[MAX_NODES]; memset(&cluster, 0, sizeof(cluster)); cman_handle = cman_init(NULL); if (cman_handle == NULL || cman_is_active(cman_handle) == FALSE) { crm_info("Couldn't connect to cman"); return FALSE; } switch (command) { case 'R': fprintf(stderr, "Node removal not supported for cman based clusters\n"); crm_exit(-EPROTONOSUPPORT); break; case 'e': /* Age makes no sense (yet?) in a cman cluster */ fprintf(stdout, "1\n"); break; case 'q': fprintf(stdout, "%d\n", cman_is_quorate(cman_handle)); break; case 'l': case 'p': rc = cman_get_nodes(cman_handle, MAX_NODES, &node_count, cman_nodes); if (rc != 0) { fprintf(stderr, "Couldn't query cman node list: %d %d", rc, errno); goto cman_bail; } for (lpc = 0; lpc < node_count; lpc++) { if (command == 'l') { printf("%s ", cman_nodes[lpc].cn_name); } else if (cman_nodes[lpc].cn_nodeid != 0 && cman_nodes[lpc].cn_member) { /* Never allow node ID 0 to be considered a member #315711 */ printf("%s ", cman_nodes[lpc].cn_name); } } printf("\n"); break; case 'i': rc = cman_get_node(cman_handle, CMAN_NODEID_US, &node); if (rc != 0) { fprintf(stderr, "Couldn't query cman node id: %d %d", rc, errno); goto cman_bail; } fprintf(stdout, "%u\n", node.cn_nodeid); break; default: fprintf(stderr, "Unknown option '%c'\n", command); crm_help('?', EX_USAGE); } cman_finish(cman_handle); crm_exit(0); cman_bail: cman_finish(cman_handle); return crm_exit(EX_USAGE); } #endif #if HAVE_CONFDB static void ais_membership_destroy(gpointer user_data) { crm_err("AIS connection terminated"); ais_fd_sync = -1; crm_exit(1); } static gint member_sort(gconstpointer a, gconstpointer b) { const crm_node_t *node_a = a; const crm_node_t *node_b = b; return strcmp(node_a->uname, node_b->uname); } static void crm_add_member(gpointer key, gpointer value, gpointer user_data) { GList **list = user_data; crm_node_t *node = value; if (node->uname != NULL) { *list = g_list_insert_sorted(*list, node, member_sort); } } static gboolean ais_membership_dispatch(int kind, const char *from, const char *data) { switch (kind) { case crm_class_members: case crm_class_notify: case crm_class_quorum: break; default: return TRUE; break; } if (command == 'q') { if (crm_have_quorum) { fprintf(stdout, "1\n"); } else { fprintf(stdout, "0\n"); } } else if (command == 'l') { GList *nodes = NULL; GListPtr lpc = NULL; g_hash_table_foreach(crm_peer_cache, crm_add_member, &nodes); for (lpc = nodes; lpc != NULL; lpc = lpc->next) { crm_node_t *node = (crm_node_t *) lpc->data; fprintf(stdout, "%u %s %s\n", node->id, node->uname, node->state); } fprintf(stdout, "\n"); } else if (command == 'p') { GList *nodes = NULL; GListPtr lpc = NULL; g_hash_table_foreach(crm_peer_cache, crm_add_member, &nodes); for (lpc = nodes; lpc != NULL; lpc = lpc->next) { crm_node_t *node = (crm_node_t *) lpc->data; if (node->uname && safe_str_eq(node->state, CRM_NODE_MEMBER)) { fprintf(stdout, "%s ", node->uname); } } fprintf(stdout, "\n"); } crm_exit(0); return TRUE; } #endif #ifdef SUPPORT_CS_QUORUM # include # include static int node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) { xmlNode *msg = string2xml(buffer); if (msg) { xmlNode *node = NULL; crm_log_xml_trace(msg, "message"); for (node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) { const char *uname = crm_element_value(node, "uname"); if (command == 'l') { int id = 0; crm_element_value_int(node, "id", &id); fprintf(stdout, "%u %s\n", id, uname); } else if (command == 'p') { fprintf(stdout, "%s ", uname); } } free_xml(msg); if (command == 'p') { fprintf(stdout, "\n"); } crm_exit(0); } return 0; } static void node_mcp_destroy(gpointer user_data) { crm_exit(1); } static int -crmd_remove_node_cache(int id) +crmd_remove_node_cache(const char *id) { + int n = 0; int rc = -1; + char *name = NULL; char *admin_uuid = NULL; crm_ipc_t *conn = crm_ipc_new(CRM_SYSTEM_CRMD, 0); xmlNode *cmd = NULL; xmlNode *hello = NULL; xmlNode *msg_data = NULL; + char *endptr = NULL; if (!conn) { goto rm_node_cleanup; } if (!crm_ipc_connect(conn)) { goto rm_node_cleanup; } admin_uuid = calloc(1, 11); snprintf(admin_uuid, 10, "%d", getpid()); admin_uuid[10] = '\0'; hello = create_hello_message(admin_uuid, "crm_node", "0", "1"); rc = crm_ipc_send(conn, hello, 0, 0, NULL); if (rc < 0) { goto rm_node_cleanup; } + errno = 0; + n = strtol(id, &endptr, 10); + if(errno != 0 || endptr == id || *endptr != '\0') { + /* Argument was not a nodeid */ + n = 0; + name = strdup(id); + } else { + name = get_node_name(n); + } + + crm_trace("Removing %s aka. %s from the membership cache", name, id); + msg_data = create_xml_node(NULL, XML_TAG_OPTIONS); - crm_xml_add_int(msg_data, XML_ATTR_ID, id); + if(n) { + crm_xml_add_int(msg_data, XML_ATTR_ID, n); + } + crm_xml_add(msg_data, XML_ATTR_UNAME, name); + cmd = create_request(CRM_OP_RM_NODE_CACHE, msg_data, NULL, CRM_SYSTEM_CRMD, "crm_node", admin_uuid); rc = crm_ipc_send(conn, cmd, 0, 0, NULL); rm_node_cleanup: if (conn) { crm_ipc_close(conn); crm_ipc_destroy(conn); } free_xml(cmd); free_xml(hello); free(admin_uuid); return rc > 0 ? 0 : rc; } static gboolean try_corosync(int command, enum cluster_type_e stack) { int rc = 0; int quorate = 0; uint32_t quorum_type = 0; unsigned int nodeid = 0; cpg_handle_t c_handle = 0; quorum_handle_t q_handle = 0; mainloop_io_t *ipc = NULL; GMainLoop *amainloop = NULL; struct ipc_client_callbacks node_callbacks = { .dispatch = node_mcp_dispatch, .destroy = node_mcp_destroy }; switch (command) { case 'R': - if (crmd_remove_node_cache(atoi(target_uname))) { + if (crmd_remove_node_cache(target_uname)) { crm_err("Failed to connect to crmd to remove node id %s", target_uname); } break; case 'e': /* Age makes no sense (yet) in an AIS cluster */ fprintf(stdout, "1\n"); crm_exit(0); case 'q': /* Go direct to the Quorum API */ rc = quorum_initialize(&q_handle, NULL, &quorum_type); if (rc != CS_OK) { crm_err("Could not connect to the Quorum API: %d\n", rc); return FALSE; } rc = quorum_getquorate(q_handle, &quorate); if (rc != CS_OK) { crm_err("Could not obtain the current Quorum API state: %d\n", rc); return FALSE; } if (quorate) { fprintf(stdout, "1\n"); } else { fprintf(stdout, "0\n"); } quorum_finalize(q_handle); crm_exit(0); case 'i': /* Go direct to the CPG API */ rc = cpg_initialize(&c_handle, NULL); if (rc != CS_OK) { crm_err("Could not connect to the Cluster Process Group API: %d\n", rc); return FALSE; } rc = cpg_local_get(c_handle, &nodeid); if (rc != CS_OK) { crm_err("Could not get local node id from the CPG API"); return FALSE; } fprintf(stdout, "%u\n", nodeid); cpg_finalize(c_handle); crm_exit(0); case 'l': case 'p': /* Go to pacemakerd */ amainloop = g_main_new(FALSE); ipc = mainloop_add_ipc_client(CRM_SYSTEM_MCP, G_PRIORITY_DEFAULT, 0, NULL, &node_callbacks); if(ipc != NULL) { xmlNode *poke = create_xml_node(NULL, "poke"); crm_ipc_send(mainloop_get_ipc_client(ipc), poke, 0, 0, NULL); free_xml(poke); g_main_run(amainloop); } break; } return FALSE; } #endif #if HAVE_CONFDB static gboolean try_openais(int command, enum cluster_type_e stack) { static crm_cluster_t cluster; cluster.destroy = ais_membership_destroy; cluster.cs_dispatch = ais_membership_dispatch; if (init_cs_connection_once(&cluster)) { GMainLoop *amainloop = NULL; switch (command) { case 'R': send_ais_text(crm_class_rmpeer, target_uname, TRUE, NULL, crm_msg_ais); crm_exit(0); case 'e': /* Age makes no sense (yet) in an AIS cluster */ fprintf(stdout, "1\n"); crm_exit(0); case 'q': send_ais_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais); break; case 'l': case 'p': crm_info("Requesting the list of configured nodes"); send_ais_text(crm_class_members, __FUNCTION__, TRUE, NULL, crm_msg_ais); break; case 'i': printf("%u\n", cluster.nodeid); crm_exit(0); default: fprintf(stderr, "Unknown option '%c'\n", command); crm_help('?', EX_USAGE); } amainloop = g_main_new(FALSE); g_main_run(amainloop); } return FALSE; } #endif int set_cluster_type(enum cluster_type_e type); int main(int argc, char **argv) { int flag = 0; int argerr = 0; uint32_t nodeid = 0; gboolean force_flag = FALSE; gboolean dangerous_cmd = FALSE; enum cluster_type_e try_stack = pcmk_cluster_unknown; int option_index = 0; crm_peer_init(); crm_log_cli_init("crm_node"); crm_set_options(NULL, "command [options]", long_options, "Tool for displaying low-level node information"); while (flag >= 0) { flag = crm_get_option(argc, argv, &option_index); switch (flag) { case -1: break; case 'V': crm_bump_log_level(argc, argv); break; case '$': case '?': crm_help(flag, EX_OK); break; case 'Q': do_quiet = TRUE; break; case 'H': set_cluster_type(pcmk_cluster_heartbeat); break; case 'A': set_cluster_type(pcmk_cluster_classic_ais); break; case 'C': set_cluster_type(pcmk_cluster_corosync); break; case 'c': set_cluster_type(pcmk_cluster_cman); break; case 'f': force_flag = TRUE; break; case 'R': dangerous_cmd = TRUE; command = flag; target_uname = optarg; break; case 'N': command = flag; nodeid = crm_parse_int(optarg, NULL); break; case 'p': case 'e': case 'q': case 'i': case 'l': case 'n': command = flag; break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } if(command == 'n') { fprintf(stdout, "%s\n", get_local_node_name()); crm_exit(0); } else if(command == 'N') { fprintf(stdout, "%s\n", get_node_name(nodeid)); crm_exit(0); } if (dangerous_cmd && force_flag == FALSE) { fprintf(stderr, "The supplied command is considered dangerous." " To prevent accidental destruction of the cluster," " the --force flag is required in order to proceed.\n"); fflush(stderr); crm_exit(EX_USAGE); } try_stack = get_cluster_type(); crm_debug("Attempting to process -%c command for cluster type: %s", command, name_for_cluster_type(try_stack)); #if SUPPORT_CMAN if (try_stack == pcmk_cluster_cman) { try_cman(command, try_stack); } #endif #ifdef SUPPORT_CS_QUORUM if (try_stack == pcmk_cluster_corosync) { try_corosync(command, try_stack); } #endif #if HAVE_CONFDB /* Only an option if we're using the plugins */ if (try_stack == pcmk_cluster_classic_ais) { try_openais(command, try_stack); } #endif #if SUPPORT_HEARTBEAT if (try_stack == pcmk_cluster_heartbeat) { try_heartbeat(command, try_stack); } #endif return (1); }