diff --git a/crmd/join_client.c b/crmd/join_client.c index aebe452f53..286cd92e0b 100644 --- a/crmd/join_client.c +++ b/crmd/join_client.c @@ -1,281 +1,281 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include int reannounce_count = 0; void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig); /* A_CL_JOIN_QUERY */ /* is there a DC out there? */ void do_cl_join_query(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); sleep(1); /* give the CCM time to propogate to the DC */ update_dc(NULL); /* Unset any existing value so that the result is not discarded */ crm_debug("Querying for a DC"); send_cluster_message(NULL, crm_msg_crmd, req, FALSE); free_xml(req); } /* A_CL_JOIN_ANNOUNCE */ /* this is kind of a workaround for the fact that we may not be around * or are otherwise unable to reply when the DC sends out A_WELCOME_ALL */ void do_cl_join_announce(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* Once we hear from the DC, we can stop the timer * * This timer was started either on startup or when a node * left the CCM list */ /* dont announce if we're in one of these states */ if (cur_state != S_PENDING) { crm_warn("Do not announce ourselves in state %s", fsa_state2string(cur_state)); return; } if (AM_I_OPERATIONAL) { /* send as a broadcast */ xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); crm_debug("Announcing availability"); update_dc(NULL); send_cluster_message(NULL, crm_msg_crmd, req, FALSE); free_xml(req); } else { /* Delay announce until we have finished local startup */ crm_warn("Delaying announce until local startup is complete"); return; } } static int query_call_id = 0; /* A_CL_JOIN_REQUEST */ /* aka. accept the welcome offer */ void do_cl_join_offer_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM); const char *join_id = crm_element_value(input->msg, F_CRM_JOIN_ID); #if 0 if (we are sick) { log error; /* save the request for later? */ return; } #endif crm_trace("Accepting join offer from %s: join-%s", welcome_from, crm_element_value(input->msg, F_CRM_JOIN_ID)); /* we only ever want the last one */ if (query_call_id > 0) { /* Calling remove_cib_op_callback() would result in a memory leak of the data field */ crm_trace("Cancelling previous join query: %d", query_call_id); query_call_id = 0; } if (update_dc(input->msg) == FALSE) { crm_warn("Discarding offer from %s (expected %s)", welcome_from, fsa_our_dc); return; } CRM_LOG_ASSERT(input != NULL); query_call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local | cib_no_children); fsa_register_cib_callback(query_call_id, FALSE, strdup(join_id), join_query_callback); crm_trace("Registered join query callback: %d", query_call_id); register_fsa_action(A_DC_TIMER_STOP); } void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *join_id = user_data; xmlNode *generation = create_xml_node(NULL, XML_CIB_TAG_GENERATION_TUPPLE); CRM_LOG_ASSERT(join_id != NULL); if (query_call_id != call_id) { - crm_trace("Query %d superceeded", call_id); + crm_trace("Query %d superseded", call_id); goto done; } query_call_id = 0; if(rc != pcmk_ok || output == NULL) { crm_err("Could not retrieve version details for join-%s: %s (%d)", join_id, pcmk_strerror(rc), rc); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); } else if (fsa_our_dc == NULL) { crm_debug("Membership is in flux, not continuing join-%s", join_id); } else { xmlNode *reply = NULL; crm_debug("Respond to join offer join-%s from %s", join_id, fsa_our_dc); copy_in_properties(generation, output); reply = create_request(CRM_OP_JOIN_REQUEST, generation, fsa_our_dc, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); crm_xml_add(reply, F_CRM_JOIN_ID, join_id); send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE); free_xml(reply); } done: free_xml(generation); free(join_id); } /* A_CL_JOIN_RESULT */ /* aka. this is notification that we have (or have not) been accepted */ void do_cl_join_finalize_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *tmp1 = NULL; gboolean was_nack = TRUE; static gboolean first_join = TRUE; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); int join_id = -1; const char *op = crm_element_value(input->msg, F_CRM_TASK); const char *ack_nack = crm_element_value(input->msg, CRM_OP_JOIN_ACKNAK); const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM); if (safe_str_neq(op, CRM_OP_JOIN_ACKNAK)) { crm_trace("Ignoring op=%s message", op); return; } /* calculate if it was an ack or a nack */ if (crm_is_true(ack_nack)) { was_nack = FALSE; } crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id); if (was_nack) { crm_err("Join (join-%d) with leader %s failed (NACK'd): Shutting down", join_id, welcome_from); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (AM_I_DC == FALSE && safe_str_eq(welcome_from, fsa_our_uname)) { crm_warn("Discarding our own welcome - we're no longer the DC"); return; } if (update_dc(input->msg) == FALSE) { crm_warn("Discarding %s from %s (expected %s)", op, welcome_from, fsa_our_dc); return; } /* send our status section to the DC */ crm_debug("Confirming join join-%d: %s", join_id, crm_element_value(input->msg, F_CRM_TASK)); tmp1 = do_lrm_query(TRUE, fsa_our_uname); if (tmp1 != NULL) { xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id); crm_debug("join-%d: Join complete." " Sending local LRM status to %s", join_id, fsa_our_dc); if (first_join) { first_join = FALSE; /* * Clear any previous transient node attribute and lrm operations * * Corosync has a nasty habit of not being able to tell if a * node is returning or didn't leave in the first place. * This confuses Pacemaker because it never gets a "node up" * event which is normally used to clean up the status section. * * Do not remove the resources though, they'll be cleaned up in * do_dc_join_ack(). Removing them here creates a race * condition if the crmd is being recovered. * Instead of a list of active resources from the lrmd * we may end up with a blank status section. * If we are _NOT_ lucky, we will probe for the "wrong" instance * of anonymous clones and end up with multiple active * instances on the machine. */ erase_status_tag(fsa_our_uname, XML_TAG_TRANSIENT_NODEATTRS, 0); /* Just in case attrd was still around too */ if (is_not_set(fsa_input_register, R_SHUTDOWN)) { update_attrd(fsa_our_uname, "terminate", NULL, NULL, FALSE); update_attrd(fsa_our_uname, XML_CIB_ATTR_SHUTDOWN, "0", NULL, FALSE); } } send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE); free_xml(reply); if (AM_I_DC == FALSE) { register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __FUNCTION__); update_attrd(NULL, NULL, NULL, NULL, FALSE); } free_xml(tmp1); } else { crm_err("Could not send our LRM state to the DC"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } diff --git a/crmd/messages.c b/crmd/messages.c index bc3bad3f28..d38f2a3f2b 100644 --- a/crmd/messages.c +++ b/crmd/messages.c @@ -1,971 +1,971 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GListPtr fsa_message_queue = NULL; extern void crm_shutdown(int nsig); void handle_response(xmlNode * stored_msg); enum crmd_fsa_input handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause); enum crmd_fsa_input handle_shutdown_request(xmlNode * stored_msg); #define ROUTER_RESULT(x) crm_trace("Router result: %s", x) /* debug only, can wrap all it likes */ int last_data_id = 0; void register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, fsa_data_t * cur_data, void *new_data, const char *raised_from) { /* save the current actions if any */ if (fsa_actions != A_NOTHING) { register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL, I_NULL, cur_data ? cur_data->data : NULL, fsa_actions, TRUE, __FUNCTION__); } /* reset the action list */ crm_info("Resetting the current action list"); fsa_dump_actions(fsa_actions, "Drop"); fsa_actions = A_NOTHING; /* register the error */ register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from); } int register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, void *data, long long with_actions, gboolean prepend, const char *raised_from) { unsigned old_len = g_list_length(fsa_message_queue); fsa_data_t *fsa_data = NULL; CRM_CHECK(raised_from != NULL, raised_from = ""); if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) { /* no point doing anything */ crm_err("Cannot add entry to queue: no input and no action"); return 0; } if (input == I_WAIT_FOR_EVENT) { do_fsa_stall = TRUE; crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d", raised_from, fsa_cause2string(cause), data, old_len); if (old_len > 0) { fsa_dump_queue(LOG_TRACE); prepend = FALSE; } if (data == NULL) { fsa_actions |= with_actions; fsa_dump_actions(with_actions, "Restored"); return 0; } /* Store everything in the new event and reset fsa_actions */ with_actions |= fsa_actions; fsa_actions = A_NOTHING; } last_data_id++; crm_trace("%s %s FSA input %d (%s) (cause=%s) %s data", raised_from, prepend ? "prepended" : "appended", last_data_id, fsa_input2string(input), fsa_cause2string(cause), data ? "with" : "without"); fsa_data = calloc(1, sizeof(fsa_data_t)); fsa_data->id = last_data_id; fsa_data->fsa_input = input; fsa_data->fsa_cause = cause; fsa_data->origin = raised_from; fsa_data->data = NULL; fsa_data->data_type = fsa_dt_none; fsa_data->actions = with_actions; if (with_actions != A_NOTHING) { crm_trace("Adding actions %.16llx to input", with_actions); } if (data != NULL) { switch (cause) { case C_FSA_INTERNAL: case C_CRMD_STATUS_CALLBACK: case C_IPC_MESSAGE: case C_HA_MESSAGE: crm_trace("Copying %s data from %s as a HA msg", fsa_cause2string(cause), raised_from); CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL, crm_err("Bogus data from %s", raised_from)); fsa_data->data = copy_ha_msg_input(data); fsa_data->data_type = fsa_dt_ha_msg; break; case C_LRM_OP_CALLBACK: crm_trace("Copying %s data from %s as lrmd_event_data_t", fsa_cause2string(cause), raised_from); fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data); fsa_data->data_type = fsa_dt_lrm; break; case C_CCM_CALLBACK: case C_SUBSYSTEM_CONNECT: case C_LRM_MONITOR_CALLBACK: case C_TIMER_POPPED: case C_SHUTDOWN: case C_HEARTBEAT_FAILED: case C_HA_DISCONNECT: case C_ILLEGAL: case C_UNKNOWN: case C_STARTUP: crm_err("Copying %s data (from %s)" " not yet implemented", fsa_cause2string(cause), raised_from); crmd_exit(pcmk_err_generic); break; } crm_trace("%s data copied", fsa_cause2string(fsa_data->fsa_cause)); } /* make sure to free it properly later */ if (prepend) { crm_trace("Prepending input"); fsa_message_queue = g_list_prepend(fsa_message_queue, fsa_data); } else { fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); } crm_trace("Queue len: %d", g_list_length(fsa_message_queue)); /* fsa_dump_queue(LOG_DEBUG_2); */ if (old_len == g_list_length(fsa_message_queue)) { crm_err("Couldnt add message to the queue"); } if (fsa_source && input != I_WAIT_FOR_EVENT) { crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); } return last_data_id; } void fsa_dump_queue(int log_level) { int offset = 0; GListPtr lpc = NULL; for (lpc = fsa_message_queue; lpc != NULL; lpc = lpc->next) { fsa_data_t *data = (fsa_data_t *) lpc->data; do_crm_log_unlikely(log_level, "queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)", offset++, data->id, fsa_input2string(data->fsa_input), data->origin, data->data, data->data_type, fsa_cause2string(data->fsa_cause)); } } ha_msg_input_t * copy_ha_msg_input(ha_msg_input_t * orig) { ha_msg_input_t *copy = NULL; xmlNodePtr data = NULL; if (orig != NULL) { crm_trace("Copy msg"); data = copy_xml(orig->msg); } else { crm_trace("No message to copy"); } copy = new_ha_msg_input(data); if (orig && orig->msg != NULL) { CRM_CHECK(copy->msg != NULL, crm_err("copy failed")); } return copy; } void delete_fsa_input(fsa_data_t * fsa_data) { lrmd_event_data_t *op = NULL; xmlNode *foo = NULL; if (fsa_data == NULL) { return; } crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause)); if (fsa_data->data != NULL) { switch (fsa_data->data_type) { case fsa_dt_ha_msg: delete_ha_msg_input(fsa_data->data); break; case fsa_dt_xml: foo = fsa_data->data; free_xml(foo); break; case fsa_dt_lrm: op = (lrmd_event_data_t *) fsa_data->data; lrmd_free_event(op); break; case fsa_dt_none: if (fsa_data->data != NULL) { crm_err("Dont know how to free %s data from %s", fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); crmd_exit(pcmk_err_generic); } break; } crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause)); } free(fsa_data); } /* returns the next message */ fsa_data_t * get_message(void) { fsa_data_t *message = g_list_nth_data(fsa_message_queue, 0); fsa_message_queue = g_list_remove(fsa_message_queue, message); crm_trace("Processing input %d", message->id); return message; } /* returns the current head of the FIFO queue */ gboolean is_message(void) { return (g_list_length(fsa_message_queue) > 0); } void * fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller) { void *ret_val = NULL; if (fsa_data == NULL) { crm_err("%s: No FSA data available", caller); } else if (fsa_data->data == NULL) { crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin); } else if (fsa_data->data_type != a_type) { crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s", caller, fsa_data->data_type, a_type, fsa_data->origin); CRM_ASSERT(fsa_data->data_type == a_type); } else { ret_val = fsa_data->data; } return ret_val; } /* A_MSG_ROUTE */ void do_msg_route(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); route_message(msg_data->fsa_cause, input->msg); } void route_message(enum crmd_fsa_cause cause, xmlNode * input) { ha_msg_input_t fsa_input; enum crmd_fsa_input result = I_NULL; fsa_input.msg = input; CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return); /* try passing the buck first */ if (relay_message(input, cause == C_IPC_MESSAGE)) { return; } /* handle locally */ result = handle_message(input, cause); /* done or process later? */ switch (result) { case I_NULL: case I_CIB_OP: case I_ROUTER: case I_NODE_JOIN: case I_JOIN_REQUEST: case I_JOIN_RESULT: break; default: /* Defering local processing of message */ register_fsa_input_later(cause, result, &fsa_input); return; } if (result != I_NULL) { /* add to the front of the queue */ register_fsa_input(cause, result, &fsa_input); } } gboolean relay_message(xmlNode * msg, gboolean originated_locally) { int dest = 1; int is_for_dc = 0; int is_for_dcib = 0; int is_for_te = 0; int is_for_crm = 0; int is_for_cib = 0; int is_local = 0; gboolean processing_complete = FALSE; const char *host_to = crm_element_value(msg, F_CRM_HOST_TO); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *type = crm_element_value(msg, F_TYPE); const char *msg_error = NULL; crm_trace("Routing message %s", crm_element_value(msg, XML_ATTR_REFERENCE)); if (msg == NULL) { msg_error = "Cannot route empty message"; } else if (safe_str_eq(CRM_OP_HELLO, crm_element_value(msg, F_CRM_TASK))) { /* quietly ignore */ processing_complete = TRUE; } else if (safe_str_neq(type, T_CRM)) { msg_error = "Bad message type"; } else if (sys_to == NULL) { msg_error = "Bad message destination: no subsystem"; } if (msg_error != NULL) { processing_complete = TRUE; crm_err("%s", msg_error); crm_log_xml_warn(msg, "bad msg"); } if (processing_complete) { return TRUE; } processing_complete = TRUE; is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0); is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0); is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0); is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0); is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0); is_local = 0; if (host_to == NULL || strlen(host_to) == 0) { if (is_for_dc || is_for_te) { is_local = 0; } else if (is_for_crm && originated_locally) { is_local = 0; } else { is_local = 1; } } else if (safe_str_eq(fsa_our_uname, host_to)) { is_local = 1; } if (is_for_dc || is_for_dcib || is_for_te) { if (AM_I_DC && is_for_te) { ROUTER_RESULT("Message result: Local relay"); send_msg_via_ipc(msg, sys_to); } else if (AM_I_DC) { ROUTER_RESULT("Message result: DC/CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if (originated_locally && safe_str_neq(sys_from, CRM_SYSTEM_PENGINE) && safe_str_neq(sys_from, CRM_SYSTEM_TENGINE)) { /* Neither the TE or PE should be sending messages * to DC's on other nodes * * By definition, if we are no longer the DC, then * the PE or TE's data should be discarded */ #if SUPPORT_COROSYNC if (is_openais_cluster()) { dest = text2msg_type(sys_to); } #endif ROUTER_RESULT("Message result: External relay to DC"); send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE); } else { /* discard */ ROUTER_RESULT("Message result: Discard, not DC"); } } else if (is_local && (is_for_crm || is_for_cib)) { ROUTER_RESULT("Message result: CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if (is_local) { ROUTER_RESULT("Message result: Local relay"); send_msg_via_ipc(msg, sys_to); } else { crm_node_t *node_to = NULL; #if SUPPORT_COROSYNC if (is_openais_cluster()) { dest = text2msg_type(sys_to); if (dest == crm_msg_none || dest > crm_msg_stonith_ng) { dest = crm_msg_crmd; } } #endif if (host_to) { node_to = crm_find_peer(0, host_to); if (node_to == NULL) { crm_err("Cannot route message to unknown node %s", host_to); return TRUE; } } ROUTER_RESULT("Message result: External relay"); send_cluster_message(host_to ? node_to : NULL, dest, msg, TRUE); } return processing_complete; } static gboolean process_hello_message(xmlNode * hello, char **client_name, char **major_version, char **minor_version) { const char *local_client_name; const char *local_major_version; const char *local_minor_version; *client_name = NULL; *major_version = NULL; *minor_version = NULL; if (hello == NULL) { return FALSE; } local_client_name = crm_element_value(hello, "client_name"); local_major_version = crm_element_value(hello, "major_version"); local_minor_version = crm_element_value(hello, "minor_version"); if (local_client_name == NULL || strlen(local_client_name) == 0) { crm_err("Hello message was not valid (field %s not found)", "client name"); return FALSE; } else if (local_major_version == NULL || strlen(local_major_version) == 0) { crm_err("Hello message was not valid (field %s not found)", "major version"); return FALSE; } else if (local_minor_version == NULL || strlen(local_minor_version) == 0) { crm_err("Hello message was not valid (field %s not found)", "minor version"); return FALSE; } *client_name = strdup(local_client_name); *major_version = strdup(local_major_version); *minor_version = strdup(local_minor_version); crm_trace("Hello message ok"); return TRUE; } gboolean crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client, const char *proxy_session) { char *client_name = NULL; char *major_version = NULL; char *minor_version = NULL; gboolean auth_result = FALSE; xmlNode *xml = NULL; const char *op = crm_element_value(client_msg, F_CRM_TASK); const char *uuid = curr_client ? curr_client->id : proxy_session; if (uuid == NULL) { crm_warn("Message [%s] not authorized", crm_element_value(client_msg, XML_ATTR_REFERENCE)); return FALSE; } else if (safe_str_neq(CRM_OP_HELLO, op)) { return TRUE; } xml = get_message_xml(client_msg, F_CRM_DATA); auth_result = process_hello_message(xml, &client_name, &major_version, &minor_version); if (auth_result == TRUE) { if (client_name == NULL) { crm_err("Bad client details (client_name=%s, uuid=%s)", crm_str(client_name), uuid); auth_result = FALSE; } } if (auth_result == TRUE) { /* check version */ int mav = atoi(major_version); int miv = atoi(minor_version); crm_trace("Checking client version number"); if (mav < 0 || miv < 0) { crm_err("Client version (%d:%d) is not acceptable", mav, miv); auth_result = FALSE; } } if (auth_result == TRUE) { crm_trace("Accepted client %s", client_name); if (curr_client) { curr_client->userdata = strdup(client_name); } crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); } else { crm_warn("Rejected client logon request"); if (curr_client) { qb_ipcs_disconnect(curr_client->ipcs); } } free(minor_version); free(major_version); free(client_name); /* hello messages should never be processed further */ return FALSE; } enum crmd_fsa_input handle_message(xmlNode * msg, enum crmd_fsa_cause cause) { const char *type = NULL; CRM_CHECK(msg != NULL, return I_NULL); type = crm_element_value(msg, F_CRM_MSG_TYPE); if (crm_str_eq(type, XML_ATTR_REQUEST, TRUE)) { return handle_request(msg, cause); } else if (crm_str_eq(type, XML_ATTR_RESPONSE, TRUE)) { handle_response(msg); return I_NULL; } crm_err("Unknown message type: %s", type); return I_NULL; } static enum crmd_fsa_input handle_failcount_op(xmlNode * stored_msg) { const char *rsc = NULL; const char *uname = NULL; gboolean is_remote_node = FALSE; xmlNode *xml_rsc = get_xpath_object("//" XML_CIB_TAG_RESOURCE, stored_msg, LOG_ERR); if (xml_rsc) { rsc = ID(xml_rsc); } uname = crm_element_value(stored_msg, XML_LRM_ATTR_TARGET); if (crm_element_value(stored_msg, XML_LRM_ATTR_ROUTER_NODE)) { is_remote_node = TRUE; } if (rsc) { char *attr = NULL; crm_info("Removing failcount for %s", rsc); attr = crm_concat("fail-count", rsc, '-'); update_attrd(uname, attr, NULL, NULL, is_remote_node); free(attr); attr = crm_concat("last-failure", rsc, '-'); update_attrd(uname, attr, NULL, NULL, is_remote_node); free(attr); lrm_clear_last_failure(rsc, uname); } else { crm_log_xml_warn(stored_msg, "invalid failcount op"); } return I_NULL; } enum crmd_fsa_input handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause) { xmlNode *msg = NULL; const char *op = crm_element_value(stored_msg, F_CRM_TASK); /* Optimize this for the DC - it has the most to do */ if (op == NULL) { crm_log_xml_err(stored_msg, "Bad message"); return I_NULL; } /*========== DC-Only Actions ==========*/ if (AM_I_DC) { if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) { return I_NODE_JOIN; } else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) { return I_JOIN_REQUEST; } else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) { return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("Shutting ourselves down (DC)"); return I_STOP; } else if (dc_match) { crm_err("We didnt ask to be shut down, yet our" " TE is telling us too." " Better get out now!"); return I_TERMINATE; } else if (fsa_state != S_STOPPING) { crm_err("Another node is asking us to shutdown" " but we think we're ok."); return I_ELECTION; } } else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { /* a slave wants to shut down */ /* create cib fragment and add to message */ return handle_shutdown_request(stored_msg); } } /*========== common actions ==========*/ if (strcmp(op, CRM_OP_NOVOTE) == 0) { ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__); } else if (strcmp(op, CRM_OP_THROTTLE) == 0) { throttle_update(stored_msg); return I_NULL; } else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) { return handle_failcount_op(stored_msg); } else if (strcmp(op, CRM_OP_VOTE) == 0) { /* count the vote and decide what to do after that */ ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__); /* Sometimes we _must_ go into S_ELECTION */ if (fsa_state == S_HALT) { crm_debug("Forcing an election from S_HALT"); return I_ELECTION; #if 0 } else if (AM_I_DC) { /* This is the old way of doing things but what is gained? */ return I_ELECTION; #endif } } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) { crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); return I_JOIN_OFFER; } else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) { crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0 || strcmp(op, CRM_OP_LRM_FAIL) == 0 || strcmp(op, CRM_OP_LRM_REFRESH) == 0 || strcmp(op, CRM_OP_REPROBE) == 0) { crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); return I_ROUTER; } else if (strcmp(op, CRM_OP_NOOP) == 0) { return I_NULL; } else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) { crm_shutdown(SIGTERM); /*return I_SHUTDOWN; */ return I_NULL; /*========== (NOT_DC)-Only Actions ==========*/ } else if (AM_I_DC == FALSE && strcmp(op, CRM_OP_SHUTDOWN) == 0) { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if (dc_match || fsa_our_dc == NULL) { if (is_set(fsa_input_register, R_SHUTDOWN) == FALSE) { crm_err("We didn't ask to be shut down, yet our" " DC is telling us too."); set_bit(fsa_input_register, R_STAYDOWN); return I_STOP; } crm_info("Shutting down"); return I_STOP; } else { crm_warn("Discarding %s op from %s", op, host_from); } } else if (strcmp(op, CRM_OP_PING) == 0) { /* eventually do some stuff to figure out * if we /are/ ok */ const char *sys_to = crm_element_value(stored_msg, F_CRM_SYS_TO); xmlNode *ping = create_xml_node(NULL, XML_CRM_TAG_PING); crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok"); crm_xml_add(ping, XML_PING_ATTR_SYSFROM, sys_to); crm_xml_add(ping, "crmd_state", fsa_state2string(fsa_state)); /* Ok, so technically not so interesting, but CTS needs to see this */ crm_notice("Current ping state: %s", fsa_state2string(fsa_state)); msg = create_reply(stored_msg, ping); if(msg) { relay_message(msg, TRUE); } free_xml(ping); free_xml(msg); } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) { int id = 0; const char *name = NULL; crm_element_value_int(stored_msg, XML_ATTR_ID, &id); name = crm_element_value(stored_msg, XML_ATTR_UNAME); if(cause == C_IPC_MESSAGE) { msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { crm_err("Could not instruct peers to remove references to node %s/%u", name, id); } else { crm_notice("Instructing peers to remove references to node %s/%u", name, id); } free_xml(msg); } else { reap_crm_member(id, name); } } else { crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node"); crm_log_xml_err(stored_msg, "Unexpected"); } return I_NULL; } void handle_response(xmlNode * stored_msg) { const char *op = crm_element_value(stored_msg, F_CRM_TASK); if (op == NULL) { crm_log_xml_err(stored_msg, "Bad message"); } else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) { - /* Check if the PE answer been superceeded by a subsequent request? */ + /* Check if the PE answer been superseded by a subsequent request? */ const char *msg_ref = crm_element_value(stored_msg, XML_ATTR_REFERENCE); if (msg_ref == NULL) { crm_err("%s - Ignoring calculation with no reference", op); } else if (safe_str_eq(msg_ref, fsa_pe_ref)) { ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); crm_trace("Completed: %s...", fsa_pe_ref); } else { crm_info("%s calculation %s is obsolete", op, msg_ref); } } else if (strcmp(op, CRM_OP_VOTE) == 0 || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) { } else { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); crm_err("Unexpected response (op=%s, src=%s) sent to the %s", op, host_from, AM_I_DC ? "DC" : "CRMd"); } } enum crmd_fsa_input handle_shutdown_request(xmlNode * stored_msg) { /* handle here to avoid potential version issues * where the shutdown message/proceedure may have * been changed in later versions. * * This way the DC is always in control of the shutdown */ char *now_s = NULL; time_t now = time(NULL); const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); if (host_from == NULL) { /* we're shutting down and the DC */ host_from = fsa_our_uname; } crm_info("Creating shutdown request for %s (state=%s)", host_from, fsa_state2string(fsa_state)); crm_log_xml_trace(stored_msg, "message"); now_s = crm_itoa(now); update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, FALSE); free(now_s); /* will be picked up by the TE as long as its running */ return I_NULL; } /* msg is deleted by the time this returns */ extern gboolean process_te_message(xmlNode * msg, xmlNode * xml_data); gboolean send_msg_via_ipc(xmlNode * msg, const char *sys) { gboolean send_ok = TRUE; crm_client_t *client_channel = crm_client_get_by_id(sys); if (crm_element_value(msg, F_CRM_HOST_FROM) == NULL) { crm_xml_add(msg, F_CRM_HOST_FROM, fsa_our_uname); } if (client_channel != NULL) { /* Transient clients such as crmadmin */ send_ok = crm_ipcs_send(client_channel, 0, msg, crm_ipc_server_event); } else if (sys != NULL && strcmp(sys, CRM_SYSTEM_TENGINE) == 0) { xmlNode *data = get_message_xml(msg, F_CRM_DATA); process_te_message(msg, data); } else if (sys != NULL && strcmp(sys, CRM_SYSTEM_LRMD) == 0) { fsa_data_t fsa_data; ha_msg_input_t fsa_input; fsa_input.msg = msg; fsa_input.xml = get_message_xml(msg, F_CRM_DATA); fsa_data.id = 0; fsa_data.actions = 0; fsa_data.data = &fsa_input; fsa_data.fsa_input = I_MESSAGE; fsa_data.fsa_cause = C_IPC_MESSAGE; fsa_data.origin = __FUNCTION__; fsa_data.data_type = fsa_dt_ha_msg; #ifdef FSA_TRACE crm_trace("Invoking action A_LRM_INVOKE (%.16llx)", A_LRM_INVOKE); #endif do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, &fsa_data); } else if (sys != NULL && crmd_is_proxy_session(sys)) { crmd_proxy_send(sys, msg); } else { crm_debug("Unknown Sub-system (%s)... discarding message.", crm_str(sys)); send_ok = FALSE; } return send_ok; } ha_msg_input_t * new_ha_msg_input(xmlNode * orig) { ha_msg_input_t *input_copy = NULL; input_copy = calloc(1, sizeof(ha_msg_input_t)); input_copy->msg = orig; input_copy->xml = get_message_xml(input_copy->msg, F_CRM_DATA); return input_copy; } void delete_ha_msg_input(ha_msg_input_t * orig) { if (orig == NULL) { return; } free_xml(orig->msg); free(orig); } diff --git a/crmd/pengine.c b/crmd/pengine.c index dde1b13692..ab426be72e 100644 --- a/crmd/pengine.c +++ b/crmd/pengine.c @@ -1,301 +1,301 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include /* for access */ #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include #include struct crm_subsystem_s *pe_subsystem = NULL; void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); static void save_cib_contents(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *id = user_data; register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); CRM_CHECK(id != NULL, return); if (rc == pcmk_ok) { int len = 15; char *filename = NULL; len += strlen(id); len += strlen(PE_STATE_DIR); filename = calloc(1, len); CRM_CHECK(filename != NULL, return); sprintf(filename, PE_STATE_DIR "/pe-core-%s.bz2", id); if (write_xml_file(output, filename, TRUE) < 0) { crm_err("Could not save CIB contents after PE crash to %s", filename); } else { crm_notice("Saved CIB contents after PE crash to %s", filename); } free(filename); } free(id); } static void pe_ipc_destroy(gpointer user_data) { if (is_set(fsa_input_register, pe_subsystem->flag_required)) { int rc = pcmk_ok; char *uuid_str = crm_generate_uuid(); crm_crit("Connection to the Policy Engine failed (pid=%d, uuid=%s)", pe_subsystem->pid, uuid_str); /* *The PE died... * * Save the current CIB so that we have a chance of * figuring out what killed it. * * Delay raising the I_ERROR until the query below completes or * 5s is up, whichever comes first. * */ rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); fsa_register_cib_callback(rc, FALSE, uuid_str, save_cib_contents); } else { if (is_heartbeat_cluster()) { stop_subsystem(pe_subsystem, FALSE); } crm_info("Connection to the Policy Engine released"); } clear_bit(fsa_input_register, pe_subsystem->flag_connected); pe_subsystem->pid = -1; pe_subsystem->source = NULL; pe_subsystem->client = NULL; mainloop_set_trigger(fsa_source); return; } static int pe_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { xmlNode *msg = string2xml(buffer); if (msg) { route_message(C_IPC_MESSAGE, msg); } free_xml(msg); return 0; } /* A_PE_START, A_PE_STOP, A_TE_RESTART */ void do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { struct crm_subsystem_s *this_subsys = pe_subsystem; long long stop_actions = A_PE_STOP; long long start_actions = A_PE_START; static struct ipc_client_callbacks pe_callbacks = { .dispatch = pe_ipc_dispatch, .destroy = pe_ipc_destroy }; if (action & stop_actions) { clear_bit(fsa_input_register, pe_subsystem->flag_required); mainloop_del_ipc_client(pe_subsystem->source); pe_subsystem->source = NULL; clear_bit(fsa_input_register, pe_subsystem->flag_connected); } if ((action & start_actions) && (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE)) { if (cur_state != S_STOPPING) { set_bit(fsa_input_register, pe_subsystem->flag_required); pe_subsystem->source = mainloop_add_ipc_client(CRM_SYSTEM_PENGINE, G_PRIORITY_DEFAULT, 5 * 1024 * 1024 /* 5Mb */ , NULL, &pe_callbacks); if (pe_subsystem->source == NULL) { crm_warn("Setup of client connection failed, not adding channel to mainloop"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return; } /* if (is_openais_cluster()) { */ /* pe_subsystem->pid = pe_subsystem->ipc->farside_pid; */ /* } */ set_bit(fsa_input_register, pe_subsystem->flag_connected); } else { crm_info("Ignoring request to start %s while shutting down", this_subsys->name); } } } int fsa_pe_query = 0; char *fsa_pe_ref = NULL; /* A_PE_INVOKE */ void do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (AM_I_DC == FALSE) { crm_err("Not DC: No need to invoke the PE (anymore): %s", fsa_action2string(action)); return; } if (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) { if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Cannot shut down gracefully without the PE"); register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_info("Waiting for the PE to connect"); crmd_fsa_stall(FALSE); register_fsa_action(A_PE_START); } return; } if (cur_state != S_POLICY_ENGINE) { crm_notice("No need to invoke the PE in state %s", fsa_state2string(cur_state)); return; } if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_err("Attempted to invoke the PE without a consistent copy of the CIB!"); /* start the join from scratch */ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL); return; } fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query, fsa_state2string(fsa_state)); /* Make sure any queued calculations are discarded */ free(fsa_pe_ref); fsa_pe_ref = NULL; fsa_register_cib_callback(fsa_pe_query, FALSE, NULL, do_pe_invoke_callback); } void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { int sent; xmlNode *cmd = NULL; if (rc != pcmk_ok) { crm_err("Cant retrieve the CIB: %s (call %d)", pcmk_strerror(rc), call_id); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); return; } else if (call_id != fsa_pe_query) { - crm_trace("Skipping superceeded CIB query: %d (current=%d)", call_id, fsa_pe_query); + crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query); return; } else if (AM_I_DC == FALSE || is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) { crm_debug("No need to invoke the PE anymore"); return; } else if (fsa_state != S_POLICY_ENGINE) { crm_debug("Discarding PE request in state: %s", fsa_state2string(fsa_state)); return; } else if (last_peer_update != 0) { crm_debug("Re-asking for the CIB: peer update %d still pending", last_peer_update); sleep(1); register_fsa_action(A_PE_INVOKE); return; } else if (fsa_state != S_POLICY_ENGINE) { crm_err("Invoking PE in state: %s", fsa_state2string(fsa_state)); return; } CRM_LOG_ASSERT(output != NULL); /* refresh our remote-node cache when the pengine is invoked */ crm_remote_peer_cache_refresh(output); crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid); crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum); if (ever_had_quorum && crm_have_quorum == FALSE) { crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1); } cmd = create_request(CRM_OP_PECALC, output, NULL, CRM_SYSTEM_PENGINE, CRM_SYSTEM_DC, NULL); free(fsa_pe_ref); fsa_pe_ref = crm_element_value_copy(cmd, XML_ATTR_REFERENCE); sent = crm_ipc_send(mainloop_get_ipc_client(pe_subsystem->source), cmd, 0, 0, NULL); if (sent <= 0) { crm_err("Could not contact the pengine: %d", sent); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); } crm_debug("Invoking the PE: query=%d, ref=%s, seq=%llu, quorate=%d", fsa_pe_query, fsa_pe_ref, crm_peer_seq, fsa_has_quorum); free_xml(cmd); } diff --git a/lib/cluster/election.c b/lib/cluster/election.c index 1bb378b8e6..9706b5ffe7 100644 --- a/lib/cluster/election.c +++ b/lib/cluster/election.c @@ -1,527 +1,527 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #define STORM_INTERVAL 2 /* in seconds */ #define STORM_MULTIPLIER 5 /* multiplied by the number of nodes */ struct election_s { enum election_result state; guint count; char *name; char *uname; GSourceFunc cb; GHashTable *voted; mainloop_timer_t *timeout; /* When to stop if not everyone casts a vote */ }; static void election_complete(election_t *e) { crm_info("Election %s complete", e->name); e->state = election_won; if(e->cb) { e->cb(e); } election_reset(e); } static gboolean election_timer_cb(gpointer user_data) { election_t *e = user_data; crm_info("Election %s %p timed out", e->name, e); election_complete(e); return FALSE; } enum election_result election_state(election_t *e) { if(e) { return e->state; } return election_error; } election_t * election_init(const char *name, const char *uname, guint period_ms, GSourceFunc cb) { static guint count = 0; election_t *e = calloc(1, sizeof(election_t)); if(e != NULL) { if(name) { e->name = g_strdup_printf("election-%s", name); } else { e->name = g_strdup_printf("election-%u", count++); } e->cb = cb; e->uname = strdup(uname); e->timeout = mainloop_timer_add(e->name, period_ms, FALSE, election_timer_cb, e); crm_trace("Created %s %p", e->name, e); } return e; } void election_remove(election_t *e, const char *uname) { if(e && uname && e->voted) { g_hash_table_remove(e->voted, uname); } } void election_reset(election_t *e) { crm_trace("Resetting election %s", e->name); if(e) { mainloop_timer_stop(e->timeout); } if (e && e->voted) { crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted)); g_hash_table_destroy(e->voted); e->voted = NULL; } } void election_fini(election_t *e) { if(e) { election_reset(e); crm_trace("Destroying %s", e->name); mainloop_timer_del(e->timeout); free(e->uname); free(e->name); free(e); } } static void election_timeout_start(election_t *e) { if(e) { mainloop_timer_start(e->timeout); } } void election_timeout_stop(election_t *e) { if(e) { mainloop_timer_stop(e->timeout); } } void election_timeout_set_period(election_t *e, guint period) { if(e) { mainloop_timer_set_period(e->timeout, period); } else { crm_err("No election defined"); } } static int crm_uptime(struct timeval *output) { static time_t expires = 0; static struct rusage info; time_t tm_now = time(NULL); if (expires < tm_now) { int rc = 0; info.ru_utime.tv_sec = 0; info.ru_utime.tv_usec = 0; rc = getrusage(RUSAGE_SELF, &info); output->tv_sec = 0; output->tv_usec = 0; if (rc < 0) { crm_perror(LOG_ERR, "Could not calculate the current uptime"); expires = 0; return -1; } crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec, (long)info.ru_utime.tv_usec); } expires = tm_now + STORM_INTERVAL; /* N seconds after the last _access_ */ output->tv_sec = info.ru_utime.tv_sec; output->tv_usec = info.ru_utime.tv_usec; return 1; } static int crm_compare_age(struct timeval your_age) { struct timeval our_age; crm_uptime(&our_age); /* If an error occurred, our_age will be compared as {0,0} */ if (our_age.tv_sec > your_age.tv_sec) { crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec); return 1; } else if (our_age.tv_sec < your_age.tv_sec) { crm_debug("Loose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec); return -1; } else if (our_age.tv_usec > your_age.tv_usec) { crm_debug("Win: %ld.%ld vs %ld.%ld (usec)", (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec); return 1; } else if (our_age.tv_usec < your_age.tv_usec) { crm_debug("Loose: %ld.%ld vs %ld.%ld (usec)", (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec); return -1; } return 0; } void election_vote(election_t *e) { struct timeval age; xmlNode *vote = NULL; crm_node_t *our_node; if(e == NULL) { crm_trace("Not voting in election: not initialized"); return; } our_node = crm_get_peer(0, e->uname); if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) { crm_trace("Cannot vote yet: %p", our_node); return; } e->state = election_in_progress; vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); e->count++; crm_xml_add(vote, F_CRM_ELECTION_OWNER, our_node->uuid); crm_xml_add_int(vote, F_CRM_ELECTION_ID, e->count); crm_uptime(&age); crm_xml_add_int(vote, F_CRM_ELECTION_AGE_S, age.tv_sec); crm_xml_add_int(vote, F_CRM_ELECTION_AGE_US, age.tv_usec); send_cluster_message(NULL, crm_msg_crmd, vote, TRUE); free_xml(vote); crm_debug("Started election %d", e->count); if (e->voted) { g_hash_table_destroy(e->voted); e->voted = NULL; } election_timeout_start(e); return; } bool election_check(election_t *e) { int voted_size = 0; int num_members = crm_active_peers(); if(e == NULL) { crm_trace("not initialized"); return FALSE; } if (e->voted) { voted_size = g_hash_table_size(e->voted); } /* in the case of #voted > #members, it is better to * wait for the timeout and give the cluster time to * stabilize */ if (voted_size >= num_members) { /* we won and everyone has voted */ election_timeout_stop(e); if (voted_size > num_members) { GHashTableIter gIter; const crm_node_t *node; char *key = NULL; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) { if (crm_is_peer_active(node)) { crm_err("member: %s proc=%.32x", node->uname, node->processes); } } g_hash_table_iter_init(&gIter, e->voted); while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) { crm_err("voted: %s", key); } } election_complete(e); return TRUE; } else { crm_debug("Still waiting on %d non-votes (%d total)", num_members - voted_size, num_members); } return FALSE; } #define loss_dampen 2 /* in seconds */ /* A_ELECTION_COUNT */ enum election_result election_count_vote(election_t *e, xmlNode *vote, bool can_win) { int age = 0; int election_id = -1; int log_level = LOG_INFO; gboolean use_born_on = FALSE; gboolean done = FALSE; gboolean we_loose = FALSE; const char *op = NULL; const char *from = NULL; const char *reason = "unknown"; const char *election_owner = NULL; crm_node_t *our_node = NULL, *your_node = NULL; static int election_wins = 0; xmlNode *novote = NULL; time_t tm_now = time(NULL); static time_t expires = 0; static time_t last_election_loss = 0; /* if the membership copy is NULL we REALLY shouldnt be voting * the question is how we managed to get here. */ CRM_CHECK(vote != NULL, return election_error); if(e == NULL) { crm_info("Not voting in election: not initialized"); return election_lost; } else if(crm_peer_cache == NULL) { crm_info("Not voting in election: no peer cache"); return election_lost; } op = crm_element_value(vote, F_CRM_TASK); from = crm_element_value(vote, F_CRM_HOST_FROM); election_owner = crm_element_value(vote, F_CRM_ELECTION_OWNER); crm_element_value_int(vote, F_CRM_ELECTION_ID, &election_id); your_node = crm_get_peer(0, from); our_node = crm_get_peer(0, e->uname); if (e->voted == NULL) { crm_debug("Created voted hash"); e->voted = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } if (is_heartbeat_cluster()) { use_born_on = TRUE; } else if (is_classic_ais_cluster()) { use_born_on = TRUE; } if(can_win == FALSE) { reason = "Not eligible"; we_loose = TRUE; } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) { reason = "We are not part of the cluster"; log_level = LOG_ERR; we_loose = TRUE; } else if (election_id != e->count && crm_str_eq(our_node->uuid, election_owner, TRUE)) { log_level = LOG_TRACE; - reason = "Superceeded"; + reason = "Superseded"; done = TRUE; } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) { /* Possibly we cached the message in the FSA queue at a point that it wasn't */ reason = "Peer is not part of our cluster"; log_level = LOG_WARNING; done = TRUE; } else if (crm_str_eq(op, CRM_OP_NOVOTE, TRUE)) { char *op_copy = strdup(op); char *uname_copy = strdup(from); CRM_ASSERT(crm_str_eq(our_node->uuid, election_owner, TRUE)); /* update the list of nodes that have voted */ g_hash_table_replace(e->voted, uname_copy, op_copy); reason = "Recorded"; done = TRUE; } else { struct timeval your_age; const char *your_version = crm_element_value(vote, F_CRM_VERSION); int tv_sec = 0; int tv_usec = 0; crm_element_value_int(vote, F_CRM_ELECTION_AGE_S, &tv_sec); crm_element_value_int(vote, F_CRM_ELECTION_AGE_US, &tv_usec); your_age.tv_sec = tv_sec; your_age.tv_usec = tv_usec; age = crm_compare_age(your_age); if (crm_str_eq(from, e->uname, TRUE)) { char *op_copy = strdup(op); char *uname_copy = strdup(from); CRM_ASSERT(crm_str_eq(our_node->uuid, election_owner, TRUE)); /* update ourselves in the list of nodes that have voted */ g_hash_table_replace(e->voted, uname_copy, op_copy); reason = "Recorded"; done = TRUE; } else if (compare_version(your_version, CRM_FEATURE_SET) < 0) { reason = "Version"; we_loose = TRUE; } else if (compare_version(your_version, CRM_FEATURE_SET) > 0) { reason = "Version"; } else if (age < 0) { reason = "Uptime"; we_loose = TRUE; } else if (age > 0) { reason = "Uptime"; /* TODO: Check for y(our) born < 0 */ } else if (use_born_on && your_node->born < our_node->born) { reason = "Born"; we_loose = TRUE; } else if (use_born_on && your_node->born > our_node->born) { reason = "Born"; } else if (e->uname == NULL) { reason = "Unknown host name"; we_loose = TRUE; } else if (strcasecmp(e->uname, from) > 0) { reason = "Host name"; we_loose = TRUE; } else { reason = "Host name"; CRM_ASSERT(strcasecmp(e->uname, from) < 0); /* cant happen... * } else if(strcasecmp(e->uname, from) == 0) { * */ } } if (expires < tm_now) { election_wins = 0; expires = tm_now + STORM_INTERVAL; } else if (done == FALSE && we_loose == FALSE) { int peers = 1 + g_hash_table_size(crm_peer_cache); /* If every node has to vote down every other node, thats N*(N-1) total elections * Allow some leway before _really_ complaining */ election_wins++; if (election_wins > (peers * peers)) { crm_warn("Election storm detected: %d elections in %d seconds", election_wins, STORM_INTERVAL); election_wins = 0; expires = tm_now + STORM_INTERVAL; crm_write_blackbox(0, NULL); } } if (done) { do_crm_log(log_level + 1, "Election %d (current: %d, owner: %s): Processed %s from %s (%s)", election_id, e->count, election_owner, op, from, reason); return e->state; } else if(we_loose == FALSE) { do_crm_log(log_level, "Election %d (owner: %s) pass: %s from %s (%s)", election_id, election_owner, op, from, reason); if (last_election_loss == 0 || tm_now - last_election_loss > (time_t) loss_dampen) { last_election_loss = 0; election_timeout_stop(e); /* Start a new election by voting down this, and other, peers */ e->state = election_start; return e->state; } crm_info("Election %d ignore: We already lost an election less than %ds ago (%s)", election_id, loss_dampen, ctime(&last_election_loss)); } novote = create_request(CRM_OP_NOVOTE, NULL, from, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); do_crm_log(log_level, "Election %d (owner: %s) lost: %s from %s (%s)", election_id, election_owner, op, from, reason); election_timeout_stop(e); crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner); crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id); send_cluster_message(your_node, crm_msg_crmd, novote, TRUE); free_xml(novote); last_election_loss = tm_now; e->state = election_lost; return e->state; } diff --git a/lib/transition/utils.c b/lib/transition/utils.c index c1b546778b..745cc9e411 100644 --- a/lib/transition/utils.c +++ b/lib/transition/utils.c @@ -1,290 +1,290 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include /* #include */ /* */ extern crm_graph_functions_t *graph_fns; static gboolean pseudo_action_dummy(crm_graph_t * graph, crm_action_t * action) { static int fail = -1; if (fail < 0) { char *fail_s = getenv("PE_fail"); if (fail_s) { fail = crm_int_helper(fail_s, NULL); } else { fail = 0; } } crm_trace("Dummy event handler: action %d executed", action->id); if (action->id == fail) { crm_err("Dummy event handler: pretending action %d failed", action->id); action->failed = TRUE; graph->abort_priority = INFINITY; } action->confirmed = TRUE; update_graph(graph, action); return TRUE; } crm_graph_functions_t default_fns = { pseudo_action_dummy, pseudo_action_dummy, pseudo_action_dummy, pseudo_action_dummy }; void set_default_graph_functions(void) { graph_fns = &default_fns; } void set_graph_functions(crm_graph_functions_t * fns) { crm_info("Setting custom graph functions"); graph_fns = fns; CRM_ASSERT(graph_fns != NULL); CRM_ASSERT(graph_fns->rsc != NULL); CRM_ASSERT(graph_fns->crmd != NULL); CRM_ASSERT(graph_fns->pseudo != NULL); CRM_ASSERT(graph_fns->stonith != NULL); } const char * transition_status(enum transition_status state) { switch (state) { case transition_active: return "active"; case transition_pending: return "pending"; case transition_complete: return "complete"; case transition_stopped: return "stopped"; case transition_terminated: return "terminated"; case transition_action_failed: return "failed (action)"; case transition_failed: return "failed"; } return "unknown"; } const char * actiontype2text(action_type_e type) { switch (type) { case action_type_pseudo: return "pseudo"; case action_type_rsc: return "rsc"; case action_type_crm: return "crm"; } return ""; } static crm_action_t * find_action(crm_graph_t * graph, int id) { GListPtr sIter = NULL; if (graph == NULL) { return NULL; } for (sIter = graph->synapses; sIter != NULL; sIter = sIter->next) { GListPtr aIter = NULL; synapse_t *synapse = (synapse_t *) sIter->data; for (aIter = synapse->actions; aIter != NULL; aIter = aIter->next) { crm_action_t *action = (crm_action_t *) aIter->data; if (action->id == id) { return action; } } } return NULL; } static void print_synapse(unsigned int log_level, crm_graph_t * graph, synapse_t * synapse) { GListPtr lpc = NULL; char *pending = NULL; const char *state = "Pending"; if (synapse->failed) { state = "Failed"; } else if (synapse->confirmed) { state = "Completed"; } else if (synapse->executed) { state = "In-flight"; } else if (synapse->ready) { state = "Ready"; } if (synapse->executed == FALSE) { for (lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { crm_action_t *input = (crm_action_t *) lpc->data; const char *id_string = crm_element_value(input->xml, XML_ATTR_ID); if (input->failed) { pending = add_list_element(pending, id_string); } else if (input->confirmed) { /* Confirmed, skip */ } else if (find_action(graph, input->id)) { /* In-flight or pending */ pending = add_list_element(pending, id_string); } } } for (lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { crm_action_t *action = (crm_action_t *) lpc->data; const char *key = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); const char *host = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); char *desc = g_strdup_printf("%s %s op %s", state, actiontype2text(action->type), key); do_crm_log(log_level, "[Action %4d]: %-50s on %s (priority: %d, waiting: %s)", action->id, desc, host ? host : "N/A", synapse->priority, pending ? pending : "none"); g_free(desc); } if (synapse->executed == FALSE) { for (lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { crm_action_t *input = (crm_action_t *) lpc->data; const char *key = crm_element_value(input->xml, XML_LRM_ATTR_TASK_KEY); const char *host = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); if (find_action(graph, input->id) == NULL) { if (host == NULL) { do_crm_log(log_level, " * [Input %2d]: Unresolved dependancy %s op %s", input->id, actiontype2text(input->type), key); } else { do_crm_log(log_level, " * [Input %2d]: Unresolved dependancy %s op %s on %s", input->id, actiontype2text(input->type), key, host); } } } } free(pending); } void print_action(int log_level, const char *prefix, crm_action_t * action) { print_synapse(log_level, NULL, action->synapse); } void print_graph(unsigned int log_level, crm_graph_t * graph) { GListPtr lpc = NULL; if (graph == NULL || graph->num_actions == 0) { if (log_level > LOG_DEBUG) { crm_debug("Empty transition graph"); } return; } do_crm_log(log_level, "Graph %d with %d actions:" " batch-limit=%d jobs, network-delay=%dms", graph->id, graph->num_actions, graph->num_synapses, graph->batch_limit, graph->network_delay); for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { synapse_t *synapse = (synapse_t *) lpc->data; print_synapse(log_level, graph, synapse); } } static const char * abort2text(enum transition_action abort_action) { switch (abort_action) { case tg_done: return "done"; case tg_stop: return "stop"; case tg_restart: return "restart"; case tg_shutdown: return "shutdown"; } return "unknown"; } bool update_abort_priority(crm_graph_t * graph, int priority, enum transition_action action, const char *abort_reason) { bool change = FALSE; if (graph == NULL) { return change; } if (graph->abort_priority < priority) { crm_debug("Abort priority upgraded from %d to %d", graph->abort_priority, priority); graph->abort_priority = priority; if (graph->abort_reason != NULL) { - crm_debug("'%s' abort superceeded by %s", graph->abort_reason, abort_reason); + crm_debug("'%s' abort superseded by %s", graph->abort_reason, abort_reason); } graph->abort_reason = abort_reason; change = TRUE; } if (graph->completion_action < action) { - crm_debug("Abort action %s superceeded by %s: %s", + crm_debug("Abort action %s superseded by %s: %s", abort2text(graph->completion_action), abort2text(action), abort_reason); graph->completion_action = action; change = TRUE; } return change; } diff --git a/pengine/ptest.c b/pengine/ptest.c index 5fd56af92b..4d20af06ee 100644 --- a/pengine/ptest.c +++ b/pengine/ptest.c @@ -1,510 +1,510 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if HAVE_LIBXML2 # include #endif gboolean use_stdin = FALSE; gboolean do_simulation = FALSE; gboolean inhibit_exit = FALSE; gboolean all_actions = FALSE; extern xmlNode *do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, crm_time_t * now); extern void cleanup_calculations(pe_working_set_t * data_set); char *use_date = NULL; FILE *dot_strm = NULL; #define DOT_PREFIX "PE_DOT: " /* #define DOT_PREFIX "" */ #define dot_write(fmt...) if(dot_strm != NULL) { \ fprintf(dot_strm, fmt); \ fprintf(dot_strm, "\n"); \ } else { \ crm_debug(DOT_PREFIX""fmt); \ } static void init_dotfile(void) { dot_write(" digraph \"g\" {"); /* dot_write(" size = \"30,30\""); */ /* dot_write(" graph ["); */ /* dot_write(" fontsize = \"12\""); */ /* dot_write(" fontname = \"Times-Roman\""); */ /* dot_write(" fontcolor = \"black\""); */ /* dot_write(" bb = \"0,0,398.922306,478.927856\""); */ /* dot_write(" color = \"black\""); */ /* dot_write(" ]"); */ /* dot_write(" node ["); */ /* dot_write(" fontsize = \"12\""); */ /* dot_write(" fontname = \"Times-Roman\""); */ /* dot_write(" fontcolor = \"black\""); */ /* dot_write(" shape = \"ellipse\""); */ /* dot_write(" color = \"black\""); */ /* dot_write(" ]"); */ /* dot_write(" edge ["); */ /* dot_write(" fontsize = \"12\""); */ /* dot_write(" fontname = \"Times-Roman\""); */ /* dot_write(" fontcolor = \"black\""); */ /* dot_write(" color = \"black\""); */ /* dot_write(" ]"); */ } static char * create_action_name(action_t * action) { char *action_name = NULL; const char *action_host = NULL; if (action->node) { action_host = action->node->details->uname; action_name = crm_concat(action->uuid, action_host, ' '); } else if (is_set(action->flags, pe_action_pseudo)) { action_name = strdup(action->uuid); } else { action_host = ""; action_name = crm_concat(action->uuid, action_host, ' '); } if (safe_str_eq(action->task, RSC_CANCEL)) { char *tmp_action_name = action_name; action_name = crm_concat("Cancel", tmp_action_name, ' '); free(tmp_action_name); } return action_name; } gboolean USE_LIVE_CIB = FALSE; /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "This text"}, {"version", 0, 0, '$', "Version information" }, {"verbose", 0, 0, 'V', "Increase debug output\n"}, {"simulate", 0, 0, 'S', "Simulate the transition's execution to find invalid graphs\n"}, {"show-scores", 0, 0, 's', "Display resource allocation scores"}, {"show-utilization", 0, 0, 'U', "Display utilization information"}, {"all-actions", 0, 0, 'a', "Display all possible actions - even ones not part of the transition graph"}, {"live-check", 0, 0, 'L', "Connect to the CIB and use the current contents as input"}, {"xml-text", 1, 0, 'X', "Retrieve XML from the supplied string"}, {"xml-file", 1, 0, 'x', "Retrieve XML from the named file"}, /* {"xml-pipe", 0, 0, 'p', "Retrieve XML from stdin\n"}, */ {"save-input", 1, 0, 'I', "\tSave the input to the named file"}, {"save-graph", 1, 0, 'G', "\tSave the transition graph (XML format) to the named file"}, {"save-dotfile",1, 0, 'D', "Save the transition graph (DOT format) to the named file\n"}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv) { GListPtr lpc = NULL; gboolean process = TRUE; gboolean all_good = TRUE; enum transition_status graph_rc = -1; crm_graph_t *transition = NULL; crm_time_t *a_date = NULL; cib_t *cib_conn = NULL; xmlNode *cib_object = NULL; int argerr = 0; int flag; char *msg_buffer = NULL; gboolean optional = FALSE; pe_working_set_t data_set; const char *source = NULL; const char *xml_file = NULL; const char *dot_file = NULL; const char *graph_file = NULL; const char *input_file = NULL; const char *input_xml = NULL; /* disable glib's fancy allocators that can't be free'd */ GMemVTable vtable; vtable.malloc = malloc; vtable.realloc = realloc; vtable.free = free; vtable.calloc = calloc; vtable.try_malloc = malloc; vtable.try_realloc = realloc; g_mem_set_vtable(&vtable); crm_log_cli_init("ptest"); crm_set_options(NULL, "[-?Vv] -[Xxp] {other options}", long_options, "Calculate the cluster's response to the supplied cluster state\n" - "\nSuperceeded by crm_simulate and likely to be removed in a future release\n\n"); + "\nSuperseded by crm_simulate and likely to be removed in a future release\n\n"); while (1) { int option_index = 0; flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch (flag) { case 'S': do_simulation = TRUE; break; case 'a': all_actions = TRUE; break; case 'w': inhibit_exit = TRUE; break; case 'X': /*use_stdin = TRUE; */ input_xml = optarg; break; case 's': show_scores = TRUE; break; case 'U': show_utilization = TRUE; break; case 'x': xml_file = optarg; break; case 'd': use_date = optarg; break; case 'D': dot_file = optarg; break; case 'G': graph_file = optarg; break; case 'I': input_file = optarg; break; case 'V': crm_bump_log_level(argc, argv); break; case 'L': USE_LIVE_CIB = TRUE; break; case '$': case '?': crm_help(flag, 0); break; default: fprintf(stderr, "Option -%c is not yet supported\n", flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) { printf("%s ", argv[optind++]); } printf("\n"); } if (optind > argc) { ++argerr; } if (argerr) { crm_err("%d errors in option parsing", argerr); crm_help('?', 1); } if (USE_LIVE_CIB) { int rc = pcmk_ok; source = "live cib"; cib_conn = cib_new(); rc = cib_conn->cmds->signon(cib_conn, "ptest", cib_command); if (rc == pcmk_ok) { crm_info("Reading XML from: live cluster"); cib_object = get_cib_copy(cib_conn); } else { fprintf(stderr, "Live CIB query failed: %s\n", pcmk_strerror(rc)); return 3; } if (cib_object == NULL) { fprintf(stderr, "Live CIB query failed: empty result\n"); return 3; } } else if (xml_file != NULL) { source = xml_file; cib_object = filename2xml(xml_file); } else if (use_stdin) { source = "stdin"; cib_object = filename2xml(NULL); } else if (input_xml) { source = "input string"; cib_object = string2xml(input_xml); } if (cib_object == NULL && source) { fprintf(stderr, "Could not parse configuration input from: %s\n", source); return 4; } else if (cib_object == NULL) { fprintf(stderr, "No configuration specified\n"); crm_help('?', 1); } if (get_object_root(XML_CIB_TAG_STATUS, cib_object) == NULL) { create_xml_node(cib_object, XML_CIB_TAG_STATUS); } if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) { free_xml(cib_object); return -ENOKEY; } if (validate_xml(cib_object, NULL, FALSE) != TRUE) { free_xml(cib_object); return -pcmk_err_schema_validation; } if (input_file != NULL) { FILE *input_strm = fopen(input_file, "w"); if (input_strm == NULL) { crm_perror(LOG_ERR, "Could not open %s for writing", input_file); } else { msg_buffer = dump_xml_formatted(cib_object); if (fprintf(input_strm, "%s\n", msg_buffer) < 0) { crm_perror(LOG_ERR, "Write to %s failed", input_file); } fflush(input_strm); fclose(input_strm); free(msg_buffer); } } if (use_date != NULL) { a_date = crm_time_new(use_date); crm_time_log(LOG_WARNING, "Set fake 'now' to", a_date, crm_time_log_date | crm_time_log_timeofday); crm_time_log(LOG_WARNING, "Set fake 'now' to (localtime)", a_date, crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); } set_working_set_defaults(&data_set); if (process) { if (show_scores && show_utilization) { fprintf(stdout, "Allocation scores and utilization information:\n"); } else if (show_scores) { fprintf(stdout, "Allocation scores:\n"); } else if (show_utilization) { fprintf(stdout, "Utilization information:\n"); } do_calculations(&data_set, cib_object, a_date); } msg_buffer = dump_xml_formatted(data_set.graph); if (safe_str_eq(graph_file, "-")) { fprintf(stdout, "%s\n", msg_buffer); fflush(stdout); } else if (graph_file != NULL) { FILE *graph_strm = fopen(graph_file, "w"); if (graph_strm == NULL) { crm_perror(LOG_ERR, "Could not open %s for writing", graph_file); } else { if (fprintf(graph_strm, "%s\n\n", msg_buffer) < 0) { crm_perror(LOG_ERR, "Write to %s failed", graph_file); } fflush(graph_strm); fclose(graph_strm); } } free(msg_buffer); if (dot_file != NULL) { dot_strm = fopen(dot_file, "w"); if (dot_strm == NULL) { crm_perror(LOG_ERR, "Could not open %s for writing", dot_file); } } if (dot_strm == NULL) { goto simulate; } init_dotfile(); for (lpc = data_set.actions; lpc != NULL; lpc = lpc->next) { action_t *action = (action_t *) lpc->data; const char *style = "filled"; const char *font = "black"; const char *color = "black"; const char *fill = NULL; char *action_name = create_action_name(action); crm_trace("Action %d: %p", action->id, action); if (is_set(action->flags, pe_action_pseudo)) { font = "orange"; } style = "dashed"; if (is_set(action->flags, pe_action_dumped)) { style = "bold"; color = "green"; } else if (action->rsc != NULL && is_not_set(action->rsc->flags, pe_rsc_managed)) { color = "purple"; if (all_actions == FALSE) { goto dont_write; } } else if (is_set(action->flags, pe_action_optional)) { color = "blue"; if (all_actions == FALSE) { goto dont_write; } } else { color = "red"; CRM_CHECK(is_set(action->flags, pe_action_runnable) == FALSE,; ); } set_bit(action->flags, pe_action_dumped); dot_write("\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\" %s%s]", action_name, style, color, font, fill ? "fillcolor=" : "", fill ? fill : ""); dont_write: free(action_name); } for (lpc = data_set.actions; lpc != NULL; lpc = lpc->next) { action_t *action = (action_t *) lpc->data; GListPtr lpc2 = NULL; for (lpc2 = action->actions_before; lpc2 != NULL; lpc2 = lpc2->next) { action_wrapper_t *before = (action_wrapper_t *) lpc2->data; char *before_name = NULL; char *after_name = NULL; const char *style = "dashed"; optional = TRUE; if (before->state == pe_link_dumped) { optional = FALSE; style = "bold"; } else if (is_set(action->flags, pe_action_pseudo) && (before->type & pe_order_stonith_stop)) { continue; } else if (before->state == pe_link_dup) { continue; } else if (before->type == pe_order_none) { continue; } else if (is_set(before->action->flags, pe_action_dumped) && is_set(action->flags, pe_action_dumped)) { optional = FALSE; } if (all_actions || optional == FALSE) { before_name = create_action_name(before->action); after_name = create_action_name(action); dot_write("\"%s\" -> \"%s\" [ style = %s]", before_name, after_name, style); free(before_name); free(after_name); } } } dot_write("}"); if (dot_strm != NULL) { fflush(dot_strm); fclose(dot_strm); } simulate: if (do_simulation == FALSE) { goto cleanup; } transition = unpack_graph(data_set.graph, "ptest"); print_graph(LOG_DEBUG, transition); do { graph_rc = run_graph(transition); } while (graph_rc == transition_active); if (graph_rc != transition_complete) { crm_crit("Transition failed: %s", transition_status(graph_rc)); print_graph(LOG_ERR, transition); } destroy_graph(transition); CRM_CHECK(graph_rc == transition_complete, all_good = FALSE; crm_err("An invalid transition was produced")); cleanup: cleanup_alloc_calculations(&data_set); crm_log_deinit(); /* required for MallocDebug.app */ if (inhibit_exit) { GMainLoop *mainloop = g_main_new(FALSE); g_main_run(mainloop); } if (all_good) { return 0; } return graph_rc; } diff --git a/tools/crm_master b/tools/crm_master index cd96877c45..7cce64dba3 100755 --- a/tools/crm_master +++ b/tools/crm_master @@ -1,56 +1,56 @@ #!/bin/bash target=`crm_node -n` TEMP=`getopt -o qDGQVN:U:v:i:l:r: --long version,help,resource:,node:,uname:,attr-value:,id:,update:,delete-attr,get-value,attr-id:,lifetime:,quiet \ -n 'crm_master' -- "$@"` if [ $? != 0 ] ; then echo "crm_master - A convenience wrapper for crm_attribute"; echo ""; crm_attribute -?; exit 1 ; fi # Note the quotes around `$TEMP': they are essential! eval set -- "$TEMP" while true ; do case "$1" in -N|--node|-U|--uname) target="$2"; shift; shift;; -v|--attr-value|--update|-i|--id|--attr-id|-l|--lifetime) options="$options $1 $2"; shift; shift;; -Q|-q|--quiet|-D|--delete-attr|-G|--get-value|-V) options="$options $1"; shift;; -r|--resource) OCF_RESOURCE_INSTANCE=$2; shift; shift;; --version) crm_attribute --version; exit 0;; --help) echo "crm_master - A convenience wrapper for crm_attribute"; echo ""; echo "Set, update or delete a resource's promotion score"; echo ""; echo "This program should normally only be invoked from inside an OCF resource agent" echo ""; echo "Usage: crm_master command [options]"; echo "Options:" echo " --help This text" echo " --version Version information" echo " -V, --verbose Increase debug output" echo " -q, --quiet Print only the value on stdout" echo "" echo "Commands:" echo " -G, --query Query the current value of the attribute/option" echo " -v, --update=value Update the value of the attribute/option" echo " -D, --delete Delete the attribute/option" echo "" echo "Additional Options:" echo " -N, --node=value Set an attribute for the named node (instead of the current one)." echo " -l, --lifetime=value Until when should the setting take affect." echo " Valid values: reboot, forever" echo " -i, --id=value (Advanced) The ID used to identify the attribute" exit 0;; --) shift ; break ;; *) echo "Unknown option: $1. See --help for details." exit 1;; esac done if [ -z "$OCF_RESOURCE_INSTANCE" ]; then echo "This program should normally only be invoked from inside an OCF resource agent" - echo "To set the prmotion/master score from the command line, please specify a resource ID with -r" + echo "To set the promotion/master score from the command line, please specify a resource ID with -r" exit 1 fi crm_attribute -N $target -n master-$OCF_RESOURCE_INSTANCE $options