diff --git a/crmd/messages.c b/crmd/messages.c old mode 100755 new mode 100644 index 5dfa529398..0e985ab3b0 --- a/crmd/messages.c +++ b/crmd/messages.c @@ -1,991 +1,1019 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GListPtr fsa_message_queue = NULL; extern void crm_shutdown(int nsig); void handle_response(xmlNode *stored_msg); enum crmd_fsa_input handle_request(xmlNode *stored_msg); enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg); ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t *orig); gboolean ipc_queue_helper(gpointer key, gpointer value, gpointer user_data); #ifdef MSG_LOG # define ROUTER_RESULT(x) crm_debug_3("Router result: %s", x); \ crm_log_xml(LOG_MSG, "router.log", msg); #else # define ROUTER_RESULT(x) crm_debug_3("Router result: %s", x) #endif /* debug only, can wrap all it likes */ int last_data_id = 0; void register_fsa_error_adv( enum crmd_fsa_cause cause, enum crmd_fsa_input input, fsa_data_t *cur_data, void *new_data, const char *raised_from) { /* save the current actions if any */ if(fsa_actions != A_NOTHING) { register_fsa_input_adv( cur_data?cur_data->fsa_cause:C_FSA_INTERNAL, I_NULL, cur_data?cur_data->data:NULL, fsa_actions, TRUE, __FUNCTION__); } /* reset the action list */ fsa_actions = A_NOTHING; /* register the error */ register_fsa_input_adv( cause, input, new_data, A_NOTHING, TRUE, raised_from); } int register_fsa_input_adv( enum crmd_fsa_cause cause, enum crmd_fsa_input input, void *data, long long with_actions, gboolean prepend, const char *raised_from) { unsigned old_len = g_list_length(fsa_message_queue); fsa_data_t *fsa_data = NULL; last_data_id++; CRM_CHECK(raised_from != NULL, raised_from = ""); crm_debug_2("%s %s FSA input %d (%s) (cause=%s) %s data", raised_from, prepend?"prepended":"appended",last_data_id, fsa_input2string(input), fsa_cause2string(cause), data?"with":"without"); if(input == I_WAIT_FOR_EVENT) { do_fsa_stall = TRUE; crm_debug("Stalling the FSA pending further input: cause=%s", fsa_cause2string(cause)); if(old_len > 0) { crm_warn("%s stalled the FSA with pending inputs", raised_from); fsa_dump_queue(LOG_DEBUG); } if(data == NULL) { set_bit_inplace(fsa_actions, with_actions); with_actions = A_NOTHING; return 0; } crm_err("%s stalled the FSA with data - this may be broken", raised_from); } if(input == I_NULL && with_actions == A_NOTHING /* && data == NULL */){ /* no point doing anything */ crm_err("Cannot add entry to queue: no input and no action"); return 0; } crm_malloc0(fsa_data, sizeof(fsa_data_t)); fsa_data->id = last_data_id; fsa_data->fsa_input = input; fsa_data->fsa_cause = cause; fsa_data->origin = raised_from; fsa_data->data = NULL; fsa_data->data_type = fsa_dt_none; fsa_data->actions = with_actions; if(with_actions != A_NOTHING) { crm_debug_3("Adding actions %.16llx to input", with_actions); } if(data != NULL) { switch(cause) { case C_FSA_INTERNAL: case C_CRMD_STATUS_CALLBACK: case C_IPC_MESSAGE: case C_HA_MESSAGE: crm_debug_3("Copying %s data from %s as a HA msg", fsa_cause2string(cause), raised_from); CRM_CHECK(((ha_msg_input_t*)data)->msg != NULL, crm_err("Bogus data from %s", raised_from)); fsa_data->data = copy_ha_msg_input(data); fsa_data->data_type = fsa_dt_ha_msg; break; case C_LRM_OP_CALLBACK: crm_debug_3("Copying %s data from %s as lrm_op_t", fsa_cause2string(cause), raised_from); fsa_data->data = copy_lrm_op((lrm_op_t*)data); fsa_data->data_type = fsa_dt_lrm; break; case C_CCM_CALLBACK: case C_SUBSYSTEM_CONNECT: case C_LRM_MONITOR_CALLBACK: case C_TIMER_POPPED: case C_SHUTDOWN: case C_HEARTBEAT_FAILED: case C_HA_DISCONNECT: case C_ILLEGAL: case C_UNKNOWN: case C_STARTUP: crm_err("Copying %s data (from %s)" " not yet implemented", fsa_cause2string(cause), raised_from); exit(1); break; } crm_debug_4("%s data copied", fsa_cause2string(fsa_data->fsa_cause)); } /* make sure to free it properly later */ if(prepend) { crm_debug_2("Prepending input"); fsa_message_queue = g_list_prepend(fsa_message_queue, fsa_data); } else { fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); } crm_debug_2("Queue len: %d", g_list_length(fsa_message_queue)); fsa_dump_queue(LOG_DEBUG_2); if(old_len == g_list_length(fsa_message_queue)){ crm_err("Couldnt add message to the queue"); } if(fsa_source) { crm_debug_3("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); } return last_data_id; } void fsa_dump_queue(int log_level) { if(log_level < (int)crm_log_level) { return; } slist_iter( data, fsa_data_t, fsa_message_queue, lpc, do_crm_log(log_level, "queue[%d(%d)]: input %s raised by %s()\t(cause=%s)", lpc, data->id, fsa_input2string(data->fsa_input), data->origin, fsa_cause2string(data->fsa_cause)); ); } ha_msg_input_t * copy_ha_msg_input(ha_msg_input_t *orig) { ha_msg_input_t *copy = NULL; xmlNodePtr data = NULL; if(orig != NULL) { crm_debug_4("Copy msg"); data = copy_xml(orig->msg); } else { crm_debug_3("No message to copy"); } copy = new_ha_msg_input(data); if(orig && orig->msg != NULL) { CRM_CHECK(copy->msg != NULL, crm_err("copy failed")); } return copy; } void delete_fsa_input(fsa_data_t *fsa_data) { lrm_op_t *op = NULL; xmlNode *foo = NULL; if(fsa_data == NULL) { return; } crm_debug_4("About to free %s data", fsa_cause2string(fsa_data->fsa_cause)); if(fsa_data->data != NULL) { switch(fsa_data->data_type) { case fsa_dt_ha_msg: delete_ha_msg_input(fsa_data->data); break; case fsa_dt_xml: foo = fsa_data->data; free_xml(foo); break; case fsa_dt_lrm: op = (lrm_op_t*)fsa_data->data; free_lrm_op(op); break; case fsa_dt_none: if(fsa_data->data != NULL) { crm_err("Dont know how to free %s data from %s", fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); exit(1); } break; } crm_debug_4("%s data freed", fsa_cause2string(fsa_data->fsa_cause)); } crm_free(fsa_data); } /* returns the next message */ fsa_data_t * get_message(void) { fsa_data_t* message = g_list_nth_data(fsa_message_queue, 0); fsa_message_queue = g_list_remove(fsa_message_queue, message); crm_debug_2("Processing input %d", message->id); return message; } /* returns the current head of the FIFO queue */ gboolean is_message(void) { return (g_list_length(fsa_message_queue) > 0); } void * fsa_typed_data_adv( fsa_data_t *fsa_data, enum fsa_data_type a_type, const char *caller) { void *ret_val = NULL; if(fsa_data == NULL) { do_crm_log(LOG_ERR, "%s: No FSA data available", caller); } else if(fsa_data->data == NULL) { do_crm_log(LOG_ERR, "%s: No message data available. Origin: %s", caller, fsa_data->origin); } else if(fsa_data->data_type != a_type) { do_crm_log(LOG_CRIT, "%s: Message data was the wrong type! %d vs. requested=%d." " Origin: %s", caller, fsa_data->data_type, a_type, fsa_data->origin); CRM_ASSERT(fsa_data->data_type == a_type); } else { ret_val = fsa_data->data; } return ret_val; } /* A_MSG_ROUTE */ void do_msg_route(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); route_message(msg_data->fsa_cause, input->msg); } void route_message(enum crmd_fsa_cause cause, xmlNode *input) { ha_msg_input_t fsa_input; enum crmd_fsa_input result = I_NULL; fsa_input.msg = input; CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return); /* try passing the buck first */ if(relay_message(input, cause==C_IPC_MESSAGE)) { return; } /* handle locally */ result = handle_message(input); /* done or process later? */ switch(result) { case I_NULL: case I_CIB_OP: case I_ROUTER: case I_NODE_JOIN: case I_JOIN_REQUEST: case I_JOIN_RESULT: break; default: /* Defering local processing of message */ register_fsa_input_later(cause, result, &fsa_input); return; } if(result != I_NULL) { /* add to the front of the queue */ register_fsa_input(cause, result, &fsa_input); } } gboolean relay_message(xmlNode *msg, gboolean originated_locally) { int dest = 1; int is_for_dc = 0; int is_for_dcib = 0; int is_for_te = 0; int is_for_crm = 0; int is_for_cib = 0; int is_local = 0; gboolean processing_complete = FALSE; const char *host_to = crm_element_value(msg, F_CRM_HOST_TO); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from= crm_element_value(msg, F_CRM_SYS_FROM); const char *type = crm_element_value(msg, F_TYPE); const char *msg_error = NULL; crm_debug_3("Routing message %s", crm_element_value(msg, XML_ATTR_REFERENCE)); if(msg == NULL) { msg_error = "Cannot route empty message"; } else if(safe_str_eq(CRM_OP_HELLO, crm_element_value(msg, F_CRM_TASK))){ /* quietly ignore */ processing_complete = TRUE; } else if(safe_str_neq(type, T_CRM)) { msg_error = "Bad message type"; } else if(sys_to == NULL) { msg_error = "Bad message destination: no subsystem"; } if(msg_error != NULL) { processing_complete = TRUE; crm_err("%s", msg_error); crm_log_xml(LOG_WARNING, "bad msg", msg); } if(processing_complete) { return TRUE; } processing_complete = TRUE; is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0); is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0); is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0); is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0); is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0); is_local = 0; if(host_to == NULL || strlen(host_to) == 0) { if(is_for_dc || is_for_te) { is_local = 0; } else if(is_for_crm && originated_locally) { is_local = 0; } else { is_local = 1; } } else if(safe_str_eq(fsa_our_uname, host_to)) { is_local=1; } if(is_for_dc || is_for_dcib || is_for_te) { if(AM_I_DC && is_for_te) { ROUTER_RESULT("Message result: Local relay"); send_msg_via_ipc(msg, sys_to); } else if(AM_I_DC) { ROUTER_RESULT("Message result: DC/CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if(originated_locally && safe_str_neq(sys_from, CRM_SYSTEM_PENGINE) && safe_str_neq(sys_from, CRM_SYSTEM_TENGINE)) { /* Neither the TE or PE should be sending messages * to DC's on other nodes * * By definition, if we are no longer the DC, then * the PE or TE's data should be discarded */ #if SUPPORT_AIS if(is_openais_cluster()) { dest = text2msg_type(sys_to); } #endif ROUTER_RESULT("Message result: External relay to DC"); send_cluster_message(host_to, dest, msg, TRUE); } else { /* discard */ ROUTER_RESULT("Message result: Discard, not DC"); } } else if(is_local && (is_for_crm || is_for_cib)) { ROUTER_RESULT("Message result: CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if(is_local) { ROUTER_RESULT("Message result: Local relay"); send_msg_via_ipc(msg, sys_to); } else { #if SUPPORT_AIS if(is_openais_cluster()) { dest = text2msg_type(sys_to); } #endif ROUTER_RESULT("Message result: External relay"); send_cluster_message(host_to, dest, msg, TRUE); } return processing_complete; } gboolean crmd_authorize_message(xmlNode *client_msg, crmd_client_t *curr_client) { /* check the best case first */ const char *sys_from = crm_element_value(client_msg, F_CRM_SYS_FROM); char *uuid = NULL; char *client_name = NULL; char *major_version = NULL; char *minor_version = NULL; const char *filtered_from; gpointer table_key = NULL; gboolean auth_result = FALSE; struct crm_subsystem_s *the_subsystem = NULL; gboolean can_reply = FALSE; /* no-one has registered with this id */ xmlNode *xml = NULL; const char *op = crm_element_value(client_msg, F_CRM_TASK); if (safe_str_neq(CRM_OP_HELLO, op)) { if(sys_from == NULL) { crm_warn("Message [%s] was had no value for %s... discarding", crm_element_value(client_msg, XML_ATTR_REFERENCE), F_CRM_SYS_FROM); return FALSE; } filtered_from = sys_from; /* The CIB can have two names on the DC */ if(strcasecmp(sys_from, CRM_SYSTEM_DCIB) == 0) filtered_from = CRM_SYSTEM_CIB; if (g_hash_table_lookup (ipc_clients, filtered_from) != NULL) { can_reply = TRUE; /* reply can be routed */ } crm_debug_2("Message reply can%s be routed from %s.", can_reply?"":" not", sys_from); if(can_reply == FALSE) { crm_warn("Message [%s] not authorized", crm_element_value(client_msg, XML_ATTR_REFERENCE)); } return can_reply; } crm_debug_3("received client join msg"); crm_log_xml(LOG_MSG, "join", client_msg); xml = get_message_xml(client_msg, F_CRM_DATA); auth_result = process_hello_message( xml, &uuid, &client_name, &major_version, &minor_version); if (auth_result == TRUE) { if(client_name == NULL || uuid == NULL) { crm_err("Bad client details (client_name=%s, uuid=%s)", crm_str(client_name), crm_str(uuid)); auth_result = FALSE; } } if (auth_result == TRUE) { /* check version */ int mav = atoi(major_version); int miv = atoi(minor_version); crm_debug_3("Checking client version number"); if (mav < 0 || miv < 0) { crm_err("Client version (%d:%d) is not acceptable", mav, miv); auth_result = FALSE; } crm_free(major_version); crm_free(minor_version); } if (safe_str_eq(CRM_SYSTEM_PENGINE, client_name)) { the_subsystem = pe_subsystem; } else if (safe_str_eq(CRM_SYSTEM_TENGINE, client_name)) { the_subsystem = te_subsystem; } /* TODO: Is this code required anymore?? */ if (auth_result == TRUE && the_subsystem != NULL) { /* if we already have one of those clients * only applies to te, pe etc. not admin clients */ crm_err("Checking if %s is required/already connected", client_name); table_key = (gpointer)crm_strdup(client_name); if(is_set(fsa_input_register, the_subsystem->flag_connected)) { auth_result = FALSE; crm_free(table_key); table_key = NULL; crm_warn("Bit\t%.16llx set in %.16llx", the_subsystem->flag_connected, fsa_input_register); crm_err("Client %s is already connected", client_name); } else if(FALSE == is_set(fsa_input_register, the_subsystem->flag_required)) { crm_warn("Bit\t%.16llx not set in %.16llx", the_subsystem->flag_connected, fsa_input_register); crm_warn("Client %s joined but we dont need it", client_name); stop_subsystem(the_subsystem, TRUE); } else { the_subsystem->ipc = curr_client->client_channel; set_bit_inplace(fsa_input_register, the_subsystem->flag_connected); } } else { table_key = (gpointer)generate_hash_key(client_name, uuid); } if (auth_result == TRUE) { crm_debug_2("Accepted client %s", crm_str(table_key)); curr_client->table_key = table_key; curr_client->sub_sys = crm_strdup(client_name); curr_client->uuid = crm_strdup(uuid); g_hash_table_insert (ipc_clients, table_key, curr_client->client_channel); send_hello_message(curr_client->client_channel, "n/a", CRM_SYSTEM_CRMD, "0", "1"); crm_debug_3("Updated client list with %s", crm_str(table_key)); crm_debug_3("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); if(the_subsystem != NULL) { CRM_CHECK(the_subsystem->client == NULL, process_client_disconnect(the_subsystem->client)); the_subsystem->client = curr_client; } } else { crm_free(table_key); crm_warn("Rejected client logon request"); curr_client->client_channel->ch_status = IPC_DISC_PENDING; } if(uuid != NULL) crm_free(uuid); if(minor_version != NULL) crm_free(minor_version); if(major_version != NULL) crm_free(major_version); if(client_name != NULL) crm_free(client_name); /* hello messages should never be processed further */ return FALSE; } enum crmd_fsa_input handle_message(xmlNode *msg) { const char *type = NULL; CRM_CHECK(msg != NULL, return I_NULL); type = crm_element_value(msg, F_CRM_MSG_TYPE); if(crm_str_eq(type, XML_ATTR_REQUEST, TRUE)) { return handle_request(msg); } else if(crm_str_eq(type, XML_ATTR_RESPONSE, TRUE)) { handle_response(msg); return I_NULL; } crm_err("Unknown message type: %s", type); return I_NULL; } +static enum crmd_fsa_input +handle_failcount_op(xmlNode *stored_msg) +{ + const char *rsc = NULL; + xmlNode *xml_rsc = get_xpath_object("//"XML_CIB_TAG_RESOURCE, stored_msg, LOG_ERR); + + rsc = ID(xml_rsc); + crm_log_xml_info(stored_msg, "failcount op"); + + if(rsc) { + char *attr = NULL; + crm_info("Removing failcount for %s", rsc); + + attr = crm_concat("fail-count", rsc, '-'); + update_attrd(NULL, attr, NULL); + crm_free(attr); + + attr = crm_concat("last-failure", rsc, '-'); + update_attrd(NULL, attr, NULL); + crm_free(attr); + } + + return I_NULL; +} + enum crmd_fsa_input handle_request(xmlNode *stored_msg) { xmlNode *msg = NULL; const char *op = crm_element_value(stored_msg, F_CRM_TASK); /* Optimize this for the DC - it has the most to do */ if(op == NULL) { crm_log_xml(LOG_ERR, "Bad message", stored_msg); return I_NULL; } /*========== DC-Only Actions ==========*/ if(AM_I_DC) { if(strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) { return I_NODE_JOIN; } else if(strcmp(op, CRM_OP_JOIN_REQUEST) == 0) { return I_JOIN_REQUEST; } else if(strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) { return I_JOIN_RESULT; + } else if(strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) { + return handle_failcount_op(stored_msg); + } else if(strcmp(op, CRM_OP_SHUTDOWN) == 0) { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if(is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("Shutting ourselves down (DC)"); return I_STOP; } else if(dc_match) { crm_err("We didnt ask to be shut down, yet our" " TE is telling us too." " Better get out now!"); return I_TERMINATE; } else if(fsa_state != S_STOPPING) { crm_err("Another node is asking us to shutdown" " but we think we're ok."); return I_ELECTION; } } else if(strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { /* a slave wants to shut down */ /* create cib fragment and add to message */ return handle_shutdown_request(stored_msg); } } /*========== common actions ==========*/ if(strcmp(op, CRM_OP_NOVOTE) == 0) { ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT|A_ELECTION_CHECK, FALSE, __FUNCTION__); } else if(strcmp(op, CRM_OP_VOTE) == 0) { /* count the vote and decide what to do after that */ ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT|A_ELECTION_CHECK, FALSE, __FUNCTION__); /* Sometimes we _must_ go into S_ELECTION */ if(fsa_state == S_HALT) { crm_debug("Forcing an election from S_HALT"); return I_ELECTION; #if 0 } else if(AM_I_DC) { /* This is the old way of doing things but what is gained? */ return I_ELECTION; #endif } } else if(strcmp(op, CRM_OP_JOIN_OFFER) == 0) { crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); return I_JOIN_OFFER; } else if(strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) { crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); return I_JOIN_RESULT; } else if(strcmp(op, CRM_OP_LRM_DELETE) == 0 || strcmp(op, CRM_OP_LRM_FAIL) == 0 || strcmp(op, CRM_OP_LRM_REFRESH) == 0 || strcmp(op, CRM_OP_REPROBE) == 0) { crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); return I_ROUTER; } else if(strcmp(op, CRM_OP_NOOP) == 0) { return I_NULL; } else if(strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) { crm_shutdown(SIGTERM); /*return I_SHUTDOWN; */ return I_NULL; /*========== (NOT_DC)-Only Actions ==========*/ } else if(AM_I_DC == FALSE && strcmp(op, CRM_OP_SHUTDOWN) == 0) { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if(dc_match || fsa_our_dc == NULL) { if(is_set(fsa_input_register, R_SHUTDOWN) == FALSE) { crm_err("We didnt ask to be shut down, yet our" " DC is telling us too."); set_bit_inplace(fsa_input_register, R_STAYDOWN); return I_STOP; } crm_info("Shutting down"); return I_STOP; } else { crm_warn("Discarding %s op from %s", op, host_from); } } else if(strcmp(op, CRM_OP_PING) == 0) { /* eventually do some stuff to figure out * if we /are/ ok */ const char *sys_to = crm_element_value(stored_msg, F_CRM_SYS_TO); xmlNode *ping = createPingAnswerFragment(sys_to, "ok"); crm_xml_add(ping, "crmd_state", fsa_state2string(fsa_state)); crm_info("Current ping state: %s", fsa_state2string(fsa_state)); msg = create_reply(stored_msg, ping); relay_message(msg, TRUE); free_xml(ping); free_xml(msg); /* probably better to do this via signals on the * local node */ } else if(strcmp(op, CRM_OP_DEBUG_UP) == 0) { alter_debug(DEBUG_INC); crm_info("Debug set to %d", get_crm_log_level()); } else if(strcmp(op, CRM_OP_DEBUG_DOWN) == 0) { alter_debug(DEBUG_DEC); crm_info("Debug set to %d", get_crm_log_level()); } else { crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC?"the DC":"non-DC node"); crm_log_xml(LOG_ERR, "Unexpected", stored_msg); } return I_NULL; } void handle_response(xmlNode *stored_msg) { const char *op = crm_element_value(stored_msg, F_CRM_TASK); if(op == NULL) { crm_log_xml(LOG_ERR, "Bad message", stored_msg); } else if(AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) { /* Check if the PE answer been superceeded by a subsequent request? */ const char *msg_ref = crm_element_value(stored_msg, XML_ATTR_REFERENCE); if(msg_ref == NULL) { crm_err("%s - Ignoring calculation with no reference", op); } else if(safe_str_eq(msg_ref, fsa_pe_ref)) { ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); crm_debug_2("Completed: %s...", fsa_pe_ref); } else { crm_info("%s calculation %s is obsolete", op, msg_ref); } } else if(strcmp(op, CRM_OP_VOTE) == 0 || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) { } else { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); crm_err("Unexpected response (op=%s, src=%s) sent to the %s", op, host_from, AM_I_DC?"DC":"CRMd"); } } enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg) { /* handle here to avoid potential version issues * where the shutdown message/proceedure may have * been changed in later versions. * * This way the DC is always in control of the shutdown */ char *now_s = NULL; time_t now = time(NULL); xmlNode *node_state = NULL; const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); if(host_from == NULL) { /* we're shutting down and the DC */ host_from = fsa_our_uname; } crm_info("Creating shutdown request for %s (state=%s)", host_from, fsa_state2string(fsa_state)); crm_log_xml(LOG_MSG, "message", stored_msg); node_state = create_node_state( host_from, NULL, NULL, NULL, NULL, CRMD_STATE_INACTIVE, FALSE, __FUNCTION__); fsa_cib_anon_update(XML_CIB_TAG_STATUS, node_state, cib_quorum_override); crm_log_xml_debug_2(node_state, "Shutdown update"); free_xml(node_state); now_s = crm_itoa(now); update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s); crm_free(now_s); /* will be picked up by the TE as long as its running */ return I_NULL; } /* msg is deleted by the time this returns */ extern gboolean process_te_message(xmlNode *msg, xmlNode *xml_data); gboolean send_msg_via_ipc(xmlNode *msg, const char *sys) { gboolean send_ok = TRUE; IPC_Channel *client_channel; client_channel = (IPC_Channel*)g_hash_table_lookup(ipc_clients, sys); if(crm_element_value(msg, F_CRM_HOST_FROM) == NULL) { crm_xml_add(msg, F_CRM_HOST_FROM, fsa_our_uname); } if (client_channel != NULL) { crm_debug_3("Sending message via channel %s.", sys); send_ok = send_ipc_message(client_channel, msg); } else if(sys != NULL && strcmp(sys, CRM_SYSTEM_TENGINE) == 0) { xmlNode *data = get_message_xml(msg, F_CRM_DATA); process_te_message(msg, data); } else if(sys != NULL && strcmp(sys, CRM_SYSTEM_LRMD) == 0) { fsa_data_t fsa_data; ha_msg_input_t fsa_input; fsa_input.msg = msg; fsa_input.xml = get_message_xml(msg, F_CRM_DATA); fsa_data.id = 0; fsa_data.actions = 0; fsa_data.data = &fsa_input; fsa_data.fsa_input = I_MESSAGE; fsa_data.fsa_cause = C_IPC_MESSAGE; fsa_data.origin = __FUNCTION__; fsa_data.data_type = fsa_dt_ha_msg; #ifdef FSA_TRACE crm_debug_2("Invoking action A_LRM_INVOKE (%.16llx)", A_LRM_INVOKE); #endif do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, &fsa_data); } else { crm_err("Unknown Sub-system (%s)... discarding message.", crm_str(sys)); send_ok = FALSE; } return send_ok; } void msg_queue_helper(void) { #if SUPPORT_HEARTBEAT IPC_Channel *ipc = NULL; if(fsa_cluster_conn != NULL) { ipc = fsa_cluster_conn->llc_ops->ipcchan( fsa_cluster_conn); } if(ipc != NULL) { ipc->ops->resume_io(ipc); } /* g_hash_table_foreach_remove(ipc_clients, ipc_queue_helper, NULL); */ #endif } gboolean ipc_queue_helper(gpointer key, gpointer value, gpointer user_data) { crmd_client_t *ipc_client = value; if(ipc_client->client_channel != NULL) { ipc_client->client_channel->ops->is_message_pending(ipc_client->client_channel); } return FALSE; } diff --git a/include/crm/crm.h b/include/crm/crm.h index 99bc697751..b7d21337c5 100644 --- a/include/crm/crm.h +++ b/include/crm/crm.h @@ -1,331 +1,332 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef CRM__H #define CRM__H #include #include #include #undef MIN #undef MAX #include #include #include #define CRM_FEATURE_SET "3.0.1" #define MINIMUM_SCHEMA_VERSION "pacemaker-1.0" #define LATEST_SCHEMA_VERSION "pacemaker-"CRM_DTD_VERSION #define EOS '\0' #define DIMOF(a) ((int) (sizeof(a)/sizeof(a[0])) ) #ifndef __GNUC__ # define __builtin_expect(expr, result) (expr) #endif /* Some handy macros used by the Linux kernel */ #define __likely(expr) __builtin_expect(expr, 1) #define __unlikely(expr) __builtin_expect(expr, 0) #define CRM_DEPRECATED_SINCE_2_0_1 0 #define CRM_DEPRECATED_SINCE_2_0_2 0 #define CRM_DEPRECATED_SINCE_2_0_3 0 #define CRM_DEPRECATED_SINCE_2_0_4 0 #define CRM_DEPRECATED_SINCE_2_0_5 0 #define CRM_DEPRECATED_SINCE_2_0_6 1 #define CRM_DEPRECATED_SINCE_2_0_7 1 #define CRM_DEPRECATED_SINCE_2_0_8 1 #define CRM_DEPRECATED_SINCE_2_1_0 1 #define CRM_META "CRM_meta" #define CRM_ASSERT(expr) do { \ if((expr) == FALSE) { \ crm_abort(__FILE__, __PRETTY_FUNCTION__, __LINE__, #expr, TRUE, FALSE); \ } \ } while(0) #define CRM_DEV_ASSERT(expr) do { \ if((expr) == FALSE) { \ crm_abort(__FILE__,__PRETTY_FUNCTION__,__LINE__, #expr, FALSE, TRUE); \ } \ } while(0) #define CRM_CHECK(expr, failure_action) do { \ if((expr) == FALSE) { \ crm_abort(__FILE__,__PRETTY_FUNCTION__,__LINE__, #expr, crm_log_level>LOG_INFO?TRUE:FALSE, TRUE); \ failure_action; \ } \ } while(0) #define CRM_CHECK_AND_STORE(expr, failure_action) do { \ if((expr) == FALSE) { \ crm_abort(__FILE__,__PRETTY_FUNCTION__,__LINE__, #expr, TRUE, TRUE); \ failure_action; \ } \ } while(0) extern const char *crm_system_name; /* Clean these up at some point, some probably should be runtime options */ #define SOCKET_LEN 1024 #define APPNAME_LEN 256 #define MAX_IPC_FAIL 5 #define MAX_IPC_DELAY 120 #define MSG_LOG 1 #define DOT_FSA_ACTIONS 1 #define DOT_ALL_FSA_INPUTS 1 /* #define FSA_TRACE 1 */ #define INFINITY_S "INFINITY" #define MINUS_INFINITY_S "-INFINITY" #define INFINITY 1000000 /* Sub-systems */ #define CRM_SYSTEM_DC "dc" #define CRM_SYSTEM_DCIB "dcib" /* The master CIB */ #define CRM_SYSTEM_CIB "cib" #define CRM_SYSTEM_CRMD "crmd" #define CRM_SYSTEM_LRMD "lrmd" #define CRM_SYSTEM_PENGINE "pengine" #define CRM_SYSTEM_TENGINE "tengine" #define CRM_SYSTEM_STONITHD "stonithd" /* Valid operations */ #define CRM_OP_NOOP "noop" #define CRM_OP_JOIN_ANNOUNCE "join_announce" #define CRM_OP_JOIN_OFFER "join_offer" #define CRM_OP_JOIN_REQUEST "join_request" #define CRM_OP_JOIN_ACKNAK "join_ack_nack" #define CRM_OP_JOIN_CONFIRM "join_confirm" #define CRM_OP_DIE "die_no_respawn" #define CRM_OP_RETRIVE_CIB "retrieve_cib" #define CRM_OP_PING "ping" #define CRM_OP_VOTE "vote" #define CRM_OP_NOVOTE "no-vote" #define CRM_OP_HELLO "hello" #define CRM_OP_HBEAT "dc_beat" #define CRM_OP_PECALC "pe_calc" #define CRM_OP_ABORT "abort" #define CRM_OP_QUIT "quit" #define CRM_OP_LOCAL_SHUTDOWN "start_shutdown" #define CRM_OP_SHUTDOWN_REQ "req_shutdown" #define CRM_OP_SHUTDOWN "do_shutdown" #define CRM_OP_FENCE "stonith" #define CRM_OP_EVENTCC "event_cc" #define CRM_OP_TEABORT "te_abort" #define CRM_OP_TEABORTED "te_abort_confirmed" /* we asked */ #define CRM_OP_TE_HALT "te_halt" #define CRM_OP_TECOMPLETE "te_complete" #define CRM_OP_TETIMEOUT "te_timeout" #define CRM_OP_TRANSITION "transition" #define CRM_OP_REGISTER "register" #define CRM_OP_DEBUG_UP "debug_inc" #define CRM_OP_DEBUG_DOWN "debug_dec" #define CRM_OP_INVOKE_LRM "lrm_invoke" #define CRM_OP_LRM_REFRESH "lrm_refresh" #define CRM_OP_LRM_QUERY "lrm_query" #define CRM_OP_LRM_DELETE "lrm_delete" #define CRM_OP_LRM_FAIL "lrm_fail" #define CRM_OP_PROBED "probe_complete" #define CRM_OP_REPROBE "probe_again" +#define CRM_OP_CLEAR_FAILCOUNT "clear_failcount" #define CRMD_STATE_ACTIVE "member" #define CRMD_STATE_INACTIVE "down" #define CRMD_JOINSTATE_DOWN CRMD_STATE_INACTIVE #define CRMD_JOINSTATE_PENDING "pending" #define CRMD_JOINSTATE_MEMBER CRMD_STATE_ACTIVE #define CRMD_JOINSTATE_NACK "banned" #define CRMD_ACTION_DELETE "delete" #define CRMD_ACTION_CANCEL "cancel" #define CRMD_ACTION_MIGRATE "migrate_to" #define CRMD_ACTION_MIGRATED "migrate_from" #define CRMD_ACTION_START "start" #define CRMD_ACTION_STARTED "running" #define CRMD_ACTION_STOP "stop" #define CRMD_ACTION_STOPPED "stopped" #define CRMD_ACTION_PROMOTE "promote" #define CRMD_ACTION_PROMOTED "promoted" #define CRMD_ACTION_DEMOTE "demote" #define CRMD_ACTION_DEMOTED "demoted" #define CRMD_ACTION_NOTIFY "notify" #define CRMD_ACTION_NOTIFIED "notified" #define CRMD_ACTION_STATUS "monitor" /* short names */ #define RSC_DELETE CRMD_ACTION_DELETE #define RSC_CANCEL CRMD_ACTION_CANCEL #define RSC_MIGRATE CRMD_ACTION_MIGRATE #define RSC_MIGRATED CRMD_ACTION_MIGRATED #define RSC_START CRMD_ACTION_START #define RSC_STARTED CRMD_ACTION_STARTED #define RSC_STOP CRMD_ACTION_STOP #define RSC_STOPPED CRMD_ACTION_STOPPED #define RSC_PROMOTE CRMD_ACTION_PROMOTE #define RSC_PROMOTED CRMD_ACTION_PROMOTED #define RSC_DEMOTE CRMD_ACTION_DEMOTE #define RSC_DEMOTED CRMD_ACTION_DEMOTED #define RSC_NOTIFY CRMD_ACTION_NOTIFY #define RSC_NOTIFIED CRMD_ACTION_NOTIFIED #define RSC_STATUS CRMD_ACTION_STATUS typedef GList* GListPtr; #define slist_destroy(child_type, child, parent, a) \ { \ GListPtr __crm_iter_head = parent; \ child_type *child = NULL; \ while(__crm_iter_head != NULL) { \ child = (child_type *) __crm_iter_head->data; \ __crm_iter_head = __crm_iter_head->next; \ { a; } \ } \ g_list_free(parent); \ } #define slist_iter(child, child_type, parent, counter, a) \ { \ GListPtr __crm_iter_head = parent; \ child_type *child = NULL; \ int counter = 0; \ for(; __crm_iter_head != NULL; counter++) { \ child = (child_type *) __crm_iter_head->data; \ __crm_iter_head = __crm_iter_head->next; \ { a; } \ } \ } #define LOG_DEBUG_2 LOG_DEBUG+1 #define LOG_DEBUG_3 LOG_DEBUG+2 #define LOG_DEBUG_4 LOG_DEBUG+3 #define LOG_DEBUG_5 LOG_DEBUG+4 #define LOG_DEBUG_6 LOG_DEBUG+5 #define LOG_MSG LOG_DEBUG_3 /* * Throughout the macros below, note the leading, pre-comma, space in the * various ' , ##args' occurences to aid portability across versions of 'gcc'. * http://gcc.gnu.org/onlinedocs/cpp/Variadic-Macros.html#Variadic-Macros */ #define do_crm_log(level, fmt, args...) do { \ if(__unlikely(crm_log_level < (level))) { \ continue; \ } else if(__likely((level) < LOG_DEBUG_2)) { \ cl_log(level, "%s: " fmt, __PRETTY_FUNCTION__ , ##args); \ } else { \ cl_log(LOG_DEBUG, "debug%d: %s: " fmt, \ level-LOG_INFO, __PRETTY_FUNCTION__ , ##args); \ } \ } while(0) #define do_crm_log_unlikely(level, fmt, args...) do { \ if(__likely(crm_log_level < (level))) { \ continue; \ } else if((level) < LOG_DEBUG_2) { \ cl_log(level, "%s: " fmt, __PRETTY_FUNCTION__ , ##args); \ } else { \ cl_log(LOG_DEBUG, "debug%d: %s: " fmt, \ level-LOG_INFO, __PRETTY_FUNCTION__ , ##args); \ } \ } while(0) #define do_crm_log_always(level, fmt, args...) cl_log(level, "%s: " fmt, __PRETTY_FUNCTION__ , ##args) #define crm_crit(fmt, args...) do_crm_log_always(LOG_CRIT, fmt , ##args) #define crm_err(fmt, args...) do_crm_log(LOG_ERR, fmt , ##args) #define crm_warn(fmt, args...) do_crm_log(LOG_WARNING, fmt , ##args) #define crm_notice(fmt, args...) do_crm_log(LOG_NOTICE, fmt , ##args) #define crm_info(fmt, args...) do_crm_log(LOG_INFO, fmt , ##args) #define crm_debug(fmt, args...) do_crm_log_unlikely(LOG_DEBUG, fmt , ##args) #define crm_debug_2(fmt, args...) do_crm_log_unlikely(LOG_DEBUG_2, fmt , ##args) #define crm_debug_3(fmt, args...) do_crm_log_unlikely(LOG_DEBUG_3, fmt , ##args) #define crm_debug_4(fmt, args...) do_crm_log_unlikely(LOG_DEBUG_4, fmt , ##args) #define crm_debug_5(fmt, args...) do_crm_log_unlikely(LOG_DEBUG_5, fmt , ##args) #define crm_debug_6(fmt, args...) do_crm_log_unlikely(LOG_DEBUG_6, fmt , ##args) #define crm_perror(level, fmt, args...) do { \ const char *err = strerror(errno); \ fprintf(stderr, fmt ": %s (%d)\n", ##args, err, errno); \ do_crm_log(level, fmt ": %s (%d)", ##args, err, errno); \ } while(0) #include #define crm_log_xml(level, text, xml) if(crm_log_level >= (level)) { \ print_xml_formatted(level, __PRETTY_FUNCTION__, xml, text); \ } #define crm_log_xml_crit(xml, text) crm_log_xml(LOG_CRIT, text, xml) #define crm_log_xml_err(xml, text) crm_log_xml(LOG_ERR, text, xml) #define crm_log_xml_warn(xml, text) crm_log_xml(LOG_WARNING, text, xml) #define crm_log_xml_notice(xml, text) crm_log_xml(LOG_NOTICE, text, xml) #define crm_log_xml_info(xml, text) crm_log_xml(LOG_INFO, text, xml) #define crm_log_xml_debug(xml, text) crm_log_xml(LOG_DEBUG, text, xml) #define crm_log_xml_debug_2(xml, text) crm_log_xml(LOG_DEBUG_2, text, xml) #define crm_log_xml_debug_3(xml, text) crm_log_xml(LOG_DEBUG_3, text, xml) #define crm_log_xml_debug_4(xml, text) crm_log_xml(LOG_DEBUG_4, text, xml) #define crm_log_xml_debug_5(xml, text) crm_log_xml(LOG_DEBUG_5, text, xml) #define crm_str(x) (const char*)(x?x:"") #define crm_malloc0(malloc_obj, length) do { \ malloc_obj = malloc(length); \ if(malloc_obj == NULL) { \ crm_err("Failed allocation of %lu bytes", (unsigned long)length); \ CRM_ASSERT(malloc_obj != NULL); \ } \ memset(malloc_obj, 0, length); \ } while(0) #define crm_malloc(malloc_obj, length) do { \ malloc_obj = malloc(length); \ if(malloc_obj == NULL) { \ crm_err("Failed allocation of %lu bytes", (unsigned long)length); \ CRM_ASSERT(malloc_obj != NULL); \ } \ } while(0) #define crm_realloc(realloc_obj, length) do { \ realloc_obj = realloc(realloc_obj, length); \ CRM_ASSERT(realloc_obj != NULL); \ } while(0) #define crm_free(free_obj) do { free(free_obj); free_obj=NULL; } while(0) #define crm_msg_del(msg) do { if(msg != NULL) { ha_msg_del(msg); msg = NULL; } } while(0) #define crm_strdup(str) crm_strdup_fn(str, __FILE__, __PRETTY_FUNCTION__, __LINE__) #endif diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 72dbef23ac..c546607a01 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,1743 +1,1752 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #define set_config_flag(data_set, option, flag) do { \ const char *tmp = pe_pref(data_set->config_hash, option); \ if(tmp) { \ if(crm_is_true(tmp)) { \ set_bit_inplace(data_set->flags, flag); \ } else { \ clear_bit_inplace(data_set->flags, flag); \ } \ } \ } while(0) gboolean unpack_rsc_op( resource_t *rsc, node_t *node, xmlNode *xml_op, enum action_fail_response *failed, pe_working_set_t *data_set); static void pe_fence_node(pe_working_set_t *data_set, node_t *node, const char *reason) { CRM_CHECK(node, return); if(node->details->unclean == FALSE) { if(is_set(data_set->flags, pe_flag_stonith_enabled)) { crm_warn("Node %s will be fenced %s", node->details->uname, reason); } else { crm_warn("Node %s is unclean %s", node->details->uname, reason); } } node->details->unclean = TRUE; } gboolean unpack_config(xmlNode *config, pe_working_set_t *data_set) { const char *value = NULL; GHashTable *config_hash = g_hash_table_new_full( g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str); data_set->config_hash = config_hash; unpack_instance_attributes( data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, data_set->now); verify_pe_options(data_set->config_hash); set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes); crm_info("Startup probes: %s", is_set(data_set->flags, pe_flag_startup_probes)?"enabled":"disabled (dangerous)"); value = pe_pref(data_set->config_hash, "stonith-timeout"); data_set->stonith_timeout = crm_get_msec(value); crm_debug("STONITH timeout: %d", data_set->stonith_timeout); set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled); crm_debug("STONITH of failed nodes is %s", is_set(data_set->flags, pe_flag_stonith_enabled)?"enabled":"disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); crm_debug_2("STONITH will %s nodes", data_set->stonith_action); set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything); crm_debug("Stop all active resources: %s", is_set(data_set->flags, pe_flag_stop_everything)?"true":"false"); set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster); if(is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "default-resource-stickiness"); data_set->default_resource_stickiness = char2score(value); crm_debug("Default stickiness: %d", data_set->default_resource_stickiness); value = pe_pref(data_set->config_hash, "no-quorum-policy"); if(safe_str_eq(value, "ignore")) { data_set->no_quorum_policy = no_quorum_ignore; } else if(safe_str_eq(value, "freeze")) { data_set->no_quorum_policy = no_quorum_freeze; } else if(safe_str_eq(value, "suicide")) { gboolean do_panic = FALSE; crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic); if(is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE){ crm_config_err("Setting no-quorum-policy=suicide makes no sense if stonith-enabled=false"); } if(do_panic && is_set(data_set->flags, pe_flag_stonith_enabled)) { data_set->no_quorum_policy = no_quorum_suicide; } else if(is_set(data_set->flags, pe_flag_have_quorum) == FALSE && do_panic == FALSE) { crm_notice("Resetting no-quorum-policy to 'stop': The cluster has never had quorum"); data_set->no_quorum_policy = no_quorum_stop; } } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of CCM Quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of CCM Quorum: Stop ALL resources"); break; case no_quorum_suicide: crm_notice("On loss of CCM Quorum: Fence all remaining nodes"); break; case no_quorum_ignore: crm_notice("On loss of CCM Quorum: Ignore"); break; } set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans); crm_debug_2("Orphan resources are %s", is_set(data_set->flags, pe_flag_stop_rsc_orphans)?"stopped":"ignored"); set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans); crm_debug_2("Orphan resource actions are %s", is_set(data_set->flags, pe_flag_stop_action_orphans)?"stopped":"ignored"); set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop); crm_debug_2("Stopped resources are removed from the status section: %s", is_set(data_set->flags, pe_flag_remove_after_stop)?"true":"false"); set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode); crm_debug_2("Maintenance mode: %s", is_set(data_set->flags, pe_flag_maintenance_mode)?"true":"false"); if(is_set(data_set->flags, pe_flag_maintenance_mode)) { clear_bit(data_set->flags, pe_flag_is_managed_default); } else { set_config_flag(data_set, "is-managed-default", pe_flag_is_managed_default); } crm_debug_2("By default resources are %smanaged", is_set(data_set->flags, pe_flag_is_managed_default)?"":"not "); set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal); crm_debug_2("Start failures are %s", is_set(data_set->flags, pe_flag_start_failure_fatal)?"always fatal":"handled by failcount"); node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red")); node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green")); node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow")); crm_info("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s", pe_pref(data_set->config_hash, "node-health-red"), pe_pref(data_set->config_hash, "node-health-yellow"), pe_pref(data_set->config_hash, "node-health-green")); data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy"); crm_debug_2("Placement strategy: %s", data_set->placement_strategy); return TRUE; } gboolean unpack_nodes(xmlNode * xml_nodes, pe_working_set_t *data_set) { node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; gboolean unseen_are_unclean = TRUE; const char *blind_faith = pe_pref( data_set->config_hash, "startup-fencing"); if(crm_is_true(blind_faith) == FALSE) { unseen_are_unclean = FALSE; crm_warn("Blind faith: not fencing unseen nodes"); } xml_child_iter_filter( xml_nodes, xml_obj, XML_CIB_TAG_NODE, new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); crm_debug_3("Processing node %s/%s", uname, id); if(id == NULL) { crm_config_err("Must specify id tag in "); continue; } if(type == NULL) { crm_config_err("Must specify type tag in "); continue; } if(pe_find_node(data_set->nodes, uname) != NULL) { crm_config_warn("Detected multiple node entries with uname=%s" " - this is rarely intended", uname); } crm_malloc0(new_node, sizeof(node_t)); if(new_node == NULL) { return FALSE; } new_node->weight = 0; new_node->fixed = FALSE; crm_malloc0(new_node->details, sizeof(struct node_shared_s)); if(new_node->details == NULL) { crm_free(new_node); return FALSE; } crm_debug_3("Creaing node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->type = node_ping; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->running_rsc = NULL; new_node->details->attrs = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); new_node->details->utilization = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ if(is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE || unseen_are_unclean == FALSE) { /* blind faith... */ new_node->details->unclean = FALSE; } else { /* all nodes are unclean until we've seen their * status entry */ new_node->details->unclean = TRUE; } if(type == NULL || safe_str_eq(type, "member") || safe_str_eq(type, NORMALNODE)) { new_node->details->type = node_member; } add_node_attrs(xml_obj, new_node, FALSE, data_set); unpack_instance_attributes( data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL, new_node->details->utilization, NULL, FALSE, data_set->now); data_set->nodes = g_list_append(data_set->nodes, new_node); crm_debug_3("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); ); return TRUE; } gboolean unpack_resources(xmlNode * xml_resources, pe_working_set_t *data_set) { xml_child_iter( xml_resources, xml_obj, resource_t *new_rsc = NULL; crm_debug_3("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj)); if(common_unpack(xml_obj, &new_rsc, NULL, data_set)) { data_set->resources = g_list_append( data_set->resources, new_rsc); print_resource(LOG_DEBUG_3, "Added", new_rsc, FALSE); } else { crm_config_err("Failed unpacking %s %s", crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID)); if(new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } ); data_set->resources = g_list_sort( data_set->resources, sort_rsc_priority); if(is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { crm_config_err("Resource start-up disabled since no STONITH resources have been defined"); crm_config_err("Either configure some or disable STONITH with the stonith-enabled option"); crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } /* remove nodes that are down, stopping */ /* create +ve rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode * status, pe_working_set_t *data_set) { const char *id = NULL; const char *uname = NULL; xmlNode * lrm_rsc = NULL; xmlNode * attrs = NULL; node_t *this_node = NULL; crm_debug_3("Beginning unpack"); xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, id = crm_element_value(node_state, XML_ATTR_ID); uname = crm_element_value(node_state, XML_ATTR_UNAME); attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); crm_debug_3("Processing node %s", uname); this_node = pe_find_node_id(data_set->nodes, id); if(uname == NULL) { /* error */ continue; } else if(this_node == NULL) { crm_config_warn("Node %s in status section no longer exists", uname); continue; } /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; add_node_attrs(attrs, this_node, TRUE, data_set); if(crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } crm_debug_3("determining node state"); determine_online_status(node_state, this_node, data_set); if(this_node->details->online && data_set->no_quorum_policy == no_quorum_suicide) { /* Everything else should flow from this automatically * At least until the PE becomes able to migrate off healthy resources */ pe_fence_node(data_set, this_node, "because the cluster does not have quorum"); } if(this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ crm_debug_3("Processing lrm resource entries"); unpack_lrm_resources(this_node, lrm_rsc, data_set); } ); return TRUE; } static gboolean determine_online_status_no_fencing(pe_working_set_t *data_set, xmlNode * node_state, node_t *this_node) { gboolean online = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(ha_state == NULL) { ha_state = DEADSTATUS; } if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)){ crm_debug_2("Node is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); } else if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)) { } else if(safe_str_eq(crm_state, ONLINESTATUS)) { if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join_state); } } else if(this_node->details->expected_up == FALSE) { crm_debug_2("CRMd is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); crm_debug_2("\tcrm_state=%s, join_state=%s, expected=%s", crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { /* mark it unclean */ pe_fence_node(data_set, this_node, "because it is partially and/or un-expectedly down"); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(pe_working_set_t *data_set, xmlNode * node_state, node_t *this_node) { gboolean online = FALSE; gboolean do_terminate = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); const char *terminate = g_hash_table_lookup(this_node->details->attrs, "terminate"); if(ha_state == NULL) { ha_state = DEADSTATUS; } if(crm_is_true(terminate)) { do_terminate = TRUE; } else if(terminate != NULL && strlen(terminate) > 0) { /* could be a time() value */ char t = terminate[0]; if(t != '0' && isdigit(t)) { do_terminate = TRUE; } } if(crm_is_true(ccm_state) && safe_str_eq(ha_state, ACTIVESTATUS) && safe_str_eq(crm_state, ONLINESTATUS)) { if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { online = TRUE; if(do_terminate) { pe_fence_node(data_set, this_node, "because termination was requested"); this_node->details->shutdown = TRUE; } } else if(join_state == exp_state /* == NULL */) { crm_info("Node %s is coming up", this_node->details->uname); crm_debug("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else if(safe_str_eq(join_state, CRMD_JOINSTATE_PENDING)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; online = TRUE; } else if(safe_str_eq(join_state, CRMD_JOINSTATE_NACK)) { crm_warn("Node %s is not part of the cluster", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; online = TRUE; } else { crm_warn("Node %s (%s) is un-expectedly down", this_node->details->uname, this_node->details->id); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); pe_fence_node(data_set, this_node, "because it is un-expectedly down"); } } else if(crm_is_true(ccm_state) == FALSE && safe_str_eq(ha_state, DEADSTATUS) && safe_str_eq(crm_state, OFFLINESTATUS) && this_node->details->expected_up == FALSE) { crm_debug("Node %s is down: join_state=%s, expected=%s", this_node->details->uname, crm_str(join_state), crm_str(exp_state)); #if 0 /* While a nice optimization, it causes the cluster to block until the node * comes back online. Which is a serious problem if the cluster software * is not configured to start at boot or stonith is configured to merely * stop the node instead of restart it. * Easily triggered by setting terminate=true for the DC */ } else if(do_terminate) { crm_info("Node %s is %s after forced termination", this_node->details->uname, crm_is_true(ccm_state)?"coming up":"going down"); crm_debug("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); if(crm_is_true(ccm_state) == FALSE) { this_node->details->standby = TRUE; this_node->details->pending = TRUE; online = TRUE; } #endif } else if(this_node->details->expected_up) { /* mark it unclean */ pe_fence_node(data_set, this_node, "because it is un-expectedly down"); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { crm_info("Node %s is down", this_node->details->uname); crm_debug("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } gboolean determine_online_status( xmlNode * node_state, node_t *this_node, pe_working_set_t *data_set) { gboolean online = FALSE; const char *shutdown = NULL; const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(this_node == NULL) { crm_config_err("No node to check"); return online; } this_node->details->shutdown = FALSE; this_node->details->expected_up = FALSE; shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN); if(shutdown != NULL && safe_str_neq("0", shutdown)) { this_node->details->shutdown = TRUE; } else if(safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) { this_node->details->expected_up = TRUE; } if(is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { online = determine_online_status_no_fencing( data_set, node_state, this_node); } else { online = determine_online_status_fencing( data_set, node_state, this_node); } if(online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if(online && this_node->details->shutdown) { /* dont run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if(this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if(this_node->details->online) { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown?"shutting down": this_node->details->pending?"pending": this_node->details->standby?"standby":"online"); } else { crm_debug_2("Node %s is offline", this_node->details->uname); } return online; } #define set_char(x) last_rsc_id[lpc] = x; complete = TRUE; char * clone_zero(const char *last_rsc_id) { int lpc = 0; char *zero = NULL; CRM_CHECK(last_rsc_id != NULL, return NULL); if(last_rsc_id != NULL) { lpc = strlen(last_rsc_id); } while(--lpc > 0) { switch(last_rsc_id[lpc]) { case 0: return NULL; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': crm_malloc0(zero, lpc + 3); memcpy(zero, last_rsc_id, lpc); zero[lpc] = ':'; zero[lpc+1] = '0'; zero[lpc+2] = 0; return zero; } } return NULL; } char * increment_clone(char *last_rsc_id) { int lpc = 0; int len = 0; char *tmp = NULL; gboolean complete = FALSE; CRM_CHECK(last_rsc_id != NULL, return NULL); if(last_rsc_id != NULL) { len = strlen(last_rsc_id); } lpc = len-1; while(complete == FALSE && lpc > 0) { switch (last_rsc_id[lpc]) { case 0: lpc--; break; case '0': set_char('1'); break; case '1': set_char('2'); break; case '2': set_char('3'); break; case '3': set_char('4'); break; case '4': set_char('5'); break; case '5': set_char('6'); break; case '6': set_char('7'); break; case '7': set_char('8'); break; case '8': set_char('9'); break; case '9': last_rsc_id[lpc] = '0'; lpc--; break; case ':': tmp = last_rsc_id; crm_malloc0(last_rsc_id, len + 2); memcpy(last_rsc_id, tmp, len); last_rsc_id[++lpc] = '1'; last_rsc_id[len] = '0'; last_rsc_id[len+1] = 0; complete = TRUE; crm_free(tmp); break; default: crm_err("Unexpected char: %c (%d)", last_rsc_id[lpc], lpc); break; } } return last_rsc_id; } static resource_t * create_fake_resource(const char *rsc_id, xmlNode *rsc_entry, pe_working_set_t *data_set) { resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); common_unpack(xml_rsc, &rsc, NULL, data_set); set_bit(rsc->flags, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } extern resource_t *create_child_clone(resource_t *rsc, int sub_id, pe_working_set_t *data_set); static resource_t *find_clone(pe_working_set_t *data_set, node_t *node, resource_t *parent, const char *rsc_id) { int len = 0; resource_t *rsc = NULL; char *base = clone_zero(rsc_id); char *alt_rsc_id = crm_strdup(rsc_id); CRM_ASSERT(parent != NULL); CRM_ASSERT(parent->variant == pe_clone || parent->variant == pe_master); if(base) { len = strlen(base); } if(len > 0) { base[len-1] = 0; } crm_debug_3("Looking for %s on %s in %s %d", rsc_id, node->details->uname, parent->id, is_set(parent->flags, pe_rsc_unique)); if(is_set(parent->flags, pe_rsc_unique)) { crm_debug_3("Looking for %s", rsc_id); rsc = parent->fns->find_rsc(parent, rsc_id, FALSE, FALSE, NULL, TRUE); } else { rsc = parent->fns->find_rsc(parent, base, FALSE, TRUE, node, TRUE); if(rsc != NULL && rsc->running_on) { rsc = NULL; crm_debug_3("Looking for an existing orphan for %s: %s on %s", parent->id, rsc_id, node->details->uname); /* There is already an instance of this _anonymous_ clone active on "node". * * If there is a partially active orphan (only applies to clone groups) on * the same node, use that. * Otherwise create a new (orphaned) instance at "orphan_check:". */ slist_iter(child, resource_t, parent->children, lpc, node_t *loc = child->fns->location(child, NULL, TRUE); if(loc && loc->details == node->details) { resource_t *tmp = child->fns->find_rsc(child, base, FALSE, TRUE, NULL, TRUE); if(tmp && tmp->running_on == NULL) { rsc = tmp; break; } } ); goto orphan_check; } while(rsc == NULL) { crm_debug_3("Trying %s", alt_rsc_id); rsc = parent->fns->find_rsc(parent, alt_rsc_id, FALSE, FALSE, NULL, TRUE); if(rsc == NULL) { break; } else if(rsc->running_on == NULL) { break; } alt_rsc_id = increment_clone(alt_rsc_id); rsc = NULL; } } orphan_check: if(rsc == NULL) { /* Create an extra orphan */ resource_t *top = create_child_clone(parent, -1, data_set); crm_debug("Created orphan for %s: %s on %s", parent->id, rsc_id, node->details->uname); rsc = top->fns->find_rsc(top, base, FALSE, TRUE, NULL, TRUE); CRM_ASSERT(rsc != NULL); } crm_free(rsc->clone_name); rsc->clone_name = NULL; if(safe_str_neq(rsc_id, rsc->id)) { crm_info("Internally renamed %s on %s to %s%s", rsc_id, node->details->uname, rsc->id, is_set(rsc->flags, pe_rsc_orphan)?" (ORPHAN)":""); rsc->clone_name = crm_strdup(rsc_id); } crm_free(alt_rsc_id); crm_free(base); return rsc; } static resource_t * unpack_find_resource( pe_working_set_t *data_set, node_t *node, const char *rsc_id, xmlNode *rsc_entry) { resource_t *rsc = NULL; resource_t *clone_parent = NULL; char *alt_rsc_id = crm_strdup(rsc_id); crm_debug_2("looking for %s", rsc_id); rsc = pe_find_resource(data_set->resources, alt_rsc_id); /* no match */ if(rsc == NULL) { /* Even when clone-max=0, we still create a single :0 orphan to match against */ char *tmp = clone_zero(alt_rsc_id); resource_t *clone0 = pe_find_resource(data_set->resources, tmp); clone_parent = uber_parent(clone0); crm_free(tmp); crm_debug_2("%s not found: %s", alt_rsc_id, clone_parent?clone_parent->id:"orphan"); } else { clone_parent = uber_parent(rsc); } if(clone_parent && clone_parent->variant > pe_group) { rsc = find_clone(data_set, node, clone_parent, rsc_id); CRM_ASSERT(rsc != NULL); } crm_free(alt_rsc_id); return rsc; } static resource_t * process_orphan_resource(xmlNode *rsc_entry, node_t *node, pe_working_set_t *data_set) { resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if(is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } else { print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE); CRM_CHECK(rsc != NULL, return NULL); resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set); } return rsc; } static void process_rsc_state(resource_t *rsc, node_t *node, enum action_fail_response on_fail, xmlNode *migrate_op, pe_working_set_t *data_set) { if(on_fail == action_migrate_failure) { node_t *from = NULL; const char *uuid = crm_element_value(migrate_op, CRMD_ACTION_MIGRATED); on_fail = action_fail_recover; from = pe_find_node_id(data_set->nodes, uuid); if(from != NULL) { process_rsc_state(rsc, from, on_fail, NULL, data_set); } else { crm_log_xml_err(migrate_op, "Bad Op"); } } crm_debug_2("Resource %s is %s on %s: on_fail=%s", rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail)); /* process current state */ if(rsc->role != RSC_ROLE_UNKNOWN) { rsc->known_on = g_list_append(rsc->known_on, node); } if(node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = action_fail_ignore; } switch(on_fail) { case action_fail_ignore: /* nothing to do */ break; case action_fail_fence: /* treat it as if it is still running * but also mark the node as unclean */ pe_fence_node(data_set, node, "to recover from resource failure(s)"); break; case action_fail_standby: node->details->standby = TRUE; node->details->standby_onfail = TRUE; break; case action_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); break; case action_fail_migrate: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__",data_set); break; case action_fail_stop: rsc->next_role = RSC_ROLE_STOPPED; break; case action_fail_recover: if(rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { set_bit(rsc->flags, pe_rsc_failed); stop_action(rsc, node, FALSE); } break; case action_migrate_failure: /* anything extra? */ break; } if(rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { if(is_set(rsc->flags, pe_rsc_orphan)) { if(is_set(rsc->flags, pe_rsc_managed)) { crm_config_warn("Detected active orphan %s running on %s", rsc->id, node->details->uname); } else { crm_config_warn("Cluster configured not to stop active orphans." " %s must be stopped manually on %s", rsc->id, node->details->uname); } } native_add_running(rsc, node, data_set); if(on_fail != action_fail_ignore) { set_bit(rsc->flags, pe_rsc_failed); } } else if(rsc->clone_name) { crm_debug_2("Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); crm_free(rsc->clone_name); rsc->clone_name = NULL; } else { char *key = stop_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, node); slist_iter(stop, action_t, possible_matches, lpc, stop->optional = TRUE; ); crm_free(key); } } /* create active recurring operations as optional */ static void process_recurring(node_t *node, resource_t *rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t *data_set) { const char *task = NULL; const char *status = NULL; crm_debug_3("%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); slist_iter(rsc_op, xmlNode, sorted_op_list, lpc, int interval = 0; char *key = NULL; const char *id = ID(rsc_op); const char *interval_s = NULL; if(node->details->online == FALSE) { crm_debug_4("Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; } else if(start_index < stop_index) { crm_debug_4("Skipping %s/%s: not active", rsc->id, node->details->uname); break; } else if(lpc <= start_index) { crm_debug_4("Skipping %s/%s: old", id, node->details->uname); continue; } interval_s = crm_element_value(rsc_op,XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0) { crm_debug_4("Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if(safe_str_eq(status, "-1")) { crm_debug_4("Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = generate_op_key(rsc->id, task, interval); crm_debug_3("Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); ); } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { const char *task = NULL; const char *status = NULL; *stop_index = -1; *start_index = -1; slist_iter( rsc_op, xmlNode, sorted_op_list, lpc, task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if(safe_str_eq(task, CRMD_ACTION_STOP) && safe_str_eq(status, "0")) { *stop_index = lpc; } else if(safe_str_eq(task, CRMD_ACTION_START)) { *start_index = lpc; } else if(*start_index <= *stop_index && safe_str_eq(task, CRMD_ACTION_STATUS)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if(safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) { *start_index = lpc; } } ); } static void unpack_lrm_rsc_state( node_t *node, xmlNode * rsc_entry, pe_working_set_t *data_set) { int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; const char *task = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; xmlNode *migrate_op = NULL; enum action_fail_response on_fail = FALSE; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_debug_3("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, op_list = g_list_append(op_list, rsc_op); ); if(op_list == NULL) { /* if there are no operations, there is nothing to do */ return; } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if(rsc == NULL) { rsc = process_orphan_resource(rsc_entry, node, data_set); } CRM_ASSERT(rsc != NULL); /* process operations */ saved_role = rsc->role; on_fail = action_fail_ignore; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); slist_iter( rsc_op, xmlNode, sorted_op_list, lpc, task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if(safe_str_eq(task, CRMD_ACTION_MIGRATED)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, &on_fail, data_set); ); /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); if(get_target_role(rsc, &req_role)) { if(rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { crm_debug("%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); rsc->next_role = req_role; } else if(req_role > rsc->next_role) { crm_info("%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } } if(saved_role > rsc->role) { rsc->role = saved_role; } } gboolean unpack_lrm_resources(node_t *node, xmlNode * lrm_rsc_list, pe_working_set_t *data_set) { CRM_CHECK(node != NULL, return FALSE); crm_debug_3("Unpacking resources on %s", node->details->uname); xml_child_iter_filter( lrm_rsc_list, rsc_entry, XML_LRM_TAG_RESOURCE, unpack_lrm_rsc_state(node, rsc_entry, data_set); ); return TRUE; } static void set_active(resource_t *rsc) { resource_t *top = uber_parent(rsc); if(top && top->variant == pe_master) { rsc->role = RSC_ROLE_SLAVE; } else { rsc->role = RSC_ROLE_STARTED; } } gboolean unpack_rsc_op(resource_t *rsc, node_t *node, xmlNode *xml_op, enum action_fail_response *on_fail, pe_working_set_t *data_set) { const char *id = NULL; const char *key = NULL; const char *task = NULL; const char *magic = NULL; const char *task_id = NULL; const char *actual_rc = NULL; /* const char *target_rc = NULL; */ const char *task_status = NULL; const char *interval_s = NULL; const char *op_digest = NULL; const char *op_version = NULL; int interval = 0; int task_status_i = -2; int actual_rc_i = 0; int target_rc = -1; action_t *action = NULL; node_t *effective_node = NULL; resource_t *failed = NULL; gboolean expired = FALSE; gboolean is_probe = FALSE; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(node != NULL, return FALSE); CRM_CHECK(xml_op != NULL, return FALSE); id = ID(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); task_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); task_status = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); op_digest = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(id != NULL, return FALSE); CRM_CHECK(task != NULL, return FALSE); CRM_CHECK(task_status != NULL, return FALSE); task_status_i = crm_parse_int(task_status, NULL); CRM_CHECK(task_status_i <= LRM_OP_ERROR, return FALSE); CRM_CHECK(task_status_i >= LRM_OP_PENDING, return FALSE); if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) { /* safe to ignore these */ return TRUE; } if(rsc->failure_timeout > 0) { int last_run = 0; if(crm_element_value_int(xml_op, "last-run", &last_run) == 0) { /* int last_change = crm_element_value_int(xml_op, "last_rc_change"); */ time_t now = get_timet_now(data_set); if(now > (last_run + rsc->failure_timeout)) { expired = TRUE; } } } crm_debug_2("Unpacking task %s/%s (call_id=%s, status=%s) on %s (role=%s)", id, task, task_id, task_status, node->details->uname, role2text(rsc->role)); interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; } if(node->details->unclean) { crm_debug_2("Node %s (where %s is running) is unclean." " Further action depends on the value of the stop's on-fail attribue", node->details->uname, rsc->id); } actual_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); CRM_CHECK(actual_rc != NULL, return FALSE); actual_rc_i = crm_parse_int(actual_rc, NULL); if(key) { int dummy = 0; char *dummy_string = NULL; decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc); crm_free(dummy_string); } if(task_status_i == LRM_OP_DONE && target_rc >= 0) { if(target_rc == actual_rc_i) { task_status_i = LRM_OP_DONE; } else { task_status_i = LRM_OP_ERROR; crm_debug("%s on %s returned %d (%s) instead of the expected value: %d (%s)", id, node->details->uname, actual_rc_i, execra_code2string(actual_rc_i), target_rc, execra_code2string(target_rc)); } } else if(task_status_i == LRM_OP_ERROR) { /* let us decide that */ task_status_i = LRM_OP_DONE; } if(task_status_i == LRM_OP_NOTSUPPORTED) { actual_rc_i = EXECRA_UNIMPLEMENT_FEATURE; } + if(task_status_i != actual_rc_i && get_failcount(node, rsc, NULL, data_set) == 0) { + action_t *clear_op = NULL; + clear_op = custom_action( + rsc, crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'), + CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set); + + add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); + crm_notice("Clearing expired failcount for %s on %s", rsc->id, node->details->uname); + } + if(expired && actual_rc_i != EXECRA_NOT_RUNNING && actual_rc_i != EXECRA_RUNNING_MASTER && actual_rc_i != EXECRA_OK) { crm_notice("Ignoring expired failure %s (rc=%d, magic=%s) on %s", id, actual_rc_i, magic, node->details->uname); goto done; - } - + } /* we could clean this up significantly except for old LRMs and CRMs that * didnt include target_rc and liked to remap status */ switch(actual_rc_i) { case EXECRA_NOT_RUNNING: if(is_probe || target_rc == actual_rc_i) { task_status_i = LRM_OP_DONE; rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } else if(safe_str_neq(task, CRMD_ACTION_STOP)) { task_status_i = LRM_OP_ERROR; } break; case EXECRA_RUNNING_MASTER: if(is_probe) { task_status_i = LRM_OP_DONE; crm_notice("Operation %s found resource %s active in master mode on %s", id, rsc->id, node->details->uname); } else if(target_rc == actual_rc_i) { /* nothing to do */ } else if(target_rc >= 0) { task_status_i = LRM_OP_ERROR; /* legacy code for pre-0.6.5 operations */ } else if(safe_str_neq(task, CRMD_ACTION_STATUS) || rsc->role != RSC_ROLE_MASTER) { task_status_i = LRM_OP_ERROR; if(rsc->role != RSC_ROLE_MASTER) { crm_err("%s reported %s in master mode on %s", id, rsc->id, node->details->uname); } } rsc->role = RSC_ROLE_MASTER; break; case EXECRA_FAILED_MASTER: rsc->role = RSC_ROLE_MASTER; task_status_i = LRM_OP_ERROR; break; case EXECRA_UNIMPLEMENT_FEATURE: if(interval > 0) { task_status_i = LRM_OP_NOTSUPPORTED; break; } /* else: fall through */ case EXECRA_INSUFFICIENT_PRIV: case EXECRA_NOT_INSTALLED: case EXECRA_INVALID_PARAM: effective_node = node; /* fall through */ case EXECRA_NOT_CONFIGURED: failed = rsc; if(is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(failed); } do_crm_log(actual_rc_i==EXECRA_NOT_INSTALLED?LOG_NOTICE:LOG_ERR, "Hard error - %s failed with rc=%d: Preventing %s from re-starting %s %s", id, actual_rc_i, failed->id, effective_node?"on":"anywhere", effective_node?effective_node->details->uname:"in the cluster"); resource_location(failed, effective_node, -INFINITY, "hard-error", data_set); if(is_probe) { /* treat these like stops */ task = CRMD_ACTION_STOP; task_status_i = LRM_OP_DONE; crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); add_node_copy(data_set->failed, xml_op); } break; case EXECRA_OK: if(is_probe && target_rc == 7) { task_status_i = LRM_OP_DONE; crm_notice("Operation %s found resource %s active on %s", id, rsc->id, node->details->uname); /* legacy code for pre-0.6.5 operations */ } else if(target_rc < 0 && interval > 0 && rsc->role == RSC_ROLE_MASTER) { /* catch status ops that return 0 instead of 8 while they * are supposed to be in master mode */ task_status_i = LRM_OP_ERROR; } break; default: if(task_status_i == LRM_OP_DONE) { crm_info("Remapping %s (rc=%d) on %s to an ERROR", id, actual_rc_i, node->details->uname); task_status_i = LRM_OP_ERROR; } } if(task_status_i == LRM_OP_ERROR || task_status_i == LRM_OP_TIMEOUT || task_status_i == LRM_OP_NOTSUPPORTED) { action = custom_action(rsc, crm_strdup(id), task, NULL, TRUE, FALSE, data_set); if(expired) { crm_notice("Ignoring expired failure (calculated) %s (rc=%d, magic=%s) on %s", id, actual_rc_i, magic, node->details->uname); goto done; } else if(action->on_fail == action_fail_ignore) { crm_warn("Remapping %s (rc=%d) on %s to DONE: ignore", id, actual_rc_i, node->details->uname); task_status_i = LRM_OP_DONE; } } switch(task_status_i) { case LRM_OP_PENDING: if(safe_str_eq(task, CRMD_ACTION_START)) { set_bit(rsc->flags, pe_rsc_start_pending); set_active(rsc); } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } break; case LRM_OP_DONE: crm_debug_3("%s/%s completed on %s", rsc->id, task, node->details->uname); if(actual_rc_i == EXECRA_NOT_RUNNING) { /* nothing to do */ } else if(safe_str_eq(task, CRMD_ACTION_STOP)) { rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ switch(*on_fail) { case action_fail_block: case action_fail_stop: case action_fail_fence: case action_fail_migrate: case action_fail_standby: crm_debug_2("%s.%s is not cleared by a completed stop", rsc->id, fail2text(*on_fail)); break; case action_fail_ignore: case action_fail_recover: case action_migrate_failure: *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { rsc->role = RSC_ROLE_SLAVE; } else if(rsc->role < RSC_ROLE_STARTED) { crm_debug_3("%s active on %s", rsc->id, node->details->uname); set_active(rsc); } break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: crm_warn("Processing failed op %s on %s: %s (%d)", id, node->details->uname, execra_code2string(actual_rc_i), actual_rc_i); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); add_node_copy(data_set->failed, xml_op); if(*on_fail < action->on_fail) { *on_fail = action->on_fail; } if(safe_str_eq(task, CRMD_ACTION_STOP)) { resource_location( rsc, node, -INFINITY, "__stop_fail__", data_set); } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* * staying in role=master ends up putting the PE/TE into a loop * setting role=slave is not dangerous because no master will be * promoted until the failed resource has been fully stopped */ crm_warn("Forcing %s to stop after a failed demote action", rsc->id); rsc->next_role = RSC_ROLE_STOPPED; rsc->role = RSC_ROLE_SLAVE; } else if(compare_version("2.0", op_version) > 0 && safe_str_eq(task, CRMD_ACTION_START)) { crm_warn("Compatibility handling for failed op %s on %s", id, node->details->uname); resource_location( rsc, node, -INFINITY, "__legacy_start__", data_set); } if(rsc->role < RSC_ROLE_STARTED) { set_active(rsc); } crm_debug_2("Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean?"true":"false", fail2text(action->on_fail), role2text(action->fail_role)); if(action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if(action->fail_role == RSC_ROLE_STOPPED) { crm_err("Making sure %s doesn't come up again", rsc->id); /* make sure it doesnt come up again */ pe_free_shallow_adv(rsc->allowed_nodes, TRUE); rsc->allowed_nodes = node_list_dup( data_set->nodes, FALSE, FALSE); slist_iter( node, node_t, rsc->allowed_nodes, lpc, node->weight = -INFINITY; ); } pe_free_action(action); action = NULL; break; case LRM_OP_CANCELLED: /* do nothing?? */ pe_err("Dont know what to do for cancelled ops yet"); break; } done: crm_debug_3("Resource %s after %s: role=%s", rsc->id, task, role2text(rsc->role)); pe_free_action(action); return TRUE; } gboolean add_node_attrs(xmlNode *xml_obj, node_t *node, gboolean overwrite, pe_working_set_t *data_set) { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_UNAME), crm_strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_ID), crm_strdup(node->details->id)); if(safe_str_eq(node->details->id, data_set->dc_uuid)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_FALSE)); } unpack_instance_attributes( data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, node->details->attrs, NULL, overwrite, data_set->now); return TRUE; } static GListPtr extract_operations(const char *node, const char *rsc, xmlNode *rsc_entry, gboolean active_filter) { int stop_index = -1; int start_index = -1; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_append(op_list, rsc_op); ); if(op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if(active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); slist_iter(rsc_op, xmlNode, sorted_op_list, lpc, if(start_index < stop_index) { crm_debug_4("Skipping %s: not active", ID(rsc_entry)); break; } else if(lpc < start_index) { crm_debug_4("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); ); g_list_free(sorted_op_list); return op_list; } GListPtr find_operations( const char *rsc, const char *node, gboolean active_filter, pe_working_set_t *data_set) { GListPtr output = NULL; GListPtr intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); const char *uname = NULL; node_t *this_node = NULL; xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, uname = crm_element_value(node_state, XML_ATTR_UNAME); if(node != NULL && safe_str_neq(uname, node)) { continue; } this_node = pe_find_node(data_set->nodes, uname); CRM_CHECK(this_node != NULL, continue); determine_online_status(node_state, this_node, data_set); if(this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); xml_child_iter_filter( tmp, lrm_rsc, XML_LRM_TAG_RESOURCE, const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if(rsc != NULL && safe_str_neq(rsc_id, rsc)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); ); } ); return output; } diff --git a/pengine/graph.c b/pengine/graph.c index fa9ef94ca5..ea576b5a0b 100644 --- a/pengine/graph.c +++ b/pengine/graph.c @@ -1,772 +1,775 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include gboolean update_action(action_t *action); gboolean update_action_states(GListPtr actions) { crm_debug_2("Updating %d actions", g_list_length(actions)); slist_iter( action, action_t, actions, lpc, update_action(action); ); return TRUE; } static gboolean any_possible(resource_t *rsc, const char *task) { if(rsc && rsc->children) { slist_iter(child, resource_t, rsc->children, lpc, if(any_possible(child, task)) { return TRUE; } ); } else if(rsc) { slist_iter(op, action_t, rsc->actions, lpc, if(task && safe_str_neq(op->task, task)) { continue; } if(op->runnable) { return TRUE; } ); } return FALSE; } static action_t *first_required(resource_t *rsc, const char *task) { if(rsc && rsc->children) { slist_iter(child, resource_t, rsc->children, lpc, action_t *op = first_required(child, task); if(op) { return op; } ); } else if(rsc) { slist_iter(op, action_t, rsc->actions, lpc, if(task && safe_str_neq(op->task, task)) { continue; } if(op->optional == FALSE) { return op; } ); } return NULL; } gboolean update_action(action_t *action) { int local_type = 0; int default_log_level = LOG_DEBUG_3; int log_level = default_log_level; gboolean changed = FALSE; do_crm_log_unlikely(log_level, "Processing action %s: %s %s %s", action->uuid, action->optional?"optional":"required", action->runnable?"runnable":"unrunnable", action->pseudo?"pseudo":action->task); slist_iter( other, action_wrapper_t, action->actions_before, lpc, gboolean other_changed = FALSE; node_t *node = other->action->node; resource_t *other_rsc = other->action->rsc; enum rsc_role_e other_role = RSC_ROLE_UNKNOWN; unsigned long long other_flags = 0; const char *other_id = "None"; if(other_rsc) { other_id = other_rsc->id; other_flags = other_rsc->flags; other_role = other_rsc->fns->state(other_rsc, TRUE); } if(other->type & pe_order_test) { log_level = LOG_NOTICE; do_crm_log_unlikely(log_level, "Processing action %s: %s %s %s", action->uuid, action->optional?"optional":"required", action->runnable?"runnable":"unrunnable", action->pseudo?"pseudo":action->task); } else { log_level = default_log_level; } do_crm_log_unlikely(log_level, " Checking action %s: %s %s %s (flags=0x%.6x)", other->action->uuid, other->action->optional?"optional":"required", other->action->runnable?"runnable":"unrunnable", other->action->pseudo?"pseudo":other->action->task, other->type); local_type = other->type; if((local_type & pe_order_demote_stop) && other->action->pseudo == FALSE && other_role > RSC_ROLE_SLAVE && node != NULL && node->details->online) { local_type |= pe_order_implies_left; do_crm_log_unlikely(log_level,"Upgrading demote->stop constraint to implies_left"); } if((local_type & pe_order_demote) && other->action->pseudo == FALSE && other_role > RSC_ROLE_SLAVE && node != NULL && node->details->online) { local_type |= pe_order_runnable_left; do_crm_log_unlikely(log_level,"Upgrading restart constraint to runnable_left"); } if((local_type & pe_order_complex_right) && (local_type ^ pe_order_complex_right) != pe_order_optional) { if(action->optional && other->action->optional == FALSE) { local_type |= pe_order_implies_right; do_crm_log_unlikely(log_level,"Upgrading complex constraint to implies_right"); } else if(action->runnable && any_possible(other->action->rsc, RSC_START) == FALSE) { action_t *first = first_required(action->rsc, RSC_START); if(first && first->runnable) { do_crm_log_unlikely( log_level-1, " * Marking action %s manditory because of %s (complex right)", first->uuid, other->action->uuid); action->runnable = FALSE; first->runnable = FALSE; update_action(first); changed = TRUE; } } } if((local_type & pe_order_complex_left) && action->optional == FALSE && other->action->optional && (local_type ^ pe_order_complex_left) != pe_order_optional) { local_type |= pe_order_implies_left; do_crm_log_unlikely(log_level,"Upgrading complex constraint to implies_left"); } if((local_type & pe_order_shutdown) && action->optional && other->action->optional == FALSE && is_set(other_flags, pe_rsc_shutdown)) { action->optional = FALSE; changed = TRUE; do_crm_log_unlikely(log_level-1, " * Marking action %s manditory because of %s (complex)", action->uuid, other->action->uuid); } if((local_type & pe_order_restart) && other_role > RSC_ROLE_STOPPED) { if(other_rsc && other_rsc->variant == pe_native) { local_type |= pe_order_implies_left; do_crm_log_unlikely(log_level,"Upgrading restart constraint to implies_left"); } if(other->action->optional && other->action->runnable && action->runnable == FALSE) { do_crm_log_unlikely(log_level-1, " * Marking action %s manditory because %s is unrunnable", other->action->uuid, action->uuid); other->action->optional = FALSE; if(other_rsc) { set_bit(other_rsc->flags, pe_rsc_shutdown); } other_changed = TRUE; } } if((local_type & pe_order_runnable_left) && other->action->runnable == FALSE) { if(other->action->implied_by_stonith) { do_crm_log_unlikely(log_level, "Ignoring un-runnable - implied_by_stonith"); } else if(action->runnable == FALSE) { do_crm_log_unlikely(log_level+1, "Already un-runnable"); } else { action->runnable = FALSE; do_crm_log_unlikely(log_level-1, " * Marking action %s un-runnable because of %s", action->uuid, other->action->uuid); changed = TRUE; } } if((local_type & pe_order_runnable_right) && action->runnable == FALSE) { if(action->pseudo) { do_crm_log_unlikely(log_level, "Ignoring un-runnable - pseudo"); } else if(other->action->runnable == FALSE) { do_crm_log_unlikely(log_level+1, "Already un-runnable"); } else { other->action->runnable = FALSE; do_crm_log_unlikely(log_level-1, " * Marking action %s un-runnable because of %s", other->action->uuid, action->uuid); other_changed = TRUE; } } if(local_type & pe_order_implies_left) { if(other->action->optional == FALSE) { /* nothing to do */ do_crm_log_unlikely(log_level+1, " Ignoring implies left - redundant"); } else if(safe_str_eq(other->action->task, RSC_STOP) && other_role == RSC_ROLE_STOPPED) { do_crm_log_unlikely(log_level-1, " Ignoring implies left - %s already stopped", other_id); } else if((local_type & pe_order_demote) && other_role < RSC_ROLE_MASTER) { do_crm_log_unlikely(log_level-1, " Ignoring implies left - %s already demoted", other_id); } else if(action->optional == FALSE) { other->action->optional = FALSE; do_crm_log_unlikely(log_level-1, " * (implies left) Marking action %s mandatory because of %s", other->action->uuid, action->uuid); other_changed = TRUE; } else { do_crm_log_unlikely(log_level, " Ignoring implies left"); } } if(local_type & pe_order_implies_left_printed) { if(other->action->optional == TRUE && other->action->print_always == FALSE) { if(action->optional == FALSE || (other->action->pseudo && action->print_always)) { other_changed = TRUE; other->action->print_always = TRUE; do_crm_log_unlikely(log_level-1, " * (implies left) Ensuring action %s is included because of %s", other->action->uuid, action->uuid); } } } if(local_type & pe_order_implies_right) { if(action->optional == FALSE) { /* nothing to do */ do_crm_log_unlikely(log_level+1, " Ignoring implies right - redundant"); } else if(other->action->optional == FALSE) { action->optional = FALSE; do_crm_log_unlikely(log_level-1, " * (implies right) Marking action %s mandatory because of %s", action->uuid, other->action->uuid); changed = TRUE; } else { do_crm_log_unlikely(log_level, " Ignoring implies right"); } } if(local_type & pe_order_implies_right_printed) { if(action->optional == TRUE && action->print_always == FALSE) { if(other->action->optional == FALSE || (action->pseudo && other->action->print_always)) { changed = TRUE; action->print_always = TRUE; do_crm_log_unlikely(log_level-1, " * (implies right) Ensuring action %s is included because of %s", action->uuid, other->action->uuid); } } } if(other_changed) { do_crm_log_unlikely(log_level, "%s changed, processing after list", other->action->uuid); update_action(other->action); slist_iter( before_other, action_wrapper_t, other->action->actions_after, lpc2, do_crm_log_unlikely(log_level, "%s changed, processing %s", other->action->uuid, before_other->action->uuid); update_action(before_other->action); ); slist_iter( before_other, action_wrapper_t, other->action->actions_before, lpc2, do_crm_log_unlikely(log_level, "%s changed, processing %s", other->action->uuid, before_other->action->uuid); update_action(before_other->action); ); } ); if(changed) { update_action(action); do_crm_log_unlikely(log_level, "%s changed, processing after list", action->uuid); slist_iter( other, action_wrapper_t, action->actions_after, lpc, do_crm_log_unlikely(log_level, "%s changed, processing %s", action->uuid, other->action->uuid); update_action(other->action); ); do_crm_log_unlikely(log_level, "%s changed, processing before list", action->uuid); slist_iter( other, action_wrapper_t, action->actions_before, lpc, do_crm_log_unlikely(log_level, "%s changed, processing %s", action->uuid, other->action->uuid); update_action(other->action); ); } return FALSE; } gboolean shutdown_constraints( node_t *node, action_t *shutdown_op, pe_working_set_t *data_set) { /* add the stop to the before lists so it counts as a pre-req * for the shutdown */ slist_iter( rsc, resource_t, node->details->running_rsc, lpc, if(is_not_set(rsc->flags, pe_rsc_managed)) { continue; } custom_action_order( rsc, stop_key(rsc), NULL, NULL, crm_strdup(CRM_OP_SHUTDOWN), shutdown_op, pe_order_implies_left, data_set); ); return TRUE; } gboolean stonith_constraints( node_t *node, action_t *stonith_op, pe_working_set_t *data_set) { CRM_CHECK(stonith_op != NULL, return FALSE); /* * Make sure the stonith OP occurs before we start any shared resources */ if(stonith_op != NULL) { slist_iter( rsc, resource_t, data_set->resources, lpc, rsc->cmds->stonith_ordering(rsc, stonith_op, data_set); ); } /* add the stonith OP as a stop pre-req and the mark the stop * as a pseudo op - since its now redundant */ return TRUE; } xmlNode * action2xml(action_t *action, gboolean as_input) { gboolean needs_node_info = TRUE; xmlNode * action_xml = NULL; xmlNode * args_xml = NULL; char *action_id_s = NULL; if(action == NULL) { return NULL; } crm_debug_4("Dumping action %d as XML", action->id); if(safe_str_eq(action->task, CRM_OP_FENCE)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); /* needs_node_info = FALSE; */ } else if(safe_str_eq(action->task, CRM_OP_SHUTDOWN)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); + } else if(safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT)) { + action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); + } else if(safe_str_eq(action->task, CRM_OP_LRM_REFRESH)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); /* } else if(safe_str_eq(action->task, RSC_PROBED)) { */ /* action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); */ } else if(action->pseudo) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_PSEUDO_EVENT); needs_node_info = FALSE; } else { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); } action_id_s = crm_itoa(action->id); crm_xml_add(action_xml, XML_ATTR_ID, action_id_s); crm_free(action_id_s); crm_xml_add(action_xml, XML_LRM_ATTR_TASK, action->task); if(action->rsc != NULL && action->rsc->clone_name != NULL) { char *clone_key = NULL; const char *interval_s = g_hash_table_lookup(action->meta, "interval"); int interval = crm_parse_int(interval_s, "0"); if(safe_str_eq(action->task, RSC_NOTIFY)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_operation"); CRM_CHECK(n_type != NULL, crm_err("No notify type value found for %s", action->uuid)); CRM_CHECK(n_task != NULL, crm_err("No notify operation value found for %s", action->uuid)); clone_key = generate_notify_key(action->rsc->clone_name, n_type, n_task); } else { clone_key = generate_op_key(action->rsc->clone_name, action->task, interval); } CRM_CHECK(clone_key != NULL, crm_err("Could not generate a key for %s", action->uuid)); crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, clone_key); crm_xml_add(action_xml, "internal_"XML_LRM_ATTR_TASK_KEY, action->uuid); crm_free(clone_key); } else { crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, action->uuid); } if(needs_node_info && action->node != NULL) { crm_xml_add(action_xml, XML_LRM_ATTR_TARGET, action->node->details->uname); crm_xml_add(action_xml, XML_LRM_ATTR_TARGET_UUID, action->node->details->id); } if(action->failure_is_fatal == FALSE) { add_hash_param(action->meta, XML_ATTR_TE_ALLOWFAIL, XML_BOOLEAN_TRUE); } if(as_input) { return action_xml; } if(action->rsc) { if(action->pseudo == FALSE) { int lpc = 0; xmlNode *rsc_xml = create_xml_node( action_xml, crm_element_name(action->rsc->xml)); const char *attr_list[] = { XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER, XML_ATTR_TYPE }; if(action->rsc->clone_name != NULL) { crm_debug("Using clone name %s for %s", action->rsc->clone_name, action->rsc->id); crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name); crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); } else { crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id); crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->long_name); } for(lpc = 0; lpc < DIMOF(attr_list); lpc++) { crm_xml_add(rsc_xml, attr_list[lpc], g_hash_table_lookup(action->rsc->meta, attr_list[lpc])); } } } args_xml = create_xml_node(NULL, XML_TAG_ATTRS); crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); g_hash_table_foreach(action->extra, hash2field, args_xml); if(action->rsc != NULL && safe_str_neq(action->task, RSC_STOP)) { g_hash_table_foreach(action->rsc->parameters, hash2smartfield, args_xml); } g_hash_table_foreach(action->meta, hash2metafield, args_xml); if(action->rsc != NULL) { resource_t *parent = action->rsc; while(parent != NULL) { parent->cmds->append_meta(parent, args_xml); parent = parent->parent; } } else if(safe_str_eq(action->task, CRM_OP_FENCE)) { g_hash_table_foreach(action->node->details->attrs, hash2metafield, args_xml); } sorted_xml(args_xml, action_xml, FALSE); crm_log_xml_debug_4(action_xml, "dumped action"); free_xml(args_xml); return action_xml; } static gboolean should_dump_action(action_t *action) { int log_filter = LOG_DEBUG_5; CRM_CHECK(action != NULL, return FALSE); if(action->dumped) { do_crm_log_unlikely(log_filter, "action %d (%s) was already dumped", action->id, action->uuid); return FALSE; } else if(action->runnable == FALSE) { do_crm_log_unlikely(log_filter, "action %d (%s) was not runnable", action->id, action->uuid); return FALSE; } else if(action->optional && action->print_always == FALSE) { do_crm_log_unlikely(log_filter, "action %d (%s) was optional", action->id, action->uuid); return FALSE; } else if(action->rsc != NULL && is_not_set(action->rsc->flags, pe_rsc_managed)) { const char * interval = NULL; interval = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL); /* make sure probes and recurring monitors go through */ if(safe_str_neq(action->task, RSC_STATUS) && interval == NULL) { do_crm_log_unlikely(log_filter, "action %d (%s) was for an unmanaged resource (%s)", action->id, action->uuid, action->rsc->id); return FALSE; } } if(action->pseudo || safe_str_eq(action->task, CRM_OP_FENCE) || safe_str_eq(action->task, CRM_OP_SHUTDOWN)) { /* skip the next checks */ return TRUE; } if(action->node == NULL) { pe_err("action %d (%s) was not allocated", action->id, action->uuid); log_action(LOG_DEBUG, "Unallocated action", action, FALSE); return FALSE; } else if(action->node->details->online == FALSE) { pe_err("action %d was (%s) scheduled for offline node", action->id, action->uuid); log_action(LOG_DEBUG, "Action for offline node", action, FALSE); return FALSE; #if 0 /* but this would also affect resources that can be safely * migrated before a fencing op */ } else if(action->node->details->unclean == FALSE) { pe_err("action %d was (%s) scheduled for unclean node", action->id, action->uuid); log_action(LOG_DEBUG, "Action for unclean node", action, FALSE); return FALSE; #endif } return TRUE; } /* lowest to highest */ static gint sort_action_id(gconstpointer a, gconstpointer b) { const action_wrapper_t *action_wrapper2 = (const action_wrapper_t*)a; const action_wrapper_t *action_wrapper1 = (const action_wrapper_t*)b; if(a == NULL) { return 1; } if(b == NULL) { return -1; } if(action_wrapper1->action->id > action_wrapper2->action->id) { return -1; } if(action_wrapper1->action->id < action_wrapper2->action->id) { return 1; } return 0; } static gboolean should_dump_input(int last_action, action_t *action, action_wrapper_t *wrapper) { int type = wrapper->type; int log_dump = LOG_DEBUG_3; int log_filter = LOG_DEBUG_3; type &= ~pe_order_implies_left_printed; type &= ~pe_order_implies_right_printed; type &= ~pe_order_optional; wrapper->state = pe_link_not_dumped; if(last_action == wrapper->action->id) { do_crm_log_unlikely(log_filter, "Input (%d) %s duplicated for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); wrapper->state = pe_link_dup; return FALSE; } else if(wrapper->type == pe_order_none) { do_crm_log_unlikely(log_filter, "Input (%d) %s suppressed for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); return FALSE; } else if(wrapper->action->runnable == FALSE && type == pe_order_none && safe_str_neq(wrapper->action->uuid, CRM_OP_PROBED)) { do_crm_log_unlikely(log_filter, "Input (%d) %s optional (ordering) for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); return FALSE; } else if(action->pseudo && (wrapper->type & pe_order_stonith_stop)) { do_crm_log_unlikely(log_filter, "Input (%d) %s suppressed for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); return FALSE; } else if(wrapper->action->dumped || should_dump_action(wrapper->action)) { do_crm_log_unlikely(log_dump, "Input (%d) %s should be dumped for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); goto dump; #if 0 } if(wrapper->action->runnable && wrapper->action->pseudo && wrapper->action->rsc->variant != pe_native) { do_crm_log(LOG_CRIT, "Input (%d) %s should be dumped for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); goto dump; #endif } else if(wrapper->action->optional == TRUE && wrapper->action->print_always == FALSE) { do_crm_log_unlikely(log_filter, "Input (%d) %s optional for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); do_crm_log_unlikely(log_filter, "Input (%d) %s n=%p p=%d r=%d o=%d a=%d f=0x%.6x", wrapper->action->id, wrapper->action->uuid, wrapper->action->node, wrapper->action->pseudo, wrapper->action->runnable, wrapper->action->optional, wrapper->action->print_always, wrapper->type); return FALSE; } dump: do_crm_log_unlikely(log_dump, "Input (%d) %s n=%p p=%d r=%d o=%d a=%d f=0x%.6x dumped for %s", wrapper->action->id, wrapper->action->uuid, wrapper->action->node, wrapper->action->pseudo, wrapper->action->runnable, wrapper->action->optional, wrapper->action->print_always, wrapper->type, action->uuid); return TRUE; } void graph_element_from_action(action_t *action, pe_working_set_t *data_set) { int last_action = -1; int synapse_priority = 0; xmlNode * syn = NULL; xmlNode * set = NULL; xmlNode * in = NULL; xmlNode * input = NULL; xmlNode * xml_action = NULL; if(should_dump_action(action) == FALSE) { return; } action->dumped = TRUE; syn = create_xml_node(data_set->graph, "synapse"); set = create_xml_node(syn, "action_set"); in = create_xml_node(syn, "inputs"); crm_xml_add_int(syn, XML_ATTR_ID, data_set->num_synapse); data_set->num_synapse++; if(action->rsc != NULL) { synapse_priority = action->rsc->priority; } if(action->priority > synapse_priority) { synapse_priority = action->priority; } if(synapse_priority > 0) { crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority); } xml_action = action2xml(action, FALSE); add_node_nocopy(set, crm_element_name(xml_action), xml_action); action->actions_before = g_list_sort( action->actions_before, sort_action_id); slist_iter(wrapper,action_wrapper_t,action->actions_before,lpc, if(should_dump_input(last_action, action, wrapper) == FALSE) { continue; } wrapper->state = pe_link_dumped; CRM_CHECK(last_action < wrapper->action->id, ;); last_action = wrapper->action->id; input = create_xml_node(in, "trigger"); xml_action = action2xml(wrapper->action, TRUE); add_node_nocopy(input, crm_element_name(xml_action), xml_action); ); }