diff --git a/Makefile.am b/Makefile.am index 12ea936510..9b081aa00f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,131 +1,131 @@ # # Pacemaker code # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # EXTRA_DIST = bootstrap ConfigureMe README.in libltdl.tar RPM = @RPM@ RPMFLAGS = -ba TARFILE = pacemaker.tar.gz AM_TAR = tar LAST_RELEASE = Pacemaker-0.6.2 STABLE_SERIES = stable-0.6 AUTOMAKE_OPTIONS = foreign ##ACLOCAL = aclocal -I $(auxdir) MAINTAINERCLEANFILES = Makefile.in aclocal.m4 configure DRF/config-h.in \ DRF/stamp-h.in libtool.m4 ltdl.m4 libltdl.tar -SUBDIRS = debian build replace include lib cib crmd pengine transitioner crm tools doc cts xml +SUBDIRS = debian build replace include lib cib pengine crmd crm tools doc cts xml tgz: rm -f $(TARFILE) hg archive -t tgz $(TARFILE) echo Rebuilt $(TARFILE) on `date` changes: printf "$(PACKAGE) ($(VERSION)-1) stable; urgency=medium\n" printf " * Update source tarball to revision: `hg id`\n" printf " * Statistics:\n" printf " Changesets: `hg log -M --template "{desc|firstline|strip}\n" -r $(LAST_RELEASE):tip | wc -l`\n" printf " Diff: " hg diff -r $(LAST_RELEASE):tip | diffstat | tail -n 1 printf "\n * Testing Notes:\n" printf "\n + Test hardware:\n" printf "\n + All testing was performed with STONITH enabled\n" printf "\n + Pending bugs encountered during testing:\n" printf "\n * Changes since $(LAST_RELEASE)\n" hg log -M --template " + {desc|firstline|strip}\n" -r $(LAST_RELEASE):tip | grep -v Low: | sort -uf printf "\n -- Andrew Beekhof `date +"%a, %d %b %Y %T %z"`\n" features: printf "$(PACKAGE) ($(VERSION)-1) unstable; urgency=medium\n" printf " * Update source tarball to revision: `hg id`\n" printf " * Statistics:\n" printf " Changesets: `hg out -M --template "{desc|firstline|strip}\n" ../$(STABLE_SERIES) | wc -l`\n" printf " Diff: " hg out -M -p ../$(STABLE_SERIES) | diffstat | tail -n 1 printf "\n * Changes added since $(STABLE_SERIES)\n" hg out -M --template " + {desc|firstline|strip}\n" ../$(STABLE_SERIES) | grep -v Low: | sort -uf printf "\n -- Andrew Beekhof `date +"%a, %d %b %Y %T %z"`\n" obs: tgz make changes > .changes scp .changes $(TARFILE) vmhost.beekhof.net:Development/obs/server:ha-clustering/pacemaker/ dev: tgz make features > .changes scp .changes $(TARFILE) vmhost.beekhof.net:Development/obs/server:ha-clustering:UNSTABLE/pacemaker/ home: tgz make changes > .changes scp .changes $(TARFILE) vmhost.beekhof.net:Development/obs/home:beekhof/pacemaker-test/ global: clean-generic gtags htags -sanhIT global-www: global rsync -avzxlSD --progress HTML/ root@clusterlabs.org:/var/lib/global/pacemaker rpmtgz: tgz echo "Installing $(TARFILE) into /usr/src/packages/SOURCES for rpm" -test -d /usr/src/packages/SOURCES && cp $(TARFILE) /usr/src/packages/SOURCES/ -test -d /usr/src/redhat/SOURCES && cp $(TARFILE) /usr/src/redhat/SOURCES/ rpm: rpmtgz $(RPM) $(RPMFLAGS) $(top_srcdir)/pacemaker.spec $@ %.dot : fsa_matrix.h make_dot.pl perl $(top_srcdir)/crm/crmd/make_dot.pl $(top_srcdir)/crm/crmd/fsa_matrix.h $(top_builddir)/crm/crmd diff --git a/crmd/messages.c b/crmd/messages.c index aca169002f..cfff8e784d 100644 --- a/crmd/messages.c +++ b/crmd/messages.c @@ -1,1196 +1,1166 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GListPtr fsa_message_queue = NULL; extern void crm_shutdown(int nsig); enum crmd_fsa_input handle_request(xmlNode *stored_msg); enum crmd_fsa_input handle_response(xmlNode *stored_msg); enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg); ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t *orig); gboolean ipc_queue_helper(gpointer key, gpointer value, gpointer user_data); #ifdef MSG_LOG # define ROUTER_RESULT(x) crm_debug_3("Router result: %s", x); \ crm_log_xml(LOG_MSG, "router.log", msg); #else # define ROUTER_RESULT(x) crm_debug_3("Router result: %s", x) #endif /* debug only, can wrap all it likes */ int last_data_id = 0; void register_fsa_error_adv( enum crmd_fsa_cause cause, enum crmd_fsa_input input, fsa_data_t *cur_data, void *new_data, const char *raised_from) { /* save the current actions if any */ if(fsa_actions != A_NOTHING) { register_fsa_input_adv( cur_data?cur_data->fsa_cause:C_FSA_INTERNAL, I_NULL, cur_data?cur_data->data:NULL, fsa_actions, TRUE, __FUNCTION__); } /* reset the action list */ fsa_actions = A_NOTHING; /* register the error */ register_fsa_input_adv( cause, input, new_data, A_NOTHING, TRUE, raised_from); } static gboolean last_was_vote = FALSE; int register_fsa_input_adv( enum crmd_fsa_cause cause, enum crmd_fsa_input input, void *data, long long with_actions, gboolean prepend, const char *raised_from) { unsigned old_len = g_list_length(fsa_message_queue); fsa_data_t *fsa_data = NULL; last_data_id++; CRM_CHECK(raised_from != NULL, raised_from = ""); crm_debug("%s %s FSA input %d (%s) (cause=%s) %s data", raised_from, prepend?"prepended":"appended",last_data_id, fsa_input2string(input), fsa_cause2string(cause), data?"with":"without"); if(input == I_WAIT_FOR_EVENT) { do_fsa_stall = TRUE; crm_debug("Stalling the FSA pending further input: cause=%s", fsa_cause2string(cause)); if(old_len > 0) { crm_warn("%s stalled the FSA with pending inputs", raised_from); fsa_dump_queue(LOG_DEBUG); } if(data == NULL) { set_bit_inplace(fsa_actions, with_actions); with_actions = A_NOTHING; return 0; } crm_err("%s stalled the FSA with data - this may be broken", raised_from); } if(old_len == 0) { last_was_vote = FALSE; } if(input == I_NULL && with_actions == A_NOTHING /* && data == NULL */){ /* no point doing anything */ crm_err("Cannot add entry to queue: no input and no action"); return 0; } else if(data == NULL) { last_was_vote = FALSE; #if 0 } else if(last_was_vote && cause == C_HA_MESSAGE && input == I_ROUTER) { const char *op = crm_element_value( ((ha_msg_input_t*)data)->msg, F_CRM_TASK); if(safe_str_eq(op, CRM_OP_VOTE)) { /* It is always safe to treat N successive votes as * a single one * * If all the discarded votes are more "loosing" than * the first then the result is accurate * (win or loose). * * If any of the discarded votes are less "loosing" * than the first then we will cast our vote and the * eventual winner will vote us down again (which * even in the case that N=2, is no worse than if we * had not disarded the vote). */ crm_debug_2("Vote compression: %d", old_len); return 0; } #endif } else if (cause == C_HA_MESSAGE && input == I_ROUTER) { const char *op = crm_element_value( ((ha_msg_input_t*)data)->msg, F_CRM_TASK); if(safe_str_eq(op, CRM_OP_VOTE)) { last_was_vote = TRUE; crm_debug_3("Added vote: %d", old_len); } } else { last_was_vote = FALSE; } crm_malloc0(fsa_data, sizeof(fsa_data_t)); fsa_data->id = last_data_id; fsa_data->fsa_input = input; fsa_data->fsa_cause = cause; fsa_data->origin = raised_from; fsa_data->data = NULL; fsa_data->data_type = fsa_dt_none; fsa_data->actions = with_actions; if(with_actions != A_NOTHING) { crm_debug_3("Adding actions %.16llx to input", with_actions); } if(data != NULL) { switch(cause) { case C_FSA_INTERNAL: case C_CRMD_STATUS_CALLBACK: case C_IPC_MESSAGE: case C_HA_MESSAGE: crm_debug_3("Copying %s data from %s as a HA msg", fsa_cause2string(cause), raised_from); CRM_CHECK(((ha_msg_input_t*)data)->msg != NULL, crm_err("Bogus data from %s", raised_from)); fsa_data->data = copy_ha_msg_input(data); fsa_data->data_type = fsa_dt_ha_msg; break; case C_LRM_OP_CALLBACK: crm_debug_3("Copying %s data from %s as lrm_op_t", fsa_cause2string(cause), raised_from); fsa_data->data = copy_lrm_op((lrm_op_t*)data); fsa_data->data_type = fsa_dt_lrm; break; case C_CCM_CALLBACK: case C_SUBSYSTEM_CONNECT: case C_LRM_MONITOR_CALLBACK: case C_TIMER_POPPED: case C_SHUTDOWN: case C_HEARTBEAT_FAILED: case C_HA_DISCONNECT: case C_ILLEGAL: case C_UNKNOWN: case C_STARTUP: crm_err("Copying %s data (from %s)" " not yet implemented", fsa_cause2string(cause), raised_from); exit(1); break; } crm_debug_4("%s data copied", fsa_cause2string(fsa_data->fsa_cause)); } /* make sure to free it properly later */ if(prepend) { crm_debug_2("Prepending input"); fsa_message_queue = g_list_prepend(fsa_message_queue, fsa_data); } else { fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); } crm_debug_2("Queue len: %d", g_list_length(fsa_message_queue)); fsa_dump_queue(LOG_DEBUG_2); if(old_len == g_list_length(fsa_message_queue)){ crm_err("Couldnt add message to the queue"); } if(fsa_source) { crm_debug_3("Triggering FSA: %s", __FUNCTION__); G_main_set_trigger(fsa_source); } return last_data_id; } void fsa_dump_queue(int log_level) { if(log_level < (int)crm_log_level) { return; } slist_iter( data, fsa_data_t, fsa_message_queue, lpc, do_crm_log(log_level, "queue[%d(%d)]: input %s raised by %s()\t(cause=%s)", lpc, data->id, fsa_input2string(data->fsa_input), data->origin, fsa_cause2string(data->fsa_cause)); ); } ha_msg_input_t * copy_ha_msg_input(ha_msg_input_t *orig) { ha_msg_input_t *copy = NULL; xmlNodePtr data = NULL; if(orig != NULL) { crm_debug_4("Copy msg"); data = copy_xml(orig->msg); } else { crm_debug_3("No message to copy"); } copy = new_ha_msg_input(data); if(orig->msg != NULL) { CRM_CHECK(copy->msg != NULL, crm_err("copy failed")); } return copy; } void delete_fsa_input(fsa_data_t *fsa_data) { lrm_op_t *op = NULL; xmlNode *foo = NULL; if(fsa_data == NULL) { return; } crm_debug_4("About to free %s data", fsa_cause2string(fsa_data->fsa_cause)); if(fsa_data->data != NULL) { switch(fsa_data->data_type) { case fsa_dt_ha_msg: delete_ha_msg_input(fsa_data->data); break; case fsa_dt_xml: foo = fsa_data->data; free_xml(foo); break; case fsa_dt_lrm: op = (lrm_op_t*)fsa_data->data; free_lrm_op(op); break; case fsa_dt_none: if(fsa_data->data != NULL) { crm_err("Dont know how to free %s data from %s", fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); exit(1); } break; } crm_debug_4("%s data freed", fsa_cause2string(fsa_data->fsa_cause)); } crm_free(fsa_data); } /* returns the next message */ fsa_data_t * get_message(void) { fsa_data_t* message = g_list_nth_data(fsa_message_queue, 0); fsa_message_queue = g_list_remove(fsa_message_queue, message); crm_debug_2("Processing input %d", message->id); return message; } /* returns the current head of the FIFO queue */ gboolean is_message(void) { return (g_list_length(fsa_message_queue) > 0); } void * fsa_typed_data_adv( fsa_data_t *fsa_data, enum fsa_data_type a_type, const char *caller) { void *ret_val = NULL; if(fsa_data == NULL) { do_crm_log(LOG_ERR, "%s: No FSA data available", caller); } else if(fsa_data->data == NULL) { do_crm_log(LOG_ERR, "%s: No message data available", caller); } else if(fsa_data->data_type != a_type) { do_crm_log(LOG_CRIT, "%s: Message data was the wrong type! %d vs. requested=%d." " Origin: %s", caller, fsa_data->data_type, a_type, fsa_data->origin); CRM_ASSERT(fsa_data->data_type == a_type); } else { ret_val = fsa_data->data; } return ret_val; } /* A_MSG_ROUTE */ void do_msg_route(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); route_message(msg_data->fsa_cause, input->msg); } void route_message(enum crmd_fsa_cause cause, xmlNode *input) { ha_msg_input_t fsa_input; enum crmd_fsa_input result = I_NULL; fsa_input.msg = input; CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return); /* try passing the buck first */ crm_debug_4("Attempting to route message"); if(relay_message(input, cause==C_IPC_MESSAGE)) { crm_debug_4("Message routed..."); return; } crm_debug_4("Message wasn't routed... try handling locally"); /* calculate defer */ result = handle_message(input); switch(result) { case I_NULL: crm_debug_4("Message processed"); break; case I_CIB_OP: break; case I_ROUTER: break; case I_NODE_JOIN: case I_JOIN_REQUEST: case I_JOIN_RESULT: break; default: crm_debug_4("Defering local processing of message"); register_fsa_input_later(cause, result, &fsa_input); result = I_NULL; break; } if(result != I_NULL) { /* add to the front of the queue */ register_fsa_input(cause, result, &fsa_input); } } gboolean send_request(xmlNode *msg, char **msg_reference) { if(msg_reference != NULL) { *msg_reference = crm_strdup( crm_element_value(msg, XML_ATTR_REFERENCE)); } if(relay_message(msg, TRUE) == FALSE) { ha_msg_input_t fsa_input; fsa_input.msg = msg; register_fsa_input(C_IPC_MESSAGE, I_ROUTER, &fsa_input); return FALSE; } return TRUE; } gboolean relay_message(xmlNode *msg, gboolean originated_locally) { int is_for_dc = 0; int is_for_dcib = 0; int is_for_te = 0; int is_for_crm = 0; int is_for_cib = 0; int is_local = 0; gboolean processing_complete = FALSE; const char *host_to = crm_element_value(msg, F_CRM_HOST_TO); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from= crm_element_value(msg, F_CRM_SYS_FROM); const char *type = crm_element_value(msg, F_TYPE); const char *msg_error = NULL; crm_debug_3("Routing message %s", crm_element_value(msg, XML_ATTR_REFERENCE)); if(msg == NULL) { msg_error = "Cannot route empty message"; } else if(safe_str_eq(CRM_OP_HELLO, crm_element_value(msg, F_CRM_TASK))){ /* quietly ignore */ processing_complete = TRUE; } else if(safe_str_neq(type, T_CRM)) { msg_error = "Bad message type"; } else if(sys_to == NULL) { msg_error = "Bad message destination: no subsystem"; } if(msg_error != NULL) { processing_complete = TRUE; crm_err("%s", msg_error); crm_log_xml(LOG_WARNING, "bad msg", msg); } if(processing_complete) { return TRUE; } processing_complete = TRUE; is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0); is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0); is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0); is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0); is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0); is_local = 0; if(host_to == NULL || strlen(host_to) == 0) { if(is_for_dc || is_for_te) { is_local = 0; } else if(is_for_crm && originated_locally) { is_local = 0; } else { is_local = 1; } } else if(safe_str_eq(fsa_our_uname, host_to)) { is_local=1; } if(is_for_dc || is_for_dcib || is_for_te) { if(AM_I_DC && is_for_te) { ROUTER_RESULT("Message result: Local relay"); send_msg_via_ipc(msg, sys_to); } else if(AM_I_DC) { ROUTER_RESULT("Message result: DC/CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if(originated_locally && safe_str_neq(sys_from, CRM_SYSTEM_PENGINE) && safe_str_neq(sys_from, CRM_SYSTEM_TENGINE)) { /* Neither the TE or PE should be sending messages * to DC's on other nodes * * By definition, if we are no longer the DC, then * the PE or TE's data should be discarded */ ROUTER_RESULT("Message result: External relay to DC"); send_msg_via_ha(msg); } else { /* discard */ ROUTER_RESULT("Message result: Discard, not DC"); } } else if(is_local && (is_for_crm || is_for_cib)) { ROUTER_RESULT("Message result: CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if(is_local) { ROUTER_RESULT("Message result: Local relay"); send_msg_via_ipc(msg, sys_to); } else { ROUTER_RESULT("Message result: External relay"); send_msg_via_ha(msg); } return processing_complete; } gboolean crmd_authorize_message(xmlNode *client_msg, crmd_client_t *curr_client) { /* check the best case first */ const char *sys_from = crm_element_value(client_msg, F_CRM_SYS_FROM); char *uuid = NULL; char *client_name = NULL; char *major_version = NULL; char *minor_version = NULL; const char *filtered_from; gpointer table_key = NULL; gboolean auth_result = FALSE; struct crm_subsystem_s *the_subsystem = NULL; gboolean can_reply = FALSE; /* no-one has registered with this id */ xmlNode *xml = NULL; const char *op = crm_element_value(client_msg, F_CRM_TASK); if (safe_str_neq(CRM_OP_HELLO, op)) { if(sys_from == NULL) { crm_warn("Message [%s] was had no value for %s... discarding", crm_element_value(client_msg, XML_ATTR_REFERENCE), F_CRM_SYS_FROM); return FALSE; } filtered_from = sys_from; /* The CIB can have two names on the DC */ if(strcasecmp(sys_from, CRM_SYSTEM_DCIB) == 0) filtered_from = CRM_SYSTEM_CIB; if (g_hash_table_lookup (ipc_clients, filtered_from) != NULL) { can_reply = TRUE; /* reply can be routed */ } crm_debug_2("Message reply can%s be routed from %s.", can_reply?"":" not", sys_from); if(can_reply == FALSE) { crm_warn("Message [%s] not authorized", crm_element_value(client_msg, XML_ATTR_REFERENCE)); } return can_reply; } crm_debug_3("received client join msg"); crm_log_xml(LOG_MSG, "join", client_msg); xml = get_message_xml(client_msg, F_CRM_DATA); auth_result = process_hello_message( xml, &uuid, &client_name, &major_version, &minor_version); if (auth_result == TRUE) { if(client_name == NULL || uuid == NULL) { crm_err("Bad client details (client_name=%s, uuid=%s)", crm_str(client_name), crm_str(uuid)); auth_result = FALSE; } } if (auth_result == TRUE) { /* check version */ int mav = atoi(major_version); int miv = atoi(minor_version); crm_debug_3("Checking client version number"); if (mav < 0 || miv < 0) { crm_err("Client version (%d:%d) is not acceptable", mav, miv); auth_result = FALSE; } crm_free(major_version); crm_free(minor_version); } if (safe_str_eq(CRM_SYSTEM_PENGINE, client_name)) { the_subsystem = pe_subsystem; } else if (safe_str_eq(CRM_SYSTEM_TENGINE, client_name)) { the_subsystem = te_subsystem; } if (auth_result == TRUE && the_subsystem != NULL) { /* if we already have one of those clients * only applies to te, pe etc. not admin clients */ crm_debug_3("Checking if %s is required/already connected", client_name); table_key = (gpointer)crm_strdup(client_name); if(is_set(fsa_input_register, the_subsystem->flag_connected)) { auth_result = FALSE; crm_free(table_key); table_key = NULL; crm_warn("Bit\t%.16llx set in %.16llx", the_subsystem->flag_connected, fsa_input_register); crm_err("Client %s is already connected", client_name); } else if(FALSE == is_set(fsa_input_register, the_subsystem->flag_required)) { crm_warn("Bit\t%.16llx not set in %.16llx", the_subsystem->flag_connected, fsa_input_register); crm_warn("Client %s joined but we dont need it", client_name); stop_subsystem(the_subsystem, TRUE); } else { the_subsystem->ipc = curr_client->client_channel; set_bit_inplace(fsa_input_register, the_subsystem->flag_connected); } } else { table_key = (gpointer)generate_hash_key(client_name, uuid); } if (auth_result == TRUE) { crm_debug_2("Accepted client %s", crm_str(table_key)); curr_client->table_key = table_key; curr_client->sub_sys = crm_strdup(client_name); curr_client->uuid = crm_strdup(uuid); g_hash_table_insert (ipc_clients, table_key, curr_client->client_channel); send_hello_message(curr_client->client_channel, "n/a", CRM_SYSTEM_CRMD, "0", "1"); crm_debug_3("Updated client list with %s", crm_str(table_key)); crm_debug_3("Triggering FSA: %s", __FUNCTION__); G_main_set_trigger(fsa_source); if(the_subsystem != NULL) { CRM_CHECK(the_subsystem->client == NULL, process_client_disconnect(the_subsystem->client)); the_subsystem->client = curr_client; } } else { crm_free(table_key); crm_warn("Rejected client logon request"); curr_client->client_channel->ch_status = IPC_DISC_PENDING; } if(uuid != NULL) crm_free(uuid); if(minor_version != NULL) crm_free(minor_version); if(major_version != NULL) crm_free(major_version); if(client_name != NULL) crm_free(client_name); /* hello messages should never be processed further */ return FALSE; } enum crmd_fsa_input handle_message(xmlNode *stored_msg) { enum crmd_fsa_input next_input = I_NULL; const char *type = NULL; if(stored_msg == NULL) { crm_err("No message to handle"); return I_NULL; } type = crm_element_value(stored_msg, F_CRM_MSG_TYPE); if(safe_str_eq(type, XML_ATTR_REQUEST)) { next_input = handle_request(stored_msg); } else if(safe_str_eq(type, XML_ATTR_RESPONSE)) { next_input = handle_response(stored_msg); } else { crm_err("Unknown message type: %s", type); } /* crm_debug_2("%s: Next input is %s", __FUNCTION__, */ /* fsa_input2string(next_input)); */ return next_input; } #define schedule_pe() do { \ next_input = I_PE_CALC; \ if(fsa_pe_ref) { \ crm_debug("Cancelling %s...", fsa_pe_ref); \ crm_free(fsa_pe_ref); \ fsa_pe_ref = NULL; \ } \ } while(0) enum crmd_fsa_input handle_request(xmlNode *stored_msg) { xmlNode *msg = NULL; enum crmd_fsa_input next_input = I_NULL; const char *op = crm_element_value(stored_msg, F_CRM_TASK); const char *sys_to = crm_element_value(stored_msg, F_CRM_SYS_TO); const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); crm_debug_2("Received %s "XML_ATTR_REQUEST" from %s in state %s", op, host_from, fsa_state2string(fsa_state)); if(op == NULL) { crm_log_xml(LOG_ERR, "Bad message", stored_msg); /*========== common actions ==========*/ } else if(strcasecmp(op, CRM_OP_NOOP) == 0) { crm_debug_2("no-op from %s", crm_str(host_from)); } else if(strcasecmp(op, CRM_OP_NOVOTE) == 0) { ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT|A_ELECTION_CHECK, FALSE, __FUNCTION__); } else if(strcasecmp(op, CRM_OP_VOTE) == 0) { /* count the vote and decide what to do after that */ ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT|A_ELECTION_CHECK, FALSE, __FUNCTION__); /* Sometimes we _must_ go into S_ELECTION */ if(fsa_state == S_HALT) { crm_debug("Forcing an election from S_HALT"); next_input = I_ELECTION; #if 0 } else if(AM_I_DC) { /* This is the old way of doing things but what is gained? */ next_input = I_ELECTION; #endif } } else if(strcasecmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) { crm_shutdown(SIGTERM); /*next_input = I_SHUTDOWN; */ next_input = I_NULL; } else if(strcasecmp(op, CRM_OP_PING) == 0) { /* eventually do some stuff to figure out * if we /are/ ok */ xmlNode *ping = createPingAnswerFragment(sys_to, "ok"); crm_xml_add(ping, "crmd_state", fsa_state2string(fsa_state)); crm_info("Current ping state: %s", fsa_state2string(fsa_state)); msg = create_reply(stored_msg, ping); relay_message(msg, TRUE); free_xml(ping); free_xml(msg); /* probably better to do this via signals on the * local node */ } else if(strcasecmp(op, CRM_OP_DEBUG_UP) == 0) { alter_debug(DEBUG_INC); crm_info("Debug set to %d", get_crm_log_level()); } else if(strcasecmp(op, CRM_OP_DEBUG_DOWN) == 0) { alter_debug(DEBUG_DEC); crm_info("Debug set to %d", get_crm_log_level()); } else if(strcasecmp(op, CRM_OP_JOIN_OFFER) == 0) { next_input = I_JOIN_OFFER; crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); } else if(strcasecmp(op, CRM_OP_JOIN_ACKNAK) == 0) { next_input = I_JOIN_RESULT; crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); } else if(strcasecmp(op, CRM_OP_LRM_DELETE) == 0 || strcasecmp(op, CRM_OP_LRM_FAIL) == 0 || strcasecmp(op, CRM_OP_LRM_REFRESH) == 0 || strcasecmp(op, CRM_OP_REPROBE) == 0) { crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); next_input = I_ROUTER; /* this functionality should only be enabled * if this is a development build */ } else if(CRM_DEV_BUILD && strcasecmp(op, CRM_OP_DIE) == 0/*constant condition*/) { crm_warn("Test-only code: Killing the CRM without mercy"); crm_warn("Inhibiting respawns"); exit(100); /*========== (NOT_DC)-Only Actions ==========*/ } else if(AM_I_DC == FALSE){ gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if(dc_match || fsa_our_dc == NULL) { if(strcasecmp(op, CRM_OP_HBEAT) == 0) { crm_debug_3("Received DC heartbeat from %s", host_from); next_input = I_DC_HEARTBEAT; } else if(fsa_our_dc == NULL) { crm_warn("CRMd discarding request: %s" " (DC: %s, from: %s)", op, crm_str(fsa_our_dc), host_from); crm_log_xml(LOG_WARNING, "Ignored Request", stored_msg); } else if(strcasecmp(op, CRM_OP_SHUTDOWN) == 0) { next_input = I_STOP; } else { crm_err("CRMd didnt expect request: %s", op); crm_log_xml(LOG_ERR, "bad request", stored_msg); } } else { crm_warn("Discarding %s op from %s", op, host_from); } /*========== DC-Only Actions ==========*/ } else if(AM_I_DC) { - const char *message = crm_element_value( - stored_msg, "message"); - - /* setting "fsa_pe_ref = NULL" makes sure we ignore any - * PE reply that might be pending or in the queue while - * we ask the CIB for a more up-to-date copy - */ - if(safe_str_eq(op, CRM_OP_TEABORT)) { - crm_debug("Transition cancelled: %s/%s", op, message); - clear_bit_inplace(fsa_input_register, R_IN_TRANSITION); - if(need_transition(fsa_state)) { - schedule_pe(); - - } else { - crm_debug("Filtering %s op in state %s", - op, fsa_state2string(fsa_state)); - } - - } else if(strcasecmp(op, CRM_OP_TECOMPLETE) == 0) { - crm_debug("Transition complete: %s/%s", op, message); - clear_bit_inplace(fsa_input_register, R_IN_TRANSITION); - if(fsa_state == S_TRANSITION_ENGINE) { - next_input = I_TE_SUCCESS; - } else { - crm_debug("Filtering %s op in state %s", - op, fsa_state2string(fsa_state)); - } - - } else if(strcasecmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) { + if(strcasecmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) { next_input = I_NODE_JOIN; } else if(strcasecmp(op, CRM_OP_JOIN_REQUEST) == 0) { next_input = I_JOIN_REQUEST; } else if(strcasecmp(op, CRM_OP_JOIN_CONFIRM) == 0) { next_input = I_JOIN_RESULT; } else if(strcasecmp(op, CRM_OP_SHUTDOWN) == 0) { gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if(dc_match) { crm_err("We didnt ask to be shut down yet our" " TE is telling us too." " Better get out now!"); next_input = I_TERMINATE; } else if(is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("Shutting ourselves down (DC)"); next_input = I_STOP; } else if(fsa_state != S_STOPPING) { crm_err("Another node is asking us to shutdown" " but we think we're ok."); next_input = I_ELECTION; } } else if(strcasecmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { /* a slave wants to shut down */ /* create cib fragment and add to message */ next_input = handle_shutdown_request(stored_msg); } else { crm_err("Unexpected request (%s) sent to the DC", op); crm_log_xml(LOG_ERR, "Unexpected", stored_msg); } } return next_input; } enum crmd_fsa_input handle_response(xmlNode *stored_msg) { enum crmd_fsa_input next_input = I_NULL; const char *op = crm_element_value(stored_msg, F_CRM_TASK); const char *sys_from = crm_element_value(stored_msg, F_CRM_SYS_FROM); const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); const char *msg_ref = crm_element_value(stored_msg, XML_ATTR_REFERENCE); crm_debug_2("Received %s "XML_ATTR_RESPONSE" from %s in state %s", op, host_from, fsa_state2string(fsa_state)); if(op == NULL) { crm_log_xml(LOG_ERR, "Bad message", stored_msg); } else if(AM_I_DC && strcasecmp(op, CRM_OP_PECALC) == 0) { crm_debug_2("Processing %s reply %s (fsa=%s)", sys_from, msg_ref, crm_str(fsa_pe_ref)); if(msg_ref != NULL && safe_str_eq(msg_ref, fsa_pe_ref)) { next_input = I_PE_SUCCESS; crm_debug_2("Completed: %s...", fsa_pe_ref); crm_free(fsa_pe_ref); fsa_pe_ref = NULL; } else { crm_debug_2("Skipping superceeded reply from %s", sys_from); } } else if(strcasecmp(op, CRM_OP_VOTE) == 0 || strcasecmp(op, CRM_OP_HBEAT) == 0 || strcasecmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcasecmp(op, CRM_OP_SHUTDOWN) == 0) { crm_debug_2("Ignoring %s from %s in %s", op, host_from, fsa_state2string(fsa_state)); next_input = I_NULL; } else { crm_err("Unexpected response (op=%s) sent to the %s", op, AM_I_DC?"DC":"CRMd"); next_input = I_NULL; } return next_input; } enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg) { /* handle here to avoid potential version issues * where the shutdown message/proceedure may have * been changed in later versions. * * This way the DC is always in control of the shutdown */ time_t now = time(NULL); xmlNode *node_state = NULL; const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); if(host_from == NULL) { /* we're shutting down and the DC */ host_from = fsa_our_uname; } crm_info("Creating shutdown request for %s",host_from); crm_log_xml(LOG_MSG, "message", stored_msg); node_state = create_node_state( host_from, NULL, NULL, NULL, NULL, CRMD_STATE_INACTIVE, FALSE, __FUNCTION__); crm_xml_add_int(node_state, XML_CIB_ATTR_SHUTDOWN, (int)now); fsa_cib_anon_update(XML_CIB_TAG_STATUS,node_state, cib_quorum_override); crm_log_xml_debug_2(node_state, "Shutdown update"); free_xml(node_state); /* will be picked up by the TE as long as its running */ if(need_transition(fsa_state) && is_set(fsa_input_register, R_TE_CONNECTED) == FALSE) { register_fsa_action(A_TE_CANCEL); } return I_NULL; } /* frees msg upon completion */ gboolean send_msg_via_ha(xmlNode *msg) { int log_level = LOG_DEBUG_3; gboolean broadcast = FALSE; gboolean all_is_good = TRUE; const char *op = crm_element_value(msg, F_CRM_TASK); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *host_to = crm_element_value(msg, F_CRM_HOST_TO); enum crm_ais_msg_types dest = 0; if(is_openais_cluster()) { dest = 1; #if SUPPORT_AIS dest = text2msg_type(sys_to); #endif } if (msg == NULL) { crm_err("Attempt to send NULL Message via HA failed."); all_is_good = FALSE; } else { crm_debug_4("Relaying message to (%s) via HA", host_to); } if (all_is_good) { if (sys_to == NULL || strlen(sys_to) == 0) { crm_err("You did not specify a destination sub-system" " for this message."); all_is_good = FALSE; } } /* There are a number of messages may not need to be ordered. * At a later point perhaps we should detect them and send them * as unordered messages. */ if (all_is_good) { if (host_to == NULL || strlen(host_to) == 0 || safe_str_eq(sys_to, CRM_SYSTEM_DC)) { broadcast = TRUE; all_is_good = send_cluster_message(NULL, dest, msg, FALSE); } else { all_is_good = send_cluster_message(host_to, dest, msg, FALSE); } } if(all_is_good == FALSE) { log_level = LOG_WARNING; } if(log_level == LOG_WARNING || (safe_str_neq(op, CRM_OP_HBEAT))) { do_crm_log(log_level, "Sending %sHA message (ref=%s) to %s@%s %s.", broadcast?"broadcast ":"directed ", crm_element_value(msg, XML_ATTR_REFERENCE), crm_str(sys_to), host_to==NULL?"":host_to, all_is_good?"succeeded":"failed"); } return all_is_good; } /* msg is deleted by the time this returns */ +extern gboolean process_te_message(xmlNode *msg, xmlNode *xml_data); gboolean send_msg_via_ipc(xmlNode *msg, const char *sys) { gboolean send_ok = TRUE; IPC_Channel *client_channel; crm_debug_4("relaying msg to sub_sys=%s via IPC", sys); client_channel = (IPC_Channel*)g_hash_table_lookup(ipc_clients, sys); if(crm_element_value(msg, F_CRM_HOST_FROM) == NULL) { crm_xml_add(msg, F_CRM_HOST_FROM, fsa_our_uname); } if (client_channel != NULL) { crm_debug_3("Sending message via channel %s.", sys); send_ok = send_ipc_message(client_channel, msg); - - } else if(sys != NULL && strcasecmp(sys, CRM_SYSTEM_CIB) == 0) { - crm_err("Sub-system (%s) has been incorporated into the CRMd.", - sys); - crm_err("Change the way we handle this CIB message"); - crm_log_xml(LOG_ERR, "cib op", msg); - send_ok = FALSE; - + + } else if(sys != NULL && strcasecmp(sys, CRM_SYSTEM_TENGINE) == 0) { + xmlNode *data = get_message_xml(msg, F_CRM_DATA); + process_te_message(msg, data); + } else if(sys != NULL && strcasecmp(sys, CRM_SYSTEM_LRMD) == 0) { fsa_data_t *fsa_data = NULL; ha_msg_input_t *msg_copy = new_ha_msg_input(msg); crm_malloc0(fsa_data, sizeof(fsa_data_t)); fsa_data->fsa_input = I_MESSAGE; fsa_data->fsa_cause = C_IPC_MESSAGE; fsa_data->data = msg_copy; fsa_data->origin = __FUNCTION__; fsa_data->data_type = fsa_dt_ha_msg; #ifdef FSA_TRACE crm_debug_2("Invoking action %s (%.16llx)", fsa_action2string(A_LRM_INVOKE), A_LRM_INVOKE); #endif do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, fsa_data); crm_free(msg_copy); crm_free(fsa_data); } else { crm_err("Unknown Sub-system (%s)... discarding message.", crm_str(sys)); send_ok = FALSE; } return send_ok; } void msg_queue_helper(void) { #if SUPPORT_HEARTBEAT IPC_Channel *ipc = NULL; if(fsa_cluster_conn != NULL) { ipc = fsa_cluster_conn->llc_ops->ipcchan( fsa_cluster_conn); } if(ipc != NULL) { ipc->ops->resume_io(ipc); } /* g_hash_table_foreach_remove(ipc_clients, ipc_queue_helper, NULL); */ #endif } gboolean ipc_queue_helper(gpointer key, gpointer value, gpointer user_data) { crmd_client_t *ipc_client = value; if(ipc_client->client_channel != NULL) { ipc_client->client_channel->ops->is_message_pending(ipc_client->client_channel); } return FALSE; } diff --git a/transitioner/actions.c b/crmd/te_actions.c similarity index 90% rename from transitioner/actions.c rename to crmd/te_actions.c index b9dc40b679..f9e688865f 100644 --- a/transitioner/actions.c +++ b/crmd/te_actions.c @@ -1,532 +1,521 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include +#include +#include +#include char *te_uuid = NULL; -IPC_Channel *crm_ch = NULL; void send_rsc_command(crm_action_t *action); extern crm_action_timer_t *transition_timer; static void te_start_action_timer(crm_action_t *action) { crm_malloc0(action->timer, sizeof(crm_action_timer_t)); action->timer->timeout = action->timeout; action->timer->reason = timeout_action_warn; action->timer->action = action; action->timer->source_id = Gmain_timeout_add( action->timer->timeout, action_timer_callback, (void*)action->timer); CRM_ASSERT(action->timer->source_id != 0); } static gboolean te_pseudo_action(crm_graph_t *graph, crm_action_t *pseudo) { crm_info("Pseudo action %d fired and confirmed", pseudo->id); pseudo->confirmed = TRUE; update_graph(graph, pseudo); trigger_graph(); return TRUE; } #if SUPPORT_HEARTBEAT void send_stonith_update(stonith_ops_t * op) { enum cib_errors rc = cib_ok; const char *target = op->node_name; const char *uuid = op->node_uuid; /* zero out the node-status & remove all LRM status info */ xmlNode *node_state = create_xml_node(NULL, XML_CIB_TAG_STATE); CRM_CHECK(op->node_name != NULL, return); CRM_CHECK(op->node_uuid != NULL, return); crm_xml_add(node_state, XML_ATTR_UUID, uuid); crm_xml_add(node_state, XML_ATTR_UNAME, target); crm_xml_add(node_state, XML_CIB_ATTR_HASTATE, DEADSTATUS); crm_xml_add(node_state, XML_CIB_ATTR_INCCM, XML_BOOLEAN_NO); crm_xml_add(node_state, XML_CIB_ATTR_CRMDSTATE, OFFLINESTATUS); crm_xml_add(node_state, XML_CIB_ATTR_JOINSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_CIB_ATTR_EXPSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_CIB_ATTR_REPLACE, XML_CIB_TAG_LRM); crm_xml_add(node_state, XML_ATTR_ORIGIN, __FUNCTION__); - rc = te_cib_conn->cmds->update( - te_cib_conn, XML_CIB_TAG_STATUS, node_state, NULL, + rc = fsa_cib_conn->cmds->update( + fsa_cib_conn, XML_CIB_TAG_STATUS, node_state, NULL, cib_quorum_override|cib_scope_local); if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); abort_transition( INFINITY, tg_shutdown, "CIB update failed", node_state); } else { /* delay processing the trigger until the update completes */ add_cib_op_callback(rc, FALSE, NULL, cib_fencing_updated); } free_xml(node_state); return; } #endif static gboolean te_fence_node(crm_graph_t *graph, crm_action_t *action) { #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { const char *id = NULL; const char *uuid = NULL; const char *target = NULL; const char *type = NULL; stonith_ops_t * st_op = NULL; id = ID(action->xml); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); type = g_hash_table_lookup(action->params, crm_meta_name("stonith_action")); CRM_CHECK(id != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); CRM_CHECK(uuid != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); CRM_CHECK(type != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); CRM_CHECK(target != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); te_log_action(LOG_INFO, "Executing %s fencing operation (%s) on %s (timeout=%d)", type, id, target, transition_graph->transition_timeout / 2); /* Passing NULL means block until we can connect... */ te_connect_stonith(NULL); crm_malloc0(st_op, sizeof(stonith_ops_t)); if(safe_str_eq(type, "poweroff")) { st_op->optype = POWEROFF; } else { st_op->optype = RESET; } st_op->timeout = transition_graph->transition_timeout / 2; st_op->node_name = crm_strdup(target); st_op->node_uuid = crm_strdup(uuid); st_op->private_data = generate_transition_key( transition_graph->id, action->id, 0, te_uuid); CRM_ASSERT(stonithd_input_IPC_channel() != NULL); if (ST_OK != stonithd_node_fence( st_op )) { crm_err("Cannot fence %s: stonithd_node_fence() call failed ", target); } return TRUE; } #endif return FALSE; } static int get_target_rc(crm_action_t *action) { const char *target_rc_s = g_hash_table_lookup( action->params, crm_meta_name(XML_ATTR_TE_TARGET_RC)); if(target_rc_s != NULL) { return crm_parse_int(target_rc_s, "0"); } return 0; } static gboolean te_crm_command(crm_graph_t *graph, crm_action_t *action) { char *value = NULL; char *counter = NULL; xmlNode *cmd = NULL; const char *id = NULL; const char *task = NULL; const char *on_node = NULL; gboolean ret = TRUE; id = ID(action->xml); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, te_log_action(LOG_ERR, "Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task)); return FALSE); te_log_action(LOG_INFO, "Executing crm-event (%s): %s on %s", crm_str(id), crm_str(task), on_node); cmd = create_request(task, NULL, on_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); counter = generate_transition_key( transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); - ret = send_ipc_message(crm_ch, cmd); + ret = send_cluster_message(on_node, crm_proc_crmd, cmd, TRUE); crm_free(counter); free_xml(cmd); value = g_hash_table_lookup(action->params, crm_meta_name(XML_ATTR_TE_NOWAIT)); if(ret == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if(crm_is_true(value)) { crm_info("Skipping wait for %d", action->id); action->confirmed = TRUE; update_graph(graph, action); trigger_graph(); } else if(ret && action->timeout > 0) { crm_debug("Setting timer for action %d",action->id); action->timer->reason = timeout_action_warn; te_start_action_timer(action); } return TRUE; } static gboolean te_rsc_command(crm_graph_t *graph, crm_action_t *action) { /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ const char *task = NULL; const char *on_node = NULL; action->executed = FALSE; on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, te_log_action(LOG_ERR, "Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task)); return FALSE); send_rsc_command(action); return TRUE; } gboolean cib_action_update(crm_action_t *action, int status) { char *op_id = NULL; char *code = NULL; char *digest = NULL; xmlNode *tmp = NULL; xmlNode *params = NULL; xmlNode *state = NULL; xmlNode *rsc = NULL; xmlNode *xml_op = NULL; xmlNode *action_rsc = NULL; enum cib_errors rc = cib_ok; const char *name = NULL; const char *value = NULL; const char *rsc_id = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task_uuid = crm_element_value( action->xml, XML_LRM_ATTR_TASK_KEY); const char *target_uuid = crm_element_value( action->xml, XML_LRM_ATTR_TARGET_UUID); int call_options = cib_quorum_override|cib_scope_local; crm_warn("%s %d: %s on %s timed out", crm_element_name(action->xml), action->id, task_uuid, target); action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE); if(action_rsc == NULL) { return FALSE; } rsc_id = ID(action_rsc); CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action"); return FALSE); code = crm_itoa(status); /* update the CIB */ state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(state, XML_ATTR_UUID, target_uuid); crm_xml_add(state, XML_ATTR_UNAME, target); rsc = create_xml_node(state, XML_CIB_TAG_LRM); crm_xml_add(rsc, XML_ATTR_ID, target_uuid); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); crm_xml_add(rsc, XML_ATTR_ID, rsc_id); name = XML_ATTR_TYPE; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_CLASS; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_PROVIDER; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); xml_op = create_xml_node(rsc, XML_LRM_TAG_RSC_OP); crm_xml_add(xml_op, XML_ATTR_ID, task); op_id = generate_op_key(rsc_id, task, action->interval); crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_free(op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(xml_op, XML_LRM_ATTR_OPSTATUS, code); crm_xml_add(xml_op, XML_LRM_ATTR_CALLID, "-1"); crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, action->interval); crm_xml_add(xml_op, XML_LRM_ATTR_RC, code); crm_xml_add(xml_op, XML_ATTR_ORIGIN, __FUNCTION__); crm_free(code); code = generate_transition_key( transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, code); crm_free(code); code = generate_transition_magic( crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY), status, status); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, code); crm_free(code); tmp = find_xml_node(action->xml, "attributes", TRUE); params = create_xml_node(NULL, XML_TAG_PARAMS); copy_in_properties(params, tmp); filter_action_parameters(params, CRM_FEATURE_SET); digest = calculate_xml_digest(params, TRUE, FALSE); /* info for now as this area has been problematic to debug */ crm_debug("Calculated digest %s for %s (%s)\n", digest, ID(xml_op), crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); crm_log_xml(LOG_DEBUG, "digest:source", params); crm_xml_add(xml_op, XML_LRM_ATTR_OP_DIGEST, digest); crm_free(digest); free_xml(params); crm_debug_3("Updating CIB with \"%s\" (%s): %s %s on %s", status<0?"new action":XML_ATTR_TIMEOUT, crm_element_name(action->xml), crm_str(task), rsc_id, target); - rc = te_cib_conn->cmds->update( - te_cib_conn, XML_CIB_TAG_STATUS, state, NULL, call_options); + rc = fsa_cib_conn->cmds->update( + fsa_cib_conn, XML_CIB_TAG_STATUS, state, NULL, call_options); crm_debug("Updating CIB with %s action %d: %s on %s (call_id=%d)", op_status2text(status), action->id, task_uuid, target, rc); add_cib_op_callback(rc, FALSE, NULL, cib_action_updated); free_xml(state); action->sent_update = TRUE; if(rc < cib_ok) { return FALSE; } return TRUE; } void send_rsc_command(crm_action_t *action) { xmlNode *cmd = NULL; xmlNode *rsc_op = NULL; char *counter = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *task_uuid = NULL; CRM_ASSERT(action != NULL); CRM_ASSERT(action->xml != NULL); rsc_op = action->xml; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); on_node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET); counter = generate_transition_key( transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); crm_info("Initiating action %d: %s %s on %s", action->id, task, task_uuid, on_node); crm_free(counter); if(rsc_op != NULL) { crm_log_xml_debug_2(rsc_op, "Performing"); } cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, on_node, CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); -#if 1 - send_ipc_message(crm_ch, cmd); -#else - /* test the TE timer/recovery code */ - if((action->id % 11) == 0) { - crm_err("Faking lost action %d: %s", action->id, task_uuid); - } else { - send_ipc_message(crm_ch, cmd); - } -#endif + send_cluster_message(on_node, crm_proc_lrmd, cmd, TRUE); free_xml(cmd); action->executed = TRUE; value = g_hash_table_lookup(action->params, crm_meta_name(XML_ATTR_TE_NOWAIT)); if(crm_is_true(value)) { crm_debug("Skipping wait for %d", action->id); action->confirmed = TRUE; update_graph(transition_graph, action); trigger_graph(); } else if(action->timeout > 0) { int action_timeout = (2 * action->timeout) + transition_graph->network_delay; crm_debug_3("Setting timer for action %s", task_uuid); if(transition_graph->transition_timeout < action_timeout) { crm_debug("Action %d:" " Increasing transition %d timeout to %d (2*%d + %d)", action->id, transition_graph->id, action_timeout, action->timeout, transition_graph->network_delay); transition_graph->transition_timeout = action_timeout; } te_start_action_timer(action); } } crm_graph_functions_t te_graph_fns = { te_pseudo_action, te_rsc_command, te_crm_command, te_fence_node }; -extern GMainLoop* mainloop; - void notify_crmd(crm_graph_t *graph) { - xmlNode *cmd = NULL; int log_level = LOG_DEBUG; - const char *op = CRM_OP_TEABORT; int pending_callbacks = num_cib_op_callbacks(); - stop_te_timer(transition_timer); if(pending_callbacks != 0) { - crm_warn("Delaying completion until all CIB updates complete"); - return; + transition_graph->complete = FALSE; + crm_warn("Delaying completion until %d CIB updates complete", pending_callbacks); + return; } CRM_CHECK(graph->complete, graph->complete = TRUE); switch(graph->completion_action) { case tg_stop: - op = CRM_OP_TECOMPLETE; - log_level = LOG_INFO; - break; + log_level = LOG_INFO; + clear_bit_inplace(fsa_input_register, R_IN_TRANSITION); + register_fsa_input(C_FSA_INTERNAL, I_TE_SUCCESS, NULL); + break; case tg_abort: case tg_restart: - op = CRM_OP_TEABORT; - break; + clear_bit_inplace(fsa_input_register, R_IN_TRANSITION); + if(need_transition(fsa_state)) { + /* setting "fsa_pe_ref = NULL" makes sure we ignore any + * PE reply that might be pending or in the queue while + * we ask the CIB for a more up-to-date copy + */ + crm_free(fsa_pe_ref); fsa_pe_ref = NULL; + register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); + + } else { + crm_debug("Filtering %d op in state %s", + graph->completion_action, fsa_state2string(fsa_state)); + } + + break; case tg_shutdown: - crm_info("Exiting after transition"); - if (mainloop != NULL && g_main_is_running(mainloop)) { - g_main_quit(mainloop); - return; - } - exit(LSB_EXIT_OK); + crm_info("Exiting after transition"); + return; } - te_log_action(log_level, "Transition %d status: %s - %s", - graph->id, op, crm_str(graph->abort_reason)); + te_log_action(log_level, "Transition %d status: %d - %s", + graph->id, graph->completion_action, crm_str(graph->abort_reason)); print_graph(LOG_DEBUG_3, graph); - cmd = create_request( - op, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_TENGINE, NULL); - - if(graph->abort_reason != NULL) { - crm_xml_add(cmd, "message", graph->abort_reason); - } - - send_ipc_message(crm_ch, cmd); - free_xml(cmd); - graph->abort_reason = NULL; graph->completion_action = tg_restart; } diff --git a/transitioner/callbacks.c b/crmd/te_callbacks.c similarity index 77% rename from transitioner/callbacks.c rename to crmd/te_callbacks.c index 6e8fcaef86..e1094c3463 100644 --- a/transitioner/callbacks.c +++ b/crmd/te_callbacks.c @@ -1,609 +1,483 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include void te_update_confirm(const char *event, xmlNode *msg); -void te_update_diff(const char *event, xmlNode *msg); xmlNode *need_abort(xmlNode *update); void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); extern char *te_uuid; gboolean shuttingdown = FALSE; crm_graph_t *transition_graph; GTRIGSource *transition_trigger = NULL; crm_action_timer_t *transition_timer = NULL; -static gboolean -start_global_timer(crm_action_timer_t *timer, int timeout) -{ - CRM_ASSERT(timer != NULL); - CRM_CHECK(timer > 0, return FALSE); - CRM_CHECK(timer->source_id == 0, return FALSE); - - if(timeout <= 0) { - crm_err("Tried to start timer with period: %d", timeout); - - } else if(timer->source_id == 0) { - crm_debug_2("Starting abort timer: %dms", timeout); - timer->timeout = timeout; - timer->source_id = Gmain_timeout_add( - timeout, global_timer_callback, (void*)timer); - CRM_ASSERT(timer->source_id != 0); - return TRUE; - - } else { - crm_err("Timer is already active with period: %d", timer->timeout); - } - - return FALSE; -} void te_update_diff(const char *event, xmlNode *msg) { int rc = -1; const char *op = NULL; xmlNode *diff = NULL; xmlNode *aborted = NULL; const char *set_name = NULL; int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; if(msg == NULL) { crm_err("NULL update"); return; } crm_element_value_int(msg, F_CIB_RC, &rc); op = crm_element_value(msg, F_CIB_OPERATION); if(rc < cib_ok) { crm_debug_2("Ignoring failed %s operation: %s", op, cib_error2string(rc)); return; } diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); cib_diff_version_details( diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_debug("Processing diff (%s): %d.%d.%d -> %d.%d.%d", op, diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates); log_cib_diff(LOG_DEBUG_2, diff, op); set_name = "diff-added"; if(diff != NULL) { xmlNode *section = NULL; xmlNode *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); if(change_set != NULL) { crm_debug_2("Checking status changes"); section=get_object_root(XML_CIB_TAG_STATUS,change_set); } if(section != NULL) { extract_event(section); } crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); } set_name = "diff-removed"; if(diff != NULL && aborted == NULL) { xmlNode *attrs = NULL; xmlNode *status = NULL; xmlNode *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); if(aborted == NULL && change_set != NULL) { status = get_object_root(XML_CIB_TAG_STATUS, change_set); xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); if(attrs != NULL) { crm_info("Aborting on "XML_TAG_TRANSIENT_NODEATTRS" deletions"); abort_transition(INFINITY, tg_restart, XML_TAG_TRANSIENT_NODEATTRS, attrs); } ); } } if(aborted != NULL) { abort_transition( INFINITY, tg_restart, "Non-status change", NULL); } return; } - - gboolean -process_te_message(xmlNode *msg, xmlNode *xml_data, IPC_Channel *sender) +process_te_message(xmlNode *msg, xmlNode *xml_data) { xmlNode *xml_obj = NULL; const char *from = crm_element_value(msg, F_ORIG); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *ref = crm_element_value(msg, XML_ATTR_REFERENCE); const char *op = crm_element_value(msg, F_CRM_TASK); const char *type = crm_element_value(msg, F_CRM_MSG_TYPE); crm_debug_2("Processing %s (%s) message", op, ref); crm_log_xml(LOG_DEBUG_3, "ipc", msg); if(op == NULL){ /* error */ - } else if(strcasecmp(op, CRM_OP_HELLO) == 0) { - /* ignore */ } else if(sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) { crm_debug_2("Bad sys-to %s", crm_str(sys_to)); return FALSE; } else if(safe_str_eq(op, CRM_OP_INVOKE_LRM) && safe_str_eq(sys_from, CRM_SYSTEM_LRMD) /* && safe_str_eq(type, XML_ATTR_RESPONSE) */ ){ #if CRM_DEPRECATED_SINCE_2_0_4 if(safe_str_eq(crm_element_name(xml_data), XML_TAG_CIB)) { xml_obj = xml_data; } else { xml_obj = find_xml_node(xml_data, XML_TAG_CIB, TRUE); } #else xml_obj = xml_data; CRM_CHECK(xml_obj != NULL, crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); #endif CRM_CHECK(xml_obj != NULL, crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); xml_obj = get_object_root(XML_CIB_TAG_STATUS, xml_obj); CRM_CHECK(xml_obj != NULL, crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); crm_log_xml(LOG_DEBUG_2, "Processing (N)ACK", msg); crm_info("Processing (N)ACK %s from %s", crm_element_value(msg, XML_ATTR_REFERENCE), from); extract_event(xml_obj); - } else if(safe_str_eq(type, XML_ATTR_RESPONSE)) { - crm_err("Message was a response not a request. Discarding"); - return TRUE; - - } else if(strcasecmp(op, CRM_OP_TRANSITION) == 0) { - const char *graph_file = crm_element_value(msg, F_CRM_TGRAPH); - const char *graph_input = crm_element_value(msg, F_CRM_TGRAPH_INPUT); - CRM_CHECK(graph_file != NULL || xml_data != NULL, - crm_err("No graph provided"); - crm_log_xml(LOG_WARNING, "no graph", msg); - return TRUE); - - if(transition_graph->complete == FALSE) { - crm_info("Another transition is already active"); - abort_transition( - INFINITY, tg_restart, "Transition Active", NULL); - - } else { - const char *value = NULL; - xmlNode *graph_data = xml_data; - crm_debug("Processing graph derived from %s", graph_input); - - if(graph_file != NULL) { - FILE *graph_fd = fopen(graph_file, "r"); - - CRM_CHECK(graph_fd != NULL, - cl_perror("Could not open graph file %s", graph_file); - return TRUE); - - graph_data = file2xml(graph_fd, FALSE); - - unlink(graph_file); - fclose(graph_fd); - } - - destroy_graph(transition_graph); - transition_graph = unpack_graph(graph_data); - start_global_timer(transition_timer, - transition_graph->transition_timeout); - - value = crm_element_value(graph_data, "failed-stop-offset"); - if(value) { - failed_stop_offset = crm_strdup(value); - } - - value = crm_element_value(graph_data, "failed-start-offset"); - if(value) { - failed_start_offset = crm_strdup(value); - } - - trigger_graph(); - print_graph(LOG_DEBUG_2, transition_graph); - - if(graph_data != xml_data) { - free_xml(graph_data); - } - } - - } else if(strcasecmp(op, CRM_OP_TE_HALT) == 0) { - abort_transition(INFINITY, tg_stop, "Peer Halt", NULL); - - } else if(strcasecmp(op, CRM_OP_TEABORT) == 0) { - abort_transition(INFINITY, tg_restart, "Peer Cancelled", NULL); - } else { crm_err("Unknown command: %s::%s from %s", type, op, sys_from); } crm_debug_3("finished processing message"); return TRUE; } #if SUPPORT_HEARTBEAT void tengine_stonith_callback(stonith_ops_t * op) { const char *allow_fail = NULL; int target_rc = -1; int stonith_id = -1; int transition_id = -1; char *uuid = NULL; crm_action_t *stonith_action = NULL; if(op == NULL) { crm_err("Called with a NULL op!"); return; } crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", op->call_id, op->optype, op->node_name, op->op_result, (char *)op->node_list, op->private_data); /* this will mark the event complete if a match is found */ CRM_CHECK(op->private_data != NULL, return); /* filter out old STONITH actions */ CRM_CHECK(decode_transition_key( op->private_data, &uuid, &transition_id, &stonith_id, &target_rc), crm_err("Invalid event detected"); goto bail; ); if(transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid) || transition_graph->id != transition_id) { crm_info("Ignoring STONITH action initiated outside" " of the current transition"); } stonith_action = get_action(stonith_id, TRUE); if(stonith_action == NULL) { crm_err("Stonith action not matched"); goto bail; } switch(op->op_result) { case STONITH_SUCCEEDED: send_stonith_update(op); break; case STONITH_CANNOT: case STONITH_TIMEOUT: case STONITH_GENERIC: stonith_action->failed = TRUE; allow_fail = g_hash_table_lookup( stonith_action->params, crm_meta_name(XML_ATTR_TE_ALLOWFAIL)); if(FALSE == crm_is_true(allow_fail)) { crm_err("Stonith of %s failed (%d)..." " aborting transition.", op->node_name, op->op_result); abort_transition(INFINITY, tg_restart, "Stonith failed", NULL); } break; default: crm_err("Unsupported action result: %d", op->op_result); abort_transition(INFINITY, tg_restart, "Unsupport Stonith result", NULL); } update_graph(transition_graph, stonith_action); trigger_graph(); bail: crm_free(uuid); return; } void tengine_stonith_connection_destroy(gpointer user_data) { crm_err("Fencing daemon has left us"); stonith_src = NULL; if(stonith_src == NULL) { G_main_set_trigger(stonith_reconnect); } /* cbchan will be garbage at this point, arrange for it to be reset */ set_stonithd_input_IPC_channel_NULL(); return; } gboolean tengine_stonith_dispatch(IPC_Channel *sender, void *user_data) { int lpc = 0; while(stonithd_op_result_ready()) { if (sender->ch_status == IPC_DISCONNECT) { /* The message which was pending for us is that * the IPC status is now IPC_DISCONNECT */ break; } if(ST_FAIL == stonithd_receive_ops_result(FALSE)) { crm_err("stonithd_receive_ops_result() failed"); } else { lpc++; } } crm_debug_2("Processed %d messages", lpc); if (sender->ch_status == IPC_DISCONNECT) { return FALSE; } return TRUE; } #endif void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { trigger_graph(); if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); crm_log_xml_warn(msg, "Failed update"); } } void cib_action_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { trigger_graph(); if(rc < cib_ok) { crm_err("Update %d FAILED: %s", call_id, cib_error2string(rc)); } } void cib_failcount_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { trigger_graph(); if(rc < cib_ok) { crm_err("Update %d FAILED: %s", call_id, cib_error2string(rc)); } } gboolean action_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_CHECK(timer->action != NULL, return FALSE); if(transition_graph->complete) { crm_warn("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_action_warn) { print_action( LOG_WARNING,"Action missed its timeout", timer->action); } else { /* fail the action */ cib_action_update(timer->action, LRM_OP_TIMEOUT); } return FALSE; } static int unconfirmed_actions(gboolean send_updates) { int unconfirmed = 0; const char *key = NULL; const char *task = NULL; const char *node = NULL; crm_debug_2("Unconfirmed actions..."); slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, /* lookup event */ slist_iter( action, crm_action_t, synapse->actions, lpc2, if(action->executed == FALSE) { continue; } else if(action->confirmed) { continue; } unconfirmed++; task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); key = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); crm_info("Action %s %d unconfirmed from %s", key, action->id, node); if(action->type != action_type_rsc) { continue; } else if(send_updates == FALSE) { continue; } else if(safe_str_eq(task, "cancel")) { /* we dont need to update the CIB with these */ continue; } else if(safe_str_eq(task, "stop")) { /* *never* update the CIB with these */ continue; } cib_action_update(action, LRM_OP_PENDING); ); ); if(unconfirmed > 0) { crm_warn("Waiting on %d unconfirmed actions", unconfirmed); } return unconfirmed; } gboolean global_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_CHECK(timer->action == NULL, return FALSE); if(transition_graph->complete) { crm_err("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_abort) { int unconfirmed = unconfirmed_actions(FALSE); crm_warn("Transition abort timeout reached..." " marking transition complete."); transition_graph->complete = TRUE; abort_transition(INFINITY, tg_restart, "Global Timeout", NULL); if(unconfirmed != 0) { crm_warn("Writing %d unconfirmed actions to the CIB", unconfirmed); unconfirmed_actions(TRUE); } } return FALSE; } -gboolean -te_graph_trigger(gpointer user_data) -{ - int timeout = 0; - enum transition_status graph_rc = -1; - - if(transition_graph->complete == FALSE) { - graph_rc = run_graph(transition_graph); - timeout = transition_graph->transition_timeout; - print_graph(LOG_DEBUG_3, transition_graph); - - if(graph_rc == transition_active) { - crm_debug_3("Transition not yet complete"); - stop_te_timer(transition_timer); - start_global_timer(transition_timer, timeout); - return TRUE; - - } else if(graph_rc == transition_pending) { - crm_debug_3("Transition not yet complete - no actions fired"); - return TRUE; - } - - if(graph_rc != transition_complete) { - crm_err("Transition failed: %s", transition_status(graph_rc)); - print_graph(LOG_WARNING, transition_graph); - } - } - - transition_graph->complete = TRUE; - notify_crmd(transition_graph); - - return TRUE; -} diff --git a/transitioner/te_callbacks.h b/crmd/te_callbacks.h similarity index 96% rename from transitioner/te_callbacks.h rename to crmd/te_callbacks.h index 462cc0553c..ed9336f6fd 100644 --- a/transitioner/te_callbacks.h +++ b/crmd/te_callbacks.h @@ -1,42 +1,43 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef TE_CALLBACKS__H #define TE_CALLBACKS__H extern void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); extern void cib_action_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); extern void cib_failcount_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); extern gboolean global_timer_callback(gpointer data); extern gboolean action_timer_callback(gpointer data); extern gboolean te_graph_trigger(gpointer user_data); extern void tengine_stonith_connection_destroy(gpointer user_data); +extern void te_update_diff(const char *event, xmlNode *msg); #if SUPPORT_HEARTBEAT extern void tengine_stonith_callback(stonith_ops_t * op); extern gboolean tengine_stonith_dispatch(IPC_Channel *sender, void *user_data); #endif #endif diff --git a/transitioner/events.c b/crmd/te_events.c similarity index 98% rename from transitioner/events.c rename to crmd/te_events.c index 8b94a7f899..3744e020a1 100644 --- a/transitioner/events.c +++ b/crmd/te_events.c @@ -1,577 +1,578 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include +#include char *failed_stop_offset = NULL; char *failed_start_offset = NULL; xmlNode *need_abort(xmlNode *update); void process_graph_event(xmlNode *event, const char *event_node); int match_graph_event(int action_id, xmlNode *event, const char *event_node, int op_status, int op_rc, int target_rc); xmlNode * need_abort(xmlNode *update) { xmlNode *section_xml = NULL; const char *section = NULL; if(update == NULL) { return NULL; } xml_prop_iter(update, name, value, if(safe_str_eq(name, XML_ATTR_HAVE_QUORUM)) { goto do_abort; /* possibly not required */ } else if(safe_str_eq(name, XML_ATTR_NUMPEERS)) { goto do_abort; } else if(safe_str_eq(name, XML_ATTR_GENERATION)) { goto do_abort; } else if(safe_str_eq(name, XML_ATTR_GENERATION_ADMIN)) { goto do_abort; } continue; do_abort: crm_debug("Aborting on change to %s", name); crm_log_xml_debug(update, "Abort: CIB Attrs"); return update; ); section = XML_CIB_TAG_NODES; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, return section_xml; ); section = XML_CIB_TAG_RESOURCES; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, return section_xml; ); section = XML_CIB_TAG_CONSTRAINTS; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, return section_xml; ); section = XML_CIB_TAG_CRMCONFIG; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, return section_xml; ); return NULL; } static gboolean fail_incompletable_actions(crm_graph_t *graph, const char *down_node) { const char *target = NULL; xmlNode *last_action = NULL; slist_iter( synapse, synapse_t, graph->synapses, lpc, if (synapse->confirmed) { continue; } slist_iter( action, crm_action_t, synapse->actions, lpc, if(action->type == action_type_pseudo || action->confirmed) { continue; } target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); if(safe_str_eq(target, down_node)) { action->failed = TRUE; last_action = action->xml; update_graph(graph, action); crm_notice("Action %d (%s) is scheduled for %s (offline)", action->id, ID(action->xml), down_node); } ); ); if(last_action != NULL) { crm_warn("Node %s shutdown resulted in un-runnable actions", down_node); abort_transition(INFINITY, tg_restart, "Node failure", last_action); return TRUE; } return FALSE; } gboolean extract_event(xmlNode *msg) { int shutdown = 0; const char *shutdown_s = NULL; const char *event_node = NULL; /* [cib fragment] ... */ crm_debug_4("Extracting event from %s", crm_element_name(msg)); xml_child_iter_filter( msg, node_state, XML_CIB_TAG_STATE, xmlNode *attrs = NULL; xmlNode *resources = NULL; const char *ccm_state = crm_element_value( node_state, XML_CIB_ATTR_INCCM); const char *crmd_state = crm_element_value( node_state, XML_CIB_ATTR_CRMDSTATE); /* Transient node attribute changes... */ event_node = crm_element_value(node_state, XML_ATTR_ID); crm_debug_2("Processing state update from %s", event_node); crm_log_xml_debug_3(node_state, "Processing"); attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); if(attrs != NULL) { crm_info("Aborting on "XML_TAG_TRANSIENT_NODEATTRS" changes for %s", event_node); abort_transition(INFINITY, tg_restart, XML_TAG_TRANSIENT_NODEATTRS, attrs); } resources = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); resources = find_xml_node( resources, XML_LRM_TAG_RESOURCES, FALSE); /* LRM resource update... */ xml_child_iter( resources, rsc, xml_child_iter( rsc, rsc_op, crm_log_xml_debug_3(rsc_op, "Processing resource update"); process_graph_event(rsc_op, event_node); ); ); /* * node state update... possibly from a shutdown we requested */ if(safe_str_eq(ccm_state, XML_BOOLEAN_FALSE) || safe_str_eq(crmd_state, CRMD_JOINSTATE_DOWN)) { crm_action_t *shutdown = NULL; shutdown = match_down_event(0, event_node, NULL); if(shutdown != NULL) { update_graph(transition_graph, shutdown); trigger_graph(); } else { crm_info("Stonith/shutdown of %s not matched", event_node); abort_transition(INFINITY, tg_restart, "Node failure", node_state); } fail_incompletable_actions(transition_graph, event_node); } shutdown_s = crm_element_value(node_state, XML_CIB_ATTR_SHUTDOWN); if(shutdown_s) { shutdown = crm_parse_int(shutdown_s, NULL); } if(shutdown_s && shutdown > 0) { crm_info("Aborting on "XML_CIB_ATTR_SHUTDOWN" attribute for %s", event_node); abort_transition(INFINITY, tg_restart, "Shutdown request", node_state); } ); return TRUE; } static void update_failcount(xmlNode *event, const char *event_node, int rc, int target_rc) { int interval = 0; char *task = NULL; char *rsc_id = NULL; char *attr_name = NULL; const char *id = ID(event); const char *on_uuid = event_node; const char *value = NULL; if(rc == 99) { /* this is an internal code for "we're busy, try again" */ return; } else if(rc == target_rc) { return; } if(failed_stop_offset == NULL) { failed_stop_offset = crm_strdup(INFINITY_S); } if(failed_start_offset == NULL) { failed_start_offset = crm_strdup(INFINITY_S); } CRM_CHECK(on_uuid != NULL, return); CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval), crm_err("Couldn't parse: %s", ID(event)); goto bail); CRM_CHECK(task != NULL, goto bail); CRM_CHECK(rsc_id != NULL, goto bail); if(safe_str_eq(task, CRMD_ACTION_START)) { interval = 1; value = failed_start_offset; } else if(safe_str_eq(task, CRMD_ACTION_STOP)) { interval = 1; value = failed_stop_offset; } if(value == NULL || safe_str_neq(value, INFINITY_S)) { value = XML_NVPAIR_ATTR_VALUE"++"; } if(interval > 0) { int call_id = 0; char *now = crm_itoa(time(NULL)); attr_name = crm_concat("fail-count", rsc_id, '-'); crm_warn("Updating failcount for %s on %s after failed %s:" " rc=%d (update=%s, time=%s)", rsc_id, on_uuid, task, rc, value, now); /* don't let notificatios of these updates cause new transitions */ - call_id = update_attr(te_cib_conn, cib_inhibit_notify, XML_CIB_TAG_STATUS, + call_id = update_attr(fsa_cib_conn, cib_inhibit_notify, XML_CIB_TAG_STATUS, on_uuid, NULL,NULL, attr_name, value, FALSE); add_cib_op_callback(call_id, FALSE, NULL, cib_failcount_updated); crm_free(attr_name); attr_name = crm_concat("last-failure", rsc_id, '-'); /* don't let notificatios of these updates cause new transitions */ - call_id = update_attr(te_cib_conn, cib_inhibit_notify, XML_CIB_TAG_STATUS, + call_id = update_attr(fsa_cib_conn, cib_inhibit_notify, XML_CIB_TAG_STATUS, on_uuid, NULL,NULL, attr_name, now, FALSE); add_cib_op_callback(call_id, FALSE, NULL, cib_failcount_updated); crm_free(attr_name); crm_free(now); } bail: crm_free(rsc_id); crm_free(task); } static int status_from_rc(crm_action_t *action, int orig_status, int rc, int target_rc) { int status = orig_status; if(target_rc == rc) { crm_debug_2("Target rc: == %d", rc); if(status != LRM_OP_DONE) { crm_debug_2("Re-mapping op status to" " LRM_OP_DONE for rc=%d", rc); status = LRM_OP_DONE; } } else { crm_debug_2("Target rc: != %d", rc); if(status != LRM_OP_ERROR) { crm_info("Re-mapping op status to" " LRM_OP_ERROR for rc=%d", rc); status = LRM_OP_ERROR; } } /* 99 is the code we use for direct nack's */ if(rc != 99 && status != LRM_OP_DONE) { const char *task, *uname; task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); uname = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); crm_warn("Action %d (%s) on %s failed (target: %d vs. rc: %d): %s", action->id, task, uname, target_rc, rc, op_status2text(status)); } return status; } /* * returns the ID of the action if a match is found * returns -1 if a match was not found * returns -2 if a match was found but the action failed (and was * not allowed to) */ int match_graph_event(int action_id, xmlNode *event, const char *event_node, int op_status, int op_rc, int target_rc) { const char *target = NULL; const char *allow_fail = NULL; const char *this_event = ID(event); crm_action_t *action = NULL; action = get_action(action_id, FALSE); if(action == NULL) { return -1; } op_status = status_from_rc(action, op_status, op_rc, target_rc); if(op_status != LRM_OP_DONE) { update_failcount(event, event_node, op_rc, target_rc); } /* Process OP status */ switch(op_status) { case LRM_OP_PENDING: crm_debug("Ignoring pending operation"); return action->id; break; case LRM_OP_DONE: break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: action->failed = TRUE; break; case LRM_OP_CANCELLED: /* do nothing?? */ crm_err("Dont know what to do for cancelled ops yet"); break; default: action->failed = TRUE; crm_err("Unsupported action result: %d", op_status); } /* stop this event's timer if it had one */ stop_te_timer(action->timer); action->confirmed = TRUE; update_graph(transition_graph, action); trigger_graph(); if(action->failed) { allow_fail = g_hash_table_lookup( action->params, crm_meta_name(XML_ATTR_TE_ALLOWFAIL)); if(crm_is_true(allow_fail)) { action->failed = FALSE; } } if(action->failed) { abort_transition(action->synapse->priority+1, tg_restart, "Event failed", event); } target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); te_log_action(LOG_INFO, "Action %s (%d) confirmed on %s (rc=%d)", crm_str(this_event), action->id, crm_str(target), op_status); return action->id; } crm_action_t * get_action(int id, gboolean confirmed) { slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, slist_iter( action, crm_action_t, synapse->actions, lpc2, if(action->id == id) { if(confirmed) { stop_te_timer(action->timer); action->confirmed = TRUE; } return action; } ) ); return NULL; } crm_action_t * match_down_event(int id, const char *target, const char *filter) { const char *this_action = NULL; const char *this_node = NULL; crm_action_t *match = NULL; slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, /* lookup event */ slist_iter( action, crm_action_t, synapse->actions, lpc2, if(id > 0 && action->id == id) { match = action; break; } this_action = crm_element_value( action->xml, XML_LRM_ATTR_TASK); if(action->type != action_type_crm) { continue; } else if(safe_str_eq(this_action, CRM_OP_LRM_REFRESH)){ continue; } else if(filter != NULL && safe_str_neq(this_action, filter)) { continue; } this_node = crm_element_value( action->xml, XML_LRM_ATTR_TARGET_UUID); if(this_node == NULL) { crm_log_xml_err(action->xml, "No node uuid"); } if(safe_str_neq(this_node, target)) { crm_debug("Action %d : Node mismatch: %s", action->id, this_node); continue; } match = action; break; ); if(match != NULL) { /* stop this event's timer if it had one */ break; } ); if(match != NULL) { /* stop this event's timer if it had one */ crm_debug("Match found for action %d: %s on %s", id, crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target); stop_te_timer(match->timer); match->confirmed = TRUE; } else if(id > 0) { crm_err("No match for action %d", id); } else { crm_warn("No match for shutdown action on %s", target); } return match; } void process_graph_event(xmlNode *event, const char *event_node) { int rc = -1; int status = -1; int action = -1; int target_rc = -1; int transition_num = -1; char *update_te_uuid = NULL; gboolean passed = FALSE; const char *id = NULL; const char *magic = NULL; CRM_ASSERT(event != NULL); id = ID(event); magic = crm_element_value(event, XML_ATTR_TRANSITION_MAGIC); if(magic == NULL) { /* non-change */ return; } CRM_CHECK(decode_transition_magic( magic, &update_te_uuid, &transition_num, &action, &status, &rc, &target_rc), crm_err("Invalid event %s detected", id); abort_transition(INFINITY, tg_restart,"Bad event", event); ); if(status == LRM_OP_PENDING) { goto bail; } if(transition_num == -1) { crm_err("Action %s (%s) initiated outside of a transition", id, magic); abort_transition(INFINITY, tg_restart,"Unexpected event",event); } else if(action < 0 || safe_str_neq(update_te_uuid, te_uuid)) { crm_info("Action %s (%s) initiated by a different transitioner", id, magic); abort_transition(INFINITY, tg_restart,"Foreign event", event); } else if(transition_graph->id != transition_num) { crm_info("Detected action %s from a different transition:" " %d vs. %d", id, transition_num, transition_graph->id); abort_transition(INFINITY, tg_restart,"Old event", event); } else if(transition_graph->complete) { crm_info("Action %s arrived after a completed transition", id); abort_transition(INFINITY, tg_restart, "Inactive graph", event); } else if(match_graph_event( action, event, event_node, status, rc, target_rc) < 0) { crm_err("Unknown graph action %s", id); abort_transition(INFINITY, tg_restart, "Unknown event", event); } else { passed = TRUE; crm_debug_2("Processed update to %s: %s", id, magic); } if(passed == FALSE && rc != EXECRA_OK) { update_failcount(event, event_node, rc, target_rc); } bail: crm_free(update_te_uuid); return; } diff --git a/transitioner/utils.c b/crmd/te_utils.c similarity index 71% rename from transitioner/utils.c rename to crmd/te_utils.c index ac2772048c..b55d1a81cc 100644 --- a/transitioner/utils.c +++ b/crmd/te_utils.c @@ -1,150 +1,210 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include extern cib_t *te_cib_conn; GCHSource *stonith_src = NULL; GTRIGSource *stonith_reconnect = NULL; gboolean te_connect_stonith(gpointer user_data) { #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { int lpc = 0; int rc = ST_OK; IPC_Channel *fence_ch = NULL; if(stonith_src != NULL) { crm_debug("Still connected"); return TRUE; } for(lpc = 0; lpc < 30; lpc++) { crm_info("Attempting connection to fencing daemon..."); sleep(1); rc = stonithd_signon("tengine"); if(rc == ST_OK) { break; } if(user_data != NULL) { crm_err("Sign-in failed: triggered a retry"); G_main_set_trigger(stonith_reconnect); return TRUE; } crm_err("Sign-in failed: pausing and trying again in 2s..."); sleep(1); } CRM_ASSERT(rc == ST_OK); /* If not, we failed 30 times... just get out */ CRM_ASSERT(stonithd_set_stonith_ops_callback( tengine_stonith_callback) == ST_OK); crm_debug_2("Grabbing IPC channel"); fence_ch = stonithd_input_IPC_channel(); CRM_ASSERT(fence_ch != NULL); crm_debug_2("Attaching to mainloop"); stonith_src = G_main_add_IPC_Channel( G_PRIORITY_LOW, fence_ch, FALSE, tengine_stonith_dispatch, NULL, tengine_stonith_connection_destroy); CRM_ASSERT(stonith_src != NULL); crm_info("Connected"); return TRUE; } #endif return FALSE; } +gboolean +start_global_timer(crm_action_timer_t *timer, int timeout) +{ + CRM_ASSERT(timer != NULL); + CRM_CHECK(timer > 0, return FALSE); + CRM_CHECK(timer->source_id == 0, return FALSE); + + if(timeout <= 0) { + crm_err("Tried to start timer with period: %d", timeout); + + } else if(timer->source_id == 0) { + crm_debug_2("Starting abort timer: %dms", timeout); + timer->timeout = timeout; + timer->source_id = Gmain_timeout_add( + timeout, global_timer_callback, (void*)timer); + CRM_ASSERT(timer->source_id != 0); + return TRUE; + + } else { + crm_err("Timer is already active with period: %d", timer->timeout); + } + + return FALSE; +} + gboolean stop_te_timer(crm_action_timer_t *timer) { const char *timer_desc = "action timer"; if(timer == NULL) { return FALSE; } if(timer->reason == timeout_abort) { timer_desc = "global timer"; } if(timer->source_id != 0) { crm_debug_2("Stopping %s", timer_desc); Gmain_timeout_remove(timer->source_id); timer->source_id = 0; } else { return FALSE; } return TRUE; } +gboolean +te_graph_trigger(gpointer user_data) +{ + int timeout = 0; + enum transition_status graph_rc = -1; + + if(transition_graph->complete == FALSE) { + graph_rc = run_graph(transition_graph); + timeout = transition_graph->transition_timeout; + print_graph(LOG_DEBUG_3, transition_graph); + + if(graph_rc == transition_active) { + crm_debug_3("Transition not yet complete"); + stop_te_timer(transition_timer); + start_global_timer(transition_timer, timeout); + return TRUE; + + } else if(graph_rc == transition_pending) { + crm_debug_3("Transition not yet complete - no actions fired"); + return TRUE; + } + + if(graph_rc != transition_complete) { + crm_err("Transition failed: %s", transition_status(graph_rc)); + print_graph(LOG_WARNING, transition_graph); + } + } + + transition_graph->complete = TRUE; + notify_crmd(transition_graph); + + return TRUE; +} + void trigger_graph_processing(const char *fn, int line) { G_main_set_trigger(transition_trigger); crm_debug_2("%s:%d - Triggered graph processing", fn, line); } void abort_transition_graph( int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode *reason, const char *fn, int line) { int log_level = LOG_DEBUG; /* if(abort_priority >= INFINITY) { log_level = LOG_INFO; } */ update_abort_priority( transition_graph, abort_priority, abort_action, abort_text); do_crm_log(log_level, "%s:%d - Triggered graph processing : %s", fn, line, abort_text); if(reason != NULL) { const char *magic = crm_element_value( reason, XML_ATTR_TRANSITION_MAGIC); if(magic) { do_crm_log(log_level, "Caused by update to %s: %s", ID(reason), magic); } else { crm_log_xml(log_level, "Cause", reason); } } G_main_set_trigger(transition_trigger); } + diff --git a/crmd/tengine.c b/crmd/tengine.c index 704308fa68..001d368d45 100644 --- a/crmd/tengine.c +++ b/crmd/tengine.c @@ -1,173 +1,309 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include /* for access */ #include #include #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include +#include +#include +extern crm_graph_functions_t te_graph_fns; struct crm_subsystem_s *te_subsystem = NULL; +gboolean te_init(void); + + +static void global_cib_callback(const xmlNode *msg, int callid ,int rc, xmlNode *output) +{ +#if 1 + if(transition_graph->complete == FALSE) { + int pending_callbacks = num_cib_op_callbacks(); + if(pending_callbacks == 0) { + crm_debug("Triggering the TE"); + trigger_graph(); + } + } +#endif +} + /* A_TE_START, A_TE_STOP, A_TE_RESTART */ void do_te_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { - struct crm_subsystem_s *this_subsys = te_subsystem; - - long long stop_actions = A_TE_STOP; - long long start_actions = A_TE_START; + int dummy; + gboolean init_ok = TRUE; -/* if(action & stop_actions && cur_state != S_STOPPING */ -/* && is_set(fsa_input_register, R_TE_PEND)) { */ -/* result = I_WAIT_FOR_EVENT; */ -/* return result; */ -/* } */ + cl_uuid_t new_uuid; + char uuid_str[UU_UNPARSE_SIZEOF]; - if(action & stop_actions) { - stop_subsystem(this_subsys, FALSE); + if(action & A_TE_STOP) { + if(transition_graph) { + destroy_graph(transition_graph); } + } - if(action & start_actions) { - if(cur_state != S_STOPPING) { - if(start_subsystem(this_subsys) == FALSE) { - register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); - } - } else { - crm_info("Ignoring request to start %s while shutting down", - this_subsys->name); - } + if((action & A_TE_START) && cur_state == S_STOPPING) { + crm_info("Ignoring request to start %s while shutting down", + te_subsystem->name); + return; + } + + if((action & A_TE_START) == 0) { + return; + } + + cl_uuid_generate(&new_uuid); + cl_uuid_unparse(&new_uuid, uuid_str); + te_uuid = crm_strdup(uuid_str); + crm_info("Registering TE UUID: %s", te_uuid); + + if(transition_trigger == NULL) { + transition_trigger = G_main_add_TriggerHandler( + G_PRIORITY_LOW, te_graph_trigger, NULL, NULL); + } + + if(stonith_reconnect == NULL) { + stonith_reconnect = G_main_add_TriggerHandler( + G_PRIORITY_LOW, te_connect_stonith, &dummy, NULL); + } + + if(cib_ok != fsa_cib_conn->cmds->add_notify_callback( + fsa_cib_conn, T_CIB_DIFF_NOTIFY, te_update_diff)) { + crm_err("Could not set CIB notification callback"); + init_ok = FALSE; + } + + if(cib_ok != fsa_cib_conn->cmds->set_op_callback(fsa_cib_conn, global_cib_callback)) { + crm_err("Could not set CIB global callback"); + init_ok = FALSE; + } + + if(is_heartbeat_cluster() && init_ok) { + G_main_set_trigger(stonith_reconnect); + } + + if(init_ok) { + set_graph_functions(&te_graph_fns); + + if(transition_graph) { + destroy_graph(transition_graph); } + + /* create a blank one */ + transition_graph = unpack_graph(NULL); + transition_graph->complete = TRUE; + transition_graph->abort_reason = "DC Takeover"; + transition_graph->completion_action = tg_restart; + + crm_malloc0(transition_timer, sizeof(crm_action_timer_t)); + transition_timer->source_id = 0; + transition_timer->reason = timeout_abort; + transition_timer->action = NULL; + } } /* A_TE_INVOKE, A_TE_CANCEL */ void do_te_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { xmlNode *cmd = NULL; if(AM_I_DC == FALSE) { - crm_debug("Not DC: No need to invoke the TE (anymore): %s", - fsa_action2string(action)); + crm_err("Not DC: No need to invoke the TE (anymore): %s", + fsa_action2string(action)); return; } else if(fsa_state != S_TRANSITION_ENGINE && (action & A_TE_INVOKE)) { - crm_debug("No need to invoke the TE (%s) in state %s", - fsa_action2string(action), - fsa_state2string(fsa_state)); + crm_err("No need to invoke the TE (%s) in state %s", + fsa_action2string(action), + fsa_state2string(fsa_state)); return; + } - } else if(!is_set(fsa_input_register, te_subsystem->flag_required)) { - crm_err("Ignoring action %s in state: %s" - " - We dont want the TE anymore", - fsa_action2string(action), fsa_state2string(cur_state)); - return; - - } else if(is_set(fsa_input_register, R_TE_CONNECTED) == FALSE) { - crm_info("Waiting for the TE to connect before action %s", - fsa_action2string(action)); - - if(action & A_TE_INVOKE) { - register_fsa_input( - msg_data->fsa_cause, msg_data->fsa_input, - msg_data->data); - } + if(action & A_TE_CANCEL) { + crm_debug("Cancelling the active Transition"); + abort_transition(INFINITY, tg_restart, "Peer Cancelled", NULL); - crmd_fsa_stall(NULL); - return; - } + } else if(action & A_TE_HALT) { + abort_transition(INFINITY, tg_stop, "Peer Halt", NULL); - if(action & A_TE_INVOKE) { + } else if(action & A_TE_INVOKE) { + const char *value = NULL; + xmlNode *graph_data = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *graph_file = crm_element_value(input->msg, F_CRM_TGRAPH); const char *graph_input = crm_element_value(input->msg, F_CRM_TGRAPH_INPUT); - if(graph_file != NULL || input->xml != NULL) { - crm_debug("Starting a transition"); - set_bit_inplace(fsa_input_register, R_IN_TRANSITION); - - cmd = create_request( - CRM_OP_TRANSITION, input->xml, NULL, - CRM_SYSTEM_TENGINE, CRM_SYSTEM_DC, NULL); - - crm_xml_add(cmd, F_CRM_TGRAPH_INPUT, graph_input); - if(graph_file) { - crm_xml_add(cmd, F_CRM_TGRAPH, graph_file); - } - - send_request(cmd, NULL); - - } else { + if(graph_file != NULL && input->xml == NULL) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); + return; } - - } else if(action & A_TE_CANCEL) { - crm_debug("Cancelling the active Transition"); - cmd = create_request( - CRM_OP_TEABORT, NULL, NULL, - CRM_SYSTEM_TENGINE, CRM_SYSTEM_DC, NULL); - send_request(cmd, NULL); + if(transition_graph->complete == FALSE) { + crm_info("Another transition is already active"); + abort_transition(INFINITY, tg_restart, "Transition Active", NULL); + return; - } else if(action & A_TE_HALT) { - cmd = create_request( - CRM_OP_TE_HALT, NULL, NULL, - CRM_SYSTEM_TENGINE, CRM_SYSTEM_DC, NULL); + } + crm_debug("Processing graph derived from %s", graph_input); + + graph_data = input->xml; + if(graph_file != NULL) { + FILE *graph_fd = fopen(graph_file, "r"); + + CRM_CHECK(graph_fd != NULL, + cl_perror("Could not open graph file %s", graph_file); return); + + graph_data = file2xml(graph_fd, FALSE); + + unlink(graph_file); + fclose(graph_fd); + } + + destroy_graph(transition_graph); + transition_graph = unpack_graph(graph_data); + start_global_timer(transition_timer, transition_graph->transition_timeout); + + value = crm_element_value(graph_data, "failed-stop-offset"); + if(value) { + failed_stop_offset = crm_strdup(value); + } + + value = crm_element_value(graph_data, "failed-start-offset"); + if(value) { + failed_start_offset = crm_strdup(value); + } + + trigger_graph(); + print_graph(LOG_DEBUG_2, transition_graph); - send_request(cmd, NULL); + if(graph_data != input->xml) { + free_xml(graph_data); + } } free_xml(cmd); } +gboolean te_init(void) +{ + int dummy = 0; + gboolean init_ok = TRUE; + + transition_trigger = G_main_add_TriggerHandler( + G_PRIORITY_LOW, te_graph_trigger, NULL, NULL); + + stonith_reconnect = G_main_add_TriggerHandler( + G_PRIORITY_LOW, te_connect_stonith, &dummy, NULL); + + if(init_ok) { + crm_debug_4("Setting CIB notification callback"); + if(cib_ok != fsa_cib_conn->cmds->add_notify_callback( + fsa_cib_conn, T_CIB_DIFF_NOTIFY, te_update_diff)) { + crm_err("Could not set CIB notification callback"); + init_ok = FALSE; + } + } + + if(is_heartbeat_cluster() && init_ok) { + G_main_set_trigger(stonith_reconnect); + } + + if(init_ok) { + cl_uuid_t new_uuid; + char uuid_str[UU_UNPARSE_SIZEOF]; + + cl_uuid_generate(&new_uuid); + cl_uuid_unparse(&new_uuid, uuid_str); + te_uuid = crm_strdup(uuid_str); + crm_info("Registering TE UUID: %s", te_uuid); + set_graph_functions(&te_graph_fns); + + /* create a blank one */ + transition_graph = unpack_graph(NULL); + transition_graph->complete = TRUE; + transition_graph->abort_reason = "DC Takeover"; + transition_graph->completion_action = tg_restart; + crm_malloc0(transition_timer, sizeof(crm_action_timer_t)); + transition_timer->source_id = 0; + transition_timer->reason = timeout_abort; + transition_timer->action = NULL; + } + return init_ok; +} + +#if 0 +gboolean shuttingdown; +gboolean tengine_shutdown(int nsig, gpointer unused) +{ + shuttingdown = TRUE; + abort_transition(INFINITY, tg_shutdown, "Shutdown", NULL); + return TRUE; +} + +gboolean te_stop(void) +{ + destroy_graph(transition_graph); + crm_free(transition_timer); + +#if SUPPORT_HEARTBEAT + if(is_heartbeat_cluster()) { + stonithd_signoff(); + } +#endif + crm_free(te_uuid); +} +#endif diff --git a/transitioner/tengine.h b/crmd/tengine.h similarity index 92% rename from transitioner/tengine.h rename to crmd/tengine.h index 88ae354331..16aa2a0adc 100644 --- a/transitioner/tengine.h +++ b/crmd/tengine.h @@ -1,77 +1,73 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef TENGINE__H #define TENGINE__H #include #include #if SUPPORT_HEARTBEAT # include extern void send_stonith_update(stonith_ops_t * op); #endif -extern IPC_Channel *crm_ch; -extern GMainLoop* mainloop; - /* tengine */ extern crm_action_t *match_down_event( int rc, const char *target, const char *filter); extern gboolean cib_action_update(crm_action_t *action, int status); /* utils */ extern crm_action_t *get_action(int id, gboolean confirmed); +extern gboolean start_global_timer(crm_action_timer_t *timer, int timeout); extern gboolean stop_te_timer(crm_action_timer_t *timer); extern const char *get_rsc_state(const char *task, op_status_t status); /* unpack */ extern gboolean extract_event(xmlNode *msg); -extern gboolean process_te_message( - xmlNode * msg, xmlNode *xml_data, IPC_Channel *sender); +extern gboolean process_te_message(xmlNode * msg, xmlNode *xml_data); extern crm_graph_t *transition_graph; extern GTRIGSource *transition_trigger; extern char *te_uuid; -extern cib_t *te_cib_conn; extern void notify_crmd(crm_graph_t *graph); #include extern void trigger_graph_processing(const char *fn, int line); extern void abort_transition_graph( int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode *reason, const char *fn, int line); #define trigger_graph() trigger_graph_processing(__FUNCTION__, __LINE__) #define abort_transition(pri, action, text, reason) \ abort_transition_graph(pri, action, text, reason,__FUNCTION__,__LINE__); extern gboolean te_connect_stonith(gpointer user_data); extern GCHSource *stonith_src; extern GTRIGSource *transition_trigger; extern GTRIGSource *stonith_reconnect; extern crm_action_timer_t *transition_timer; extern char *failed_stop_offset; extern char *failed_start_offset; #endif