diff --git a/crm/crmd/lrm.c b/crm/crmd/lrm.c index 1a3367605a..94a06b5fc4 100644 --- a/crm/crmd/lrm.c +++ b/crm/crmd/lrm.c @@ -1,1199 +1,1221 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include /* for access */ #include #include #include #include #include #include #include #include #include #include #include #include char *make_stop_id(const char *rsc, int call_id); void ghash_print_pending(gpointer key, gpointer value, gpointer user_data); gboolean stop_all_resources(void); gboolean resource_stopped(gpointer key, gpointer value, gpointer user_data); gboolean build_operation_update( crm_data_t *rsc_list, lrm_op_t *op, const char *src, int lpc); gboolean build_active_RAs(crm_data_t *rsc_list); void do_update_resource(lrm_op_t *op); enum crmd_fsa_input do_lrm_rsc_op( lrm_rsc_t *rsc, char *rid, const char *operation, crm_data_t *msg); enum crmd_fsa_input do_fake_lrm_op(gpointer data); void stop_recurring_action( gpointer key, gpointer value, gpointer user_data); gboolean remove_recurring_action( gpointer key, gpointer value, gpointer user_data); void free_recurring_op(gpointer value); +void nack_rsc_op(lrm_op_t* op, crm_data_t *msg); + GHashTable *xml2list(crm_data_t *parent); GHashTable *monitors = NULL; GHashTable *resources = NULL; GHashTable *resources_confirmed = NULL; GHashTable *shutdown_ops = NULL; int num_lrm_register_fails = 0; int max_lrm_register_fails = 30; -const char *rsc_path[] = -{ -/* XML_GRAPH_TAG_RSC_OP, */ - XML_CIB_TAG_RESOURCE, - "instance_attributes", - "rsc_parameters" -}; - enum crmd_rscstate { crmd_rscstate_NULL, crmd_rscstate_START, crmd_rscstate_START_PENDING, crmd_rscstate_START_OK, crmd_rscstate_START_FAIL, crmd_rscstate_STOP, crmd_rscstate_STOP_PENDING, crmd_rscstate_STOP_OK, crmd_rscstate_STOP_FAIL, crmd_rscstate_MON, crmd_rscstate_MON_PENDING, crmd_rscstate_MON_OK, crmd_rscstate_MON_FAIL, crmd_rscstate_GENERIC_PENDING, crmd_rscstate_GENERIC_OK, crmd_rscstate_GENERIC_FAIL }; const char *crmd_rscstate2string(enum crmd_rscstate state); const char * crmd_rscstate2string(enum crmd_rscstate state) { switch(state) { case crmd_rscstate_NULL: return NULL; case crmd_rscstate_START: return CRMD_ACTION_START; case crmd_rscstate_START_PENDING: return CRMD_ACTION_START_PENDING; case crmd_rscstate_START_OK: return CRMD_ACTION_STARTED; case crmd_rscstate_START_FAIL: return CRMD_ACTION_START_FAIL; case crmd_rscstate_STOP: return CRMD_ACTION_STOP; case crmd_rscstate_STOP_PENDING: return CRMD_ACTION_STOP_PENDING; case crmd_rscstate_STOP_OK: return CRMD_ACTION_STOPPED; case crmd_rscstate_STOP_FAIL: return CRMD_ACTION_STOP_FAIL; case crmd_rscstate_MON: return CRMD_ACTION_MON; case crmd_rscstate_MON_PENDING: return CRMD_ACTION_MON_PENDING; case crmd_rscstate_MON_OK: return CRMD_ACTION_MON_OK; case crmd_rscstate_MON_FAIL: return CRMD_ACTION_MON_FAIL; case crmd_rscstate_GENERIC_PENDING: return CRMD_ACTION_GENERIC_PENDING; case crmd_rscstate_GENERIC_OK: return CRMD_ACTION_GENERIC_OK; case crmd_rscstate_GENERIC_FAIL: return CRMD_ACTION_GENERIC_FAIL; } return ""; } /* A_LRM_CONNECT */ enum crmd_fsa_input do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int ret = HA_OK; if(action & A_LRM_DISCONNECT) { if(fsa_lrm_conn) { fsa_lrm_conn->lrm_ops->signoff(fsa_lrm_conn); } /* TODO: Clean up the hashtable */ } if(action & A_LRM_CONNECT) { crm_debug_4("LRM: connect..."); ret = HA_OK; monitors = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, free_recurring_op); resources = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); resources_confirmed = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); shutdown_ops = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); fsa_lrm_conn = ll_lrm_new(XML_CIB_TAG_LRM); if(NULL == fsa_lrm_conn) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); ret = HA_FAIL; } if(ret == HA_OK) { crm_debug_4("LRM: sigon..."); ret = fsa_lrm_conn->lrm_ops->signon( fsa_lrm_conn, CRM_SYSTEM_CRMD); } if(ret != HA_OK) { if(++num_lrm_register_fails < max_lrm_register_fails) { crm_warn("Failed to sign on to the LRM %d" " (%d max) times", num_lrm_register_fails, max_lrm_register_fails); crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return I_NULL; } } if(ret == HA_OK) { crm_debug_4("LRM: set_lrm_callback..."); ret = fsa_lrm_conn->lrm_ops->set_lrm_callback( fsa_lrm_conn, lrm_op_callback); if(ret != HA_OK) { crm_err("Failed to set LRM callbacks"); } } if(ret != HA_OK) { crm_err("Failed to sign on to the LRM %d" " (max) times", num_lrm_register_fails); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return I_NULL; } /* TODO: create a destroy handler that causes * some recovery to happen */ G_main_add_IPC_Channel(G_PRIORITY_LOW, fsa_lrm_conn->lrm_ops->ipcchan(fsa_lrm_conn), FALSE, lrm_dispatch, fsa_lrm_conn, default_ipc_connection_destroy); set_bit_inplace(fsa_input_register, R_LRM_CONNECTED); } if(action & ~(A_LRM_CONNECT|A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } return I_NULL; } gboolean stop_all_resources(void) { GListPtr lrm_list = NULL; crm_info("Makeing sure all active resources are stopped before exit"); if(fsa_lrm_conn == NULL) { return TRUE; } else if(is_set(fsa_input_register, R_SENT_RSC_STOP)) { crm_debug("Already sent stop operation"); return TRUE; } lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn); slist_iter( rsc_id, char, lrm_list, lpc, const char *last_op = g_hash_table_lookup(resources, rsc_id); if(safe_str_neq(last_op, CRMD_ACTION_STOP)) { crm_warn("Resource %s was active at shutdown", rsc_id); do_lrm_rsc_op(NULL, rsc_id, CRMD_ACTION_STOP, NULL); } ); set_bit_inplace(fsa_input_register, R_SENT_RSC_STOP); if(g_hash_table_size(shutdown_ops) == 0) { register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_info("Waiting for %d pending stop operations " " to complete before exiting", g_hash_table_size(shutdown_ops)); } return TRUE; } gboolean build_operation_update( crm_data_t *xml_rsc, lrm_op_t *op, const char *src, int lpc) { int len = 0; char *tmp = NULL; char *fail_state = NULL; const char *state = NULL; crm_data_t *xml_op = NULL; char *op_id = NULL; CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return FALSE; } crm_debug("%s: Updating resouce %s after %s %s op", src, op->rsc_id, op_status2text(op->op_status), op->op_type); if(op->op_status == LRM_OP_CANCELLED) { crm_debug("Ignoring cancelled op"); return TRUE; } xml_op = create_xml_node(xml_rsc, XML_LRM_TAG_RSC_OP); if(safe_str_eq(op->op_type, CRMD_ACTION_NOTIFY)) { const char *n_type = g_hash_table_lookup( op->params, "notify_type"); const char *n_task = g_hash_table_lookup( op->params, "notify_operation"); CRM_DEV_ASSERT(n_type != NULL); CRM_DEV_ASSERT(n_task != NULL); op_id = generate_notify_key(op->rsc_id, n_type, n_task); /* these are not yet allowed to fail */ op->op_status = LRM_OP_DONE; op->rc = 0; } else { op_id = generate_op_key(op->rsc_id, op->op_type, op->interval); } crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_free(op_id); crm_xml_add(xml_rsc, XML_LRM_ATTR_LASTOP, op->op_type); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, op->op_type); crm_xml_add(xml_op, "origin", src); /* Handle recurring ops - infer last op_status */ if(op->op_status == LRM_OP_PENDING && op->interval > 0) { if(op->rc == 0) { crm_debug("Mapping pending operation to DONE"); op->op_status = LRM_OP_DONE; } else { crm_debug("Mapping pending operation to ERROR"); op->op_status = LRM_OP_ERROR; } } if(op->user_data == NULL) { op->user_data = generate_transition_key(-1, fsa_our_uname); } fail_state = generate_transition_magic(op->user_data, op->op_status); crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, fail_state); crm_free(fail_state); switch(op->op_status) { case LRM_OP_PENDING: break; case LRM_OP_CANCELLED: crm_err("What to do here"); break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: crm_debug("Resource action %s/%s failed: %d", op->rsc_id, op->op_type, op->op_status); len = strlen(op->op_type); len += strlen("_failed_"); crm_malloc0(fail_state, sizeof(char)*len); if(fail_state != NULL) { sprintf(fail_state, "%s_failed", op->op_type); } crm_xml_add(xml_op, XML_LRM_ATTR_RSCSTATE, fail_state); crm_xml_add(xml_rsc, XML_LRM_ATTR_RSCSTATE, fail_state); crm_free(fail_state); break; case LRM_OP_DONE: if(safe_str_eq(CRMD_ACTION_START, op->op_type)) { state = CRMD_ACTION_STARTED; } else if(safe_str_eq(CRMD_ACTION_STOP, op->op_type)) { state = CRMD_ACTION_STOPPED; } else if(safe_str_eq(CRMD_ACTION_MON, op->op_type)) { state = CRMD_ACTION_STARTED; } else { crm_warn("Using status \"%s\" for op \"%s\"" "... this is still in the experimental stage.", CRMD_ACTION_GENERIC_OK, op->op_type); state = CRMD_ACTION_GENERIC_OK; } crm_xml_add(xml_op, XML_LRM_ATTR_RSCSTATE, state); crm_xml_add(xml_rsc, XML_LRM_ATTR_RSCSTATE, state); break; } tmp = crm_itoa(op->call_id); crm_xml_add(xml_op, XML_LRM_ATTR_CALLID, tmp); crm_free(tmp); /* set these on 'xml_rsc' too to make life easy for the TE */ tmp = crm_itoa(op->rc); crm_xml_add(xml_op, XML_LRM_ATTR_RC, tmp); crm_xml_add(xml_rsc, XML_LRM_ATTR_RC, tmp); crm_free(tmp); tmp = crm_itoa(op->op_status); crm_xml_add(xml_op, XML_LRM_ATTR_OPSTATUS, tmp); crm_xml_add(xml_rsc, XML_LRM_ATTR_OPSTATUS, tmp); crm_free(tmp); set_node_tstamp(xml_op); return TRUE; } gboolean build_active_RAs(crm_data_t *rsc_list) { GList *op_list = NULL; GList *lrm_list = NULL; gboolean found_op = FALSE; state_flag_t cur_state = 0; if(fsa_lrm_conn == NULL) { return FALSE; } lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn); slist_iter( rid, char, lrm_list, lpc, lrm_rsc_t *the_rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); crm_data_t *xml_rsc = create_xml_node( rsc_list, XML_LRM_TAG_RESOURCE); int max_call_id = -1; crm_debug("Processing lrm_rsc_t entry %s", rid); if(the_rsc == NULL) { crm_err("NULL resource returned from the LRM"); continue; } crm_xml_add(xml_rsc, XML_ATTR_ID, the_rsc->id); op_list = the_rsc->ops->get_cur_state(the_rsc, &cur_state); crm_debug_2("\tcurrent state:%s", cur_state==LRM_RSC_IDLE?"Idle":"Busy"); slist_iter( op, lrm_op_t, op_list, llpc, crm_debug_2("Processing op %s for %s (status=%d, rc=%d)", op->op_type, the_rsc->id, op->op_status, op->rc); if(max_call_id < op->call_id) { build_operation_update(xml_rsc, op, __FUNCTION__, llpc); } else if(max_call_id > op->call_id) { crm_err("Bad call_id in list=%d. Previous call_id=%d", op->call_id, max_call_id); } else { crm_debug("Skipping duplicate entry for call_id=%d", op->call_id); } max_call_id = op->call_id; found_op = TRUE; ); if(found_op == FALSE) { crm_err("Could not properly determin last op" " for %s from %d entries", the_rsc->id, g_list_length(op_list)); } g_list_free(op_list); ); g_list_free(lrm_list); return TRUE; } crm_data_t* do_lrm_query(gboolean is_replace) { crm_data_t *xml_result= NULL; crm_data_t *xml_state = NULL; crm_data_t *xml_data = NULL; crm_data_t *rsc_list = NULL; const char *exp_state = CRMD_STATE_ACTIVE; if(is_set(fsa_input_register, R_SHUTDOWN)) { exp_state = CRMD_STATE_INACTIVE; } xml_state = create_node_state( fsa_our_uname, fsa_our_uname, ACTIVESTATUS, XML_BOOLEAN_TRUE, ONLINESTATUS, CRMD_JOINSTATE_MEMBER, exp_state, __FUNCTION__); xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(rsc_list); if(is_replace) { crm_xml_add(xml_state, XML_CIB_ATTR_REPLACE, XML_CIB_TAG_LRM); } xml_result = create_cib_fragment(xml_state, NULL); crm_log_xml_debug_3(xml_state, "Current state of the LRM"); return xml_result; } /* A_LRM_INVOKE */ enum crmd_fsa_input do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { const char *crm_op = NULL; const char *operation = NULL; enum crmd_fsa_input next_input = I_NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); crm_op = cl_get_string(input->msg, F_CRM_TASK); - - operation = get_xml_attr_nested( - input->xml, rsc_path, DIMOF(rsc_path) -3, - XML_LRM_ATTR_TASK, FALSE); + operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); if(crm_op != NULL && safe_str_eq(crm_op, "lrm_query")) { crm_data_t *data = do_lrm_query(FALSE); HA_Message *reply = create_reply(input->msg, data); if(relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_message(LOG_ERR, reply); crm_msg_del(reply); } free_xml(data); } else if(operation != NULL) { char rid[64]; lrm_rsc_t *rsc = NULL; const char *id_from_cib = NULL; + crm_data_t *xml_rsc = find_xml_node( + input->xml, XML_CIB_TAG_RESOURCE, TRUE); - if(cur_state == S_STOPPING || cur_state == S_TERMINATE) { - /* we will have already scheduled a stop */ - crm_debug("Ignoring LRM operation while in state %s", - fsa_state2string(cur_state)); + CRM_DEV_ASSERT(xml_rsc != NULL); + if(crm_assert_failed) { + crm_log_xml_err(input->xml, "Bad resource"); + return I_NULL; } - - id_from_cib = get_xml_attr_nested( - input->xml, rsc_path, DIMOF(rsc_path) -2, - XML_ATTR_ID, TRUE); - + + id_from_cib = crm_element_value(xml_rsc, XML_ATTR_ID); CRM_DEV_ASSERT(id_from_cib != NULL); if(crm_assert_failed) { - crm_err("No value for %s in message at level %d.", - XML_ATTR_ID, DIMOF(rsc_path) -2); + crm_err("No value for %s in %s.", + XML_ATTR_ID, crm_element_name(xml_rsc)); crm_log_xml_err(input->xml, "Bad command"); return I_NULL; } /* only the first 16 chars are used by the LRM */ strncpy(rid, id_from_cib, 64); rid[63] = 0; rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); next_input = do_lrm_rsc_op(rsc, rid, operation, input->xml); } else { crm_err("Operation was neither a lrm_query, nor a rsc op. %s", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } return next_input; } struct recurring_op_s { char *rsc_id; int call_id; }; +void +nack_rsc_op(lrm_op_t* op, crm_data_t *msg) +{ + HA_Message *reply = NULL; + crm_data_t *update, *iter; + crm_data_t *fragment; + + CRM_DEV_ASSERT(op != NULL); + if(crm_assert_failed) { + return; + } + + update = create_xml_node(NULL, XML_CIB_TAG_STATE); + set_uuid(fsa_cluster_conn, update, XML_ATTR_UUID, fsa_our_uname); + crm_xml_add(update, XML_ATTR_UNAME, fsa_our_uname); + + iter = create_xml_node(update, XML_CIB_TAG_LRM); + iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); + iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); + + crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); + + build_operation_update(iter, op, __FUNCTION__, 0); + fragment = create_cib_fragment(update, NULL); + + reply = create_reply(msg, fragment); + crm_log_message_adv(LOG_ERR, "NACK Reply", reply); + + if(relay_message(reply, TRUE) == FALSE) { + crm_log_message_adv(LOG_ERR, "Unable to route reply", reply); + crm_msg_del(reply); + } + free_xml(fragment); + free_xml(update); +} + enum crmd_fsa_input -do_lrm_rsc_op( - lrm_rsc_t *rsc, char *rid, const char *operation, crm_data_t *msg) +do_lrm_rsc_op(lrm_rsc_t *rsc, char *rid, const char *operation, crm_data_t *msg) { int call_id = 0; char *op_id = NULL; lrm_op_t* op = NULL; const char *type = NULL; const char *class = NULL; const char *provider = NULL; const char *transition = NULL; GHashTable *params = NULL; fsa_data_t *msg_data = NULL; CRM_DEV_ASSERT(rid != NULL); if(rsc != NULL) { class = rsc->class; type = rsc->type; } else if(msg != NULL) { - class = get_xml_attr_nested( - msg, rsc_path, DIMOF(rsc_path) -2, - XML_AGENT_ATTR_CLASS, TRUE); + crm_data_t *xml_rsc = find_xml_node( + msg, XML_CIB_TAG_RESOURCE, TRUE); + + class = crm_element_value(xml_rsc, XML_AGENT_ATTR_CLASS); + CRM_DEV_ASSERT(class != NULL); + if(crm_assert_failed) { return I_NULL; } - type = get_xml_attr_nested( - msg, rsc_path, DIMOF(rsc_path) -2, - XML_ATTR_TYPE, TRUE); + type = crm_element_value(xml_rsc, XML_ATTR_TYPE); + CRM_DEV_ASSERT(type != NULL); + if(crm_assert_failed) { return I_NULL; } - provider = get_xml_attr_nested( - msg, rsc_path, DIMOF(rsc_path) -2, - XML_AGENT_ATTR_PROVIDER, FALSE); + provider = crm_element_value(xml_rsc, XML_AGENT_ATTR_PROVIDER); } if(msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if(transition == NULL) { crm_err("Missing transition"); crm_log_message(LOG_ERR, msg); } } - if(safe_str_neq(operation, CRMD_ACTION_STOP)) { - if(fsa_state == S_STARTING - || fsa_state == S_STOPPING - || fsa_state == S_TERMINATE) { - crm_err("Discarding attempt to perform action %s on %s" - " while in state %s", operation, rid, - fsa_state2string(fsa_state)); - return I_NULL; - - } else if(AM_I_DC == FALSE && fsa_state != S_NOT_DC) { - crm_err("Discarding attempt to perform action %s on %s" - " in state %s", operation, rid, - fsa_state2string(fsa_state)); - return I_NULL; - } - } - if(rsc == NULL) { /* check if its already there */ rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); } if(rsc == NULL) { /* add it to the list */ crm_debug_2("adding rsc %s before operation", rid); if(msg != NULL) { params = xml2list(msg); } else { CRM_DEV_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); } fsa_lrm_conn->lrm_ops->add_rsc( fsa_lrm_conn, rid, class, type, provider, params); rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); } if(rsc == NULL) { crm_err("Could not add resource %s to LRM", rid); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } /* stop the monitor before stopping the resource */ if(safe_str_eq(operation, CRMD_ACTION_STOP)) { g_hash_table_foreach(monitors, stop_recurring_action, rsc); g_hash_table_foreach_remove( monitors, remove_recurring_action, rsc); } /* now do the op */ crm_info("Performing op %s on %s", operation, rid); crm_malloc0(op, sizeof(lrm_op_t)); op->op_type = crm_strdup(operation); op->op_status = LRM_OP_PENDING; op->user_data = NULL; op->user_data_len = 0; if(transition != NULL) { op->user_data = crm_strdup(transition); op->user_data_len = 1+strlen(op->user_data); } else { CRM_DEV_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); } if(params == NULL) { if(msg != NULL) { params = xml2list(msg); } else { CRM_DEV_ASSERT(safe_str_eq( CRMD_ACTION_STOP, operation)); } } op->params = params; op->interval = crm_get_msec(g_hash_table_lookup(op->params,"interval")); op->timeout = crm_get_msec(g_hash_table_lookup(op->params, "timeout")); op->start_delay = crm_get_msec( g_hash_table_lookup(op->params,"start_delay")); /* sanity */ if(op->interval < 0) { op->interval = 0; } if(op->timeout < 0) { op->timeout = 0; } if(op->start_delay < 0) { op->start_delay = 0; } if(g_hash_table_lookup(op->params, "timeout") != NULL) { char *timeout_ms = crm_itoa(op->timeout); g_hash_table_replace( op->params, crm_strdup("timeout"), timeout_ms); } if(g_hash_table_lookup(op->params, "interval") != NULL) { char *interval_ms = crm_itoa(op->interval); g_hash_table_replace( op->params, crm_strdup("interval"), interval_ms); } if(g_hash_table_lookup(op->params, "start_delay") != NULL) { char *delay_ms = crm_itoa(op->start_delay); g_hash_table_replace( op->params, crm_strdup("start_delay"), delay_ms); } + if(safe_str_neq(operation, CRMD_ACTION_STOP)) { + if((AM_I_DC == FALSE && fsa_state != S_NOT_DC) + || (AM_I_DC && fsa_state != S_TRANSITION_ENGINE)) { + crm_err("Discarding attempt to perform action %s on %s" + " in state %s", operation, rid, + fsa_state2string(fsa_state)); + nack_rsc_op(op, msg); + free_lrm_op(op); + crm_free(op_id); + return I_NULL; + } + } + if(op->interval > 0) { struct recurring_op_s *existing_op = NULL; op_id = generate_op_key(rsc->id, op->op_type, op->interval); existing_op = g_hash_table_lookup(monitors, op_id); if(existing_op != NULL) { crm_debug("Cancelling previous invocation of" " %s on %s (%d)", crm_str(op_id), rsc->id,existing_op->call_id); /*cancel it so we can then restart it without conflict*/ rsc->ops->cancel_op(rsc, existing_op->call_id); g_hash_table_remove(monitors, op_id); } } op->app_name = crm_strdup(CRM_SYSTEM_CRMD); if(safe_str_eq(operation, CRMD_ACTION_MON)) { op->target_rc = CHANGED; } else { op->target_rc = EVERYTIME; } if(safe_str_eq(CRMD_ACTION_MON, operation)) { const char *last_op = g_hash_table_lookup(resources, rsc->id); if(safe_str_eq(last_op, CRMD_ACTION_STOP)) { crm_warn("Attempting to schedule %s after a stop.", op_id); free_lrm_op(op); crm_free(op_id); return I_NULL; } } g_hash_table_replace( resources, crm_strdup(rsc->id), crm_strdup(operation)); call_id = rsc->ops->perform_op(rsc, op); if(call_id <= 0) { crm_err("Operation %s on %s failed: %d", operation, rid, call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else if(op->interval > 0) { struct recurring_op_s *op = NULL; crm_malloc0(op, sizeof(struct recurring_op_s)); crm_debug("Adding recurring %s op for %s (%d)", op_id, rsc->id, call_id); op->call_id = call_id; op->rsc_id = crm_strdup(rsc->id); g_hash_table_insert(monitors, op_id, op); op_id = NULL; } else { /* record all non-recurring operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); g_hash_table_replace( shutdown_ops, call_id_s, crm_strdup(rsc->id)); crm_debug("Recording pending op: %s", call_id_s); } crm_free(op_id); free_lrm_op(op); return I_NULL; } void stop_recurring_action(gpointer key, gpointer value, gpointer user_data) { lrm_rsc_t *rsc = user_data; struct recurring_op_s *op = (struct recurring_op_s*)value; if(safe_str_eq(op->rsc_id, rsc->id)) { if(op->call_id > 0) { crm_debug("Stopping recurring op %d for %s (%s)", op->call_id, rsc->id, (char*)key); rsc->ops->cancel_op(rsc, op->call_id); } else { crm_err("Invalid call_id %d for %s", op->call_id, rsc->id); /* TODO: we probably need to look up the LRM to find it */ } } } gboolean remove_recurring_action(gpointer key, gpointer value, gpointer user_data) { lrm_rsc_t *rsc = user_data; struct recurring_op_s *op = (struct recurring_op_s*)value; if(safe_str_eq(op->rsc_id, rsc->id)) { return TRUE; } return FALSE; } void free_recurring_op(gpointer value) { struct recurring_op_s *op = (struct recurring_op_s*)value; crm_free(op->rsc_id); crm_free(op); } void free_lrm_op(lrm_op_t *op) { g_hash_table_destroy(op->params); crm_free(op->user_data); crm_free(op->output); crm_free(op->rsc_id); crm_free(op->op_type); crm_free(op->app_name); crm_free(op); } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, crm_strdup(key), crm_strdup(value)); } lrm_op_t * copy_lrm_op(const lrm_op_t *op) { lrm_op_t *op_copy = NULL; CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return NULL; } CRM_ASSERT(op->rsc_id != NULL); crm_malloc0(op_copy, sizeof(lrm_op_t)); op_copy->op_type = crm_strdup(op->op_type); /* input fields */ op_copy->params = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if(op->params != NULL) { g_hash_table_foreach(op->params, dup_attr, op_copy->params); } op_copy->timeout = op->timeout; op_copy->interval = op->interval; op_copy->target_rc = op->target_rc; /* in the CRM, this is always a string */ if(op->user_data != NULL) { op_copy->user_data = crm_strdup(op->user_data); } /* output fields */ op_copy->op_status = op->op_status; op_copy->rc = op->rc; op_copy->call_id = op->call_id; op_copy->output = NULL; op_copy->rsc_id = crm_strdup(op->rsc_id); if(op->app_name != NULL) { op_copy->app_name = crm_strdup(op->app_name); } if(op->output != NULL) { op_copy->output = crm_strdup(op->output); } return op_copy; } lrm_rsc_t * copy_lrm_rsc(const lrm_rsc_t *rsc) { lrm_rsc_t *rsc_copy = NULL; if(rsc == NULL) { return NULL; } crm_malloc0(rsc_copy, sizeof(lrm_rsc_t)); rsc_copy->id = crm_strdup(rsc->id); rsc_copy->type = crm_strdup(rsc->type); rsc_copy->class = NULL; rsc_copy->provider = NULL; if(rsc->class != NULL) { rsc_copy->class = crm_strdup(rsc->class); } if(rsc->provider != NULL) { rsc_copy->provider = crm_strdup(rsc->provider); } /* GHashTable* params; */ rsc_copy->params = NULL; rsc_copy->ops = NULL; return rsc_copy; } void do_update_resource(lrm_op_t* op) { /* */ crm_data_t *update, *iter; crm_data_t *fragment; int rc = cib_ok; CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return; } update = create_xml_node(NULL, XML_CIB_TAG_STATE); set_uuid(fsa_cluster_conn, update, XML_ATTR_UUID, fsa_our_uname); crm_xml_add(update, XML_ATTR_UNAME, fsa_our_uname); iter = create_xml_node(update, XML_CIB_TAG_LRM); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, op, __FUNCTION__, 0); fragment = create_cib_fragment(update, NULL); /* make it an asyncronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" when the really weren't * * the alternative however means blocking here for too long, which * isnt acceptable */ rc = fsa_cib_conn->cmds->modify( fsa_cib_conn, XML_CIB_TAG_STATUS, fragment, NULL, cib_quorum_override); if(rc > 0) { /* the return code is a call number, not an error code */ crm_debug_3("Sent resource state update message: %d", rc); } else { crm_err("Resource state update failed: %s", cib_error2string(rc)); CRM_DEV_ASSERT(rc == cib_ok); } free_xml(fragment); free_xml(update); } enum crmd_fsa_input do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data) { lrm_op_t* op = NULL; const char *last_op = NULL; if(msg_data->fsa_cause != C_LRM_OP_CALLBACK) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } op = fsa_typed_data(fsa_dt_lrm); CRM_DEV_ASSERT(op != NULL); CRM_DEV_ASSERT(op != NULL && op->rsc_id != NULL); if(crm_assert_failed) { return I_NULL; } if(op->op_status == LRM_OP_DONE && op->rc != EXECRA_OK) { crm_warn("Mapping operation %d status with a rc=%d" " to status %d", op->op_status, op->rc, LRM_OP_ERROR); op->op_status = LRM_OP_ERROR; } switch(op->op_status) { case LRM_OP_PENDING: /* this really shouldnt happen */ crm_err("LRM operation (%d) %s on %s %s: %s", op->call_id, op->op_type, crm_str(op->rsc_id), op_status2text(op->op_status), execra_code2string(op->rc)); break; case LRM_OP_ERROR: crm_err("LRM operation (%d) %s on %s %s: %s", op->call_id, op->op_type, crm_str(op->rsc_id), op_status2text(op->op_status), execra_code2string(op->rc)); crm_debug("Result: %s", op->output); break; case LRM_OP_CANCELLED: crm_warn("LRM operation (%d) %s on %s %s", op->call_id, op->op_type, crm_str(op->rsc_id), op_status2text(op->op_status)); return I_NULL; break; case LRM_OP_TIMEOUT: last_op = g_hash_table_lookup( resources_confirmed, crm_strdup(op->rsc_id)); if(safe_str_eq(last_op, CRMD_ACTION_STOP) && safe_str_eq(op->op_type, CRMD_ACTION_MON)) { crm_err("LRM sent a timed out %s operation" " _after_ a confirmed stop", op->op_type); return I_NULL; } crm_err("LRM operation (%d) %s on %s %s", op->call_id, op->op_type, crm_str(op->rsc_id), op_status2text(op->op_status)); break; case LRM_OP_NOTSUPPORTED: crm_err("LRM operation (%d) %s on %s %s", op->call_id, op->op_type, crm_str(op->rsc_id), op_status2text(op->op_status)); break; case LRM_OP_DONE: crm_debug("LRM operation (%d) %s on %s %s", op->call_id, op->op_type, crm_str(op->rsc_id), op_status2text(op->op_status)); break; } g_hash_table_replace(resources_confirmed, crm_strdup(op->rsc_id), crm_strdup(op->op_type)); do_update_resource(op); if(g_hash_table_size(shutdown_ops) > 0) { char *op_id = make_stop_id(op->rsc_id, op->call_id); if(g_hash_table_remove(shutdown_ops, op_id)) { crm_debug("Op %d (%s %s) confirmed", op->call_id, op->op_type, op->rsc_id); } else if(op->interval == 0) { crm_err("Op %d (%s %s) not matched: %s", op->call_id, op->op_type, op->rsc_id, op_id); } crm_free(op_id); } if(is_set(fsa_input_register, R_SENT_RSC_STOP)) { if(g_hash_table_size(shutdown_ops) == 0) { register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_debug("Still waiting for %d pending stop operations" " to complete before exiting", g_hash_table_size(shutdown_ops)); g_hash_table_foreach( shutdown_ops, ghash_print_pending, NULL); } } return I_NULL; } char * make_stop_id(const char *rsc, int call_id) { char *op_id = NULL; crm_malloc0(op_id, strlen(rsc) + 34); if(op_id != NULL) { snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id); } return op_id; } void ghash_print_pending(gpointer key, gpointer value, gpointer user_data) { const char *uname = key; crm_debug("Pending action: %s", uname); } gboolean resource_stopped(gpointer key, gpointer value, gpointer user_data) { const char *this_rsc = value; const char *target_rsc = user_data; if(safe_str_eq(this_rsc, target_rsc)) { return TRUE; } return FALSE; } diff --git a/crm/tengine/callbacks.c b/crm/tengine/callbacks.c index cfe61570e2..ec6ba0d7be 100644 --- a/crm/tengine/callbacks.c +++ b/crm/tengine/callbacks.c @@ -1,504 +1,507 @@ -/* $Id: callbacks.c,v 1.44 2005/08/25 09:36:55 andrew Exp $ */ +/* $Id: callbacks.c,v 1.45 2005/08/26 08:49:42 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include void te_update_confirm(const char *event, HA_Message *msg); void te_update_diff(const char *event, HA_Message *msg); crm_data_t *need_abort(crm_data_t *update); void cib_fencing_updated(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); void te_update_diff(const char *event, HA_Message *msg) { int rc = -1; const char *op = NULL; crm_data_t *diff = NULL; crm_data_t *aborted = NULL; const char *set_name = NULL; int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; if(msg == NULL) { crm_err("NULL update"); return; } ha_msg_value_int(msg, F_CIB_RC, &rc); op = cl_get_string(msg, F_CIB_OPERATION); if(rc < cib_ok) { crm_debug_2("Ignoring failed %s operation: %s", op, cib_error2string(rc)); return; } diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); cib_diff_version_details( diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_debug("Processing diff (%s): %d.%d.%d -> %d.%d.%d", op, diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates); log_cib_diff(LOG_DEBUG_2, diff, op); set_name = "diff-added"; if(diff != NULL && aborted == NULL) { crm_data_t *section = NULL; crm_data_t *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); if(change_set != NULL) { crm_debug_2("Checking status changes"); section=get_object_root(XML_CIB_TAG_STATUS,change_set); } if(section != NULL && extract_event(section) == FALSE) { send_complete("Unexpected status update", section, te_update, i_cancel); free_xml(diff); return; } crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); } set_name = "diff-removed"; if(diff != NULL && aborted == NULL) { crm_data_t *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); } if(aborted != NULL) { send_complete("Non-status change", diff, te_update, i_cancel); free_xml(diff); return; } free_xml(diff); return; } crm_data_t * need_abort(crm_data_t *update) { crm_data_t *section_xml = NULL; const char *section = NULL; if(update == NULL) { return NULL; } section = XML_CIB_TAG_NODES; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, NULL, return section_xml; ); section = XML_CIB_TAG_RESOURCES; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, NULL, return section_xml; ); section = XML_CIB_TAG_CONSTRAINTS; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, NULL, return section_xml; ); section = XML_CIB_TAG_CRMCONFIG; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, NULL, return section_xml; ); return NULL; } void te_update_confirm(const char *event, HA_Message *msg) { int rc = -1; gboolean done = FALSE; const char *op = cl_get_string(msg, F_CIB_OPERATION); const char *type = cl_get_string(msg, F_CIB_OBJTYPE); crm_data_t *update = get_message_xml(msg, F_CIB_UPDATE); ha_msg_value_int(msg, F_CIB_RC, &rc); crm_debug("Processing %s...", event); crm_log_xml_debug_2(update, "Processing update"); if(op == NULL) { crm_err("Illegal CIB update, the operation must be specified"); send_complete("Illegal update", update, te_update, i_cancel); done = TRUE; } else if(te_fsa_state == s_abort_pending) { /* take no further actions if an abort is pending */ crm_debug("Ignoring CIB update while waiting for an abort"); done = TRUE; } else if(strcmp(op, CIB_OP_ERASE) == 0) { /* these are always unexpected, trigger the PE */ crm_err("Need to trigger an election here so that" " the current state of all nodes is obtained"); send_complete("Erase event", update, te_update, i_cancel); done = TRUE; } else if(strcmp(op, CIB_OP_CREATE) == 0 || strcmp(op, CIB_OP_DELETE) == 0 || strcmp(op, CIB_OP_REPLACE) == 0 || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { /* these are always unexpected, trigger the PE */ send_complete("Non-update change", update, te_update, i_cancel); done = TRUE; } else if(strcmp(op, CIB_OP_UPDATE) != 0) { crm_debug_2("Ignoring %s op confirmation", op); done = TRUE; } if(done) { free_xml(update); return; } if(safe_str_eq(type, XML_CIB_TAG_CRMCONFIG)) { /* ignore - for the moment */ crm_debug("Ignoring changes to the %s section", type); } else if(safe_str_eq(type, XML_CIB_TAG_NODES)) { /* ignore new nodes until they sign up */ crm_debug("Ignoring changes to the %s section", type); } else if(safe_str_eq(type, XML_CIB_TAG_STATUS)) { /* this _may_ not be un-expected */ if(extract_event(update) == FALSE) { send_complete("Unexpected status update", update, te_update, i_cancel); } } else if(safe_str_eq(type, XML_CIB_TAG_NODES) || safe_str_eq(type, XML_CIB_TAG_RESOURCES) || safe_str_eq(type, XML_CIB_TAG_CONSTRAINTS)) { /* these are never expected */ crm_debug("Aborting on changes to the %s section", type); send_complete("Non-status update", update, te_update, i_cancel); } else if(safe_str_eq(type, XML_TAG_CIB)) { crm_data_t *section_xml = NULL; const char *section = NULL; gboolean abort = FALSE; section = XML_CIB_TAG_NODES; if(abort == FALSE) { section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, NULL, abort = TRUE; break; ); } section = XML_CIB_TAG_RESOURCES; if(abort == FALSE) { section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, NULL, abort = TRUE; break; ); } section = XML_CIB_TAG_CONSTRAINTS; if(abort == FALSE) { section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, NULL, abort = TRUE; break; ); } section = XML_CIB_TAG_CRMCONFIG; if(abort == FALSE) { section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, NULL, abort = TRUE; break; ); } if(abort) { send_complete("Non-status update", update, te_update, i_cancel); } section = XML_CIB_TAG_STATUS; if(abort == FALSE) { section_xml = get_object_root(section, update); if(extract_event(section_xml) == FALSE) { send_complete("Unexpected global status update", section_xml, te_update, i_cancel); } } } else { crm_err("Ignoring update confirmation for %s object", type); crm_log_xml_debug(update, "Ignored update"); } free_xml(update); } gboolean process_te_message(HA_Message *msg, crm_data_t *xml_data, IPC_Channel *sender) { - const char *sys_to = cl_get_string(msg, F_CRM_SYS_TO); - const char *ref = cl_get_string(msg, XML_ATTR_REFERENCE); - const char *op = cl_get_string(msg, F_CRM_TASK); + const char *sys_to = cl_get_string(msg, F_CRM_SYS_TO); + const char *sys_from = cl_get_string(msg, F_CRM_SYS_FROM); + const char *ref = cl_get_string(msg, XML_ATTR_REFERENCE); + const char *op = cl_get_string(msg, F_CRM_TASK); + const char *type = cl_get_string(msg, F_CRM_MSG_TYPE); crm_log_message(LOG_DEBUG_3, msg); - - if(safe_str_eq(cl_get_string(msg, F_CRM_MSG_TYPE), XML_ATTR_RESPONSE) - && safe_str_neq(op, CRM_OP_EVENTCC)) { - crm_info("Message was a response not a request. Discarding"); - return TRUE; - } - crm_debug("Processing %s (%s) message", op, ref); if(op == NULL){ /* error */ } else if(strcmp(op, CRM_OP_HELLO) == 0) { /* ignore */ } else if(sys_to == NULL || strcmp(sys_to, CRM_SYSTEM_TENGINE) != 0) { crm_debug_2("Bad sys-to %s", crm_str(sys_to)); return FALSE; +#ifdef TESTING + } else if(strcmp(op, CRM_OP_EVENTCC) == 0) { + crm_debug_4("Processing %s...", op); + extract_event(msg); +#endif + + } else if(safe_str_eq(op, CRM_OP_INVOKE_LRM) + && safe_str_eq(sys_from, CRM_SYSTEM_LRMD) + && safe_str_eq(type, XML_ATTR_RESPONSE) ){ + crm_err("Processing %s reply...", op); + extract_event(msg); + + } else if(safe_str_eq(type, XML_ATTR_RESPONSE)) { + crm_info("Message was a response not a request. Discarding"); + return TRUE; + } else if(strcmp(op, CRM_OP_TRANSITION) == 0) { if(te_fsa_state != s_idle) { crm_debug("Attempt to start another transition"); send_complete("Attempt to start another transition", NULL, te_abort, i_cancel); } else { te_fsa_state = te_state_matrix[i_transition][te_fsa_state]; CRM_DEV_ASSERT(te_fsa_state == s_in_transition); initialize_graph(); unpack_graph(xml_data); crm_debug("Initiating transition..."); if(initiate_transition() == FALSE) { /* nothing to be done.. means we're done. */ crm_info("No actions to be taken..." " transition compelte."); } } } else if(strcmp(op, CRM_OP_TE_HALT) == 0) { send_complete(CRM_OP_TE_HALT, NULL, te_halt, i_cancel); } else if(strcmp(op, CRM_OP_TEABORT) == 0) { send_complete(CRM_OP_TEABORTED, NULL, te_abort, i_cancel); } else if(strcmp(op, CRM_OP_QUIT) == 0) { crm_info("Received quit message, terminating"); exit(0); -#ifdef TESTING - } else if(strcmp(op, CRM_OP_EVENTCC) == 0) { - crm_debug_4("Processing %s...", CRM_OP_EVENTCC); - if(extract_event(msg) == FALSE) { - send_complete("ttest loopback", msg, - te_failed, i_complete); - } -#endif } else { - crm_err("Unknown command: %s", op); + crm_err("Unknown command: %s::%s from %s", type, op, sys_from); } crm_debug_3("finished processing message"); return TRUE; } void tengine_stonith_callback(stonith_ops_t * op, void * private_data) { int *action_id = NULL; const char *fail_text = "Fencing op failed"; int fail_code = te_failed; void *fail_data = NULL; crm_data_t *update = NULL; if(op == NULL) { crm_err("Called with a NULL op!"); return; } crm_info("optype=%d, node_name=%s, result=%d, node_list=%s", op->optype, op->node_name, op->op_result, (char *)op->node_list); /* this will mark the event complete if a match is found */ crm_malloc0(action_id, sizeof(int)); *action_id = match_down_event( op->node_uuid, CRM_OP_FENCE, op->op_result); if(*action_id == -1) { crm_err("Stonith action not matched"); } if(op->op_result == STONITH_SUCCEEDED) { enum cib_errors rc = cib_ok; const char *target = op->node_name; const char *uuid = op->node_uuid; /* zero out the node-status & remove all LRM status info */ crm_data_t *node_state = create_xml_node( NULL, XML_CIB_TAG_STATE); CRM_DEV_ASSERT(op->node_name != NULL); CRM_DEV_ASSERT(op->node_uuid != NULL); crm_xml_add(node_state, XML_ATTR_UUID, uuid); crm_xml_add(node_state, XML_ATTR_UNAME, target); crm_xml_add(node_state, XML_CIB_ATTR_HASTATE, DEADSTATUS); crm_xml_add(node_state, XML_CIB_ATTR_INCCM, XML_BOOLEAN_NO); crm_xml_add(node_state, XML_CIB_ATTR_CRMDSTATE, OFFLINESTATUS); crm_xml_add( node_state, XML_CIB_ATTR_JOINSTATE,CRMD_JOINSTATE_DOWN); crm_xml_add( node_state, XML_CIB_ATTR_EXPSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_CIB_ATTR_REPLACE, XML_CIB_TAG_LRM); create_xml_node(node_state, XML_CIB_TAG_LRM); update = create_cib_fragment(node_state, NULL); free_xml(node_state); rc = te_cib_conn->cmds->modify( te_cib_conn, XML_CIB_TAG_STATUS, update, NULL, cib_quorum_override); if(*action_id < 0) { fail_text = "Stonith not matched"; fail_data = update; fail_code = te_update; } else if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); fail_text = "Couldnt update CIB after stonith"; fail_data = update; fail_code = te_failed; } else { add_cib_op_callback( rc, FALSE, action_id, cib_fencing_updated); free_xml(update); return; } } send_complete(fail_text, fail_data, fail_code, i_cancel); process_trigger(*action_id); free_xml(update); return; } void cib_fencing_updated(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { int *action_id = user_data; CRM_ASSERT(action_id != NULL); if(rc < cib_ok) { HA_Message *msg_copy = ha_msg_copy(msg); const char *fail_text = "Couldnt update CIB after stonith"; crm_err("CIB update failed: %s", cib_error2string(rc)); send_complete(fail_text, msg_copy, te_failed, i_cancel); ha_msg_del(msg_copy); } else { process_trigger(*action_id); check_for_completion(); } crm_free(action_id); } void tengine_stonith_connection_destroy(gpointer user_data) { #if 0 crm_err("Fencing daemon has left us: Shutting down...NOW"); /* shutdown properly later */ CRM_DEV_ASSERT(FALSE/* fencing daemon died */); #else crm_err("Fencing daemon has left us"); #endif return; } gboolean tengine_stonith_dispatch(IPC_Channel *sender, void *user_data) { int lpc = 0; while(stonithd_op_result_ready()) { if (sender->ch_status == IPC_DISCONNECT) { /* The message which was pending for us is that * the IPC status is now IPC_DISCONNECT */ break; } if(ST_FAIL == stonithd_receive_ops_result(FALSE)) { crm_err("stonithd_receive_ops_result() failed"); } else { lpc++; } } crm_debug_2("Processed %d messages", lpc); if (sender->ch_status == IPC_DISCONNECT) { return FALSE; } return TRUE; } diff --git a/crm/tengine/tengine.c b/crm/tengine/tengine.c index 8c755202dc..8757d115bb 100644 --- a/crm/tengine/tengine.c +++ b/crm/tengine/tengine.c @@ -1,1016 +1,1016 @@ -/* $Id: tengine.c,v 1.95 2005/08/25 09:36:55 andrew Exp $ */ +/* $Id: tengine.c,v 1.96 2005/08/26 08:49:42 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include gboolean graph_complete = FALSE; GListPtr graph = NULL; IPC_Channel *crm_ch = NULL; uint transition_idle_timeout = 30*1000; /* 30 seconds */ void fire_synapse(synapse_t *synapse); gboolean initiate_action(action_t *action); gboolean confirm_synapse(synapse_t *synapse, int action_id); void check_synapse_triggers(synapse_t *synapse, int action_id); void cib_action_updated( const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); te_timer_t *transition_timer = NULL; te_timer_t *abort_timer = NULL; int transition_counter = 1; char *te_uuid = NULL; const te_fsa_state_t te_state_matrix[i_invalid][s_invalid] = { /* s_idle, s_in_transition, s_abort_pending, s_updates_pending */ /* Got an i_transition */{ s_in_transition, s_abort_pending, s_abort_pending, s_updates_pending }, /* Got an i_cancel */{ s_idle, s_abort_pending, s_abort_pending, s_updates_pending }, /* Got an i_complete */{ s_idle, s_idle, s_abort_pending, s_updates_pending }, /* Got an i_cmd_complete*/{ s_idle, s_in_transition, s_updates_pending, s_updates_pending }, /* Got an i_cib_complete*/{ s_idle, s_in_transition, s_abort_pending, s_idle }, /* Got an i_cib_confirm */{ s_idle, s_in_transition, s_abort_pending, s_updates_pending }, /* Got an i_cib_notify */{ s_idle, s_in_transition, s_abort_pending, s_updates_pending } }; te_fsa_state_t te_fsa_state = s_idle; gboolean initialize_graph(void) { remove_cib_op_callback(-1, TRUE); if(transition_timer == NULL) { crm_malloc0(transition_timer, sizeof(te_timer_t)); transition_timer->timeout = 10; transition_timer->source_id = -1; transition_timer->reason = timeout_timeout; transition_timer->action = NULL; } else { stop_te_timer(transition_timer); } if(abort_timer == NULL) { crm_malloc0(abort_timer, sizeof(te_timer_t)); abort_timer->timeout = 10; abort_timer->source_id = -1; abort_timer->reason = timeout_abort; abort_timer->action = NULL; } else { stop_te_timer(abort_timer); } if(te_uuid == NULL) { cl_uuid_t new_uuid; crm_malloc0(te_uuid, sizeof(char)*38); cl_uuid_generate(&new_uuid); cl_uuid_unparse(&new_uuid, te_uuid); crm_info("Registering TE UUID: %s", te_uuid); } while(g_list_length(graph) > 0) { synapse_t *synapse = g_list_nth_data(graph, 0); while(g_list_length(synapse->actions) > 0) { action_t *action = g_list_nth_data(synapse->actions,0); synapse->actions = g_list_remove( synapse->actions, action); if(action->timer->source_id > 0) { crm_debug_3("Removing timer for action: %d", action->id); Gmain_timeout_remove(action->timer->source_id); } free_xml(action->xml); crm_free(action->timer); crm_free(action); } while(g_list_length(synapse->inputs) > 0) { action_t *action = g_list_nth_data(synapse->inputs, 0); synapse->inputs = g_list_remove(synapse->inputs, action); free_xml(action->xml); crm_free(action); } graph = g_list_remove(graph, synapse); crm_free(synapse); } graph = NULL; return TRUE; } /* * returns the ID of the action if a match is found * returns -1 if a match was not found * returns -2 if a match was found but the action failed (and was * not allowed to) */ int match_graph_event(action_t *action, crm_data_t *event, const char *event_node) { const char *allow_fail = NULL; const char *this_action = NULL; const char *this_node = NULL; const char *this_uname = NULL; const char *this_rsc = NULL; const char *magic = NULL; const char *this_event; char *update_te_uuid = NULL; const char *update_event; action_t *match = NULL; int op_status_i = -3; int transition_i = -1; if(event == NULL) { crm_debug_4("Ignoring NULL event"); return -1; } this_rsc = crm_element_value(action->xml, XML_LRM_ATTR_RSCID); if(this_rsc == NULL) { crm_debug_4("Skipping non-resource event"); return -1; } crm_debug_3("Processing \"%s\" change", crm_element_name(event)); update_event = crm_element_value(event, XML_ATTR_ID); magic = crm_element_value(event, XML_ATTR_TRANSITION_MAGIC); if(magic == NULL) { /* crm_debug("Skipping \"non-change\""); */ crm_log_xml_debug(event, "Skipping \"non-change\""); return -3; } this_action = crm_element_value(action->xml, XML_LRM_ATTR_TASK); this_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); this_uname = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); this_event = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); CRM_DEV_ASSERT(this_event != NULL); if(safe_str_neq(this_event, update_event)) { crm_debug_2("Action %d : Event mismatch %s vs. %s", action->id, this_event, update_event); } else if(safe_str_neq(this_node, event_node)) { crm_debug_2("Action %d : Node mismatch %s (%s) vs. %s", action->id, this_node, this_uname, event_node); } else { match = action; } if(match == NULL) { return -1; } crm_debug("Matched action (%d) %s", action->id, this_event); CRM_DEV_ASSERT(decode_transition_magic( magic, &update_te_uuid, &transition_i, &op_status_i)); if(transition_i == -1) { /* we never expect these - recompute */ crm_err("Detected an action initiated outside of a transition"); crm_log_message(LOG_ERR, event); return -5; } else if(safe_str_neq(update_te_uuid, te_uuid)) { crm_err("Detected an action from a different transitioner:" " %s vs. %s", update_te_uuid, te_uuid); return -6; } else if(transition_counter != transition_i) { crm_warn("Detected an action from a different transition:" " %d vs. %d", transition_i, transition_counter); return -3; } /* stop this event's timer if it had one */ stop_te_timer(match->timer); match->complete = TRUE; /* Process OP status */ allow_fail = crm_element_value(match->xml, "allow_fail"); switch(op_status_i) { case -3: crm_err("Action returned the same as last time..." " whatever that was!"); crm_log_message(LOG_ERR, event); break; case LRM_OP_PENDING: crm_debug("Ignoring pending operation"); return -4; break; case LRM_OP_DONE: break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: match->failed = TRUE; crm_warn("Action %s on %s failed: %s", update_event, event_node, op_status2text(op_status_i)); if(FALSE == crm_is_true(allow_fail)) { send_complete("Action failed", event, te_failed, i_cancel); return -2; } break; case LRM_OP_CANCELLED: /* do nothing?? */ crm_err("Dont know what to do for cancelled ops yet"); break; default: crm_err("Unsupported action result: %d", op_status_i); send_complete("Unsupport action result", event, te_failed, i_cancel); return -2; } te_log_action(LOG_INFO, "Action %d confirmed", match->id); process_trigger(match->id); if(te_fsa_state != s_in_transition) { return -3; } return match->id; } int match_down_event(const char *target, const char *filter, int rc) { const char *allow_fail = NULL; const char *this_action = NULL; const char *this_node = NULL; action_t *match = NULL; slist_iter( synapse, synapse_t, graph, lpc, /* lookup event */ slist_iter( action, action_t, synapse->actions, lpc2, this_action = crm_element_value( action->xml, XML_LRM_ATTR_TASK); if(action->type != action_type_crm) { continue; } else if(filter != NULL && safe_str_neq(this_action, filter)) { continue; } if(safe_str_eq(this_action, CRM_OP_FENCE)) { this_node = crm_element_value( action->xml, XML_LRM_ATTR_TARGET_UUID); } else if(safe_str_eq(this_action, CRM_OP_SHUTDOWN)) { this_node = crm_element_value( action->xml, XML_LRM_ATTR_TARGET_UUID); } else { crm_err("Action %d : Bad action %s", action->id, this_action); continue; } if(this_node == NULL) { crm_log_xml_err(action->xml, "No node uuid"); } if(safe_str_neq(this_node, target)) { crm_debug("Action %d : Node mismatch: %s", action->id, this_node); continue; } match = action; ); if(match != NULL) { break; } ); if(match == NULL) { crm_debug_3("didnt match current action"); return -1; } crm_debug_3("matched"); /* stop this event's timer if it had one */ stop_te_timer(match->timer); match->complete = TRUE; /* Process OP status */ switch(rc) { case STONITH_SUCCEEDED: break; case STONITH_CANNOT: case STONITH_TIMEOUT: case STONITH_GENERIC: match->failed = TRUE; allow_fail = crm_element_value(match->xml, "allow_fail"); if(FALSE == crm_is_true(allow_fail)) { crm_err("Stonith of %s failed (%d)..." " aborting transition.", target, rc); send_complete("Stonith failed", match->xml, te_failed, i_cancel); return -2; } break; default: crm_err("Unsupported action result: %d", rc); send_complete("Unsupport Stonith result", match->xml, te_failed, i_cancel); return -3; } crm_debug_3("Action %d was successful, looking for next action", match->id); return match->id; } gboolean process_graph_event(crm_data_t *event, const char *event_node) { int rc = -1; int action_id = -1; int op_status_i = 0; const char *task = NULL; const char *rsc_id = NULL; const char *op_status = NULL; if(event == NULL) { crm_debug("a transition is starting"); process_trigger(action_id); check_for_completion(); return TRUE; } task = crm_element_value(event, XML_LRM_ATTR_LASTOP); rsc_id = crm_element_value(event, XML_ATTR_ID); op_status = crm_element_value(event, XML_LRM_ATTR_OPSTATUS); if(op_status != NULL) { op_status_i = atoi(op_status); if(op_status_i == -1) { /* just information that the action was sent */ crm_debug("Ignoring TE initiated updates"); return TRUE; } } crm_debug("Processing CIB update: %s on %s: %s", rsc_id, event_node, op_status2text(op_status_i)); if(crm_element_value(event, XML_ATTR_TRANSITION_MAGIC) == NULL) { crm_log_xml_debug(event, "Skipping \"non-change\""); action_id = -3; } slist_iter( synapse, synapse_t, graph, lpc, /* lookup event */ slist_iter( action, action_t, synapse->actions, lpc2, rc = match_graph_event(action, event, event_node); if(action_id >= 0 && rc >= 0) { crm_err("Additional match found: %d [%d]", rc, action_id); } else if(rc != -1) { action_id = rc; } ); if(action_id != -1) { crm_debug("Terminating search: %d", action_id); break; } ); if(action_id == -1) { /* didnt find a match... * now try any dangling inputs */ slist_iter( synapse, synapse_t, graph, lpc, slist_iter( action, action_t, synapse->inputs, lpc2, rc = match_graph_event(action,event,event_node); if(action_id >=0 && rc >=0 && rc != action_id) { crm_err("Additional match found:" " %d [%d]", rc, action_id); } else if(rc != -1) { action_id = rc; } ); if(action_id != -1) { break; } ); } if(action_id > -1) { crm_log_xml_debug_3(event, "Event found"); } else if(action_id == -2) { crm_log_xml_info(event, "Event failed"); } else if(action_id == -3) { crm_log_xml_info(event, "Old event found"); } else if(action_id == -4) { crm_log_xml_debug(event, "Pending event found"); } else { /* unexpected event, trigger a pe-recompute */ /* possibly do this only for certain types of actions */ crm_debug("Search terminated: %d", action_id); send_complete("Event not matched", event, te_update, i_cancel); return FALSE; } check_for_completion(); return TRUE; } void check_for_completion(void) { if(graph_complete) { /* allow some slack until we are pretty sure nothing * else is happening */ crm_info("Transition complete"); send_complete("complete", NULL, te_done, i_complete); } else { /* restart the transition timer again */ crm_debug_3("Transition not yet complete"); start_te_timer(transition_timer); } } gboolean initiate_action(action_t *action) { gboolean ret = FALSE; gboolean send_command = FALSE; const char *on_node = NULL; const char *id = NULL; const char *task = NULL; const char *timeout = NULL; const char *msg_task = XML_GRAPH_TAG_RSC_OP; on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); id = crm_element_value(action->xml, XML_ATTR_ID); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); timeout = crm_element_value(action->xml, XML_ATTR_TIMEOUT); if(id == NULL || strlen(id) == 0 || task == NULL || strlen(task) == 0) { /* error */ te_log_action(LOG_ERR, "Failed on corrupted command: %s (id=%s) %s", crm_element_name(action->xml), crm_str(id), crm_str(task)); } else if(action->type == action_type_pseudo){ te_log_action(LOG_INFO, "Executing pseudo-event (%d): " "%s on %s", action->id, task, on_node); action->complete = TRUE; process_trigger(action->id); ret = TRUE; } else if(action->type == action_type_crm && safe_str_eq(task, CRM_OP_FENCE)){ const char *uuid = NULL; const char *target = NULL; stonith_ops_t * st_op = NULL; target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); CRM_DEV_ASSERT(target != NULL); CRM_DEV_ASSERT(uuid != NULL); te_log_action(LOG_INFO,"Executing fencing operation (%s) on %s", id, target); #ifdef TESTING ret = TRUE; action->complete = TRUE; process_trigger(action->id); return TRUE; #endif crm_malloc0(st_op, sizeof(stonith_ops_t)); st_op->optype = RESET; st_op->timeout = transition_idle_timeout / 2; st_op->node_name = crm_strdup(target); st_op->node_uuid = crm_strdup(uuid); if(stonithd_input_IPC_channel() == NULL) { crm_err("Cannot fence %s - stonith not available", target); } else if (ST_OK == stonithd_node_fence( st_op )) { ret = TRUE; } } else if(on_node == NULL || strlen(on_node) == 0) { /* error */ te_log_action(LOG_ERR, "Failed on corrupted command: %s (id=%s) %s on %s", crm_element_name(action->xml), crm_str(id), crm_str(task), crm_str(on_node)); } else if(action->type == action_type_crm){ te_log_action(LOG_INFO, "Executing crm-event (%s): %s on %s", id, task, on_node); #ifdef TESTING action->complete = TRUE; process_trigger(action->id); return TRUE; #endif /* action->complete = TRUE; */ msg_task = task; send_command = TRUE; } else if(action->type == action_type_rsc){ /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ #ifdef TESTING action->invoked = FALSE; cib_action_update(action, LRM_OP_DONE); return TRUE; #endif action->invoked = FALSE; if(safe_str_eq(task, CRMD_ACTION_STOP) || safe_str_eq(task, CRMD_ACTION_NOTIFY)) { cib_action_updated(NULL, 0, cib_ok, NULL, action); } else { cib_action_update(action, LRM_OP_PENDING); } ret = TRUE; } else { te_log_action(LOG_ERR, "Failed on unsupported command type: " "%s, %s (id=%s) on %s", crm_element_name(action->xml), task, id, on_node); } if(send_command) { HA_Message *cmd = NULL; char *counter = crm_itoa(transition_counter); cmd = create_request(msg_task, NULL, on_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); counter = generate_transition_key(transition_counter, te_uuid); crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); ret = send_ipc_message(crm_ch, cmd); crm_free(counter); if(ret && action->timeout > 0) { crm_debug_3("Setting timer for action %d",action->id); action->timer->reason = timeout_action_warn; start_te_timer(action->timer); } } return ret; } gboolean cib_action_update(action_t *action, int status) { char *code = NULL; crm_data_t *fragment = NULL; crm_data_t *state = NULL; crm_data_t *rsc = NULL; crm_data_t *xml_op = NULL; char *op_id = NULL; enum cib_errors rc = cib_ok; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *rsc_id = crm_element_value(action->xml, XML_LRM_ATTR_RSCID); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); int call_options = cib_quorum_override; if(status == LRM_OP_TIMEOUT) { if(crm_element_value(action->xml, XML_LRM_ATTR_RSCID) != NULL) { crm_warn("%s: %s %s on %s timed out", crm_element_name(action->xml), task, rsc_id, target); } else { crm_warn("%s: %s on %s timed out", crm_element_name(action->xml), task, target); } } code = crm_itoa(status); /* update the CIB */ fragment = NULL; state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(state, XML_ATTR_UUID, target_uuid); crm_xml_add(state, XML_ATTR_UNAME, target); rsc = create_xml_node(state, XML_CIB_TAG_LRM); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); xml_op = create_xml_node(rsc,XML_LRM_TAG_RSC_OP); crm_xml_add(rsc, XML_ATTR_ID, rsc_id); crm_xml_add(xml_op, XML_ATTR_ID, task); op_id = generate_op_key(rsc_id, task, action->interval); crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_free(op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(rsc, XML_LRM_ATTR_RSCSTATE, get_rsc_state(task, status)); crm_xml_add(rsc, XML_LRM_ATTR_OPSTATUS, code); crm_xml_add(rsc, XML_LRM_ATTR_RC, code); crm_xml_add(rsc, XML_LRM_ATTR_LASTOP, task); crm_xml_add(xml_op, XML_LRM_ATTR_OPSTATUS, code); crm_xml_add(xml_op, XML_LRM_ATTR_CALLID, "-1"); crm_xml_add(xml_op, XML_LRM_ATTR_RC, code); crm_xml_add(xml_op, "origin", __FUNCTION__); crm_free(code); code = generate_transition_key(transition_counter, te_uuid); crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, code); crm_free(code); code = generate_transition_magic( crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY), status); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, code); crm_free(code); set_node_tstamp(xml_op); fragment = create_cib_fragment(state, NULL); crm_debug_3("Updating CIB with \"%s\" (%s): %s %s on %s", status<0?"new action":XML_ATTR_TIMEOUT, crm_element_name(action->xml), crm_str(task), rsc_id, target); #ifndef TESTING rc = te_cib_conn->cmds->modify( te_cib_conn, XML_CIB_TAG_STATUS, fragment, NULL, call_options); crm_debug("Updating CIB with %s action %d: %s %s on %s (call_id=%d)", op_status2text(status), action->id, task, rsc_id, target, rc); if(status == LRM_OP_PENDING) { crm_debug_2("Waiting for callback id: %d", rc); add_cib_op_callback(rc, FALSE, action, cib_action_updated); } #else te_log_action(LOG_INFO, "Initiating action %d: %s %s on %s", action->id, task, rsc_id, target); call_options = 0; { HA_Message *cmd = ha_msg_new(11); ha_msg_add(cmd, F_TYPE, T_CRM); ha_msg_add(cmd, F_CRM_VERSION, CRM_VERSION); ha_msg_add(cmd, F_CRM_MSG_TYPE, XML_ATTR_REQUEST); ha_msg_add(cmd, F_CRM_TASK, CRM_OP_EVENTCC); ha_msg_add(cmd, F_CRM_SYS_TO, CRM_SYSTEM_TENGINE); ha_msg_add(cmd, F_CRM_SYS_FROM, CRM_SYSTEM_TENGINE); ha_msg_addstruct(cmd, crm_element_name(state), state); send_ipc_message(crm_ch, cmd); } #endif free_xml(fragment); free_xml(state); action->sent_update = TRUE; if(rc < cib_ok) { return FALSE; } return TRUE; } void cib_action_updated( const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { HA_Message *cmd = NULL; crm_data_t *rsc_op = NULL; const char *task = NULL; const char *rsc_id = NULL; const char *on_node = NULL; action_t *action = user_data; char *counter = crm_itoa(transition_counter); CRM_DEV_ASSERT(action != NULL); if(crm_assert_failed) { return; } CRM_DEV_ASSERT(action->xml != NULL); if(crm_assert_failed) { return; } rsc_op = action->xml; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); rsc_id = crm_element_value(rsc_op, XML_LRM_ATTR_RSCID); on_node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET); counter = generate_transition_key(transition_counter, te_uuid); crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); crm_free(counter); if(rc < cib_ok) { crm_err("Update for action %d: %s %s on %s FAILED", action->id, task, rsc_id, on_node); send_complete(cib_error2string(rc), output, te_failed, i_cancel); return; } if(te_fsa_state != s_in_transition) { int pending_updates = num_cib_op_callbacks(); if(pending_updates == 0) { send_complete("CIB update queue empty", output, te_done, i_cib_complete); } else { crm_debug("Still waiting on %d callbacks", pending_updates); } crm_debug("Not executing action: Not in a transition: %d", te_fsa_state); return; } crm_info("Initiating action %d: %s %s on %s", action->id, task, rsc_id, on_node); if(rsc_op != NULL) { crm_log_xml_debug_2(rsc_op, "Performing"); } - cmd = create_request(task, rsc_op, on_node, + cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, on_node, CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); #ifndef TESTING send_ipc_message(crm_ch, cmd); #else crm_log_message(LOG_INFO, cmd); #endif action->invoked = TRUE; if(action->timeout > 0) { crm_debug_3("Setting timer for action %d",action->id); action->timer->reason = timeout_action_warn; start_te_timer(action->timer); } } gboolean initiate_transition(void) { crm_info("Initating transition"); process_graph_event(NULL, NULL); return TRUE; } void check_synapse_triggers(synapse_t *synapse, int action_id) { synapse->triggers_complete = TRUE; if(synapse->confirmed) { crm_debug_3("Skipping confirmed synapse %d", synapse->id); return; } else if(synapse->complete == FALSE) { crm_debug_3("Checking pre-reqs for %d", synapse->id); /* lookup prereqs */ slist_iter( prereq, action_t, synapse->inputs, lpc, crm_debug_3("Processing input %d", prereq->id); if(prereq->id == action_id) { crm_debug_3("Marking input %d complete", action_id); prereq->complete = TRUE; } else if(prereq->complete == FALSE) { crm_debug_3("Inputs for synapse %d not satisfied", synapse->id); synapse->triggers_complete = FALSE; } ); } } void fire_synapse(synapse_t *synapse) { if(synapse == NULL) { crm_err("Synapse was NULL!"); return; } crm_debug_3("Checking if synapse %d needs to be fired", synapse->id); if(synapse->complete) { crm_debug_3("Skipping complete synapse %d", synapse->id); return; } else if(synapse->triggers_complete == FALSE) { crm_debug_3("Synapse %d not yet satisfied", synapse->id); return; } crm_debug("All inputs for synapse %d satisfied... invoking actions", synapse->id); synapse->complete = TRUE; slist_iter( action, action_t, synapse->actions, lpc, /* allow some leeway */ int tmp_time = 2 * action->timeout; gboolean passed = FALSE; action->invoked = TRUE; /* Invoke the action and start the timer */ passed = initiate_action(action); if(passed == FALSE) { crm_err("Failed initiating <%s id=%d> in synapse %d", crm_element_name(action->xml), action->id, synapse->id); send_complete("Action init failed", action->xml, te_failed, i_cancel); return; } if(tmp_time > transition_timer->timeout) { crm_debug("Action %d: Increasing IDLE timer to %d", action->id, tmp_time); transition_timer->timeout = tmp_time; } ); crm_debug("Synapse %d complete", synapse->id); } gboolean confirm_synapse(synapse_t *synapse, int action_id) { gboolean complete = TRUE; synapse->confirmed = TRUE; slist_iter( action, action_t, synapse->actions, lpc, if(action->complete == FALSE) { complete = FALSE; synapse->confirmed = FALSE; crm_debug_3("Found an incomplete action" " - transition not complete"); break; } ); if(complete) { crm_debug("Synapse %d complete (action=%d)", synapse->id, action_id); } return complete; } void process_trigger(int action_id) { if(te_fsa_state != s_in_transition) { int unconfirmed = unconfirmed_actions(); crm_info("Trigger from action %d (%d more) discarded:" " Not in transition", action_id, unconfirmed); if(unconfirmed == 0) { send_complete("Last pending action confirmed", NULL, te_abort_confirmed, i_cmd_complete); } return; } graph_complete = TRUE; crm_debug_3("Processing trigger from action %d", action_id); /* something happened, stop the timer and start it again at the end */ stop_te_timer(transition_timer); slist_iter( synapse, synapse_t, graph, lpc, if(synapse->confirmed) { crm_debug_3("Skipping confirmed synapse %d", synapse->id); continue; } check_synapse_triggers(synapse, action_id); fire_synapse(synapse); if(graph == NULL) { crm_err("Trigger processing aborted after failed synapse"); break; } crm_debug_3("Checking if %d is confirmed", synapse->id); if(synapse->complete == FALSE) { crm_debug_3("Found an incomplete synapse" " - transition not complete"); /* indicate that the transition is not yet complete */ graph_complete = FALSE; } else if(synapse->confirmed == FALSE) { gboolean confirmed = confirm_synapse(synapse,action_id); graph_complete = graph_complete && confirmed; } crm_debug_3("%d is %s", synapse->id, synapse->confirmed?"confirmed":synapse->complete?"complete":"pending"); ); } diff --git a/include/crm/crm.h b/include/crm/crm.h index 59a4240c72..5d5b9444d9 100644 --- a/include/crm/crm.h +++ b/include/crm/crm.h @@ -1,305 +1,306 @@ -/* $Id: crm.h,v 1.73 2005/08/25 08:29:37 andrew Exp $ */ +/* $Id: crm.h,v 1.74 2005/08/26 08:49:43 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef CRM__H #define CRM__H #include #include #include #include #include #include #include #ifdef MCHECK #include #endif #include #ifndef CRM_DEV_BUILD # define CRM_DEV_BUILD 0 #endif #define CRM_DEPRECATED_SINCE_2_0_1 1 #define CRM_DEPRECATED_SINCE_2_0_2 1 #define CRM_DEPRECATED_SINCE_2_0_3 1 #define CRM_DEPRECATED_SINCE_2_0_4 1 #define CRM_DEPRECATED_SINCE_2_1_0 1 #define ipc_call_diff_max_ms 5000 #define action_diff_warn_ms 5000 #define action_diff_max_ms 20000 #define fsa_diff_warn_ms 10000 #define fsa_diff_max_ms 30000 #include #define CRM_ASSERT(expr) if((expr) == FALSE) { \ do_crm_log(LOG_CRIT, __FILE__, __PRETTY_FUNCTION__, \ "Triggered dev assert at %s:%d : %s", \ __FILE__, __LINE__, #expr); \ abort(); \ } extern gboolean crm_assert_failed; #define CRM_DEV_ASSERT(expr) crm_assert_failed = FALSE; \ if((expr) == FALSE) { \ crm_assert_failed = TRUE; \ do_crm_log(CRM_DEV_BUILD?LOG_CRIT:LOG_ERR, \ __FILE__, __PRETTY_FUNCTION__, \ "Triggered dev assert at %s:%d : %s", \ __FILE__, __LINE__, #expr); \ if(CRM_DEV_BUILD) { \ abort(); \ } \ } /* Clean these up at some point, some probably should be runtime options */ #define WORKING_DIR HA_VARLIBDIR"/heartbeat/crm" #define CRM_SOCK_DIR HA_VARRUNDIR"/heartbeat/crm" #define BIN_DIR HA_LIBDIR"/heartbeat" #define SOCKET_LEN 1024 #define APPNAME_LEN 256 #define MAX_IPC_FAIL 5 #define CIB_FILENAME WORKING_DIR"/cib.xml" #define CIB_BACKUP WORKING_DIR"/cib_backup.xml" #define CRM_VERSION "1.0" #define MSG_LOG 1 #define DOT_FSA_ACTIONS 1 #define DOT_ALL_FSA_INPUTS 1 /* #define FSA_TRACE 1 */ #define INFINITY_S "INFINITY" #define MINUS_INFINITY_S "-INFINITY" #define INFINITY 1000000.0 /* Sub-systems */ #define CRM_SYSTEM_DC "dc" #define CRM_SYSTEM_DCIB "dcib" /* The master CIB */ #define CRM_SYSTEM_CIB "cib" #define CRM_SYSTEM_CRMD "crmd" #define CRM_SYSTEM_LRMD "lrmd" #define CRM_SYSTEM_PENGINE "pengine" #define CRM_SYSTEM_TENGINE "tengine" /* Valid operations */ #define CRM_OP_NOOP "noop" #define CRM_OP_JOIN_ANNOUNCE "join_announce" #define CRM_OP_JOIN_OFFER "join_offer" #define CRM_OP_JOIN_REQUEST "join_request" #define CRM_OP_JOIN_ACKNAK "join_ack_nack" #define CRM_OP_JOIN_CONFIRM "join_confirm" #define CRM_OP_DIE "die_no_respawn" #define CRM_OP_RETRIVE_CIB "retrieve_cib" #define CRM_OP_PING "ping" #define CRM_OP_VOTE "vote" #define CRM_OP_HELLO "hello" #define CRM_OP_HBEAT "dc_beat" #define CRM_OP_PECALC "pe_calc" #define CRM_OP_ABORT "abort" #define CRM_OP_QUIT "quit" #define CRM_OP_LOCAL_SHUTDOWN "start_shutdown" #define CRM_OP_SHUTDOWN_REQ "req_shutdown" #define CRM_OP_SHUTDOWN "do_shutdown" #define CRM_OP_FENCE "stonith" #define CRM_OP_EVENTCC "event_cc" #define CRM_OP_TEABORT "te_abort" #define CRM_OP_TEABORTED "te_abort_confirmed" /* we asked */ #define CRM_OP_TE_HALT "te_halt" #define CRM_OP_TECOMPLETE "te_complete" #define CRM_OP_TETIMEOUT "te_timeout" #define CRM_OP_TRANSITION "transition" #define CRM_OP_REGISTER "register" #define CRM_OP_DEBUG_UP "debug_inc" #define CRM_OP_DEBUG_DOWN "debug_dec" +#define CRM_OP_INVOKE_LRM "lrm_invoke" #define CRMD_STATE_ACTIVE "member" #define CRMD_STATE_INACTIVE "down" #define CRMD_JOINSTATE_DOWN CRMD_STATE_INACTIVE #define CRMD_JOINSTATE_PENDING "pending" #define CRMD_JOINSTATE_MEMBER CRMD_STATE_ACTIVE #define CRMD_ACTION_START "start" #define CRMD_ACTION_STARTED "running" #define CRMD_ACTION_START_FAIL "start_failed" #define CRMD_ACTION_START_PENDING "starting" #define CRMD_ACTION_STOP "stop" #define CRMD_ACTION_STOPPED "stopped" #define CRMD_ACTION_STOP_FAIL "stop_failed" #define CRMD_ACTION_STOP_PENDING "stopping" #define CRMD_ACTION_NOTIFY "notify" #define CRMD_ACTION_NOTIFIED "notified" #define CRMD_ACTION_MON "monitor" #define CRMD_ACTION_MON_PENDING CRMD_ACTION_STARTED #define CRMD_ACTION_MON_OK CRMD_ACTION_STARTED #define CRMD_ACTION_MON_FAIL "monitor_failed" /* #define CRMD_ACTION_GENERIC "pending" */ #define CRMD_ACTION_GENERIC_PENDING "pending" #define CRMD_ACTION_GENERIC_OK "complete" #define CRMD_ACTION_GENERIC_FAIL "pending_failed" typedef GList* GListPtr; #define crm_atoi(text, default) atoi(text?text:default) extern gboolean safe_str_eq(const char *a, const char *b); extern gboolean safe_str_neq(const char *a, const char *b); #define slist_iter(child, child_type, parent, counter, a) \ { \ GListPtr __crm_iter_head = parent; \ child_type *child = NULL; \ int counter = 0; \ for(; __crm_iter_head != NULL; counter++) { \ child = __crm_iter_head->data; \ __crm_iter_head = __crm_iter_head->next; \ { a; } \ } \ } #define LOG_DEBUG_2 LOG_DEBUG+1 #define LOG_DEBUG_3 LOG_DEBUG+2 #define LOG_DEBUG_4 LOG_DEBUG+3 #define LOG_DEBUG_5 LOG_DEBUG+4 #define LOG_DEBUG_6 LOG_DEBUG+5 #define LOG_MSG LOG_DEBUG_3 #define crm_crit(w...) do_crm_log(LOG_CRIT, __FILE__, __FUNCTION__, w) #define crm_err(w...) do_crm_log(LOG_ERR, __FILE__, __FUNCTION__, w) #define crm_warn(w...) do_crm_log(LOG_WARNING, __FILE__, __FUNCTION__, w) #define crm_notice(w...) do_crm_log(LOG_NOTICE, __FILE__, __FUNCTION__, w) #define crm_info(w...) do_crm_log(LOG_INFO, __FILE__, __FUNCTION__, w) #define crm_log_maybe(level, fmt...) if(crm_log_level >= level) { \ do_crm_log(level, __FILE__, __FUNCTION__, fmt); \ } #define crm_debug(fmt...) crm_log_maybe(LOG_DEBUG, fmt) #define crm_debug_2(fmt...) crm_log_maybe(LOG_DEBUG_2, fmt) /* If this is not a developmental build, give the compiler every chance to * optimize these away */ #if CRM_DEV_BUILD # define crm_debug_3(fmt...) crm_log_maybe(LOG_DEBUG_3, fmt) # define crm_debug_4(fmt...) crm_log_maybe(LOG_DEBUG_4, fmt) # define crm_debug_5(fmt...) crm_log_maybe(LOG_DEBUG_5, fmt) # define crm_debug_6(fmt...) crm_log_maybe(LOG_DEBUG_6, fmt) #else # define crm_debug_3(w...) if(0) { do_crm_log(LOG_DEBUG, NULL, NULL, w); } # define crm_debug_4(w...) if(0) { do_crm_log(LOG_DEBUG, NULL, NULL, w); } # define crm_debug_5(w...) if(0) { do_crm_log(LOG_DEBUG, NULL, NULL, w); } # define crm_debug_6(w...) if(0) { do_crm_log(LOG_DEBUG, NULL, NULL, w); } #endif extern void crm_log_message_adv( int level, const char *alt_debugfile, const HA_Message *msg); #define crm_log_message(level, msg) if(crm_log_level >= level) { \ crm_log_message_adv(level, NULL, msg); \ } #define crm_do_action(level, actions) if(crm_log_level >= level) { \ actions; \ } #define crm_action_info(x) crm_do_action(LOG_INFO, x) #define crm_action_debug(x) crm_do_action(LOG_DEBUG, x) #define crm_action_debug_2(x) crm_do_action(LOG_DEBUG_2, x) #define crm_action_debug_3(x) crm_do_action(LOG_DEBUG_3, x) #define crm_action_debug_4(x) crm_do_action(LOG_DEBUG_4, x) #define crm_log_xml(level, text, xml) if(crm_log_level >= level) { \ print_xml_formatted(level, __FUNCTION__, xml, text); \ } #define crm_log_xml_crit(xml, text) crm_log_xml(LOG_CRIT, text, xml) #define crm_log_xml_err(xml, text) crm_log_xml(LOG_ERR, text, xml) #define crm_log_xml_warn(xml, text) crm_log_xml(LOG_WARNING, text, xml) #define crm_log_xml_notice(xml, text) crm_log_xml(LOG_NOTICE, text, xml) #define crm_log_xml_info(xml, text) crm_log_xml(LOG_INFO, text, xml) #define crm_log_xml_debug(xml, text) crm_log_xml(LOG_DEBUG, text, xml) #define crm_log_xml_debug_2(xml, text) crm_log_xml(LOG_DEBUG_2, text, xml) #define crm_log_xml_debug_3(xml, text) crm_log_xml(LOG_DEBUG_3, text, xml) #define crm_log_xml_debug_4(xml, text) crm_log_xml(LOG_DEBUG_4, text, xml) #define crm_log_xml_debug_5(xml, text) crm_log_xml(LOG_DEBUG_5, text, xml) #define crm_str(x) (const char*)(x?x:"") #if CRM_USE_MALLOC # define crm_malloc0(new_obj,length) \ { \ new_obj = malloc(length); \ if(new_obj == NULL) { \ crm_crit("Out of memory... exiting"); \ exit(1); \ } else { \ memset(new_obj, 0, length); \ } \ } # define crm_free(x) if(x) { free(x); x=NULL; } # define crm_is_allocated(obj) obj?TRUE:FALSE #else # if CRM_DEV_BUILD # define crm_malloc0(new_obj,length) \ { \ if(new_obj) { \ crm_err("Potential memory leak:" \ " %s at %s:%d not NULL before alloc.", \ #new_obj, __FILE__, __LINE__); \ abort(); \ } \ new_obj = cl_malloc(length); \ if(new_obj == NULL) { \ crm_crit("Out of memory... exiting"); \ abort(); \ } \ memset(new_obj, 0, length); \ } #else # define crm_malloc0(new_obj,length) \ { \ new_obj = cl_malloc(length); \ if(new_obj == NULL) { \ crm_crit("Out of memory... exiting"); \ abort(); \ } \ memset(new_obj, 0, length); \ } # endif # define crm_free(x) if(x) { \ CRM_ASSERT(cl_is_allocated(x) == 1); \ cl_free(x); \ x=NULL; \ } # define crm_is_allocated(obj) cl_is_allocated(obj) #endif #define crm_msg_del(msg) if(msg != NULL) { ha_msg_del(msg); msg = NULL; } #endif