diff --git a/crm/crmd/lrm.c b/crm/crmd/lrm.c index 0229a266a0..91047e1606 100644 --- a/crm/crmd/lrm.c +++ b/crm/crmd/lrm.c @@ -1,1600 +1,1599 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include /* for access */ #include #include #include #include #include #include #include #include #include #include #include #include char *make_stop_id(const char *rsc, int call_id); gboolean verify_stopped(gboolean force, int log_level); gboolean resource_stopped(gpointer key, gpointer value, gpointer user_data); gboolean build_operation_update( crm_data_t *rsc_list, lrm_op_t *op, const char *src, int lpc); gboolean build_active_RAs(crm_data_t *rsc_list); gboolean is_rsc_active(const char *rsc_id); void do_update_resource(lrm_op_t *op); gboolean process_lrm_event(lrm_op_t *op); enum crmd_fsa_input do_lrm_rsc_op( lrm_rsc_t *rsc, const char *operation, crm_data_t *msg, HA_Message *request); enum crmd_fsa_input do_fake_lrm_op(gpointer data); void stop_recurring_action( gpointer key, gpointer value, gpointer user_data); gboolean remove_recurring_action( gpointer key, gpointer value, gpointer user_data); lrm_op_t *construct_op( crm_data_t *rsc_op, const char *rsc_id, const char *operation); void send_direct_ack(const char *to_host, const char *to_sys, lrm_op_t* op, const char *rsc_id); void free_recurring_op(gpointer value); GHashTable *monitors = NULL; GHashTable *resources = NULL; GHashTable *shutdown_ops = NULL; int num_lrm_register_fails = 0; int max_lrm_register_fails = 30; enum crmd_rscstate { crmd_rscstate_NULL, crmd_rscstate_START, crmd_rscstate_START_PENDING, crmd_rscstate_START_OK, crmd_rscstate_START_FAIL, crmd_rscstate_STOP, crmd_rscstate_STOP_PENDING, crmd_rscstate_STOP_OK, crmd_rscstate_STOP_FAIL, crmd_rscstate_MON, crmd_rscstate_MON_PENDING, crmd_rscstate_MON_OK, crmd_rscstate_MON_FAIL, crmd_rscstate_GENERIC_PENDING, crmd_rscstate_GENERIC_OK, crmd_rscstate_GENERIC_FAIL }; const char *crmd_rscstate2string(enum crmd_rscstate state); const char * crmd_rscstate2string(enum crmd_rscstate state) { switch(state) { case crmd_rscstate_NULL: return NULL; case crmd_rscstate_START: return CRMD_ACTION_START; case crmd_rscstate_START_PENDING: return CRMD_ACTION_START_PENDING; case crmd_rscstate_START_OK: return CRMD_ACTION_STARTED; case crmd_rscstate_START_FAIL: return CRMD_ACTION_START_FAIL; case crmd_rscstate_STOP: return CRMD_ACTION_STOP; case crmd_rscstate_STOP_PENDING: return CRMD_ACTION_STOP_PENDING; case crmd_rscstate_STOP_OK: return CRMD_ACTION_STOPPED; case crmd_rscstate_STOP_FAIL: return CRMD_ACTION_STOP_FAIL; case crmd_rscstate_MON: return CRMD_ACTION_MON; case crmd_rscstate_MON_PENDING: return CRMD_ACTION_MON_PENDING; case crmd_rscstate_MON_OK: return CRMD_ACTION_MON_OK; case crmd_rscstate_MON_FAIL: return CRMD_ACTION_MON_FAIL; case crmd_rscstate_GENERIC_PENDING: return CRMD_ACTION_GENERIC_PENDING; case crmd_rscstate_GENERIC_OK: return CRMD_ACTION_GENERIC_OK; case crmd_rscstate_GENERIC_FAIL: return CRMD_ACTION_GENERIC_FAIL; } return ""; } GCHSource *lrm_source = NULL; /* A_LRM_CONNECT */ enum crmd_fsa_input do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int ret = HA_OK; if(action & A_LRM_DISCONNECT) { verify_stopped(TRUE, LOG_ERR); if(lrm_source) { crm_debug("Removing LRM connection from MainLoop"); if(G_main_del_IPC_Channel(lrm_source) == FALSE) { crm_err("Could not remove LRM connection" " from MainLoop"); } lrm_source = NULL; } if(fsa_lrm_conn) { fsa_lrm_conn->lrm_ops->signoff(fsa_lrm_conn); crm_info("Disconnected from the LRM"); clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED); fsa_lrm_conn = NULL; } /* TODO: Clean up the hashtable */ } if(action & A_LRM_CONNECT) { ret = HA_OK; monitors = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, free_recurring_op); resources = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); shutdown_ops = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); fsa_lrm_conn = ll_lrm_new(XML_CIB_TAG_LRM); if(NULL == fsa_lrm_conn) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); ret = HA_FAIL; } if(ret == HA_OK) { crm_debug("Connecting to the LRM"); ret = fsa_lrm_conn->lrm_ops->signon( fsa_lrm_conn, CRM_SYSTEM_CRMD); } if(ret != HA_OK) { if(++num_lrm_register_fails < max_lrm_register_fails) { crm_warn("Failed to sign on to the LRM %d" " (%d max) times", num_lrm_register_fails, max_lrm_register_fails); crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return I_NULL; } } if(ret == HA_OK) { crm_debug_4("LRM: set_lrm_callback..."); ret = fsa_lrm_conn->lrm_ops->set_lrm_callback( fsa_lrm_conn, lrm_op_callback); if(ret != HA_OK) { crm_err("Failed to set LRM callbacks"); } } if(ret != HA_OK) { crm_err("Failed to sign on to the LRM %d" " (max) times", num_lrm_register_fails); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return I_NULL; } /* TODO: create a destroy handler that causes * some recovery to happen */ lrm_source = G_main_add_IPC_Channel( G_PRIORITY_LOW, fsa_lrm_conn->lrm_ops->ipcchan(fsa_lrm_conn), FALSE, lrm_dispatch, fsa_lrm_conn, default_ipc_connection_destroy); set_bit_inplace(fsa_input_register, R_LRM_CONNECTED); crm_debug("LRM connection established"); } if(action & ~(A_LRM_CONNECT|A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } return I_NULL; } static void ghash_print_pending(gpointer key, gpointer value, gpointer user_data) { const char *action = key; int *log_level = user_data; crm_log_maybe(*log_level, "Pending action: %s", action); } gboolean verify_stopped(gboolean force, int log_level) { GListPtr lrm_list = NULL; crm_info("Checking for active resources before exit"); if(fsa_lrm_conn == NULL) { crm_err("Exiting with no LRM connection..." " resources may be active!"); return TRUE; } if(g_hash_table_size(shutdown_ops) > 0) { crm_log_maybe(log_level, "%d pending LRM operations at shutdown%s", g_hash_table_size(shutdown_ops), force?"":"... waiting"); if(force || !is_set(fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_foreach( shutdown_ops, ghash_print_pending, &log_level); } if(force == FALSE) { return FALSE; } } lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn); slist_iter( rsc_id, char, lrm_list, lpc, if(is_rsc_active(rsc_id) == FALSE) { continue; } crm_err("Resource %s was active at shutdown." " You may ignore this error if it is unmanaged.", rsc_id); ); set_bit_inplace(fsa_input_register, R_SENT_RSC_STOP); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); return TRUE; } gboolean build_operation_update( crm_data_t *xml_rsc, lrm_op_t *op, const char *src, int lpc) { char *fail_state = NULL; const char *state = NULL; crm_data_t *xml_op = NULL; char *op_id = NULL; const char *caller_version = NULL; char *digest = NULL; crm_data_t *args_xml = NULL; crm_data_t *args_parent = NULL; CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return FALSE; } crm_debug_2("%s: Updating resouce %s after %s %s op", src, op->rsc_id, op_status2text(op->op_status), op->op_type); if(op->op_status == LRM_OP_CANCELLED) { crm_debug_3("Ignoring cancelled op"); return TRUE; } if(AM_I_DC) { caller_version = CRM_FEATURE_SET; } else if(fsa_our_dc_version != NULL) { caller_version = fsa_our_dc_version; } else { /* there is a small risk in formerly mixed clusters that * it will be sub-optimal. * however with our upgrade policy, the update we send * should still be completely supported anyway */ caller_version = g_hash_table_lookup( op->params, XML_ATTR_CRM_VERSION); crm_warn("Falling back to operation originator version: %s", caller_version); } crm_debug_3("DC version: %s", caller_version); if(safe_str_eq(op->op_type, CRMD_ACTION_NOTIFY)) { const char *n_type = g_hash_table_lookup( op->params, crm_meta_name("notify_type")); const char *n_task = g_hash_table_lookup( op->params, crm_meta_name("notify_operation")); #if CRM_DEPRECATED_SINCE_2_0_5 if(n_type == NULL) { n_type = g_hash_table_lookup(op->params, "notify_type"); } if(n_task == NULL) { n_task = g_hash_table_lookup(op->params, "notify_operation"); } #endif CRM_DEV_ASSERT(n_type != NULL); CRM_DEV_ASSERT(n_task != NULL); op_id = generate_notify_key(op->rsc_id, n_type, n_task); /* these are not yet allowed to fail */ op->op_status = LRM_OP_DONE; op->rc = 0; } else { op_id = generate_op_key(op->rsc_id, op->op_type, op->interval); } /* Handle recurring ops - infer last op_status */ if(op->op_status == LRM_OP_PENDING && op->interval > 0) { if(op->rc == 0) { crm_debug("Mapping pending operation to DONE"); op->op_status = LRM_OP_DONE; } else { crm_debug("Mapping pending operation to ERROR"); op->op_status = LRM_OP_ERROR; } } xml_op = find_entity(xml_rsc, XML_LRM_TAG_RSC_OP, op_id); if(xml_op != NULL) { const char *old_status_s = crm_element_value( xml_op, XML_LRM_ATTR_OPSTATUS); int old_status = crm_parse_int(old_status_s, "-2"); int log_level = LOG_ERR; if(old_status_s == NULL) { crm_err("No value for "XML_LRM_ATTR_OPSTATUS); } else if(old_status == op->op_status) { /* safe to mask */ log_level = LOG_WARNING; } else if(old_status == LRM_OP_PENDING){ /* ??safe to mask?? */ /* log_level = LOG_WARNING; */ } crm_log_maybe(log_level, "Duplicate %s operations in get_cur_state()", op_id); crm_log_maybe(log_level+2, "New entry: %s %s (call=%d, status=%s)", op_id, op->user_data, op->call_id, op_status2text(op->op_status)); crm_log_xml(log_level+2, "Existing entry", xml_op); crm_free(op_id); return FALSE; } else { xml_op = create_xml_node(xml_rsc, XML_LRM_TAG_RSC_OP); } crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_free(op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, op->op_type); crm_xml_add(xml_op, XML_ATTR_ORIGIN, src); if(op->user_data == NULL) { op->user_data = generate_transition_key(-1, fsa_our_uname); } #if CRM_DEPRECATED_SINCE_2_0_3 if(compare_version("1.0.3", caller_version) > 0) { CRM_CHECK(FALSE, ; ); fail_state = generate_transition_magic_v202( op->user_data, op->op_status); } else { fail_state = generate_transition_magic( op->user_data, op->op_status, op->rc); } #else fail_state = generate_transition_magic( op->user_data, op->op_status, op->rc); #endif crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, fail_state); crm_free(fail_state); switch(op->op_status) { case LRM_OP_PENDING: break; case LRM_OP_CANCELLED: crm_err("What to do here"); break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: crm_debug("Resource action %s/%s %s: %d", op->rsc_id, op->op_type, op_status2text(op->op_status), op->rc); break; case LRM_OP_DONE: if(safe_str_eq(CRMD_ACTION_START, op->op_type)) { state = CRMD_ACTION_STARTED; } else if(safe_str_eq(CRMD_ACTION_STOP, op->op_type)) { state = CRMD_ACTION_STOPPED; } else if(safe_str_eq(CRMD_ACTION_MON, op->op_type)) { state = CRMD_ACTION_STARTED; } else { crm_debug("Using status \"%s\" for op \"%s\"", CRMD_ACTION_GENERIC_OK, op->op_type); state = CRMD_ACTION_GENERIC_OK; } break; } crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id); /* set these on 'xml_rsc' too to make life easy for the PE */ crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version); crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc); crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status); crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, op->interval); /* this will enable us to later determin that the * resource's parameters have changed and we should force * a restart */ args_parent = NULL; #if CRM_DEPRECATED_SINCE_2_0_4 if(compare_version("1.0.4", caller_version) > 0) { args_parent = xml_op; } #endif args_xml = create_xml_node(args_parent, XML_TAG_PARAMS); g_hash_table_foreach(op->params, hash2field, args_xml); filter_action_parameters(args_xml, caller_version); digest = calculate_xml_digest(args_xml, TRUE); crm_xml_add(xml_op, XML_LRM_ATTR_OP_DIGEST, digest); crm_free(digest); if(args_parent == NULL) { free_xml(args_xml); } return TRUE; } gboolean is_rsc_active(const char *rsc_id) { GList *op_list = NULL; gboolean active = FALSE; lrm_rsc_t *the_rsc = NULL; state_flag_t cur_state = 0; int max_call_id = -1; if(fsa_lrm_conn == NULL) { return FALSE; } the_rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rsc_id); crm_debug_2("Processing lrm_rsc_t entry %s", rsc_id); if(the_rsc == NULL) { crm_err("NULL resource returned from the LRM"); return FALSE; } op_list = the_rsc->ops->get_cur_state(the_rsc, &cur_state); crm_debug_3("\tcurrent state:%s",cur_state==LRM_RSC_IDLE?"Idle":"Busy"); slist_iter( op, lrm_op_t, op_list, llpc, crm_debug("Processing op %s_%d (%d) for %s (status=%d, rc=%d)", op->op_type, op->interval, op->call_id, the_rsc->id, op->op_status, op->rc); CRM_ASSERT(max_call_id <= op->call_id); if(safe_str_eq(op->op_type, CRMD_ACTION_STOP)) { active = FALSE; } else if(op->rc == EXECRA_NOT_RUNNING) { active = FALSE; } else { active = TRUE; } max_call_id = op->call_id; lrm_free_op(op); ); g_list_free(op_list); lrm_free_rsc(the_rsc); return active; } gboolean build_active_RAs(crm_data_t *rsc_list) { GList *op_list = NULL; GList *lrm_list = NULL; gboolean found_op = FALSE; state_flag_t cur_state = 0; if(fsa_lrm_conn == NULL) { return FALSE; } lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn); slist_iter( rid, char, lrm_list, lpc, lrm_rsc_t *the_rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); crm_data_t *xml_rsc = create_xml_node( rsc_list, XML_LRM_TAG_RESOURCE); int max_call_id = -1; crm_debug("Processing lrm_rsc_t entry %s", rid); if(the_rsc == NULL) { crm_err("NULL resource returned from the LRM"); continue; } crm_xml_add(xml_rsc, XML_ATTR_ID, the_rsc->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, the_rsc->type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, the_rsc->class); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER,the_rsc->provider); op_list = the_rsc->ops->get_cur_state(the_rsc, &cur_state); crm_debug_2("\tcurrent state:%s", cur_state==LRM_RSC_IDLE?"Idle":"Busy"); slist_iter( op, lrm_op_t, op_list, llpc, crm_debug_2("Processing op %s for %s (status=%d, rc=%d)", op->op_type, the_rsc->id, op->op_status, op->rc); if(max_call_id < op->call_id) { build_operation_update( xml_rsc, op, __FUNCTION__, llpc); } else if(max_call_id > op->call_id) { crm_err("Bad call_id in list=%d. Previous call_id=%d", op->call_id, max_call_id); } else { crm_warn("lrm->get_cur_state() returned" " duplicate entries for call_id=%d", op->call_id); } max_call_id = op->call_id; found_op = TRUE; lrm_free_op(op); ); if(found_op == FALSE && g_list_length(op_list) != 0) { crm_err("Could not properly determin last op" " for %s from %d entries", the_rsc->id, g_list_length(op_list)); } g_list_free(op_list); lrm_free_rsc(the_rsc); ); g_list_free(lrm_list); return TRUE; } crm_data_t* do_lrm_query(gboolean is_replace) { gboolean shut_down = FALSE; crm_data_t *xml_result= NULL; crm_data_t *xml_state = NULL; crm_data_t *xml_data = NULL; crm_data_t *rsc_list = NULL; const char *exp_state = CRMD_JOINSTATE_MEMBER; if(is_set(fsa_input_register, R_SHUTDOWN)) { exp_state = CRMD_STATE_INACTIVE; shut_down = TRUE; } xml_state = create_node_state( fsa_our_uname, ACTIVESTATUS, XML_BOOLEAN_TRUE, ONLINESTATUS, CRMD_JOINSTATE_MEMBER, exp_state, !shut_down, __FUNCTION__); xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, fsa_our_uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(rsc_list); if(is_replace) { crm_xml_add(xml_state, XML_CIB_ATTR_REPLACE, XML_CIB_TAG_LRM); } xml_result = create_cib_fragment(xml_state, XML_CIB_TAG_STATUS); crm_log_xml_debug_3(xml_state, "Current state of the LRM"); return xml_result; } struct recurring_op_s { char *rsc_id; int call_id; }; static void cancel_monitor(lrm_rsc_t *rsc, const char *key) { struct recurring_op_s *existing_op = NULL; if(rsc == NULL) { crm_err("No resource to cancel and operation for"); return; } else if(key == NULL) { crm_err("No operation to cancel"); return; } existing_op = g_hash_table_lookup(monitors, key); if(existing_op != NULL) { crm_debug("Cancelling previous invocation of %s (%d)", key, existing_op->call_id); /* cancel it so we can then restart it without conflict */ if(rsc->ops->cancel_op(rsc, existing_op->call_id) != HA_OK) { crm_info("Couldn't cancel %s (%d)", key, existing_op->call_id); } else { g_hash_table_remove(monitors, key); } } else { crm_debug("No previous invocation of %s", key); } } static lrm_rsc_t * get_lrm_resource(crm_data_t *resource, crm_data_t *op_msg, gboolean do_create) { char rid[64]; lrm_rsc_t *rsc = NULL; const char *short_id = ID(resource); const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG); crm_debug_2("Retrieving %s from the LRM.", short_id); CRM_CHECK(short_id != NULL, return NULL); if(rsc == NULL) { /* check if its already there (short name) */ strncpy(rid, short_id, 64); rid[63] = 0; rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); } if(rsc == NULL && long_id != NULL) { /* try the long name instead */ strncpy(rid, long_id, 64); rid[63] = 0; rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); } if(rsc == NULL && do_create) { /* add it to the LRM */ const char *type = crm_element_value(resource, XML_ATTR_TYPE); const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER); GHashTable *params = xml2list(op_msg); CRM_CHECK(class != NULL, return NULL); CRM_CHECK(type != NULL, return NULL); crm_debug("Adding rsc %s before operation", short_id); strncpy(rid, short_id, 64); rid[63] = 0; #if CRM_DEPRECATED_SINCE_2_0_3 if(op_msg != NULL) { if(g_hash_table_lookup( params, XML_ATTR_CRM_VERSION) == NULL) { g_hash_table_destroy(params); params = xml2list_202(op_msg); } } #endif if(g_hash_table_size(params) == 0) { crm_log_xml_warn(op_msg, "EmptyParams"); } fsa_lrm_conn->lrm_ops->add_rsc( fsa_lrm_conn, rid, class, type, provider, params); rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); g_hash_table_destroy(params); if(rsc == NULL) { fsa_data_t *msg_data = NULL; crm_err("Could not add resource %s to LRM", rid); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } return rsc; } /* A_LRM_INVOKE */ enum crmd_fsa_input do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; enum crmd_fsa_input next_input = I_NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); crm_op = cl_get_string(input->msg, F_CRM_TASK); from_sys = cl_get_string(input->msg, F_CRM_SYS_FROM); if(safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = cl_get_string(input->msg, F_CRM_HOST_FROM); } crm_debug_2("LRM command from: %s", from_sys); if(safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { operation = CRMD_ACTION_DELETE; } else if(safe_str_eq(operation, CRM_OP_LRM_REFRESH)) { crm_op = CRM_OP_LRM_REFRESH; } else if(input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if(crm_op != NULL && safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { enum cib_errors rc = cib_ok; crm_data_t *fragment = do_lrm_query(TRUE); crm_info("Forcing a local LRM refresh"); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc); free_xml(fragment); } else if(crm_op != NULL && safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { crm_data_t *data = do_lrm_query(FALSE); HA_Message *reply = create_reply(input->msg, data); if(relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_message(LOG_ERR, reply); crm_msg_del(reply); } free_xml(data); } else if(safe_str_eq(operation, CRM_OP_PROBED) || safe_str_eq(crm_op, CRM_OP_REPROBE)) { const char *probed = XML_BOOLEAN_TRUE; if(safe_str_eq(crm_op, CRM_OP_REPROBE)) { probed = XML_BOOLEAN_FALSE; } update_attr(fsa_cib_conn, cib_none, XML_CIB_TAG_STATUS, fsa_our_uuid, NULL, NULL, CRM_OP_PROBED, probed); } else if(operation != NULL) { lrm_rsc_t *rsc = NULL; crm_data_t *params = NULL; gboolean create_rsc = TRUE; crm_data_t *xml_rsc = find_xml_node( input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return I_NULL); /* only the first 16 chars are used by the LRM */ params = find_xml_node(input->xml, XML_TAG_ATTRS,TRUE); if(safe_str_eq(operation, CRMD_ACTION_STOP)) { create_rsc = FALSE; } rsc = get_lrm_resource(xml_rsc, input->xml, create_rsc); if(rsc == NULL && create_rsc) { crm_err("Invalid resource definition"); crm_log_xml_warn(input->xml, "Bad command"); } else if(rsc == NULL) { lrm_op_t* op = NULL; crm_err("Not creating resource for a stop event"); crm_log_xml_warn(input->xml, "Bad command"); op = construct_op(input->xml, ID(xml_rsc), operation); op->op_status = LRM_OP_DONE; op->rc = 0; CRM_ASSERT(op != NULL); send_direct_ack(from_host, from_sys, op, ID(xml_rsc)); free_lrm_op(op); } else if(safe_str_eq(operation, CRMD_ACTION_CANCEL)) { lrm_op_t* op = NULL; char *op_key = NULL; const char *op_task = NULL; const char *op_interval = NULL; CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command"); return I_NULL); op_task = crm_element_value(params, crm_meta_name(XML_LRM_ATTR_TASK)); op_interval = crm_element_value(params, crm_meta_name("interval")); #if CRM_DEPRECATED_SINCE_2_0_5 if(op_interval == NULL) { op_interval = crm_element_value(params, "interval"); } if(op_task == NULL) { op_task = crm_element_value(params, XML_LRM_ATTR_TASK); if(op_task == NULL) { op_task = crm_element_value(params, "task"); } } #endif CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command"); return I_NULL); CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command"); return I_NULL); op = construct_op(input->xml, rsc->id, op_task); CRM_ASSERT(op != NULL); op_key = generate_op_key( rsc->id,op_task,crm_parse_int(op_interval,"0")); cancel_monitor(rsc, op_key); op->op_status = LRM_OP_DONE; op->rc = EXECRA_OK; send_direct_ack(from_host, from_sys, op, rsc->id); crm_free(op_key); free_lrm_op(op); } else if(safe_str_eq(operation, CRMD_ACTION_DELETE)) { int rc = HA_OK; lrm_op_t* op = NULL; op = construct_op(input->xml, rsc->id, operation); CRM_ASSERT(op != NULL); op->op_status = LRM_OP_DONE; op->rc = EXECRA_OK; if(rsc == NULL) { crm_debug("Resource %s was already removed", rsc->id); } else { crm_info("Removing resource %s from the LRM", rsc->id); rc = fsa_lrm_conn->lrm_ops->delete_rsc( fsa_lrm_conn, rsc->id); if(rc != HA_OK) { crm_err("Failed to remove resource %s", rsc->id); op->op_status = LRM_OP_ERROR; op->rc = EXECRA_UNKNOWN_ERROR; } } send_direct_ack(from_host, from_sys, op, rsc->id); free_lrm_op(op); } else if(rsc != NULL) { next_input = do_lrm_rsc_op( rsc, operation, input->xml, input->msg); } lrm_free_rsc(rsc); } else { crm_err("Operation was neither a lrm_query, nor a rsc op. %s", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } return next_input; } lrm_op_t * construct_op(crm_data_t *rsc_op, const char *rsc_id, const char *operation) { lrm_op_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; const char *op_interval = NULL; const char *transition = NULL; CRM_DEV_ASSERT(rsc_id != NULL); crm_malloc0(op, sizeof(lrm_op_t)); op->op_type = crm_strdup(operation); op->op_status = LRM_OP_PENDING; op->rc = -1; op->rsc_id = crm_strdup(rsc_id); op->interval = 0; op->timeout = 0; op->start_delay = 0; op->app_name = crm_strdup(CRM_SYSTEM_CRMD); if(rsc_op == NULL) { CRM_DEV_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); op->user_data = NULL; op->user_data_len = 0; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(op->params, crm_strdup(XML_ATTR_CRM_VERSION), crm_strdup(CRM_FEATURE_SET)); crm_debug_2("Constructed %s op for %s", operation, rsc_id); return op; } op->params = xml2list(rsc_op); #if CRM_DEPRECATED_SINCE_2_0_3 if(g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION) == NULL) { g_hash_table_destroy(op->params); op->params = xml2list_202(rsc_op); } #endif if(op->params == NULL) { CRM_DEV_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); } op_delay = g_hash_table_lookup(op->params, crm_meta_name("start_delay")); op_timeout = g_hash_table_lookup(op->params, crm_meta_name("timeout")); op_interval = g_hash_table_lookup(op->params, crm_meta_name("interval")); #if CRM_DEPRECATED_SINCE_2_0_5 if(op_delay == NULL) { op_delay = g_hash_table_lookup(op->params, "start_delay"); } if(op_timeout == NULL) { op_timeout = g_hash_table_lookup(op->params, "timeout"); } if(op_interval == NULL) { op_interval = g_hash_table_lookup(op->params, "interval"); } #endif op->interval = crm_parse_int(op_interval, "0"); op->timeout = crm_parse_int(op_timeout, "0"); op->start_delay = crm_parse_int(op_delay, "0"); /* sanity */ if(op->interval < 0) { op->interval = 0; } if(op->timeout < 0) { op->timeout = 0; } if(op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = crm_strdup(transition); op->user_data_len = 1+strlen(op->user_data); if(op->interval != 0) { if(safe_str_eq(operation, CRMD_ACTION_START) || safe_str_eq(operation, CRMD_ACTION_STOP)) { crm_err("Start and Stop actions cannot have an interval"); op->interval = 0; } } crm_debug_2("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval); return op; } void send_direct_ack(const char *to_host, const char *to_sys, lrm_op_t* op, const char *rsc_id) { HA_Message *reply = NULL; crm_data_t *update, *iter; crm_data_t *fragment; CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return; } if(op->rsc_id == NULL) { CRM_DEV_ASSERT(rsc_id != NULL); op->rsc_id = crm_strdup(rsc_id); } if(to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } crm_info("ACK'ing resource op: %s for %s", op->op_type, op->rsc_id); update = create_node_state( fsa_our_uname, NULL, NULL, NULL, NULL, NULL, FALSE, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, op, __FUNCTION__, 0); fragment = create_cib_fragment(update, XML_CIB_TAG_STATUS); reply = create_request(CRM_OP_INVOKE_LRM, fragment, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_debug("Sending ACK: %s", cl_get_string(reply, XML_ATTR_REFERENCE)); crm_log_xml_debug_2(update, "ACK Update"); crm_log_message_adv(LOG_DEBUG_3, "ACK Reply", reply); if(relay_message(reply, TRUE) == FALSE) { crm_log_message_adv(LOG_ERR, "Unable to route reply", reply); crm_msg_del(reply); } free_xml(fragment); free_xml(update); } enum crmd_fsa_input do_lrm_rsc_op(lrm_rsc_t *rsc, const char *operation, crm_data_t *msg, HA_Message *request) { int call_id = 0; char *op_id = NULL; lrm_op_t* op = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; CRM_CHECK(rsc != NULL, return I_NULL); if(msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if(transition == NULL) { crm_err("Missing transition"); crm_log_message(LOG_ERR, msg); } } /* stop the monitor before stopping the resource */ if(safe_str_eq(operation, CRMD_ACTION_STOP)) { g_hash_table_foreach(monitors, stop_recurring_action, rsc); g_hash_table_foreach_remove( monitors, remove_recurring_action, rsc); } /* now do the op */ op = construct_op(msg, rsc->id, operation); crm_info("Performing op %s on %s (interval=%dms, key=%s)", operation, rsc->id, op->interval, transition); if((AM_I_DC == FALSE && fsa_state != S_NOT_DC) || (AM_I_DC && fsa_state != S_TRANSITION_ENGINE)) { if(safe_str_neq(operation, CRMD_ACTION_STOP)) { crm_info("Discarding attempt to perform action %s on %s" " in state %s", operation, rsc->id, fsa_state2string(fsa_state)); op->rc = 99; op->op_status = LRM_OP_ERROR; send_direct_ack(NULL, NULL, op, rsc->id); free_lrm_op(op); crm_free(op_id); return I_NULL; } } op_id = generate_op_key(rsc->id, op->op_type, op->interval); if(op->interval > 0) { cancel_monitor(rsc, op_id); op->target_rc = CHANGED; } else { op->target_rc = EVERYTIME; } g_hash_table_replace(resources,crm_strdup(rsc->id), crm_strdup(op_id)); call_id = rsc->ops->perform_op(rsc, op); if(call_id <= 0) { crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); g_hash_table_replace( shutdown_ops, call_id_s, crm_strdup(rsc->id)); crm_debug_2("Recording pending op: %s/%s %s", rsc->id, operation, call_id_s); if(op->interval > 0) { struct recurring_op_s *op = NULL; crm_malloc0(op, sizeof(struct recurring_op_s)); crm_debug_2("Adding recurring %s op for %s (%d)", op_id, rsc->id, call_id); op->call_id = call_id; op->rsc_id = crm_strdup(rsc->id); g_hash_table_insert(monitors, op_id, op); op_id = NULL; } } crm_free(op_id); free_lrm_op(op); return I_NULL; } void stop_recurring_action(gpointer key, gpointer value, gpointer user_data) { lrm_rsc_t *rsc = user_data; struct recurring_op_s *op = (struct recurring_op_s*)value; if(safe_str_eq(op->rsc_id, rsc->id)) { if(op->call_id > 0) { crm_debug("Stopping recurring op %d for %s (%s)", op->call_id, rsc->id, (char*)key); rsc->ops->cancel_op(rsc, op->call_id); } else { crm_err("Invalid call_id %d for %s", op->call_id, rsc->id); } } } gboolean remove_recurring_action(gpointer key, gpointer value, gpointer user_data) { lrm_rsc_t *rsc = user_data; struct recurring_op_s *op = (struct recurring_op_s*)value; if(safe_str_eq(op->rsc_id, rsc->id)) { return TRUE; } return FALSE; } void free_recurring_op(gpointer value) { struct recurring_op_s *op = (struct recurring_op_s*)value; crm_free(op->rsc_id); crm_free(op); } void free_lrm_op(lrm_op_t *op) { g_hash_table_destroy(op->params); crm_free(op->user_data); crm_free(op->output); crm_free(op->rsc_id); crm_free(op->op_type); crm_free(op->app_name); crm_free(op); } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, crm_strdup(key), crm_strdup(value)); } lrm_op_t * copy_lrm_op(const lrm_op_t *op) { lrm_op_t *op_copy = NULL; CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return NULL; } CRM_ASSERT(op->rsc_id != NULL); crm_malloc0(op_copy, sizeof(lrm_op_t)); op_copy->op_type = crm_strdup(op->op_type); /* input fields */ op_copy->params = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if(op->params != NULL) { g_hash_table_foreach(op->params, dup_attr, op_copy->params); } op_copy->timeout = op->timeout; op_copy->interval = op->interval; op_copy->target_rc = op->target_rc; /* in the CRM, this is always a string */ if(op->user_data != NULL) { op_copy->user_data = crm_strdup(op->user_data); } /* output fields */ op_copy->op_status = op->op_status; op_copy->rc = op->rc; op_copy->call_id = op->call_id; op_copy->output = NULL; op_copy->rsc_id = crm_strdup(op->rsc_id); if(op->app_name != NULL) { op_copy->app_name = crm_strdup(op->app_name); } if(op->output != NULL) { op_copy->output = crm_strdup(op->output); } return op_copy; } lrm_rsc_t * copy_lrm_rsc(const lrm_rsc_t *rsc) { lrm_rsc_t *rsc_copy = NULL; if(rsc == NULL) { return NULL; } crm_malloc0(rsc_copy, sizeof(lrm_rsc_t)); rsc_copy->id = crm_strdup(rsc->id); rsc_copy->type = crm_strdup(rsc->type); rsc_copy->class = NULL; rsc_copy->provider = NULL; if(rsc->class != NULL) { rsc_copy->class = crm_strdup(rsc->class); } if(rsc->provider != NULL) { rsc_copy->provider = crm_strdup(rsc->provider); } /* GHashTable* params; */ rsc_copy->params = NULL; rsc_copy->ops = NULL; return rsc_copy; } static void cib_rsc_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { if(rc != cib_ok) { crm_err("Resource update %d failed: %s", call_id, cib_error2string(rc)); } else { crm_debug("Resource update %d complete", call_id); } } void do_update_resource(lrm_op_t* op) { /* */ int rc = cib_ok; crm_data_t *update, *iter; CRM_CHECK(op != NULL, return); update = create_node_state( fsa_our_uname, NULL, NULL, NULL, NULL, NULL, FALSE, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); if(op->interval == 0) { lrm_rsc_t *rsc = NULL; crm_debug_2("Updating %s resource definitions after %s op", op->rsc_id, op->op_type); rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, op->rsc_id); crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER,rsc->provider); lrm_free_rsc(rsc); } build_operation_update(iter, op, __FUNCTION__, 0); /* make it an asyncronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" when the really weren't * * the alternative however means blocking here for too long, which * isnt acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, cib_quorum_override, rc); if(rc > 0) { /* the return code is a call number, not an error code */ crm_debug("Sent resource state update message: %d", rc); add_cib_op_callback(rc, FALSE, NULL, cib_rsc_callback); } else { crm_err("Resource state update failed: %s", cib_error2string(rc)); - CRM_DEV_ASSERT(rc == cib_ok); } free_xml(update); } enum crmd_fsa_input do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data) { lrm_op_t *op = NULL; CRM_CHECK(msg_data->fsa_cause == C_LRM_OP_CALLBACK, return I_NULL); op = fsa_typed_data(fsa_dt_lrm); process_lrm_event(op); return I_NULL; } gboolean process_lrm_event(lrm_op_t *op) { CRM_CHECK(op != NULL, return I_NULL); CRM_CHECK(op->rsc_id != NULL, return I_NULL); if(op->rc == 8 || op->rc == 7) { /* Leave it up to the TE/PE to decide if this is an error */ op->op_status = LRM_OP_DONE; } switch(op->op_status) { case LRM_OP_PENDING: /* this really shouldnt happen */ crm_err("LRM operation (%d) %s_%d on %s %s: %s", op->call_id, op->op_type, op->interval, crm_str(op->rsc_id), op_status2text(op->op_status), execra_code2string(op->rc)); break; case LRM_OP_ERROR: crm_err("LRM operation (%d) %s_%d on %s %s: (%d) %s", op->call_id, op->op_type, op->interval, crm_str(op->rsc_id), op_status2text(op->op_status), op->rc, execra_code2string(op->rc)); if(op->output != NULL) { crm_debug("Result: %s", op->output); } break; case LRM_OP_CANCELLED: crm_warn("LRM operation (%d) %s_%d on %s %s", op->call_id, op->op_type, op->interval, crm_str(op->rsc_id), op_status2text(op->op_status)); break; case LRM_OP_TIMEOUT: crm_err("LRM operation (%d) %s_%d on %s %s", op->call_id, op->op_type, op->interval, crm_str(op->rsc_id), op_status2text(op->op_status)); break; case LRM_OP_NOTSUPPORTED: crm_err("LRM operation (%d) %s_%d on %s %s", op->call_id, op->op_type, op->interval, crm_str(op->rsc_id), op_status2text(op->op_status)); break; case LRM_OP_DONE: crm_info("LRM operation (%d) %s_%d on %s %s", op->call_id, op->op_type, op->interval, crm_str(op->rsc_id), op_status2text(op->op_status)); break; } if(op->op_status != LRM_OP_CANCELLED) { do_update_resource(op); if(op->interval > 0) { crm_debug("Op %d %s_%s_%d returned", op->call_id, op->rsc_id, op->op_type, op->interval); return TRUE; } } else if(op->interval == 0) { crm_err("No update sent for cancelled op %d: %s_%s_%d", op->call_id, op->rsc_id, op->op_type, op->interval); } if(g_hash_table_size(shutdown_ops) > 0) { char *op_id = make_stop_id(op->rsc_id, op->call_id); if(g_hash_table_remove(shutdown_ops, op_id)) { crm_debug_2("Op %d %s_%s_%d is confirmed", op->call_id, op->rsc_id, op->op_type, op->interval); crm_free(op_id); return TRUE; } crm_free(op_id); } crm_err("Op %d %s_%s_%d not matched", op->call_id, op->rsc_id, op->op_type, op->interval); return TRUE; } char * make_stop_id(const char *rsc, int call_id) { char *op_id = NULL; crm_malloc0(op_id, strlen(rsc) + 34); if(op_id != NULL) { snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id); } return op_id; } gboolean resource_stopped(gpointer key, gpointer value, gpointer user_data) { const char *this_rsc = value; const char *target_rsc = user_data; if(safe_str_eq(this_rsc, target_rsc)) { return TRUE; } return FALSE; } diff --git a/crm/tengine/events.c b/crm/tengine/events.c index 96a8f1fc52..06b6b6939a 100644 --- a/crm/tengine/events.c +++ b/crm/tengine/events.c @@ -1,562 +1,550 @@ -/* $Id: events.c,v 1.21 2006/06/22 12:58:45 andrew Exp $ */ +/* $Id: events.c,v 1.22 2006/08/14 08:52:30 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include crm_data_t *need_abort(crm_data_t *update); void process_graph_event(crm_data_t *event, const char *event_node); int match_graph_event( crm_action_t *action, crm_data_t *event, const char *event_node); crm_data_t * need_abort(crm_data_t *update) { crm_data_t *section_xml = NULL; const char *section = NULL; if(update == NULL) { return NULL; } section = XML_CIB_TAG_NODES; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, return section_xml; ); section = XML_CIB_TAG_RESOURCES; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, return section_xml; ); section = XML_CIB_TAG_CONSTRAINTS; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, return section_xml; ); section = XML_CIB_TAG_CRMCONFIG; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, return section_xml; ); return NULL; } static gboolean fail_incompletable_actions(crm_graph_t *graph, const char *down_node) { const char *target = NULL; crm_data_t *last_action = NULL; slist_iter( synapse, synapse_t, graph->synapses, lpc, if (synapse->confirmed) { continue; } slist_iter( action, crm_action_t, synapse->actions, lpc, if(action->type == action_type_pseudo || action->confirmed) { continue; } target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); if(safe_str_eq(target, down_node)) { action->failed = TRUE; last_action = action->xml; update_graph(graph, action); crm_notice("Action %d (%s) is scheduled for %s (offline)", action->id, ID(action->xml), down_node); } ); ); if(last_action != NULL) { crm_warn("Node %s shutdown resulted in un-runnable actions", down_node); abort_transition(INFINITY, tg_restart, "Node failure", last_action); return TRUE; } return FALSE; } gboolean extract_event(crm_data_t *msg) { int shutdown = 0; const char *event_node = NULL; /* [cib fragment] ... */ crm_debug_4("Extracting event from %s", crm_element_name(msg)); xml_child_iter_filter( msg, node_state, XML_CIB_TAG_STATE, crm_data_t *attrs = NULL; crm_data_t *resources = NULL; const char *ccm_state = crm_element_value( node_state, XML_CIB_ATTR_INCCM); const char *crmd_state = crm_element_value( node_state, XML_CIB_ATTR_CRMDSTATE); /* Transient node attribute changes... */ event_node = crm_element_value(node_state, XML_ATTR_ID); crm_debug("Processing state update from %s", event_node); crm_log_xml_debug_3(node_state,"Processing"); attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); if(attrs != NULL) { crm_info("Aborting on "XML_TAG_TRANSIENT_NODEATTRS" changes"); abort_transition(INFINITY, tg_restart, XML_TAG_TRANSIENT_NODEATTRS, attrs); } resources = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); resources = find_xml_node( resources, XML_LRM_TAG_RESOURCES, FALSE); /* LRM resource update... */ xml_child_iter( resources, rsc, xml_child_iter( rsc, rsc_op, crm_log_xml_debug_3( rsc_op, "Processing resource update"); process_graph_event(rsc_op, event_node); ); ); /* * node state update... possibly from a shutdown we requested */ if(safe_str_eq(ccm_state, XML_BOOLEAN_FALSE) || safe_str_eq(crmd_state, CRMD_JOINSTATE_DOWN)) { crm_action_t *shutdown = NULL; crm_debug_3("A shutdown we requested?"); shutdown = match_down_event(0, event_node, NULL); if(shutdown != NULL) { update_graph(transition_graph, shutdown); trigger_graph(); } else { crm_info("Stonith/shutdown event not matched"); abort_transition(INFINITY, tg_restart, "Node failure", node_state); } fail_incompletable_actions(transition_graph, event_node); } shutdown = 0; ha_msg_value_int(node_state, XML_CIB_ATTR_SHUTDOWN, &shutdown); if(shutdown != 0) { crm_info("Aborting on "XML_CIB_ATTR_SHUTDOWN" attribute"); abort_transition(INFINITY, tg_restart, "Shutdown request", node_state); } ); return TRUE; } static void update_failcount(crm_action_t *action, int rc) { crm_data_t *rsc = NULL; char *attr_name = NULL; const char *task = NULL; const char *rsc_id = NULL; const char *on_node = NULL; const char *on_uuid = NULL; const char *interval = NULL; if(rc == 99) { /* this is an internal code for "we're busy, try again" */ return; } interval = g_hash_table_lookup( action->params, crm_meta_name("interval")); if(interval == NULL) { return; } CRM_CHECK(action->xml != NULL, return); rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(rsc != NULL, return); rsc_id = ID(rsc); CRM_CHECK(rsc_id != NULL, return); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); on_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); CRM_CHECK(task != NULL, return); CRM_CHECK(on_uuid != NULL, return); CRM_CHECK(on_node != NULL, return); attr_name = crm_concat("fail-count", rsc_id, '-'); crm_warn("Updating failcount for %s on %s after failed %s: rc=%d", rsc_id, on_node, task, rc); update_attr(te_cib_conn, cib_none, XML_CIB_TAG_STATUS, on_uuid, NULL,NULL, attr_name, XML_NVPAIR_ATTR_VALUE"++"); crm_free(attr_name); } /* * returns the ID of the action if a match is found * returns -1 if a match was not found * returns -2 if a match was found but the action failed (and was * not allowed to) */ int match_graph_event( crm_action_t *action, crm_data_t *event, const char *event_node) { int log_level_fail = LOG_ERR; int target_rc = 0; const char *target_rc_s = NULL; const char *allow_fail = NULL; const char *this_action = NULL; const char *this_node = NULL; const char *this_uname = NULL; const char *magic = NULL; const char *this_event; char *update_te_uuid = NULL; const char *update_event; int op_status_i = -3; int op_rc_i = -3; int transition_i = -1; CRM_CHECK(event != NULL, return -1); crm_debug_3("Processing \"%s\" change", crm_element_name(event)); update_event = crm_element_value(event, XML_ATTR_ID); magic = crm_element_value(event, XML_ATTR_TRANSITION_MAGIC); CRM_CHECK(magic != NULL, return -2); this_action = crm_element_value(action->xml, XML_LRM_ATTR_TASK); this_uname = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); this_event = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); this_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); CRM_CHECK(this_event != NULL, return -2); if(safe_str_neq(this_event, update_event)) { crm_debug_2("Action %d : Event mismatch %s vs. %s", action->id, this_event, update_event); return -1; } else if(safe_str_neq(this_node, event_node)) { crm_debug_2("Action %d : Node mismatch %s (%s) vs. %s", action->id, this_node, this_uname, event_node); return -1; } crm_debug_2("Matched action (%d) %s", action->id, this_event); CRM_CHECK(decode_transition_magic( magic, &update_te_uuid, &transition_i, &op_status_i, &op_rc_i), return -2); if(transition_i == -1) { /* we never expect these - recompute */ crm_err("Detected action %s initiated outside of a transition", this_event); crm_log_message(LOG_ERR, event); return -2; } else if(safe_str_neq(update_te_uuid, te_uuid)) { crm_info("Detected action %s from a different transitioner:" " %s vs. %s", this_event, update_te_uuid, te_uuid); crm_log_message(LOG_INFO, event); return -3; } else if(transition_graph->id != transition_i) { crm_warn("Detected an action %s from a different transition:" " %d vs. %d", this_event, transition_i, transition_graph->id); crm_log_message(LOG_INFO, event); return -4; } /* stop this event's timer if it had one */ stop_te_timer(action->timer); action->confirmed = TRUE; target_rc_s = g_hash_table_lookup( action->params,crm_meta_name(XML_ATTR_TE_TARGET_RC)); if(target_rc_s != NULL) { crm_debug_2("Target rc: %s vs. %d", target_rc_s, op_rc_i); target_rc = crm_parse_int(target_rc_s, NULL); if(target_rc == op_rc_i) { crm_debug_2("Target rc: == %d", op_rc_i); if(op_status_i != LRM_OP_DONE) { crm_debug_2("Re-mapping op status to" " LRM_OP_DONE for %s",update_event); op_status_i = LRM_OP_DONE; } } else { crm_debug_2("Target rc: != %d", op_rc_i); if(op_status_i != LRM_OP_ERROR) { crm_info("Re-mapping op status to" " LRM_OP_ERROR for %s", update_event); op_status_i = LRM_OP_ERROR; } } } /* Process OP status */ switch(op_status_i) { case -3: crm_err("Action returned the same as last time..." " whatever that was!"); crm_log_message(LOG_ERR, event); break; case LRM_OP_PENDING: crm_debug("Ignoring pending operation"); return -5; break; case LRM_OP_DONE: break; case LRM_OP_ERROR: /* This is the code we use for direct nack's */ if(op_rc_i == 99) { log_level_fail = LOG_WARNING; } /* fall through */ case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: action->failed = TRUE; crm_log_maybe(log_level_fail, "Action %s on %s failed (target: %d vs. rc: %d): %s", update_event, this_uname, target_rc, op_rc_i, op_status2text(op_status_i)); break; case LRM_OP_CANCELLED: /* do nothing?? */ crm_err("Dont know what to do for cancelled ops yet"); break; default: action->failed = TRUE; crm_err("Unsupported action result: %d", op_status_i); } update_graph(transition_graph, action); trigger_graph(); if(action->failed) { allow_fail = g_hash_table_lookup( action->params, crm_meta_name(XML_ATTR_TE_ALLOWFAIL)); if(crm_is_true(allow_fail)) { action->failed = FALSE; } } if(action->failed) { /* ignore probes */ if(target_rc != EXECRA_NOT_RUNNING) { update_failcount(action, op_rc_i); } abort_transition(action->synapse->priority+1, tg_restart, "Event failed", event); } else if(transition_graph->complete) { abort_transition(INFINITY, tg_restart,"No active graph", event); } te_log_action(LOG_INFO, "Action %s (%d) confirmed", this_event, action->id); return action->id; } crm_action_t * match_down_event(int id, const char *target, const char *filter) { const char *this_action = NULL; const char *this_node = NULL; crm_action_t *match = NULL; slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, /* lookup event */ slist_iter( action, crm_action_t, synapse->actions, lpc2, if(id > 0 && action->id == id) { match = action; break; } this_action = crm_element_value( action->xml, XML_LRM_ATTR_TASK); if(action->type != action_type_crm) { continue; } else if(safe_str_eq(this_action, CRM_OP_LRM_REFRESH)){ continue; } else if(filter != NULL && safe_str_neq(this_action, filter)) { continue; } this_node = crm_element_value( action->xml, XML_LRM_ATTR_TARGET_UUID); if(this_node == NULL) { crm_log_xml_err(action->xml, "No node uuid"); } if(safe_str_neq(this_node, target)) { crm_debug("Action %d : Node mismatch: %s", action->id, this_node); continue; } match = action; break; ); if(match != NULL) { /* stop this event's timer if it had one */ break; } ); if(match != NULL) { /* stop this event's timer if it had one */ crm_debug("Match found for action %d: %s on %s", id, crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target); stop_te_timer(match->timer); match->confirmed = TRUE; } else if(id > 0) { crm_err("No match for action %d", id); } else { crm_warn("No match for shutdown action on %s", target); } return match; } void process_graph_event(crm_data_t *event, const char *event_node) { int rc = -1; - int op_status_i = 0; const char *magic = NULL; const char *rsc_id = NULL; - const char *op_status = NULL; CRM_ASSERT(event != NULL); rsc_id = crm_element_value(event, XML_ATTR_ID); - op_status = crm_element_value(event, XML_LRM_ATTR_OPSTATUS); magic = crm_element_value(event, XML_ATTR_TRANSITION_MAGIC); - if(op_status != NULL) { - op_status_i = crm_parse_int(op_status, NULL); - if(op_status_i == LRM_OP_PENDING) { - /* just information that the action was sent */ - crm_debug_2("Ignoring TE initiated updates"); - return; - } - } - if(magic == NULL) { crm_log_xml_debug_2(event, "Skipping \"non-change\""); return; } else { crm_debug_2("Processing CIB update: %s on %s: %s", rsc_id, event_node, magic); } slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, /* lookup event */ slist_iter( action, crm_action_t, synapse->actions, lpc2, rc = match_graph_event(action, event, event_node); if(rc >= 0) { crm_log_xml_debug_2(event, "match:found"); } else if(rc == -5) { crm_log_xml_debug_2(event, "match:pending"); } else if(rc != -1) { crm_warn("Search for %s terminated: %d", ID(event), rc); abort_transition(INFINITY, tg_restart, "Unexpected event", event); } if(rc != -1) { return; } ); ); /* unexpected event, trigger a pe-recompute */ /* possibly do this only for certain types of actions */ crm_warn("Event not found."); crm_log_xml_info(event, "match:not-found"); abort_transition(INFINITY, tg_restart, "Unexpected event", event); return; } diff --git a/crm/tengine/utils.c b/crm/tengine/utils.c index 189636d1b4..25c40f7469 100644 --- a/crm/tengine/utils.c +++ b/crm/tengine/utils.c @@ -1,141 +1,94 @@ -/* $Id: utils.c,v 1.59 2006/04/19 12:24:59 andrew Exp $ */ +/* $Id: utils.c,v 1.60 2006/08/14 08:52:30 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include extern cib_t *te_cib_conn; -const char * -get_rsc_state(const char *task, op_status_t status) -{ - if(safe_str_eq(CRMD_ACTION_START, task)) { - if(status == LRM_OP_PENDING) { - return CRMD_ACTION_START_PENDING; - } else if(status == LRM_OP_DONE) { - return CRMD_ACTION_STARTED; - } else { - return CRMD_ACTION_START_FAIL; - } - - } else if(safe_str_eq(CRMD_ACTION_STOP, task)) { - if(status == LRM_OP_PENDING) { - return CRMD_ACTION_STOP_PENDING; - } else if(status == LRM_OP_DONE) { - return CRMD_ACTION_STOPPED; - } else { - return CRMD_ACTION_STOP_FAIL; - } - - } else { - if(safe_str_eq(CRMD_ACTION_MON, task)) { - if(status == LRM_OP_PENDING) { - return CRMD_ACTION_MON_PENDING; - } else if(status == LRM_OP_DONE) { - return CRMD_ACTION_MON_OK; - } else { - return CRMD_ACTION_MON_FAIL; - } - } else { - const char *rsc_state = NULL; - if(status == LRM_OP_PENDING) { - rsc_state = CRMD_ACTION_GENERIC_PENDING; - } else if(status == LRM_OP_DONE) { - rsc_state = CRMD_ACTION_GENERIC_OK; - } else { - rsc_state = CRMD_ACTION_GENERIC_FAIL; - } - crm_warn("Using status \"%s\" for op \"%s\"..." - " this is still in the experimental stage.", - rsc_state, task); - return rsc_state; - } - } -} - gboolean stop_te_timer(crm_action_timer_t *timer) { const char *timer_desc = "action timer"; if(timer == NULL) { return FALSE; } if(timer->reason == timeout_abort) { timer_desc = "global timer"; } if(timer->source_id != 0) { crm_debug_2("Stopping %s", timer_desc); Gmain_timeout_remove(timer->source_id); timer->source_id = 0; } else { return FALSE; } return TRUE; } void trigger_graph_processing(const char *fn, int line) { G_main_set_trigger(transition_trigger); crm_debug_2("%s:%d - Triggered graph processing", fn, line); } void abort_transition_graph( int abort_priority, enum transition_action abort_action, const char *abort_text, crm_data_t *reason, const char *fn, int line) { int log_level = LOG_DEBUG; /* if(abort_priority >= INFINITY) { log_level = LOG_INFO; } */ update_abort_priority( transition_graph, abort_priority, abort_action, abort_text); crm_log_maybe(log_level, "%s:%d - Triggered graph processing : %s", fn, line, abort_text); if(reason != NULL) { crm_log_xml(log_level, "Cause", reason); } if(transition_graph->complete) { notify_crmd(transition_graph); } else { G_main_set_trigger(transition_trigger); } } diff --git a/lib/crm/common/ipc.c b/lib/crm/common/ipc.c index 64df19a260..a3804f9888 100644 --- a/lib/crm/common/ipc.c +++ b/lib/crm/common/ipc.c @@ -1,435 +1,439 @@ -/* $Id: ipc.c,v 1.27 2006/07/12 09:31:36 andrew Exp $ */ +/* $Id: ipc.c,v 1.28 2006/08/14 08:52:08 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include gboolean send_ha_message(ll_cluster_t *hb_conn, HA_Message *msg, const char *node, gboolean force_ordered) { gboolean all_is_good = TRUE; cl_mem_stats_t saved_stats; crm_save_mem_stats(__PRETTY_FUNCTION__, &saved_stats); if (msg == NULL) { crm_err("cant send NULL message"); all_is_good = FALSE; } else if(hb_conn == NULL) { crm_err("No heartbeat connection specified"); all_is_good = FALSE; } else if(hb_conn->llc_ops->chan_is_connected(hb_conn) == FALSE) { crm_err("Not connected to Heartbeat"); all_is_good = FALSE; } else if(node != NULL) { if(hb_conn->llc_ops->send_ordered_nodemsg( hb_conn, msg, node) != HA_OK) { all_is_good = FALSE; crm_err("Send failed"); } else { crm_debug_2("Message sent..."); } } else if(force_ordered) { if(hb_conn->llc_ops->send_ordered_clustermsg(hb_conn, msg) != HA_OK) { all_is_good = FALSE; crm_err("Broadcast Send failed"); } else { crm_debug_2("Broadcast message sent..."); } } else { if(hb_conn->llc_ops->sendclustermsg(hb_conn, msg) != HA_OK) { all_is_good = FALSE; crm_err("Broadcast Send failed"); } else { crm_debug_2("Broadcast message sent..."); } } if(all_is_good == FALSE && hb_conn != NULL) { IPC_Channel *ipc = NULL; IPC_Queue *send_q = NULL; if(hb_conn->llc_ops->chan_is_connected(hb_conn) != HA_OK) { ipc = hb_conn->llc_ops->ipcchan(hb_conn); } if(ipc != NULL) { /* ipc->ops->resume_io(ipc); */ send_q = ipc->send_queue; } if(send_q != NULL) { CRM_CHECK(send_q->current_qlen < send_q->max_qlen, ;); } } crm_log_message_adv(all_is_good?LOG_MSG:LOG_WARNING,"HA[outbound]",msg); crm_diff_mem_stats(LOG_DEBUG, LOG_DEBUG, __PRETTY_FUNCTION__, NULL, &saved_stats); return all_is_good; } /* frees msg */ gboolean send_ipc_message(IPC_Channel *ipc_client, HA_Message *msg) { gboolean all_is_good = TRUE; int fail_level = LOG_WARNING; cl_mem_stats_t saved_stats; crm_save_mem_stats(__PRETTY_FUNCTION__, &saved_stats); if(ipc_client != NULL && ipc_client->conntype == IPC_CLIENT) { fail_level = LOG_ERR; } if (msg == NULL) { crm_err("cant send NULL message"); all_is_good = FALSE; } else if (ipc_client == NULL) { crm_err("cant send message without an IPC Channel"); all_is_good = FALSE; } else if(ipc_client->ops->get_chan_status(ipc_client) != IPC_CONNECT) { crm_log_maybe(fail_level, "IPC Channel to %d is not connected", (int)ipc_client->farside_pid); all_is_good = FALSE; } if(all_is_good && msg2ipcchan(msg, ipc_client) != HA_OK) { crm_log_maybe(fail_level, "Could not send IPC message to %d", (int)ipc_client->farside_pid); all_is_good = FALSE; if(ipc_client->ops->get_chan_status(ipc_client) != IPC_CONNECT) { crm_log_maybe(fail_level, "IPC Channel to %d is no longer connected", (int)ipc_client->farside_pid); } else if(ipc_client->conntype == IPC_CLIENT) { - CRM_CHECK(ipc_client->send_queue->current_qlen < ipc_client->send_queue->max_qlen, ;); + if(ipc_client->send_queue->current_qlen >= ipc_client->send_queue->max_qlen) { + crm_err("Send queue to %d (size=%d) full.", + ipc_client->farside_pid, + (int)ipc_client->send_queue->max_qlen); + } } } /* ipc_client->ops->resume_io(ipc_client); */ crm_log_message_adv(all_is_good?LOG_MSG:LOG_WARNING,"IPC[outbound]",msg); crm_diff_mem_stats(LOG_DEBUG, LOG_DEBUG, __PRETTY_FUNCTION__, NULL, &saved_stats); return all_is_good; } void default_ipc_connection_destroy(gpointer user_data) { return; } int init_server_ipc_comms( char *channel_name, gboolean (*channel_client_connect)(IPC_Channel *newclient,gpointer user_data), void (*channel_connection_destroy)(gpointer user_data)) { /* the clients wait channel is the other source of events. * This source delivers the clients connection events. * listen to this source at a relatively lower priority. */ char commpath[SOCKET_LEN]; IPC_WaitConnection *wait_ch; sprintf(commpath, CRM_SOCK_DIR "/%s", channel_name); wait_ch = wait_channel_init(commpath); if (wait_ch == NULL) { return 1; } G_main_add_IPC_WaitConnection( G_PRIORITY_LOW, wait_ch, NULL, FALSE, channel_client_connect, channel_name, channel_connection_destroy); crm_debug_3("Listening on: %s", commpath); return 0; } GCHSource* init_client_ipc_comms(const char *channel_name, gboolean (*dispatch)( IPC_Channel* source_data, gpointer user_data), void *client_data, IPC_Channel **ch) { IPC_Channel *a_ch = NULL; GCHSource *the_source = NULL; void *callback_data = client_data; a_ch = init_client_ipc_comms_nodispatch(channel_name); if(ch != NULL) { *ch = a_ch; if(callback_data == NULL) { callback_data = a_ch; } } if(a_ch == NULL) { crm_warn("Setup of client connection failed," " not adding channel to mainloop"); return NULL; } if(dispatch == NULL) { crm_warn("No dispatch method specified..." "maybe you meant init_client_ipc_comms_nodispatch()?"); } else { crm_debug_3("Adding dispatch method to channel"); the_source = G_main_add_IPC_Channel( G_PRIORITY_HIGH, a_ch, FALSE, dispatch, callback_data, default_ipc_connection_destroy); } return the_source; } IPC_Channel * init_client_ipc_comms_nodispatch(const char *channel_name) { IPC_Channel *ch; GHashTable *attrs; static char path[] = IPC_PATH_ATTR; char *commpath = NULL; int local_socket_len = 2; /* 2 = '/' + '\0' */ local_socket_len += strlen(channel_name); local_socket_len += strlen(CRM_SOCK_DIR); crm_malloc0(commpath, local_socket_len); if(commpath != NULL) { sprintf(commpath, CRM_SOCK_DIR "/%s", channel_name); commpath[local_socket_len - 1] = '\0'; crm_debug_3("Attempting to talk on: %s", commpath); } attrs = g_hash_table_new(g_str_hash,g_str_equal); g_hash_table_insert(attrs, path, commpath); ch = ipc_channel_constructor(IPC_ANYTYPE, attrs); g_hash_table_destroy(attrs); if (ch == NULL) { crm_err("Could not access channel on: %s", commpath); crm_free(commpath); return NULL; } else if (ch->ops->initiate_connection(ch) != IPC_OK) { crm_debug("Could not init comms on: %s", commpath); ch->ops->destroy(ch); crm_free(commpath); return NULL; } ch->ops->set_recv_qlen(ch, 100); ch->ops->set_send_qlen(ch, 100); /* ch->should_send_block = TRUE; */ crm_debug_3("Processing of %s complete", commpath); crm_free(commpath); return ch; } IPC_WaitConnection * wait_channel_init(char daemonsocket[]) { IPC_WaitConnection *wait_ch; mode_t mask; char path[] = IPC_PATH_ATTR; GHashTable * attrs; attrs = g_hash_table_new(g_str_hash,g_str_equal); g_hash_table_insert(attrs, path, daemonsocket); mask = umask(0); wait_ch = ipc_wait_conn_constructor(IPC_ANYTYPE, attrs); if (wait_ch == NULL) { cl_perror("Can't create wait channel of type %s", IPC_ANYTYPE); exit(1); } mask = umask(mask); g_hash_table_destroy(attrs); return wait_ch; } longclock_t ipc_call_start = 0; longclock_t ipc_call_stop = 0; longclock_t ipc_call_diff = 0; gboolean subsystem_msg_dispatch(IPC_Channel *sender, void *user_data) { int lpc = 0; HA_Message *msg = NULL; ha_msg_input_t *new_input = NULL; gboolean all_is_well = TRUE; const char *sys_to; const char *task; while(IPC_ISRCONN(sender)) { gboolean process = FALSE; if(sender->ops->is_message_pending(sender) == 0) { break; } msg = msgfromIPC_noauth(sender); if (msg == NULL) { crm_err("No message from %d this time", sender->farside_pid); continue; } lpc++; new_input = new_ha_msg_input(msg); crm_msg_del(msg); msg = NULL; crm_log_message(LOG_MSG, new_input->msg); sys_to = cl_get_string(new_input->msg, F_CRM_SYS_TO); task = cl_get_string(new_input->msg, F_CRM_TASK); if(safe_str_eq(task, CRM_OP_HELLO)) { process = TRUE; } else if(sys_to == NULL) { crm_err("Value of %s was NULL!!", F_CRM_SYS_TO); } else if(task == NULL) { crm_err("Value of %s was NULL!!", F_CRM_TASK); } else { process = TRUE; } if(process){ gboolean (*process_function) (HA_Message *msg, crm_data_t *data, IPC_Channel *sender) = NULL; process_function = user_data; #ifdef MSG_LOG crm_log_message_adv( LOG_MSG, __FUNCTION__, new_input->msg); #endif if(ipc_call_diff_max_ms > 0) { ipc_call_start = time_longclock(); } if(FALSE == process_function( new_input->msg, new_input->xml, sender)) { crm_warn("Received a message destined for %s" " by mistake", sys_to); } if(ipc_call_diff_max_ms > 0) { unsigned int ipc_call_diff_ms = 0; ipc_call_stop = time_longclock(); ipc_call_diff = sub_longclock( ipc_call_stop, ipc_call_start); ipc_call_diff_ms = longclockto_ms( ipc_call_diff); if(ipc_call_diff_ms > ipc_call_diff_max_ms) { crm_err("%s took %dms to complete", sys_to, ipc_call_diff_ms); } } } else { #ifdef MSG_LOG crm_log_message_adv( LOG_ERR, NULL, new_input->msg); #endif } delete_ha_msg_input(new_input); new_input = NULL; if(sender->ch_status == IPC_CONNECT) { break; } } crm_debug_2("Processed %d messages", lpc); if (sender->ch_status != IPC_CONNECT) { crm_err("The server %d has left us: Shutting down...NOW", sender->farside_pid); exit(1); /* shutdown properly later */ return !all_is_well; } return all_is_well; } gboolean is_ipc_empty(IPC_Channel *ch) { if(ch == NULL) { return TRUE; } else if(ch->send_queue->current_qlen == 0 && ch->recv_queue->current_qlen == 0) { return TRUE; } return FALSE; }