diff --git a/crmd/lrm.c b/crmd/lrm.c index 7712eeedec..a95a35d7bc 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1,1896 +1,1896 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include /* for access */ #include #include #include #include #include #include #include #include #include #include #include struct recurring_op_s { char *rsc_id; char *op_key; int call_id; int interval; gboolean remove; gboolean cancelled; }; char *make_stop_id(const char *rsc, int call_id); gboolean build_operation_update( xmlNode *rsc_list, lrm_op_t *op, const char *src, int lpc); gboolean build_active_RAs(xmlNode *rsc_list); gboolean is_rsc_active(const char *rsc_id); void do_update_resource(lrm_op_t *op); gboolean process_lrm_event(lrm_op_t *op); void do_lrm_rsc_op(lrm_rsc_t *rsc, const char *operation, xmlNode *msg, xmlNode *request); lrm_op_t *construct_op( xmlNode *rsc_op, const char *rsc_id, const char *operation); void send_direct_ack(const char *to_host, const char *to_sys, lrm_op_t* op, const char *rsc_id); void free_recurring_op(gpointer value); GHashTable *meta_hash = NULL; GHashTable *resources = NULL; GHashTable *pending_ops = NULL; GCHSource *lrm_source = NULL; int num_lrm_register_fails = 0; int max_lrm_register_fails = 30; /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int ret = HA_OK; if(action & A_LRM_DISCONNECT) { if(verify_stopped(cur_state, LOG_INFO) == FALSE) { crmd_fsa_stall(NULL); return; } if(lrm_source) { crm_debug("Removing LRM connection from MainLoop"); if(G_main_del_IPC_Channel(lrm_source) == FALSE) { crm_err("Could not remove LRM connection" " from MainLoop"); } lrm_source = NULL; } if(fsa_lrm_conn) { fsa_lrm_conn->lrm_ops->signoff(fsa_lrm_conn); crm_info("Disconnected from the LRM"); clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED); } /* TODO: Clean up the hashtable */ } if(action & A_LRM_CONNECT) { ret = HA_OK; pending_ops = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, free_recurring_op); resources = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if(NULL == fsa_lrm_conn) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); ret = HA_FAIL; } if(ret == HA_OK) { crm_debug("Connecting to the LRM"); ret = fsa_lrm_conn->lrm_ops->signon( fsa_lrm_conn, CRM_SYSTEM_CRMD); } if(ret != HA_OK) { if(++num_lrm_register_fails < max_lrm_register_fails) { crm_warn("Failed to sign on to the LRM %d" " (%d max) times", num_lrm_register_fails, max_lrm_register_fails); crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return; } } if(ret == HA_OK) { crm_debug_4("LRM: set_lrm_callback..."); ret = fsa_lrm_conn->lrm_ops->set_lrm_callback( fsa_lrm_conn, lrm_op_callback); if(ret != HA_OK) { crm_err("Failed to set LRM callbacks"); } } if(ret != HA_OK) { crm_err("Failed to sign on to the LRM %d" " (max) times", num_lrm_register_fails); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } /* TODO: create a destroy handler that causes * some recovery to happen */ lrm_source = G_main_add_IPC_Channel( G_PRIORITY_LOW, fsa_lrm_conn->lrm_ops->ipcchan(fsa_lrm_conn), FALSE, lrm_dispatch, fsa_lrm_conn, default_ipc_connection_destroy); set_bit_inplace(fsa_input_register, R_LRM_CONNECTED); crm_debug("LRM connection established"); } if(action & ~(A_LRM_CONNECT|A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static void ghash_print_pending(gpointer key, gpointer value, gpointer user_data) { const char *stop_id = key; int *log_level = user_data; struct recurring_op_s *pending = value; do_crm_log(*log_level, "Pending action: %s (%s)", stop_id, pending->op_key); } static void ghash_print_pending_for_rsc(gpointer key, gpointer value, gpointer user_data) { const char *stop_id = key; char *rsc = user_data; struct recurring_op_s *pending = value; if(safe_str_eq(rsc, pending->rsc_id)) { do_crm_log(LOG_NOTICE, "%sction %s (%s) incomplete at shutdown", pending->interval==0?"A":"Recurring a", stop_id, pending->op_key); } } static void ghash_count_pending(gpointer key, gpointer value, gpointer user_data) { int *counter = user_data; struct recurring_op_s *pending = value; if(pending->interval > 0) { /* Ignore recurring actions in the shutdown calculations */ return; } (*counter)++; } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; GListPtr lrm_list = NULL; crm_debug("Checking for active resources before exit"); if(cur_state == S_TERMINATE) { log_level = LOG_ERR; } g_hash_table_foreach(pending_ops, ghash_count_pending, &counter); if(counter > 0) { rc = FALSE; do_crm_log(log_level, "%d pending LRM operations at shutdown%s", g_hash_table_size(pending_ops), cur_state == S_TERMINATE?"":"... waiting"); if(cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_foreach( pending_ops, ghash_print_pending, &log_level); } goto bail; } if(lrm_source != NULL && fsa_lrm_conn != NULL) { lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn); } slist_iter( rsc_id, char, lrm_list, lpc, if(is_rsc_active(rsc_id) == FALSE) { continue; } crm_err("Resource %s was active at shutdown." " You may ignore this error if it is unmanaged.", rsc_id); g_hash_table_foreach( pending_ops, ghash_print_pending_for_rsc, rsc_id); ); bail: set_bit_inplace(fsa_input_register, R_SENT_RSC_STOP); if(cur_state == S_TERMINATE) { rc = TRUE; } return rc; } static const char * get_rsc_metadata(const char *type, const char *class, const char *provider) { int len = 0; char *key = NULL; char *metadata = NULL; if(meta_hash == NULL) { meta_hash = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } CRM_CHECK(type != NULL, return NULL); CRM_CHECK(class != NULL, return NULL); if(provider == NULL) { provider = "heartbeat"; } len = strlen(type) + strlen(class) + strlen(provider) + 4; crm_malloc0(key, len); sprintf(key, "%s::%s:%s", type, class, provider); key[len-1] = 0; metadata = g_hash_table_lookup(meta_hash, key); if(metadata) { crm_debug_2("Returning cached metadata for %s", key); goto out; } crm_debug("Retreiving metadata for %s", key); metadata = fsa_lrm_conn->lrm_ops->get_rsc_type_metadata( fsa_lrm_conn, class, type, provider); if(metadata) { /* copy the metadata because the LRM likes using * g_alloc instead of cl_malloc */ char *m_copy = crm_strdup(metadata); g_hash_table_insert(meta_hash, key, m_copy); key = NULL; /* prevent it from being free'd */ g_free(metadata); metadata = m_copy; } else { crm_warn("No metadata found for %s", key); } out: crm_free(key); return metadata; } static GListPtr get_rsc_restart_list(lrm_rsc_t *rsc, lrm_op_t *op) { gboolean supported = FALSE; GListPtr restart_list = NULL; const char *value = NULL; const char *metadata_str = get_rsc_metadata( rsc->type, rsc->class, rsc->provider); xmlNode *params = NULL; xmlNode *actions = NULL; xmlNode *metadata = NULL; if(metadata_str == NULL) { return NULL; } metadata = string2xml(metadata_str); if(metadata == NULL) { crm_err("Metadata for %s::%s:%s is not valid XML", rsc->provider, rsc->class, rsc->type); return NULL; } actions = find_xml_node(metadata, "actions", TRUE); xml_child_iter_filter( actions, action, "action", value = crm_element_value(action, "name"); if(safe_str_eq("reload", value)) { supported = TRUE; break; } ); if(supported == FALSE) { goto cleanup; } params = find_xml_node(metadata, "parameters", TRUE); xml_child_iter_filter( params, param, "parameter", value = crm_element_value(param, "unique"); if(crm_is_true(value)) { value = crm_element_value(param, "name"); crm_debug("Attr %s is not reloadable", value); restart_list = g_list_append( restart_list, crm_strdup(value)); } ); cleanup: free_xml(metadata); return restart_list; } static void append_restart_list(xmlNode *update, lrm_op_t *op, const char *version) { int len = 0; char *list = NULL; char *digest = NULL; lrm_rsc_t *rsc = NULL; const char *value = NULL; gboolean non_empty = FALSE; xmlNode *restart = NULL; GListPtr restart_list = NULL; if(op->interval > 0) { /* monitors are not reloadable */ return; } else if(safe_str_neq(CRMD_ACTION_START, op->op_type)) { /* only starts are potentially reloadable */ return; } else if(compare_version("1.0.8", version) > 0) { crm_debug("Caller version %s does not support reloads", version); return; } rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, op->rsc_id); if(rsc == NULL) { crm_info("Resource %s no longer in the LRM", op->rsc_id); return; } restart_list = get_rsc_restart_list(rsc, op); if(restart_list == NULL) { crm_debug("Resource %s does not support reloads", op->rsc_id); return; } restart = create_xml_node(NULL, "parameters"); slist_iter(param, const char, restart_list, lpc, int start = len; value = g_hash_table_lookup(op->params, param); if(value != NULL) { non_empty = TRUE; crm_xml_add(restart, param, value); } len += strlen(param) + 2; crm_realloc(list, len+1); sprintf(list+start, " %s ", param); ); digest = calculate_xml_digest(restart, TRUE, FALSE); crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); crm_debug("%s : %s", digest, list); if(non_empty) { crm_log_xml_debug(restart, "restart digest source"); } slist_destroy(char, child, restart_list, crm_free(child)); free_xml(restart); crm_free(digest); crm_free(list); } gboolean build_operation_update( xmlNode *xml_rsc, lrm_op_t *op, const char *src, int lpc) { char *magic = NULL; const char *task = NULL; xmlNode *xml_op = NULL; char *op_id = NULL; char *local_user_data = NULL; const char *caller_version = NULL; char *digest = NULL; xmlNode *args_xml = NULL; xmlNode *args_parent = NULL; CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return FALSE; } crm_debug_2("%s: Updating resouce %s after %s %s op", src, op->rsc_id, op_status2text(op->op_status), op->op_type); if(op->op_status == LRM_OP_CANCELLED) { crm_debug_3("Ignoring cancelled op"); return TRUE; } if(AM_I_DC) { caller_version = CRM_FEATURE_SET; } else if(fsa_our_dc_version != NULL) { caller_version = fsa_our_dc_version; } else { /* there is a small risk in formerly mixed clusters that * it will be sub-optimal. * however with our upgrade policy, the update we send * should still be completely supported anyway */ caller_version = g_hash_table_lookup( op->params, XML_ATTR_CRM_VERSION); crm_warn("Falling back to operation originator version: %s", caller_version); } crm_debug_3("DC version: %s", caller_version); task = op->op_type; /* remap the task name under various scenarios * this makes life easier for the PE when its trying determin the current state */ if(crm_str_eq(task, "reload", TRUE)) { if(op->op_status == LRM_OP_DONE) { task = CRMD_ACTION_START; } else { task = CRMD_ACTION_STATUS; } } else if(crm_str_eq(task, CRMD_ACTION_MIGRATE, TRUE)) { /* if the migrate_from fails it will have enough info to do the right thing */ if(op->op_status == LRM_OP_DONE) { task = CRMD_ACTION_STOP; } else { task = CRMD_ACTION_STATUS; } } else if(op->op_status == LRM_OP_DONE && crm_str_eq(task, CRMD_ACTION_MIGRATED, TRUE)) { task = CRMD_ACTION_START; } if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) { const char *n_type = g_hash_table_lookup( op->params, crm_meta_name("notify_type")); const char *n_task = g_hash_table_lookup( op->params, crm_meta_name("notify_operation")); #if CRM_DEPRECATED_SINCE_2_0_5 if(n_type == NULL) { n_type = g_hash_table_lookup(op->params, "notify_type"); } if(n_task == NULL) { n_task = g_hash_table_lookup(op->params, "notify_operation"); } #endif CRM_DEV_ASSERT(n_type != NULL); CRM_DEV_ASSERT(n_task != NULL); op_id = generate_notify_key(op->rsc_id, n_type, n_task); /* these are not yet allowed to fail */ op->op_status = LRM_OP_DONE; op->rc = 0; } else { op_id = generate_op_key(op->rsc_id, task, op->interval); } xml_op = find_entity(xml_rsc, XML_LRM_TAG_RSC_OP, op_id); if(xml_op != NULL) { crm_log_xml(LOG_DEBUG, "Replacing existing entry", xml_op); } else { xml_op = create_xml_node(xml_rsc, XML_LRM_TAG_RSC_OP); } crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_free(op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(xml_op, XML_ATTR_ORIGIN, src); if(op->user_data == NULL) { char *id = crm_itoa(op->call_id); crm_debug("Generating fake transition key for:" " %s_%s_%d %d from %s", op->rsc_id, op->op_type, op->interval, op->call_id, op->app_name); - local_user_data = generate_transition_key(-1, 0, id); + local_user_data = generate_transition_key(-1, 0, 0, id); op->user_data = local_user_data; crm_free(id); } if(compare_version("1.0.3", caller_version) > 0) { magic = generate_transition_magic_v202( op->user_data, op->op_status); } else { magic = generate_transition_magic( op->user_data, op->op_status, op->rc); } crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic); crm_free(magic); switch(op->op_status) { case LRM_OP_PENDING: break; case LRM_OP_CANCELLED: crm_err("What to do here"); break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: crm_debug_2("Resource action %s/%s %s: %d", op->rsc_id, task, op_status2text(op->op_status), op->rc); break; case LRM_OP_DONE: break; } crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id); /* set these on 'xml_rsc' too to make life easy for the PE */ crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version); crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc); crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status); crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, op->interval); if(compare_version("2.1", caller_version) <= 0) { if(op->t_run || op->t_rcchange || op->exec_time || op->queue_time) { crm_debug("Timing data (%s_%s_%d): last=%lu change=%lu exec=%lu queue=%lu", op->rsc_id, op->op_type, op->interval, op->t_run, op->t_rcchange, op->exec_time, op->queue_time); crm_xml_add_int(xml_op, "last_run", op->t_run); crm_xml_add_int(xml_op, "last_rc_change", op->t_rcchange); crm_xml_add_int(xml_op, "exec_time", op->exec_time); crm_xml_add_int(xml_op, "queue_time", op->queue_time); } } /* this will enable us to later determin that the * resource's parameters have changed and we should force * a restart */ args_parent = NULL; #if CRM_DEPRECATED_SINCE_2_0_4 if(compare_version("1.0.4", caller_version) > 0) { args_parent = xml_op; } #endif args_xml = create_xml_node(args_parent, XML_TAG_PARAMS); g_hash_table_foreach(op->params, hash2field, args_xml); filter_action_parameters(args_xml, caller_version); digest = calculate_xml_digest(args_xml, TRUE, FALSE); if(op->interval == 0 && safe_str_neq(task, CRMD_ACTION_STOP)) { crm_debug("Calculated digest %s for %s (%s)\n", digest, ID(xml_op), crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); crm_log_xml(LOG_DEBUG, "digest:source", args_xml); } crm_xml_add(xml_op, XML_LRM_ATTR_OP_DIGEST, digest); crm_free(digest); if(args_parent == NULL) { free_xml(args_xml); } append_restart_list(xml_op, op, caller_version); if(op->op_status != LRM_OP_DONE && crm_str_eq(op->op_type, CRMD_ACTION_MIGRATED, TRUE)) { const char *host = g_hash_table_lookup( op->params, crm_meta_name("migrate_source_uuid")); crm_xml_add(xml_op, CRMD_ACTION_MIGRATED, host); } if(local_user_data) { crm_free(local_user_data); op->user_data = NULL; } return TRUE; } gboolean is_rsc_active(const char *rsc_id) { GList *op_list = NULL; gboolean active = FALSE; lrm_rsc_t *the_rsc = NULL; state_flag_t cur_state = 0; int max_call_id = -1; if(fsa_lrm_conn == NULL) { return FALSE; } the_rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rsc_id); crm_debug_3("Processing lrm_rsc_t entry %s", rsc_id); if(the_rsc == NULL) { crm_err("NULL resource returned from the LRM"); return FALSE; } op_list = the_rsc->ops->get_cur_state(the_rsc, &cur_state); crm_debug_3("\tcurrent state:%s",cur_state==LRM_RSC_IDLE?"Idle":"Busy"); slist_iter( op, lrm_op_t, op_list, llpc, crm_debug_2("Processing op %s_%d (%d) for %s (status=%d, rc=%d)", op->op_type, op->interval, op->call_id, the_rsc->id, op->op_status, op->rc); CRM_ASSERT(max_call_id <= op->call_id); if(op->rc == EXECRA_OK && safe_str_eq(op->op_type, CRMD_ACTION_STOP)) { active = FALSE; } else if(op->rc == EXECRA_OK && safe_str_eq(op->op_type, CRMD_ACTION_MIGRATE)) { /* a stricter check is too complex... * leave that to the PE */ active = FALSE; } else if(op->rc == EXECRA_NOT_RUNNING) { active = FALSE; } else { active = TRUE; } max_call_id = op->call_id; lrm_free_op(op); ); g_list_free(op_list); lrm_free_rsc(the_rsc); return active; } gboolean build_active_RAs(xmlNode *rsc_list) { GList *op_list = NULL; GList *lrm_list = NULL; gboolean found_op = FALSE; state_flag_t cur_state = 0; if(fsa_lrm_conn == NULL) { return FALSE; } lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn); slist_iter( rid, char, lrm_list, lpc, lrm_rsc_t *the_rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); xmlNode *xml_rsc = create_xml_node( rsc_list, XML_LRM_TAG_RESOURCE); int max_call_id = -1; crm_debug_2("Processing lrm_rsc_t entry %s", rid); if(the_rsc == NULL) { crm_err("NULL resource returned from the LRM"); continue; } crm_xml_add(xml_rsc, XML_ATTR_ID, the_rsc->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, the_rsc->type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, the_rsc->class); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER,the_rsc->provider); op_list = the_rsc->ops->get_cur_state(the_rsc, &cur_state); crm_debug_2("\tcurrent state:%s", cur_state==LRM_RSC_IDLE?"Idle":"Busy"); slist_iter( op, lrm_op_t, op_list, llpc, crm_debug_2("Processing op %s for %s (status=%d, rc=%d)", op->op_type, the_rsc->id, op->op_status, op->rc); if(max_call_id < op->call_id) { build_operation_update( xml_rsc, op, __FUNCTION__, llpc); } else if(max_call_id > op->call_id) { crm_err("Bad call_id in list=%d. Previous call_id=%d", op->call_id, max_call_id); } else { crm_warn("lrm->get_cur_state() returned" " duplicate entries for call_id=%d", op->call_id); } max_call_id = op->call_id; found_op = TRUE; lrm_free_op(op); ); if(found_op == FALSE && g_list_length(op_list) != 0) { crm_err("Could not properly determin last op" " for %s from %d entries", the_rsc->id, g_list_length(op_list)); } g_list_free(op_list); lrm_free_rsc(the_rsc); ); g_list_free(lrm_list); return TRUE; } xmlNode* do_lrm_query(gboolean is_replace) { gboolean shut_down = FALSE; xmlNode *xml_result= NULL; xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; const char *exp_state = CRMD_STATE_ACTIVE; if(is_set(fsa_input_register, R_SHUTDOWN)) { exp_state = CRMD_STATE_INACTIVE; shut_down = TRUE; } xml_state = create_node_state( fsa_our_uname, ACTIVESTATUS, XML_BOOLEAN_TRUE, ONLINESTATUS, CRMD_JOINSTATE_MEMBER, exp_state, !shut_down, __FUNCTION__); xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, fsa_our_uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(rsc_list); if(is_replace) { crm_xml_add(xml_state, XML_CIB_ATTR_REPLACE, XML_CIB_TAG_LRM); } xml_result = create_cib_fragment(xml_state, XML_CIB_TAG_STATUS); free_xml(xml_state); crm_log_xml_debug_3(xml_state, "Current state of the LRM"); return xml_result; } static void delete_rsc_entry(const char *rsc_id) { xmlNode *xml_top = NULL; xmlNode *xml_tmp = NULL; /* * Remove the rsc from the CIB * * Avoids refreshing the entire LRM section of this host */ CRM_CHECK(rsc_id != NULL, return); xml_top = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(xml_top, XML_ATTR_ID, fsa_our_uuid); xml_tmp = create_xml_node(xml_top, XML_CIB_TAG_LRM); crm_xml_add(xml_tmp, XML_ATTR_ID, fsa_our_uuid); xml_tmp = create_xml_node(xml_tmp, XML_LRM_TAG_RESOURCES); xml_tmp = create_xml_node(xml_tmp, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_tmp, XML_ATTR_ID, rsc_id); crm_debug("sync: Sending delete op for %s", rsc_id); fsa_cib_conn->cmds->delete_absolute(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, NULL, cib_quorum_override); /* crm_log_xml_err(xml_top, "op:cancel"); */ free_xml(xml_top); } static void delete_op_entry(lrm_op_t *op, const char *rsc_id, const char *key, int call_id) { xmlNode *xml_top = NULL; /* * Remove the op from the CIB * * Avoids refreshing the entire LRM section of this host */ if(op != NULL) { xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); crm_debug("async: Sending delete op for %s_%s_%d (call=%d)", op->rsc_id, op->op_type, op->interval, op->call_id); fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, NULL, cib_quorum_override); } else if (rsc_id != NULL && key != NULL) { xmlNode *xml_tmp = NULL; xml_top = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(xml_top, XML_ATTR_ID, fsa_our_uuid); xml_tmp = create_xml_node(xml_top, XML_CIB_TAG_LRM); crm_xml_add(xml_tmp, XML_ATTR_ID, fsa_our_uuid); xml_tmp = create_xml_node(xml_tmp, XML_LRM_TAG_RESOURCES); xml_tmp = create_xml_node(xml_tmp, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_tmp, XML_ATTR_ID, rsc_id); xml_tmp = create_xml_node(xml_tmp, XML_LRM_TAG_RSC_OP); crm_xml_add(xml_tmp, XML_ATTR_ID, key); if(call_id > 0) { crm_xml_add_int(xml_tmp, XML_LRM_ATTR_CALLID, call_id); } crm_debug("sync: Sending delete op for %s (call=%d)", key, call_id); fsa_cib_conn->cmds->delete_absolute(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, NULL, cib_quorum_override); } else { crm_err("Not enough information to delete op entry: rsc=%p key=%p", rsc_id, key); return; } crm_log_xml_debug_2(xml_top, "op:cancel"); free_xml(xml_top); } static gboolean cancel_op(lrm_rsc_t *rsc, const char *key, int op, gboolean remove) { int rc = HA_OK; struct recurring_op_s *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc != NULL, return FALSE); if(key == NULL) { key = make_stop_id(rsc->id, op); } pending = g_hash_table_lookup(pending_ops, key); if(pending) { if(remove && pending->remove == FALSE) { pending->remove = TRUE; crm_debug("Scheduling %s for removal", key); } if(pending->cancelled) { crm_debug("Operation %s already cancelled", key); return TRUE; } pending->cancelled = TRUE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc->id, key); rc = rsc->ops->cancel_op(rsc, op); if(rc != HA_OK) { crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc->id, key); /* The caller needs to make sure the entry is * removed from the pending_ops list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from pending_ops will block * the node from shutting down */ return FALSE; } return TRUE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrm_rsc_t *rsc; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { struct cancel_data *data = user_data; struct recurring_op_s *op = (struct recurring_op_s*)value; if(safe_str_eq(op->op_key, data->key)) { data->done = TRUE; if (cancel_op(data->rsc, key, op->call_id, data->remove) == FALSE) { return TRUE; } } return FALSE; } static gboolean cancel_op_key(lrm_rsc_t *rsc, const char *key, gboolean remove) { struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; g_hash_table_foreach_remove(pending_ops, cancel_action_by_key, &data); return data.done; } static lrm_rsc_t * get_lrm_resource(xmlNode *resource, xmlNode *op_msg, gboolean do_create) { char rid[64]; lrm_rsc_t *rsc = NULL; const char *short_id = ID(resource); const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG); crm_debug_2("Retrieving %s from the LRM.", short_id); CRM_CHECK(short_id != NULL, return NULL); if(rsc == NULL) { /* check if its already there (short name) */ strncpy(rid, short_id, 64); rid[63] = 0; rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); } if(rsc == NULL && long_id != NULL) { /* try the long name instead */ strncpy(rid, long_id, 64); rid[63] = 0; rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); } if(rsc == NULL && do_create) { /* add it to the LRM */ const char *type = crm_element_value(resource, XML_ATTR_TYPE); const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER); GHashTable *params = xml2list(op_msg); CRM_CHECK(class != NULL, return NULL); CRM_CHECK(type != NULL, return NULL); crm_debug("Adding rsc %s before operation", short_id); strncpy(rid, short_id, 64); rid[63] = 0; #if CRM_DEPRECATED_SINCE_2_0_3 if(op_msg != NULL) { if(g_hash_table_lookup( params, XML_ATTR_CRM_VERSION) == NULL) { g_hash_table_destroy(params); params = xml2list_202(op_msg); } } #endif if(g_hash_table_size(params) == 0) { crm_log_xml_warn(op_msg, "EmptyParams"); } fsa_lrm_conn->lrm_ops->add_rsc( fsa_lrm_conn, rid, class, type, provider, params); rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); g_hash_table_destroy(params); if(rsc == NULL) { fsa_data_t *msg_data = NULL; crm_err("Could not add resource %s to LRM", rid); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } return rsc; } static gboolean lrm_remove_deleted_op( gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; struct recurring_op_s *pending = value; if(safe_str_eq(rsc, pending->rsc_id)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { gboolean done = FALSE; gboolean create_rsc = TRUE; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if(safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } crm_debug_2("LRM command from: %s", from_sys); if(safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { operation = CRMD_ACTION_DELETE; } else if(safe_str_eq(operation, CRM_OP_LRM_REFRESH)) { crm_op = CRM_OP_LRM_REFRESH; } else if(safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) { crm_info("Failing resource..."); operation = "fail"; } else if(input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if(safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { enum cib_errors rc = cib_ok; xmlNode *fragment = do_lrm_query(TRUE); crm_info("Forcing a local LRM refresh"); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc); free_xml(fragment); } else if(safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { xmlNode *data = do_lrm_query(FALSE); xmlNode *reply = create_reply(input->msg, data); if(relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_xml(LOG_ERR, "reply", reply); free_xml(reply); } free_xml(data); } else if(safe_str_eq(operation, CRM_OP_PROBED) || safe_str_eq(crm_op, CRM_OP_REPROBE)) { const char *probed = XML_BOOLEAN_TRUE; if(safe_str_eq(crm_op, CRM_OP_REPROBE)) { probed = XML_BOOLEAN_FALSE; } update_attr(fsa_cib_conn, cib_inhibit_notify, XML_CIB_TAG_STATUS, fsa_our_uuid, NULL, NULL, CRM_OP_PROBED, probed, FALSE); } else if(operation != NULL) { lrm_rsc_t *rsc = NULL; xmlNode *params = NULL; xmlNode *xml_rsc = find_xml_node( input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* only the first 16 chars are used by the LRM */ params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); if(safe_str_eq(operation, CRMD_ACTION_DELETE)) { create_rsc = FALSE; } rsc = get_lrm_resource(xml_rsc, input->xml, create_rsc); if(rsc == NULL && create_rsc) { crm_err("Invalid resource definition"); crm_log_xml_warn(input->msg, "bad input"); } else if(rsc == NULL) { lrm_op_t* op = NULL; crm_err("Not creating resource for a %s event: %s", operation, ID(input->xml)); crm_log_xml_warn(input->msg, "bad input"); op = construct_op(input->xml, ID(xml_rsc), operation); op->op_status = LRM_OP_DONE; op->rc = EXECRA_OK; CRM_ASSERT(op != NULL); send_direct_ack(from_host, from_sys, op, ID(xml_rsc)); free_lrm_op(op); } else if(safe_str_eq(operation, CRMD_ACTION_CANCEL)) { lrm_op_t* op = NULL; char *op_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; const char *op_interval = NULL; CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); op_interval = crm_element_value(params, crm_meta_name("interval")); op_task = crm_element_value(params, crm_meta_name(XML_LRM_ATTR_TASK)); call_id = crm_element_value(params, crm_meta_name(XML_LRM_ATTR_CALLID)); #if CRM_DEPRECATED_SINCE_2_0_5 if(op_interval == NULL) { op_interval = crm_element_value(params, "interval"); } if(op_task == NULL) { op_task = crm_element_value(params, XML_LRM_ATTR_TASK); if(op_task == NULL) { op_task = crm_element_value(params, "task"); } } #endif CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); op = construct_op(input->xml, rsc->id, op_task); CRM_ASSERT(op != NULL); op_key = generate_op_key( rsc->id,op_task,crm_parse_int(op_interval,"0")); crm_debug("PE requested op %s (call=%s) be cancelled", op_key, call_id?call_id:"NA"); call = crm_parse_int(call_id, "0"); if(call == 0) { /* the normal case when the PE cancels a recurring op */ done = cancel_op_key(rsc, op_key, TRUE); } else { /* the normal case when the PE cancels an orphan op */ done = cancel_op(rsc, op_key, call, TRUE); } if(done == FALSE) { crm_debug("Nothing known about operation %d for %s", call, op_key); delete_op_entry(NULL, rsc->id, op_key, call); /* needed?? surely not otherwise the cancel_op_(_key) wouldn't * have failed in the first place */ g_hash_table_remove(pending_ops, op_key); } op->op_status = LRM_OP_DONE; op->rc = EXECRA_OK; send_direct_ack(from_host, from_sys, op, rsc->id); crm_free(op_key); free_lrm_op(op); } else if(safe_str_eq(operation, CRMD_ACTION_DELETE)) { int rc = HA_OK; lrm_op_t* op = NULL; CRM_ASSERT(rsc != NULL); op = construct_op(input->xml, rsc->id, operation); CRM_ASSERT(op != NULL); op->op_status = LRM_OP_DONE; op->rc = EXECRA_OK; crm_info("Removing resource %s from the LRM", rsc->id); rc = fsa_lrm_conn->lrm_ops->delete_rsc(fsa_lrm_conn, rsc->id); if(rc != HA_OK) { crm_err("Failed to remove resource %s", rsc->id); op->op_status = LRM_OP_ERROR; op->rc = EXECRA_UNKNOWN_ERROR; } delete_rsc_entry(rsc->id); send_direct_ack(from_host, from_sys, op, rsc->id); free_lrm_op(op); g_hash_table_foreach_remove(pending_ops, lrm_remove_deleted_op, rsc->id); if(safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { /* this isn't expected - trigger a new transition */ time_t now = time(NULL); char *now_s = crm_itoa(now); crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc->id); update_attr(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, "last-lrm-refresh", now_s, FALSE); crm_free(now_s); } } else if(rsc != NULL) { do_lrm_rsc_op(rsc, operation, input->xml, input->msg); } lrm_free_rsc(rsc); } else { crm_err("Operation was neither a lrm_query, nor a rsc op. %s", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } lrm_op_t * construct_op(xmlNode *rsc_op, const char *rsc_id, const char *operation) { lrm_op_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; const char *op_interval = NULL; const char *transition = NULL; CRM_DEV_ASSERT(rsc_id != NULL); crm_malloc0(op, sizeof(lrm_op_t)); op->op_type = crm_strdup(operation); op->op_status = LRM_OP_PENDING; op->rc = -1; op->rsc_id = crm_strdup(rsc_id); op->interval = 0; op->timeout = 0; op->start_delay = 0; op->copyparams = 0; op->app_name = crm_strdup(CRM_SYSTEM_CRMD); if(rsc_op == NULL) { CRM_DEV_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); op->user_data = NULL; op->user_data_len = 0; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(op->params, crm_strdup(XML_ATTR_CRM_VERSION), crm_strdup(CRM_FEATURE_SET)); crm_debug_2("Constructed %s op for %s", operation, rsc_id); return op; } op->params = xml2list(rsc_op); #if CRM_DEPRECATED_SINCE_2_0_3 if(g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION) == NULL) { g_hash_table_destroy(op->params); op->params = xml2list_202(rsc_op); } #endif if(op->params == NULL) { CRM_DEV_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); } op_delay = g_hash_table_lookup(op->params, crm_meta_name("start_delay")); op_timeout = g_hash_table_lookup(op->params, crm_meta_name("timeout")); op_interval = g_hash_table_lookup(op->params, crm_meta_name("interval")); #if CRM_DEPRECATED_SINCE_2_0_5 if(op_delay == NULL) { op_delay = g_hash_table_lookup(op->params, "start_delay"); } if(op_timeout == NULL) { op_timeout = g_hash_table_lookup(op->params, "timeout"); } if(op_interval == NULL) { op_interval = g_hash_table_lookup(op->params, "interval"); } #endif op->interval = crm_parse_int(op_interval, "0"); op->timeout = crm_parse_int(op_timeout, "0"); op->start_delay = crm_parse_int(op_delay, "0"); /* sanity */ if(op->interval < 0) { op->interval = 0; } if(op->timeout < 0) { op->timeout = 0; } if(op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = crm_strdup(transition); op->user_data_len = 1+strlen(op->user_data); if(op->interval != 0) { if(safe_str_eq(operation, CRMD_ACTION_START) || safe_str_eq(operation, CRMD_ACTION_STOP)) { crm_err("Start and Stop actions cannot have an interval"); op->interval = 0; } } /* reset the resource's parameters? */ if(op->interval == 0) { if(safe_str_eq(CRMD_ACTION_START, operation) || safe_str_eq(CRMD_ACTION_STATUS, operation)) { op->copyparams = 1; } } crm_debug_2("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval); return op; } void send_direct_ack(const char *to_host, const char *to_sys, lrm_op_t* op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; xmlNode *fragment; CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return; } if(op->rsc_id == NULL) { CRM_DEV_ASSERT(rsc_id != NULL); op->rsc_id = crm_strdup(rsc_id); } if(to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } update = create_node_state( fsa_our_uname, NULL, NULL, NULL, NULL, NULL, FALSE, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, op, __FUNCTION__, 0); fragment = create_cib_fragment(update, XML_CIB_TAG_STATUS); reply = create_request(CRM_OP_INVOKE_LRM, fragment, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_debug_2(update, "ACK Update"); crm_info("ACK'ing resource op %s_%s_%d from %s: %s", op->rsc_id, op->op_type, op->interval, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if(relay_message(reply, TRUE) == FALSE) { crm_log_xml(LOG_ERR, "Unable to route reply", reply); free_xml(reply); } free_xml(fragment); free_xml(update); } static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { lrm_rsc_t *rsc = user_data; struct recurring_op_s *op = (struct recurring_op_s*)value; if(op->interval != 0 && safe_str_eq(op->rsc_id, rsc->id)) { if (cancel_op(rsc, key, op->call_id, FALSE) == FALSE) { return TRUE; } } return FALSE; } void do_lrm_rsc_op(lrm_rsc_t *rsc, const char *operation, xmlNode *msg, xmlNode *request) { int call_id = 0; char *op_id = NULL; lrm_op_t* op = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; CRM_CHECK(rsc != NULL, return); if(msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if(transition == NULL) { crm_log_xml_err(msg, "Missing transition number"); } } op = construct_op(msg, rsc->id, operation); /* stop the monitor before stopping the resource */ if(crm_str_eq(operation, CRMD_ACTION_STOP, TRUE) || crm_str_eq(operation, CRMD_ACTION_DEMOTE, TRUE) || crm_str_eq(operation, CRMD_ACTION_PROMOTE, TRUE) || crm_str_eq(operation, CRMD_ACTION_MIGRATE, TRUE)) { g_hash_table_foreach_remove(pending_ops, stop_recurring_action_by_rsc, rsc); } /* now do the op */ crm_info("Performing op=%s_%s_%d key=%s)", rsc->id, operation, op->interval, transition); if(fsa_state != S_NOT_DC && fsa_state != S_TRANSITION_ENGINE) { if(safe_str_neq(operation, "fail") && safe_str_neq(operation, CRMD_ACTION_STOP)) { crm_info("Discarding attempt to perform action %s on %s" " in state %s", operation, rsc->id, fsa_state2string(fsa_state)); op->rc = 99; op->op_status = LRM_OP_ERROR; send_direct_ack(NULL, NULL, op, rsc->id); free_lrm_op(op); crm_free(op_id); return; } } op_id = generate_op_key(rsc->id, op->op_type, op->interval); if(op->interval > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(rsc, op_id, FALSE); op->target_rc = CHANGED; } else { op->target_rc = EVERYTIME; } g_hash_table_replace(resources,crm_strdup(rsc->id), crm_strdup(op_id)); call_id = rsc->ops->perform_op(rsc, op); if(call_id <= 0) { crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); struct recurring_op_s *pending = NULL; crm_malloc0(pending, sizeof(struct recurring_op_s)); crm_debug("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval = op->interval; pending->op_key = crm_strdup(op_id); pending->rsc_id = crm_strdup(rsc->id); g_hash_table_replace(pending_ops, call_id_s, pending); } crm_free(op_id); free_lrm_op(op); return; } void free_recurring_op(gpointer value) { struct recurring_op_s *op = (struct recurring_op_s*)value; crm_free(op->rsc_id); crm_free(op->op_key); crm_free(op); } void free_lrm_op(lrm_op_t *op) { g_hash_table_destroy(op->params); crm_free(op->user_data); crm_free(op->output); crm_free(op->rsc_id); crm_free(op->op_type); crm_free(op->app_name); crm_free(op); } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, crm_strdup(key), crm_strdup(value)); } lrm_op_t * copy_lrm_op(const lrm_op_t *op) { lrm_op_t *op_copy = NULL; CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return NULL; } CRM_ASSERT(op->rsc_id != NULL); crm_malloc0(op_copy, sizeof(lrm_op_t)); op_copy->op_type = crm_strdup(op->op_type); /* input fields */ op_copy->params = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if(op->params != NULL) { g_hash_table_foreach(op->params, dup_attr, op_copy->params); } op_copy->timeout = op->timeout; op_copy->interval = op->interval; op_copy->target_rc = op->target_rc; /* in the CRM, this is always a string */ if(op->user_data != NULL) { op_copy->user_data = crm_strdup(op->user_data); } /* output fields */ op_copy->op_status = op->op_status; op_copy->rc = op->rc; op_copy->call_id = op->call_id; op_copy->output = NULL; op_copy->rsc_id = crm_strdup(op->rsc_id); if(op->app_name != NULL) { op_copy->app_name = crm_strdup(op->app_name); } if(op->output != NULL) { op_copy->output = crm_strdup(op->output); } return op_copy; } lrm_rsc_t * copy_lrm_rsc(const lrm_rsc_t *rsc) { lrm_rsc_t *rsc_copy = NULL; if(rsc == NULL) { return NULL; } crm_malloc0(rsc_copy, sizeof(lrm_rsc_t)); rsc_copy->id = crm_strdup(rsc->id); rsc_copy->type = crm_strdup(rsc->type); rsc_copy->class = NULL; rsc_copy->provider = NULL; if(rsc->class != NULL) { rsc_copy->class = crm_strdup(rsc->class); } if(rsc->provider != NULL) { rsc_copy->provider = crm_strdup(rsc->provider); } /* GHashTable* params; */ rsc_copy->params = NULL; rsc_copy->ops = NULL; return rsc_copy; } static void cib_rsc_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { switch(rc) { case cib_ok: case cib_diff_failed: case cib_diff_resync: crm_debug("Resource update %d complete: rc=%d", call_id, rc); break; default: crm_err("Resource update %d failed: (rc=%d) %s", call_id, rc, cib_error2string(rc)); } } void do_update_resource(lrm_op_t* op) { /* */ int rc = cib_ok; lrm_rsc_t *rsc = NULL; xmlNode *update, *iter; CRM_CHECK(op != NULL, return); update = create_node_state( fsa_our_uname, NULL, NULL, NULL, NULL, NULL, FALSE, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, op->rsc_id); CRM_CHECK(rsc->type != NULL, crm_err("Resource %s has no value for type", op->rsc_id)); CRM_CHECK(rsc->class != NULL, crm_err("Resource %s has no value for class", op->rsc_id)); crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER,rsc->provider); lrm_free_rsc(rsc); build_operation_update(iter, op, __FUNCTION__, 0); /* make it an asyncronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" when the really weren't * * the alternative however means blocking here for too long, which * isnt acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, cib_quorum_override, rc); if(rc > 0) { /* the return code is a call number, not an error code */ crm_debug("Sent resource state update message: %d", rc); add_cib_op_callback(rc, FALSE, NULL, cib_rsc_callback); } else { crm_err("Resource state update failed: %s", cib_error2string(rc)); } free_xml(update); } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data) { CRM_CHECK(FALSE, return); } gboolean process_lrm_event(lrm_op_t *op) { char *op_id = NULL; char *op_key = NULL; int log_level = LOG_ERR; struct recurring_op_s *pending = NULL; CRM_CHECK(op != NULL, return FALSE); CRM_CHECK(op->rsc_id != NULL, return FALSE); op_key = generate_op_key(op->rsc_id, op->op_type, op->interval); switch(op->op_status) { case LRM_OP_ERROR: case LRM_OP_PENDING: case LRM_OP_NOTSUPPORTED: break; case LRM_OP_CANCELLED: log_level = LOG_INFO; break; case LRM_OP_DONE: log_level = LOG_INFO; break; case LRM_OP_TIMEOUT: log_level = LOG_DEBUG_3; crm_err("LRM operation %s (%d) %s (timeout=%dms)", op_key, op->call_id, op_status2text(op->op_status), op->timeout); break; default: crm_err("Mapping unknown status (%d) to ERROR", op->op_status); op->op_status = LRM_OP_ERROR; } if(op->op_status == LRM_OP_ERROR && (op->rc == EXECRA_RUNNING_MASTER || op->rc == EXECRA_NOT_RUNNING)) { /* Leave it up to the TE/PE to decide if this is an error */ op->op_status = LRM_OP_DONE; log_level = LOG_INFO; } do_crm_log(log_level, "LRM operation %s (call=%d, rc=%d) %s %s", op_key, op->call_id, op->rc, op_status2text(op->op_status), op->op_status==LRM_OP_ERROR?execra_code2string(op->rc):""); if(op->op_status == LRM_OP_ERROR && op->output != NULL) { crm_info("Result: %s", op->output); } op_id = make_stop_id(op->rsc_id, op->call_id); pending = g_hash_table_lookup(pending_ops, op_id); if(op->op_status != LRM_OP_CANCELLED) { do_update_resource(op); if(op->interval != 0) { goto out; } } else if(op->interval == 0) { /* no known valid reason for this to happen */ crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id); } else if(pending == NULL) { crm_err("Op %s (call=%d): No 'pending' entry", op_key, op->call_id); } else if(op->user_data == NULL) { crm_err("Op %s (call=%d): No user data", op_key, op->call_id); } else if(pending->remove) { delete_op_entry(op, op->rsc_id, op_key, op->call_id); } if(g_hash_table_remove(pending_ops, op_id)) { crm_debug("Op %s (call=%d): Confirmed", op_key, op->call_id); } out: crm_free(op_key); crm_free(op_id); return TRUE; } char * make_stop_id(const char *rsc, int call_id) { char *op_id = NULL; crm_malloc0(op_id, strlen(rsc) + 34); if(op_id != NULL) { snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id); } return op_id; } diff --git a/include/crm/common/util.h b/include/crm/common/util.h index 3ce7e99802..81d3ad887c 100644 --- a/include/crm/common/util.h +++ b/include/crm/common/util.h @@ -1,191 +1,191 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef CRM_COMMON_UTIL__H #define CRM_COMMON_UTIL__H #include #include #if SUPPORT_HEARTBEAT # include # include #endif #include #include #include #include #define DEBUG_INC SIGUSR1 #define DEBUG_DEC SIGUSR2 extern unsigned int crm_log_level; extern gboolean crm_config_error; extern gboolean crm_config_warning; #define crm_config_err(fmt...) { crm_config_error = TRUE; crm_err(fmt); } #define crm_config_warn(fmt...) { crm_config_warning = TRUE; crm_warn(fmt); } extern gboolean crm_log_init( const char *entity, int level, gboolean coredir, gboolean to_stderr, int argc, char **argv); /* returns the old value */ extern unsigned int set_crm_log_level(unsigned int level); extern unsigned int get_crm_log_level(void); extern char *crm_itoa(int an_int); extern char *crm_strdup_fn(const char *a, const char *file, const char *fn, int line); extern char *generate_hash_key(const char *crm_msg_reference, const char *sys); extern char *generate_hash_value(const char *src_node, const char *src_subsys); extern gboolean decodeNVpair(const char *srcstring, char separator, char **name, char **value); extern int compare_version(const char *version1, const char *version2); extern char *generateReference(const char *custom1, const char *custom2); extern void alter_debug(int nsig); extern void g_hash_destroy_str(gpointer data); extern gboolean crm_is_true(const char * s); extern int crm_str_to_boolean(const char * s, int * ret); extern long crm_get_msec(const char * input); extern const char *op_status2text(op_status_t status); extern char *generate_op_key( const char *rsc_id, const char *op_type, int interval); extern gboolean parse_op_key( const char *key, char **rsc_id, char **op_type, int *interval); extern char *generate_notify_key( const char *rsc_id, const char *notify_type, const char *op_type); extern char *generate_transition_magic_v202( const char *transition_key, int op_status); extern char *generate_transition_magic( const char *transition_key, int op_status, int op_rc); extern gboolean decode_transition_magic( const char *magic, char **uuid, - int *transition_id, int *action_id, int *op_status, int *op_rc); + int *transition_id, int *action_id, int *op_status, int *op_rc, int *target_rc); -extern char *generate_transition_key(int action, int transition_id, const char *node); +extern char *generate_transition_key(int action, int transition_id, int target_rc, const char *node); extern gboolean decode_transition_key( - const char *key, char **uuid, int *action, int *transition_id); + const char *key, char **uuid, int *action, int *transition_id, int *target_rc); extern char *crm_concat(const char *prefix, const char *suffix, char join); extern gboolean decode_op_key( const char *key, char **rsc_id, char **op_type, int *interval); extern void filter_action_parameters(xmlNode *param_set, const char *version); extern void filter_reload_parameters(xmlNode *param_set, const char *restart_string); #define safe_str_eq(a, b) crm_str_eq(a, b, FALSE) extern gboolean crm_str_eq(const char *a, const char *b, gboolean use_case); extern gboolean safe_str_neq(const char *a, const char *b); extern int crm_parse_int(const char *text, const char *default_text); extern long crm_int_helper(const char *text, char **end_text); #define crm_atoi(text, default_text) crm_parse_int(text, default_text) extern void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork); extern char *generate_series_filename( const char *directory, const char *series, int sequence, gboolean bzip); extern int get_last_sequence(const char *directory, const char *series); extern void write_last_sequence( const char *directory, const char *series, int sequence, int max); extern void crm_make_daemon( const char *name, gboolean daemonize, const char *pidfile); typedef struct pe_cluster_option_s { const char *name; const char *alt_name; const char *type; const char *values; const char *default_value; gboolean (*is_valid)(const char *); const char *description_short; const char *description_long; } pe_cluster_option; extern const char *cluster_option( GHashTable* options, gboolean(*validate)(const char*), const char *name, const char *old_name, const char *def_value); extern const char *get_cluster_pref( GHashTable *options, pe_cluster_option *option_list, int len, const char *name); extern void config_metadata( const char *name, const char *version, const char *desc_short, const char *desc_long, pe_cluster_option *option_list, int len); extern void verify_all_options(GHashTable *options, pe_cluster_option *option_list, int len); extern gboolean check_time(const char *value); extern gboolean check_timer(const char *value); extern gboolean check_boolean(const char *value); extern gboolean check_number(const char *value); extern int char2score(const char *score); extern char *score2char(int score); extern gboolean crm_is_writable( const char *dir, const char *file, const char *user, const char *group, gboolean need_both); extern long long crm_set_bit(const char *function, long long word, long long bit); extern long long crm_clear_bit(const char *function, long long word, long long bit); #define set_bit(word, bit) word = crm_set_bit(__PRETTY_FUNCTION__, word, bit) #define clear_bit(word, bit) word = crm_clear_bit(__PRETTY_FUNCTION__, word, bit) #define set_bit_inplace(word, bit) word |= bit #define clear_bit_inplace(word, bit) word &= ~bit extern gboolean is_set(long long action_list, long long action); extern gboolean is_not_set(long long action_list, long long action); extern gboolean is_set_any(long long action_list, long long action); extern gboolean is_openais_cluster(void); extern gboolean is_heartbeat_cluster(void); #endif diff --git a/lib/crm/common/utils.c b/lib/crm/common/utils.c index c31c3a1a03..eb857b5933 100644 --- a/lib/crm/common/utils.c +++ b/lib/crm/common/utils.c @@ -1,1614 +1,1612 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef MAXLINE # define MAXLINE 512 #endif static uint ref_counter = 0; gboolean crm_assert_failed = FALSE; unsigned int crm_log_level = LOG_INFO; gboolean crm_config_error = FALSE; gboolean crm_config_warning = FALSE; const char *crm_system_name = "unknown"; void crm_set_env_options(void); gboolean check_time(const char *value) { if(crm_get_msec(value) < 5000) { return FALSE; } return TRUE; } gboolean check_timer(const char *value) { if(crm_get_msec(value) < 0) { return FALSE; } return TRUE; } gboolean check_boolean(const char *value) { int tmp = FALSE; if(crm_str_to_boolean(value, &tmp) != 1) { return FALSE; } return TRUE; } gboolean check_number(const char *value) { errno = 0; if(value == NULL) { return FALSE; } else if(safe_str_eq(value, MINUS_INFINITY_S)) { } else if(safe_str_eq(value, INFINITY_S)) { } else { crm_int_helper(value, NULL); } if(errno != 0) { return FALSE; } return TRUE; } int char2score(const char *score) { int score_f = 0; if(score == NULL) { } else if(safe_str_eq(score, MINUS_INFINITY_S)) { score_f = -INFINITY; } else if(safe_str_eq(score, INFINITY_S)) { score_f = INFINITY; } else if(safe_str_eq(score, "+"INFINITY_S)) { score_f = INFINITY; } else { score_f = crm_parse_int(score, NULL); if(score_f > 0 && score_f > INFINITY) { score_f = INFINITY; } else if(score_f < 0 && score_f < -INFINITY) { score_f = -INFINITY; } } return score_f; } char * score2char(int score) { if(score >= INFINITY) { return crm_strdup("+"INFINITY_S); } else if(score <= -INFINITY) { return crm_strdup("-"INFINITY_S); } return crm_itoa(score); } const char * cluster_option(GHashTable* options, gboolean(*validate)(const char*), const char *name, const char *old_name, const char *def_value) { const char *value = NULL; CRM_ASSERT(name != NULL); if(options != NULL) { value = g_hash_table_lookup(options, name); } if(value == NULL && old_name && options != NULL) { value = g_hash_table_lookup(options, old_name); if(value != NULL) { crm_config_warn("Using deprecated name '%s' for" " cluster option '%s'", old_name, name); g_hash_table_insert( options, crm_strdup(name), crm_strdup(value)); value = g_hash_table_lookup(options, old_name); } } if(value == NULL) { crm_debug_2("Using default value '%s' for cluster option '%s'", def_value, name); if(options == NULL) { return def_value; } g_hash_table_insert( options, crm_strdup(name), crm_strdup(def_value)); value = g_hash_table_lookup(options, name); } if(validate && validate(value) == FALSE) { crm_config_err("Value '%s' for cluster option '%s' is invalid." " Defaulting to %s", value, name, def_value); g_hash_table_replace(options, crm_strdup(name), crm_strdup(def_value)); value = g_hash_table_lookup(options, name); } return value; } const char * get_cluster_pref(GHashTable *options, pe_cluster_option *option_list, int len, const char *name) { int lpc = 0; const char *value = NULL; gboolean found = FALSE; for(lpc = 0; lpc < len; lpc++) { if(safe_str_eq(name, option_list[lpc].name)) { found = TRUE; value = cluster_option(options, option_list[lpc].is_valid, option_list[lpc].name, option_list[lpc].alt_name, option_list[lpc].default_value); } } CRM_CHECK(found, crm_err("No option named: %s", name)); CRM_ASSERT(value != NULL); return value; } void config_metadata(const char *name, const char *version, const char *desc_short, const char *desc_long, pe_cluster_option *option_list, int len) { int lpc = 0; fprintf(stdout, "" "\n" "\n" " %s\n" " %s\n" " %s\n" " \n", name, version, desc_long, desc_short); for(lpc = 0; lpc < len; lpc++) { if(option_list[lpc].description_long == NULL && option_list[lpc].description_short == NULL) { continue; } fprintf(stdout, " \n" " %s\n" " \n" " %s%s%s\n" " \n", option_list[lpc].name, option_list[lpc].description_short, option_list[lpc].type, option_list[lpc].default_value, option_list[lpc].description_long?option_list[lpc].description_long:option_list[lpc].description_short, option_list[lpc].values?" Allowed values: ":"", option_list[lpc].values?option_list[lpc].values:""); } fprintf(stdout, " \n\n"); } void verify_all_options(GHashTable *options, pe_cluster_option *option_list, int len) { int lpc = 0; for(lpc = 0; lpc < len; lpc++) { cluster_option(options, option_list[lpc].is_valid, option_list[lpc].name, option_list[lpc].alt_name, option_list[lpc].default_value); } } char * generateReference(const char *custom1, const char *custom2) { const char *local_cust1 = custom1; const char *local_cust2 = custom2; int reference_len = 4; char *since_epoch = NULL; reference_len += 20; /* too big */ reference_len += 40; /* too big */ if(local_cust1 == NULL) { local_cust1 = "_empty_"; } reference_len += strlen(local_cust1); if(local_cust2 == NULL) { local_cust2 = "_empty_"; } reference_len += strlen(local_cust2); crm_malloc0(since_epoch, reference_len); if(since_epoch != NULL) { sprintf(since_epoch, "%s-%s-%ld-%u", local_cust1, local_cust2, (unsigned long)time(NULL), ref_counter++); } return since_epoch; } gboolean decodeNVpair(const char *srcstring, char separator, char **name, char **value) { int lpc = 0; int len = 0; const char *temp = NULL; CRM_ASSERT(name != NULL && value != NULL); *name = NULL; *value = NULL; crm_debug_4("Attempting to decode: [%s]", srcstring); if (srcstring != NULL) { len = strlen(srcstring); while(lpc <= len) { if (srcstring[lpc] == separator) { crm_malloc0(*name, lpc+1); if(*name == NULL) { break; /* and return FALSE */ } strncpy(*name, srcstring, lpc); (*name)[lpc] = '\0'; /* this sucks but as the strtok manpage says.. * it *is* a bug */ len = len-lpc; len--; if(len <= 0) { *value = NULL; } else { crm_malloc0(*value, len+1); if(*value == NULL) { crm_free(*name); break; /* and return FALSE */ } temp = srcstring+lpc+1; strncpy(*value, temp, len); (*value)[len] = '\0'; } return TRUE; } lpc++; } } if(*name != NULL) { crm_free(*name); } *name = NULL; *value = NULL; return FALSE; } char * crm_concat(const char *prefix, const char *suffix, char join) { int len = 0; char *new_str = NULL; CRM_ASSERT(prefix != NULL); CRM_ASSERT(suffix != NULL); len = strlen(prefix) + strlen(suffix) + 2; crm_malloc0(new_str, (len)); sprintf(new_str, "%s%c%s", prefix, join, suffix); new_str[len-1] = 0; return new_str; } char * generate_hash_key(const char *crm_msg_reference, const char *sys) { char *hash_key = crm_concat(sys?sys:"none", crm_msg_reference, '_'); crm_debug_3("created hash key: (%s)", hash_key); return hash_key; } char * generate_hash_value(const char *src_node, const char *src_subsys) { char *hash_value = NULL; if (src_node == NULL || src_subsys == NULL) { return NULL; } if (strcasecmp(CRM_SYSTEM_DC, src_subsys) == 0) { hash_value = crm_strdup(src_subsys); if (!hash_value) { crm_err("memory allocation failed in " "generate_hash_value()"); } return hash_value; } hash_value = crm_concat(src_node, src_subsys, '_'); crm_info("created hash value: (%s)", hash_value); return hash_value; } char * crm_itoa(int an_int) { int len = 32; char *buffer = NULL; crm_malloc0(buffer, (len+1)); if(buffer != NULL) { snprintf(buffer, len, "%d", an_int); } return buffer; } extern int LogToLoggingDaemon(int priority, const char * buf, int bstrlen, gboolean use_pri_str); gboolean crm_log_init( const char *entity, int level, gboolean coredir, gboolean to_stderr, int argc, char **argv) { /* const char *test = "Testing log daemon connection"; */ /* Redirect messages from glib functions to our handler */ /* cl_malloc_forced_for_glib(); */ g_log_set_handler(NULL, G_LOG_LEVEL_ERROR | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING | G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO | G_LOG_LEVEL_DEBUG | G_LOG_FLAG_RECURSION | G_LOG_FLAG_FATAL, cl_glib_msg_handler, NULL); /* and for good measure... - this enum is a bit field (!) */ g_log_set_always_fatal((GLogLevelFlags)0); /*value out of range*/ crm_system_name = entity; cl_log_set_entity(entity); cl_log_set_facility(HA_LOG_FACILITY); if(coredir) { cl_set_corerootdir(HA_COREDIR); cl_cdtocoredir(); } set_crm_log_level(level); crm_set_env_options(); cl_log_args(argc, argv); cl_log_enable_stderr(to_stderr); CL_SIGNAL(DEBUG_INC, alter_debug); CL_SIGNAL(DEBUG_DEC, alter_debug); return TRUE; } /* returns the old value */ unsigned int set_crm_log_level(unsigned int level) { unsigned int old = crm_log_level; while(crm_log_level < 100 && crm_log_level < level) { alter_debug(DEBUG_INC); } while(crm_log_level > 0 && crm_log_level > level) { alter_debug(DEBUG_DEC); } return old; } unsigned int get_crm_log_level(void) { return crm_log_level; } void crm_log_message_adv(int level, const char *prefix, const HA_Message *msg) { if((int)crm_log_level >= level) { do_crm_log(level, "#========= %s message start ==========#", prefix?prefix:""); if(level > LOG_DEBUG) { cl_log_message(LOG_DEBUG, msg); } else { cl_log_message(level, msg); } } } static int crm_version_helper(const char *text, char **end_text) { int atoi_result = -1; CRM_ASSERT(end_text != NULL); errno = 0; if(text != NULL && text[0] != 0) { atoi_result = (int)strtol(text, end_text, 10); if(errno == EINVAL) { crm_err("Conversion of '%s' %c failed", text, text[0]); atoi_result = -1; } } return atoi_result; } /* * version1 < version2 : -1 * version1 = version2 : 0 * version1 > version2 : 1 */ int compare_version(const char *version1, const char *version2) { int rc = 0; int lpc = 0; char *ver1_copy = NULL, *ver2_copy = NULL; char *rest1 = NULL, *rest2 = NULL; if(version1 == NULL && version2 == NULL) { return 0; } else if(version1 == NULL) { return -1; } else if(version2 == NULL) { return 1; } ver1_copy = crm_strdup(version1); ver2_copy = crm_strdup(version2); rest1 = ver1_copy; rest2 = ver2_copy; while(1) { int digit1 = 0; int digit2 = 0; lpc++; if(rest1 == rest2) { break; } if(rest1 != NULL) { digit1 = crm_version_helper(rest1, &rest1); } if(rest2 != NULL) { digit2 = crm_version_helper(rest2, &rest2); } if(digit1 < digit2){ rc = -1; crm_debug_5("%d < %d", digit1, digit2); break; } else if (digit1 > digit2){ rc = 1; crm_debug_5("%d > %d", digit1, digit2); break; } if(rest1 != NULL && rest1[0] == '.') { rest1++; } if(rest1 != NULL && rest1[0] == 0) { rest1 = NULL; } if(rest2 != NULL && rest2[0] == '.') { rest2++; } if(rest2 != NULL && rest2[0] == 0) { rest2 = NULL; } } crm_free(ver1_copy); crm_free(ver2_copy); if(rc == 0) { crm_debug_3("%s == %s (%d)", version1, version2, lpc); } else if(rc < 0) { crm_debug_3("%s < %s (%d)", version1, version2, lpc); } else if(rc > 0) { crm_debug_3("%s > %s (%d)", version1, version2, lpc); } return rc; } gboolean do_stderr = FALSE; void alter_debug(int nsig) { CL_SIGNAL(DEBUG_INC, alter_debug); CL_SIGNAL(DEBUG_DEC, alter_debug); switch(nsig) { case DEBUG_INC: if (crm_log_level < 100) { crm_log_level++; } break; case DEBUG_DEC: if (crm_log_level > 0) { crm_log_level--; } break; default: fprintf(stderr, "Unknown signal %d\n", nsig); cl_log(LOG_ERR, "Unknown signal %d", nsig); break; } } void g_hash_destroy_str(gpointer data) { crm_free(data); } #include #include #include long crm_int_helper(const char *text, char **end_text) { long atoi_result = -1; char *local_end_text = NULL; errno = 0; if(text != NULL) { if(end_text != NULL) { atoi_result = strtoul(text, end_text, 10); } else { atoi_result = strtoul(text, &local_end_text, 10); } /* CRM_CHECK(errno != EINVAL); */ if(errno == EINVAL) { crm_err("Conversion of %s failed", text); atoi_result = -1; } else { if(errno == ERANGE) { crm_err("Conversion of %s was clipped: %ld", text, atoi_result); } if(end_text == NULL && local_end_text[0] != '\0') { crm_err("Characters left over after parsing " "\"%s\": \"%s\"", text, local_end_text); } } } return atoi_result; } int crm_parse_int(const char *text, const char *default_text) { int atoi_result = -1; if(text != NULL) { atoi_result = crm_int_helper(text, NULL); if(errno == 0) { return atoi_result; } } if(default_text != NULL) { atoi_result = crm_int_helper(default_text, NULL); if(errno == 0) { return atoi_result; } } else { crm_err("No default conversion value supplied"); } return -1; } gboolean crm_str_eq(const char *a, const char *b, gboolean use_case) { if(a == NULL || b == NULL) { /* shouldn't be comparing NULLs */ CRM_CHECK(a != b, return TRUE); return FALSE; } else if(use_case && a[0] != b[0]) { return FALSE; } else if(a == b) { return TRUE; } else if(strcasecmp(a, b) == 0) { return TRUE; } return FALSE; } gboolean safe_str_neq(const char *a, const char *b) { if(a == b) { return FALSE; } else if(a==NULL || b==NULL) { return TRUE; } else if(strcasecmp(a, b) == 0) { return FALSE; } return TRUE; } char * crm_strdup_fn(const char *src, const char *file, const char *fn, int line) { char *dup = NULL; CRM_CHECK(src != NULL, return NULL); crm_malloc0(dup, strlen(src) + 1); return strcpy(dup, src); } #define ENV_PREFIX "HA_" void crm_set_env_options(void) { cl_inherit_logging_environment(500); cl_log_set_logd_channel_source(NULL, NULL); if(debug_level > 0 && (debug_level+LOG_INFO) > (int)crm_log_level) { set_crm_log_level(LOG_INFO + debug_level); } } gboolean crm_is_true(const char * s) { gboolean ret = FALSE; if(s != NULL) { cl_str_to_boolean(s, &ret); } return ret; } int crm_str_to_boolean(const char * s, int * ret) { if(s == NULL) { return -1; } else if (strcasecmp(s, "true") == 0 || strcasecmp(s, "on") == 0 || strcasecmp(s, "yes") == 0 || strcasecmp(s, "y") == 0 || strcasecmp(s, "1") == 0){ *ret = TRUE; return 1; } else if (strcasecmp(s, "false") == 0 || strcasecmp(s, "off") == 0 || strcasecmp(s, "no") == 0 || strcasecmp(s, "n") == 0 || strcasecmp(s, "0") == 0){ *ret = FALSE; return 1; } return -1; } #ifndef NUMCHARS # define NUMCHARS "0123456789." #endif #ifndef WHITESPACE # define WHITESPACE " \t\n\r\f" #endif long crm_get_msec(const char * input) { const char * cp = input; const char * units; long multiplier = 1000; long divisor = 1; long ret = -1; double dret; if(input == NULL) { return 0; } cp += strspn(cp, WHITESPACE); units = cp + strspn(cp, NUMCHARS); units += strspn(units, WHITESPACE); if (strchr(NUMCHARS, *cp) == NULL) { return ret; } if (strncasecmp(units, "ms", 2) == 0 || strncasecmp(units, "msec", 4) == 0) { multiplier = 1; divisor = 1; }else if (strncasecmp(units, "us", 2) == 0 || strncasecmp(units, "usec", 4) == 0) { multiplier = 1; divisor = 1000; }else if (strncasecmp(units, "s", 1) == 0 || strncasecmp(units, "sec", 3) == 0) { multiplier = 1000; divisor = 1; }else if (strncasecmp(units, "m", 1) == 0 || strncasecmp(units, "min", 3) == 0) { multiplier = 60*1000; divisor = 1; }else if (strncasecmp(units, "h", 1) == 0 || strncasecmp(units, "hr", 2) == 0) { multiplier = 60*60*1000; divisor = 1; }else if (*units != EOS && *units != '\n' && *units != '\r') { return ret; } dret = atof(cp); dret *= (double)multiplier; dret /= (double)divisor; dret += 0.5; ret = (long)dret; return(ret); } const char * op_status2text(op_status_t status) { switch(status) { case LRM_OP_PENDING: return "pending"; break; case LRM_OP_DONE: return "complete"; break; case LRM_OP_ERROR: return "Error"; break; case LRM_OP_TIMEOUT: return "Timed Out"; break; case LRM_OP_NOTSUPPORTED: return "NOT SUPPORTED"; break; case LRM_OP_CANCELLED: return "Cancelled"; break; } CRM_CHECK(status >= LRM_OP_PENDING && status <= LRM_OP_CANCELLED, crm_err("Unknown status: %d", status)); return "UNKNOWN!"; } char * generate_op_key(const char *rsc_id, const char *op_type, int interval) { int len = 35; char *op_id = NULL; CRM_CHECK(rsc_id != NULL, return NULL); CRM_CHECK(op_type != NULL, return NULL); len += strlen(op_type); len += strlen(rsc_id); crm_malloc0(op_id, len); CRM_CHECK(op_id != NULL, return NULL); sprintf(op_id, "%s_%s_%d", rsc_id, op_type, interval); return op_id; } gboolean parse_op_key(const char *key, char **rsc_id, char **op_type, int *interval) { char *mutable_key = NULL; char *mutable_key_ptr = NULL; int len = 0, offset = 0, ch = 0; CRM_CHECK(key != NULL, return FALSE); *interval = 0; len = strlen(key); offset = len-1; crm_debug_3("Source: %s", key); while(offset > 0 && isdigit(key[offset])) { int digits = len-offset; ch = key[offset] - '0'; CRM_CHECK(ch < 10, return FALSE); CRM_CHECK(ch >= 0, return FALSE); while(digits > 1) { digits--; ch = ch * 10; } *interval += ch; offset--; } crm_debug_3(" Interval: %d", *interval); CRM_CHECK(key[offset] == '_', return FALSE); mutable_key = crm_strdup(key); mutable_key_ptr = mutable_key_ptr; mutable_key[offset] = 0; offset--; while(offset > 0 && key[offset] != '_') { offset--; } CRM_CHECK(key[offset] == '_', crm_free(mutable_key); return FALSE); mutable_key_ptr = mutable_key+offset+1; crm_debug_3(" Action: %s", mutable_key_ptr); *op_type = crm_strdup(mutable_key_ptr); mutable_key[offset] = 0; offset--; CRM_CHECK(mutable_key != mutable_key_ptr, crm_free(mutable_key); return FALSE); crm_debug_3(" Resource: %s", mutable_key); *rsc_id = crm_strdup(mutable_key); crm_free(mutable_key); return TRUE; } char * generate_notify_key(const char *rsc_id, const char *notify_type, const char *op_type) { int len = 12; char *op_id = NULL; CRM_CHECK(rsc_id != NULL, return NULL); CRM_CHECK(op_type != NULL, return NULL); CRM_CHECK(notify_type != NULL, return NULL); len += strlen(op_type); len += strlen(rsc_id); len += strlen(notify_type); crm_malloc0(op_id, len); if(op_id != NULL) { sprintf(op_id, "%s_%s_notify_%s_0", rsc_id, notify_type, op_type); } return op_id; } char * generate_transition_magic_v202(const char *transition_key, int op_status) { int len = 80; char *fail_state = NULL; CRM_CHECK(transition_key != NULL, return NULL); len += strlen(transition_key); crm_malloc0(fail_state, len); if(fail_state != NULL) { snprintf(fail_state, len, "%d:%s", op_status,transition_key); } return fail_state; } char * generate_transition_magic(const char *transition_key, int op_status, int op_rc) { int len = 80; char *fail_state = NULL; CRM_CHECK(transition_key != NULL, return NULL); len += strlen(transition_key); crm_malloc0(fail_state, len); if(fail_state != NULL) { snprintf(fail_state, len, "%d:%d;%s", op_status, op_rc, transition_key); } return fail_state; } gboolean decode_transition_magic( const char *magic, char **uuid, int *transition_id, int *action_id, - int *op_status, int *op_rc) + int *op_status, int *op_rc, int *target_rc) { - char *rc = NULL; - char *key = NULL; - char *magic2 = NULL; - char *status = NULL; - - gboolean result = TRUE; - - if(decodeNVpair(magic, ':', &status, &magic2) == FALSE) { - crm_err("Couldn't find ':' in: %s", magic); - result = FALSE; - goto bail; - } - - if(decodeNVpair(magic2, ';', &rc, &key) == FALSE) { - crm_err("Couldn't find ';' in: %s", magic2); - result = FALSE; - goto bail; - } - - - CRM_CHECK(decode_transition_key(key, uuid, transition_id, action_id), - result = FALSE; - goto bail; - ); - - *op_rc = crm_parse_int(rc, NULL); - *op_status = crm_parse_int(status, NULL); + int res = 0; + char *key = NULL; + gboolean result = TRUE; + CRM_CHECK(magic != NULL, return FALSE); + CRM_CHECK(op_rc != NULL, return FALSE); + CRM_CHECK(op_status != NULL, return FALSE); + + crm_malloc0(key, strlen(magic)); + res = sscanf(magic, "%d:%d;%s", op_status, op_rc, key); + if(res != 3) { + crm_crit("Only found %d items in: %s", res, magic); + return FALSE; + } + + CRM_CHECK(decode_transition_key(key, uuid, transition_id, action_id, target_rc), + result = FALSE; + goto bail; + ); + bail: - crm_free(rc); - crm_free(key); - crm_free(magic2); - crm_free(status); - - return result; + crm_free(key); + return result; } char * -generate_transition_key(int transition_id, int action_id, const char *node) +generate_transition_key(int transition_id, int action_id, int target_rc, const char *node) { int len = 40; char *fail_state = NULL; CRM_CHECK(node != NULL, return NULL); len += strlen(node); crm_malloc0(fail_state, len); if(fail_state != NULL) { - snprintf(fail_state, len, "%d:%d:%s", - action_id, transition_id, node); + snprintf(fail_state, len, "%d:%d:%d:%s", + action_id, transition_id, target_rc, node); } return fail_state; } gboolean decode_transition_key( - const char *key, char **uuid, int *transition_id, int *action_id) + const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc) { - char *tmp = NULL; - char *action = NULL; - char *transition = NULL; + int res = 0; + gboolean done = TRUE; - *uuid = NULL; - *action_id = -1; - *transition_id = -1; + CRM_CHECK(uuid != NULL, return FALSE); + CRM_CHECK(target_rc != NULL, return FALSE); + CRM_CHECK(action_id != NULL, return FALSE); + CRM_CHECK(transition_id != NULL, return FALSE); - if(decodeNVpair(key, ':', &action, &tmp) == FALSE) { - crm_err("Couldn't find ':' in: %s", key); - return FALSE; - } - - *action_id = crm_parse_int(action, NULL); - crm_free(action); - - if(decodeNVpair(tmp, ':', &transition, uuid) == FALSE) { - /* this would be an error but some versions dont - * have the action - */ - *transition_id = *action_id; + crm_malloc0(*uuid, strlen(key)); + res = sscanf(key, "%d:%d:%d:%s", transition_id, action_id, target_rc, *uuid); + switch(res) { + case 4: + /* Post Pacemaker 0.6 */ + break; + case 2: + /* Until Pacemaker 0.6 */ + *target_rc = -1; + res = sscanf(key, "%d:%d:%s", transition_id, action_id, *uuid); + CRM_CHECK(res == 3, done = FALSE); + break; + case 1: + /* Prior to Heartbeat 2.0.8 */ *action_id = -1; - *uuid = tmp; + *target_rc = -1; + res = sscanf(key, "%d:%s", transition_id, *uuid); + CRM_CHECK(res == 2, done = FALSE); + break; + } - } else { - *transition_id = crm_parse_int(transition, NULL); - crm_free(transition); - crm_free(tmp); + if(done == FALSE) { + crm_err("Cannot decode '%s'", key); + + crm_free(*uuid); + *uuid = NULL; + *target_rc = -1; + *action_id = -1; + *transition_id = -1; } - return TRUE; + + return done; } void filter_action_parameters(xmlNode *param_set, const char *version) { char *timeout = NULL; char *interval = NULL; #if CRM_DEPRECATED_SINCE_2_0_5 const char *filter_205[] = { XML_ATTR_TE_TARGET_RC, XML_ATTR_LRM_PROBE, XML_RSC_ATTR_START, XML_RSC_ATTR_NOTIFY, XML_RSC_ATTR_UNIQUE, XML_RSC_ATTR_MANAGED, XML_RSC_ATTR_PRIORITY, XML_RSC_ATTR_MULTIPLE, XML_RSC_ATTR_STICKINESS, XML_RSC_ATTR_FAIL_STICKINESS, XML_RSC_ATTR_TARGET_ROLE, /* ignore clone fields */ XML_RSC_ATTR_INCARNATION, XML_RSC_ATTR_INCARNATION_MAX, XML_RSC_ATTR_INCARNATION_NODEMAX, XML_RSC_ATTR_MASTER_MAX, XML_RSC_ATTR_MASTER_NODEMAX, /* old field names */ "role", "crm_role", "te-target-rc", /* ignore notify fields */ "notify_stop_resource", "notify_stop_uname", "notify_start_resource", "notify_start_uname", "notify_active_resource", "notify_active_uname", "notify_inactive_resource", "notify_inactive_uname", "notify_promote_resource", "notify_promote_uname", "notify_demote_resource", "notify_demote_uname", "notify_master_resource", "notify_master_uname", "notify_slave_resource", "notify_slave_uname" }; #endif const char *attr_filter[] = { XML_ATTR_ID, XML_ATTR_CRM_VERSION, XML_LRM_ATTR_OP_DIGEST, }; gboolean do_delete = FALSE; int lpc = 0; static int meta_len = 0; if(meta_len == 0) { meta_len = strlen(CRM_META); } if(param_set == NULL) { return; } #if CRM_DEPRECATED_SINCE_2_0_5 if(version == NULL || compare_version("1.0.5", version)) { for(lpc = 0; lpc < DIMOF(filter_205); lpc++) { xml_remove_prop(param_set, filter_205[lpc]); } } #endif for(lpc = 0; lpc < DIMOF(attr_filter); lpc++) { xml_remove_prop(param_set, attr_filter[lpc]); } timeout = crm_element_value_copy(param_set, CRM_META"_timeout"); interval = crm_element_value_copy(param_set, CRM_META"_interval"); xml_prop_iter(param_set, prop_name, prop_value, do_delete = FALSE; if(strncasecmp(prop_name, CRM_META, meta_len) == 0) { do_delete = TRUE; } if(do_delete) { xml_remove_prop(param_set, prop_name); } ); if(crm_get_msec(interval) && compare_version(version, "1.0.8")) { /* Re-instate the operation's timeout value */ if(timeout != NULL) { crm_xml_add(param_set, CRM_META"_timeout", timeout); } } crm_free(interval); crm_free(timeout); } void filter_reload_parameters(xmlNode *param_set, const char *restart_string) { int len = 0; char *name = NULL; char *match = NULL; if(param_set == NULL) { return; } xml_prop_iter(param_set, prop_name, prop_value, name = NULL; len = strlen(prop_name) + 3; crm_malloc0(name, len); sprintf(name, " %s ", prop_name); name[len-1] = 0; match = strstr(restart_string, name); if(match == NULL) { crm_debug_3("%s not found in %s", prop_name, restart_string); xml_remove_prop(param_set, prop_name); } crm_free(name); ); } void crm_abort(const char *file, const char *function, int line, const char *assert_condition, gboolean do_core, gboolean do_fork) { int rc = 0; int pid = 0; int status = 0; if(do_core == FALSE) { do_crm_log(LOG_ERR, "%s: Triggered assert at %s:%d : %s", function, file, line, assert_condition); return; } else if(do_fork) { do_crm_log(LOG_ERR, "%s: Triggered non-fatal assert at %s:%d : %s", function, file, line, assert_condition); pid=fork(); } else { do_crm_log(LOG_ERR, "%s: Triggered fatal assert at %s:%d : %s", function, file, line, assert_condition); } switch(pid) { case -1: crm_err("Cannot fork!"); return; default: /* Parent */ do_crm_log(LOG_ERR, "%s: Forked child %d to record non-fatal assert at %s:%d : %s", function, pid, file, line, assert_condition); do { rc = waitpid(pid, &status, 0); if(rc < 0 && errno != EINTR) { cl_perror("%s: Cannot wait on forked child %d", function, pid); } } while(rc < 0 && errno == EINTR); return; case 0: /* Child */ abort(); break; } } char * generate_series_filename( const char *directory, const char *series, int sequence, gboolean bzip) { int len = 40; char *filename = NULL; const char *ext = "raw"; CRM_CHECK(directory != NULL, return NULL); CRM_CHECK(series != NULL, return NULL); len += strlen(directory); len += strlen(series); crm_malloc0(filename, len); CRM_CHECK(filename != NULL, return NULL); if(bzip) { ext = "bz2"; } sprintf(filename, "%s/%s-%d.%s", directory, series, sequence, ext); return filename; } int get_last_sequence(const char *directory, const char *series) { FILE *file_strm = NULL; int start = 0, length = 0, read_len = 0; char *series_file = NULL; char *buffer = NULL; int seq = 0; int len = 36; CRM_CHECK(directory != NULL, return 0); CRM_CHECK(series != NULL, return 0); len += strlen(directory); len += strlen(series); crm_malloc0(series_file, len); CRM_CHECK(series_file != NULL, return 0); sprintf(series_file, "%s/%s.last", directory, series); file_strm = fopen(series_file, "r"); if(file_strm == NULL) { crm_debug("Series file %s does not exist", series_file); crm_free(series_file); return 0; } /* see how big the file is */ start = ftell(file_strm); fseek(file_strm, 0L, SEEK_END); length = ftell(file_strm); fseek(file_strm, 0L, start); CRM_ASSERT(start == ftell(file_strm)); crm_debug_3("Reading %d bytes from file", length); crm_malloc0(buffer, (length+1)); read_len = fread(buffer, 1, length, file_strm); if(read_len != length) { crm_err("Calculated and read bytes differ: %d vs. %d", length, read_len); crm_free(buffer); buffer = NULL; } else if(length <= 0) { crm_info("%s was not valid", series_file); crm_free(buffer); buffer = NULL; } crm_free(series_file); seq = crm_parse_int(buffer, "0"); crm_free(buffer); fclose(file_strm); return seq; } void write_last_sequence( const char *directory, const char *series, int sequence, int max) { int rc = 0; int len = 36; char *buffer = NULL; FILE *file_strm = NULL; char *series_file = NULL; CRM_CHECK(directory != NULL, return); CRM_CHECK(series != NULL, return); if(max == 0) { return; } while(max > 0 && sequence > max) { sequence -= max; } buffer = crm_itoa(sequence); len += strlen(directory); len += strlen(series); crm_malloc0(series_file, len); sprintf(series_file, "%s/%s.last", directory, series); file_strm = fopen(series_file, "w"); if(file_strm == NULL) { crm_err("Cannout open series file %s for writing", series_file); goto bail; } rc = fprintf(file_strm, "%s", buffer); if(rc < 0) { cl_perror("Cannot write to series file %s", series_file); } bail: if(file_strm != NULL) { fflush(file_strm); fclose(file_strm); } crm_free(series_file); crm_free(buffer); } void crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile) { long pid; const char *devnull = "/dev/null"; if(daemonize == FALSE) { return; } pid = fork(); if (pid < 0) { fprintf(stderr, "%s: could not start daemon\n", name); cl_perror("fork"); exit(LSB_EXIT_GENERIC); } else if (pid > 0) { exit(LSB_EXIT_OK); } if (cl_lock_pidfile(pidfile) < 0 ) { pid = cl_read_pidfile_no_checking(pidfile); crm_warn("%s: already running [pid %ld] (%s).\n", name, pid, pidfile); exit(LSB_EXIT_OK); } umask(022); close(STDIN_FILENO); (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ close(STDOUT_FILENO); (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ close(STDERR_FILENO); (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ } gboolean crm_is_writable(const char *dir, const char *file, const char *user, const char *group, gboolean need_both) { int s_res = -1; struct stat buf; char *full_file = NULL; const char *target = NULL; gboolean pass = TRUE; gboolean readwritable = FALSE; CRM_ASSERT(dir != NULL); if(file != NULL) { full_file = crm_concat(dir, file, '/'); target = full_file; s_res = stat(full_file, &buf); if( s_res == 0 && S_ISREG(buf.st_mode) == FALSE ) { crm_err("%s must be a regular file", target); pass = FALSE; goto out; } } if (s_res != 0) { target = dir; s_res = stat(dir, &buf); if(s_res != 0) { crm_err("%s must exist and be a directory", dir); pass = FALSE; goto out; } else if( S_ISDIR(buf.st_mode) == FALSE ) { crm_err("%s must be a directory", dir); pass = FALSE; } } if(user) { struct passwd *sys_user = NULL; sys_user = getpwnam(user); readwritable = (sys_user != NULL && buf.st_uid == sys_user->pw_uid && (buf.st_mode & (S_IRUSR|S_IWUSR))); if(readwritable == FALSE) { crm_err("%s must be owned and r/w by user %s", target, user); if(need_both) { pass = FALSE; } } } if(group) { struct group *sys_grp = getgrnam(group); readwritable = ( sys_grp != NULL && buf.st_gid == sys_grp->gr_gid && (buf.st_mode & (S_IRGRP|S_IWGRP))); if(readwritable == FALSE) { if(need_both || user == NULL) { pass = FALSE; crm_err("%s must be owned and r/w by group %s", target, group); } else { crm_warn("%s should be owned and r/w by group %s", target, group); } } } out: crm_free(full_file); return pass; } static unsigned long long crm_bit_filter = 0; /* 0x00000002ULL; */ static unsigned int bit_log_level = LOG_DEBUG_5; long long crm_clear_bit(const char *function, long long word, long long bit) { unsigned int level = bit_log_level; if(bit & crm_bit_filter) { level = LOG_ERR; } do_crm_log(level, "Bit 0x%.16llx cleared by %s", bit, function); word &= ~bit; return word; } long long crm_set_bit(const char *function, long long word, long long bit) { unsigned int level = bit_log_level; if(bit & crm_bit_filter) { level = LOG_ERR; } do_crm_log(level, "Bit 0x%.16llx set by %s", bit, function); word |= bit; return word; } gboolean is_not_set(long long word, long long bit) { crm_debug_5("Checking bit\t%.16llx in %.16llx", bit, word); return ((word & bit) == 0); } gboolean is_set(long long word, long long bit) { crm_debug_5("Checking bit\t%.16llx in %.16llx", bit, word); return ((word & bit) == bit); } gboolean is_set_any(long long word, long long bit) { crm_debug_5("Checking bit\t%.16llx in %.16llx", bit, word); return ((word & bit) != 0); } gboolean is_openais_cluster(void) { static const char *cluster_type = NULL; if(cluster_type == NULL) { cluster_type = getenv("HA_cluster_type"); } if(safe_str_eq("openais", cluster_type)) { #if SUPPORT_AIS return TRUE; #else CRM_ASSERT(safe_str_eq("openais", cluster_type) == FALSE); #endif } return FALSE; } gboolean is_heartbeat_cluster(void) { #if SUPPORT_HEARTBEAT return !is_openais_cluster(); #else CRM_ASSERT(is_openais_cluster()); return FALSE; #endif } diff --git a/lib/crm/pengine/utils.c b/lib/crm/pengine/utils.c index bfa26d9ae6..43477999d8 100644 --- a/lib/crm/pengine/utils.c +++ b/lib/crm/pengine/utils.c @@ -1,1278 +1,1278 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include void print_str_str(gpointer key, gpointer value, gpointer user_data); gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); void unpack_operation( action_t *action, xmlNode *xml_obj, pe_working_set_t* data_set); void pe_free_shallow(GListPtr alist) { pe_free_shallow_adv(alist, TRUE); } void pe_free_shallow_adv(GListPtr alist, gboolean with_data) { GListPtr item; GListPtr item_next = alist; if(with_data == FALSE && alist != NULL) { g_list_free(alist); return; } while(item_next != NULL) { item = item_next; item_next = item_next->next; if(with_data) { /* crm_debug_5("freeing %p", item->data); */ crm_free(item->data); } item->data = NULL; item->next = NULL; g_list_free_1(item); } } node_t * node_copy(node_t *this_node) { node_t *new_node = NULL; CRM_CHECK(this_node != NULL, return NULL); crm_malloc0(new_node, sizeof(node_t)); CRM_ASSERT(new_node != NULL); crm_debug_5("Copying %p (%s) to %p", this_node, this_node->details->uname, new_node); new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; new_node->details = this_node->details; return new_node; } /* are the contents of list1 and list2 equal * nodes with weight < 0 are ignored if filter == TRUE * * slow but linear * */ gboolean node_list_eq(GListPtr list1, GListPtr list2, gboolean filter) { node_t *other_node; GListPtr lhs = list1; GListPtr rhs = list2; slist_iter( node, node_t, lhs, lpc, if(node == NULL || (filter && node->weight < 0)) { continue; } other_node = (node_t*) pe_find_node_id(rhs, node->details->id); if(other_node == NULL || other_node->weight < 0) { return FALSE; } ); lhs = list2; rhs = list1; slist_iter( node, node_t, lhs, lpc, if(node == NULL || (filter && node->weight < 0)) { continue; } other_node = (node_t*) pe_find_node_id(rhs, node->details->id); if(other_node == NULL || other_node->weight < 0) { return FALSE; } ); return TRUE; } /* the intersection of list1 and list2 */ GListPtr node_list_and(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; unsigned lpc = 0; for(lpc = 0; lpc < g_list_length(list1); lpc++) { node_t *node = (node_t*)g_list_nth_data(list1, lpc); node_t *other_node = pe_find_node_id(list2, node->details->id); node_t *new_node = NULL; if(other_node != NULL) { new_node = node_copy(node); } if(new_node != NULL) { crm_debug_4("%s: %d + %d", node->details->uname, other_node->weight, new_node->weight); new_node->weight = merge_weights( new_node->weight, other_node->weight); crm_debug_3("New node weight for %s: %d", new_node->details->uname, new_node->weight); if(filter && new_node->weight < 0) { crm_free(new_node); new_node = NULL; } } if(new_node != NULL) { result = g_list_append(result, new_node); } } return result; } /* list1 - list2 */ GListPtr node_list_minus(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; slist_iter( node, node_t, list1, lpc, node_t *other_node = pe_find_node_id(list2, node->details->id); node_t *new_node = NULL; if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); crm_debug_3("Minus result len: %d", g_list_length(result)); return result; } /* list1 + list2 - (intersection of list1 and list2) */ GListPtr node_list_xor(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; slist_iter( node, node_t, list1, lpc, node_t *new_node = NULL; node_t *other_node = (node_t*) pe_find_node_id(list2, node->details->id); if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); slist_iter( node, node_t, list2, lpc, node_t *new_node = NULL; node_t *other_node = (node_t*) pe_find_node_id(list1, node->details->id); if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); crm_debug_3("Xor result len: %d", g_list_length(result)); return result; } GListPtr node_list_or(GListPtr list1, GListPtr list2, gboolean filter) { node_t *other_node = NULL; GListPtr result = NULL; gboolean needs_filter = FALSE; result = node_list_dup(list1, FALSE, filter); slist_iter( node, node_t, list2, lpc, if(node == NULL) { continue; } other_node = (node_t*)pe_find_node_id( result, node->details->id); if(other_node != NULL) { crm_debug_4("%s + %s: %d + %d", node->details->uname, other_node->details->uname, node->weight, other_node->weight); other_node->weight = merge_weights( other_node->weight, node->weight); if(filter && node->weight < 0) { needs_filter = TRUE; } } else if(filter == FALSE || node->weight >= 0) { node_t *new_node = node_copy(node); result = g_list_append(result, new_node); } ); /* not the neatest way, but the most expedient for now */ if(filter && needs_filter) { GListPtr old_result = result; result = node_list_dup(old_result, FALSE, filter); pe_free_shallow_adv(old_result, TRUE); } return result; } GListPtr node_list_dup(GListPtr list1, gboolean reset, gboolean filter) { GListPtr result = NULL; slist_iter( this_node, node_t, list1, lpc, node_t *new_node = NULL; if(filter && this_node->weight < 0) { continue; } new_node = node_copy(this_node); if(reset) { new_node->weight = 0; } if(new_node != NULL) { result = g_list_append(result, new_node); } ); return result; } void dump_node_scores(int level, resource_t *rsc, const char *comment, GListPtr nodes) { GListPtr list = nodes; if(rsc) { list = rsc->allowed_nodes; } slist_iter( node, node_t, list, lpc, if(rsc) { do_crm_log(level, "%s: %s allocation score on %s: %d", comment, rsc->id, node->details->uname, node->weight); } else { do_crm_log(level, "%s: %s = %d", comment, node->details->uname, node->weight); } ); if(rsc && rsc->children) { slist_iter( child, resource_t, rsc->children, lpc, dump_node_scores(level, child, comment, nodes); ); } } gint sort_rsc_index(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; if(a == NULL && b == NULL) { return 0; } if(a == NULL) { return 1; } if(b == NULL) { return -1; } if(resource1->sort_index > resource2->sort_index) { return -1; } if(resource1->sort_index < resource2->sort_index) { return 1; } return 0; } gint sort_rsc_priority(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; if(a == NULL && b == NULL) { return 0; } if(a == NULL) { return 1; } if(b == NULL) { return -1; } if(resource1->priority > resource2->priority) { return -1; } if(resource1->priority < resource2->priority) { return 1; } return 0; } action_t * custom_action(resource_t *rsc, char *key, const char *task, node_t *on_node, gboolean optional, gboolean save_action, pe_working_set_t *data_set) { action_t *action = NULL; GListPtr possible_matches = NULL; CRM_CHECK(key != NULL, return NULL); CRM_CHECK(task != NULL, return NULL); if(save_action && rsc != NULL) { possible_matches = find_actions(rsc->actions, key, on_node); } if(possible_matches != NULL) { crm_free(key); if(g_list_length(possible_matches) > 1) { pe_warn("Action %s for %s on %s exists %d times", task, rsc?rsc->id:"", on_node?on_node->details->uname:"", g_list_length(possible_matches)); } action = g_list_nth_data(possible_matches, 0); crm_debug_4("Found existing action (%d) %s for %s on %s", action->id, task, rsc?rsc->id:"", on_node?on_node->details->uname:""); g_list_free(possible_matches); } if(action == NULL) { if(save_action) { crm_debug_2("Creating%s action %d: %s for %s on %s", optional?"":" manditory", data_set->action_id, key, rsc?rsc->id:"", on_node?on_node->details->uname:""); } crm_malloc0(action, sizeof(action_t)); if(save_action) { action->id = data_set->action_id++; } else { action->id = 0; } action->rsc = rsc; CRM_ASSERT(task != NULL); action->task = crm_strdup(task); action->node = on_node; action->uuid = key; action->actions_before = NULL; action->actions_after = NULL; action->failure_is_fatal = TRUE; action->pseudo = FALSE; action->dumped = FALSE; action->runnable = TRUE; action->processed = FALSE; action->optional = optional; action->seen_count = 0; action->extra = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); action->meta = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if(save_action) { data_set->actions = g_list_append( data_set->actions, action); } if(rsc != NULL) { action->op_entry = find_rsc_op_entry(rsc, key); unpack_operation( action, action->op_entry, data_set); if(save_action) { rsc->actions = g_list_append( rsc->actions, action); } } if(save_action) { crm_debug_4("Action %d created", action->id); } } if(optional == FALSE && action->optional) { crm_debug_2("Action %d (%s) marked manditory", action->id, action->uuid); action->optional = FALSE; } if(rsc != NULL) { enum action_tasks a_task = text2task(action->task); int warn_level = LOG_DEBUG_3; if(save_action) { warn_level = LOG_WARNING; } if(action->node != NULL && action->op_entry != NULL) { unpack_instance_attributes( action->op_entry, XML_TAG_ATTR_SETS, action->node->details->attrs, action->extra, NULL, FALSE, data_set->now); } if(action->pseudo) { /* leave untouched */ } else if(action->node == NULL) { action->runnable = FALSE; } else if(is_not_set(rsc->flags, pe_rsc_managed)) { do_crm_log(warn_level, "Action %s (unmanaged)", action->uuid); action->optional = TRUE; /* action->runnable = FALSE; */ } else if(action->node->details->online == FALSE) { action->runnable = FALSE; do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)", action->uuid, action->node->details->uname); if(is_set(action->rsc->flags, pe_rsc_managed) && save_action && a_task == stop_rsc) { do_crm_log(warn_level, "Marking node %s unclean", action->node->details->uname); action->node->details->unclean = TRUE; } } else if(action->needs == rsc_req_nothing) { crm_debug_3("Action %s doesnt require anything", action->uuid); action->runnable = TRUE; #if 0 /* * No point checking this * - if we dont have quorum we cant stonith anyway */ } else if(action->needs == rsc_req_stonith) { crm_debug_3("Action %s requires only stonith", action->uuid); action->runnable = TRUE; #endif } else if(data_set->have_quorum == FALSE && data_set->no_quorum_policy == no_quorum_stop) { action->runnable = FALSE; crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid); } else if(data_set->have_quorum == FALSE && data_set->no_quorum_policy == no_quorum_freeze) { crm_debug_3("Check resource is already active"); if(rsc->fns->active(rsc, TRUE) == FALSE) { action->runnable = FALSE; crm_debug("%s\t%s (cancelled : quorum freeze)", action->node->details->uname, action->uuid); } } else { crm_debug_3("Action %s is runnable", action->uuid); action->runnable = TRUE; } if(save_action) { switch(a_task) { case stop_rsc: set_bit(rsc->flags, pe_rsc_stopping); break; case start_rsc: clear_bit(rsc->flags, pe_rsc_starting); if(action->runnable) { set_bit(rsc->flags, pe_rsc_starting); } break; default: break; } } } return action; } void unpack_operation( action_t *action, xmlNode *xml_obj, pe_working_set_t* data_set) { int value_i = 0; int start_delay = 0; char *value_ms = NULL; const char *class = NULL; const char *value = NULL; const char *field = NULL; CRM_CHECK(action->rsc != NULL, return); class = g_hash_table_lookup(action->rsc->meta, "class"); if(xml_obj != NULL) { value = crm_element_value(xml_obj, "prereq"); } if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_START)) { value = g_hash_table_lookup(action->rsc->meta, "start_prereq"); } if(value == NULL && safe_str_neq(action->task, CRMD_ACTION_START)) { /* todo: integrate stop as an option? */ action->needs = rsc_req_nothing; value = "nothing (default)"; } else if(safe_str_eq(value, "nothing")) { action->needs = rsc_req_nothing; } else if(safe_str_eq(value, "quorum")) { action->needs = rsc_req_quorum; } else if(safe_str_eq(value, "fencing")) { action->needs = rsc_req_stonith; } else if(data_set->no_quorum_policy == no_quorum_ignore || safe_str_eq(class, "stonith")) { action->needs = rsc_req_nothing; value = "nothing (default)"; } else if(data_set->no_quorum_policy == no_quorum_freeze && data_set->stonith_enabled) { action->needs = rsc_req_stonith; value = "fencing (default)"; } else { action->needs = rsc_req_quorum; value = "quorum (default)"; } if(safe_str_eq(class, "stonith")) { if(action->needs == rsc_req_stonith) { crm_config_err("Stonith resources (eg. %s) cannot require" " fencing to start", action->rsc->id); } action->needs = rsc_req_nothing; value = "nothing (fencing override)"; } crm_debug_3("\tAction %s requires: %s", action->task, value); value = NULL; if(xml_obj != NULL) { value = crm_element_value(xml_obj, "on_fail"); } if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) { value = g_hash_table_lookup( action->rsc->meta, "on_stopfail"); if(value != NULL) { #if CRM_DEPRECATED_SINCE_2_0_2 crm_config_err("The \"on_stopfail\" attribute used in" " %s has been deprecated since 2.0.2", action->rsc->id); #else crm_config_err("The \"on_stopfail\" attribute used in" " %s has been deprecated since 2.0.2" " and is now disabled", action->rsc->id); value = NULL; #endif crm_config_err("Please use specify the \"on_fail\"" " attribute on the \"stop\" operation" " instead"); } } if(value == NULL) { } else if(safe_str_eq(value, "block")) { action->on_fail = action_fail_block; } else if(safe_str_eq(value, "fence")) { action->on_fail = action_fail_fence; value = "node fencing"; if(data_set->stonith_enabled == FALSE) { crm_config_err("Specifying on_fail=fence and" " stonith-enabled=false makes no sense"); action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } } else if(safe_str_eq(value, "ignore") || safe_str_eq(value, "nothing")) { action->on_fail = action_fail_ignore; value = "ignore"; } else if(safe_str_eq(value, "migrate")) { action->on_fail = action_fail_migrate; value = "force migration"; } else if(safe_str_eq(value, "stop")) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } else if(safe_str_eq(value, "restart")) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate)"; } else { pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); value = NULL; } /* defaults */ if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) { if(data_set->stonith_enabled) { action->on_fail = action_fail_fence; value = "resource fence (default)"; } else { action->on_fail = action_fail_block; value = "resource block (default)"; } } else if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) { action->on_fail = action_migrate_failure; value = "atomic migration recovery (default)"; } else if(value == NULL) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate) (default)"; } crm_debug_3("\t%s failure handling: %s", action->task, value); value = NULL; if(xml_obj != NULL) { value = crm_element_value(xml_obj, "role_after_failure"); } if(value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) { action->fail_role = text2role(value); } /* defaults */ if(action->fail_role == RSC_ROLE_UNKNOWN) { if(safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) { action->fail_role = RSC_ROLE_SLAVE; } else { action->fail_role = RSC_ROLE_STARTED; } } crm_debug_3("\t%s failure results in: %s", action->task, role2text(action->fail_role)); if(xml_obj != NULL) { xml_prop_iter(xml_obj, p_name, p_value, if(p_value != NULL) { g_hash_table_insert(action->meta, crm_strdup(p_name), crm_strdup(p_value)); } ); unpack_instance_attributes(xml_obj, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); unpack_instance_attributes(xml_obj, XML_TAG_ATTR_SETS, NULL, action->meta, NULL, FALSE, data_set->now); } field = XML_LRM_ATTR_INTERVAL; value = g_hash_table_lookup(action->meta, field); if(value != NULL) { value_i = crm_get_msec(value); CRM_CHECK(value_i >= 0, value_i = 0); value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, crm_strdup(field), value_ms); } field = "start_delay"; value = g_hash_table_lookup(action->meta, field); if(value != NULL) { value_i = crm_get_msec(value); if(value_i < 0) { value_i = 0; } start_delay = value_i; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, crm_strdup(field), value_ms); } field = "timeout"; value = g_hash_table_lookup(action->meta, field); if(value == NULL) { value = pe_pref( data_set->config_hash, "default-action-timeout"); } value_i = crm_get_msec(value); if(value_i < 0) { value_i = 0; } value_i += start_delay; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, crm_strdup(field), value_ms); } xmlNode * find_rsc_op_entry(resource_t *rsc, const char *key) { int number = 0; const char *name = NULL; const char *value = NULL; const char *interval = NULL; char *match_key = NULL; xmlNode *op = NULL; xml_child_iter_filter( rsc->ops_xml, operation, "op", name = crm_element_value(operation, "name"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "disabled"); if(crm_is_true(value)) { crm_debug_2("%s disabled", ID(operation)); continue; } number = crm_get_msec(interval); if(number < 0) { continue; } match_key = generate_op_key(rsc->id, name, number); if(safe_str_eq(key, match_key)) { op = operation; } crm_free(match_key); if(op != NULL) { return op; } ); crm_debug_3("No match for %s", key); return op; } void print_node(const char *pre_text, node_t *node, gboolean details) { if(node == NULL) { crm_debug_4("%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } crm_debug_4("%s%s%sNode %s: (weight=%d, fixed=%s)", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", node->details==NULL?"error ":node->details->online?"":"Unavailable/Unclean ", node->details->uname, node->weight, node->fixed?"True":"False"); if(details && node != NULL && node->details != NULL) { char *pe_mutable = crm_strdup("\t\t"); crm_debug_4("\t\t===Node Attributes"); g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable); crm_free(pe_mutable); crm_debug_4("\t\t=== Resources"); slist_iter( rsc, resource_t, node->details->running_rsc, lpc, print_resource(LOG_DEBUG_4, "\t\t", rsc, FALSE); ); } } /* * Used by the HashTable for-loop */ void print_str_str(gpointer key, gpointer value, gpointer user_data) { crm_debug_4("%s%s %s ==> %s", user_data==NULL?"":(char*)user_data, user_data==NULL?"":": ", (char*)key, (char*)value); } void print_resource( int log_level, const char *pre_text, resource_t *rsc, gboolean details) { long options = pe_print_log; if(rsc == NULL) { do_crm_log(log_level-1, "%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } if(details) { options |= pe_print_details; } rsc->fns->print(rsc, pre_text, options, &log_level); } void pe_free_action(action_t *action) { if(action == NULL) { return; } pe_free_shallow(action->actions_before);/* action_warpper_t* */ pe_free_shallow(action->actions_after); /* action_warpper_t* */ g_hash_table_destroy(action->extra); g_hash_table_destroy(action->meta); crm_free(action->task); crm_free(action->uuid); crm_free(action); } GListPtr find_recurring_actions(GListPtr input, node_t *not_on_node) { const char *value = NULL; GListPtr result = NULL; CRM_CHECK(input != NULL, return NULL); slist_iter( action, action_t, input, lpc, value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL); if(value == NULL) { /* skip */ } else if(safe_str_eq(value, "0")) { /* skip */ } else if(safe_str_eq(CRMD_ACTION_CANCEL, action->task)) { /* skip */ } else if(not_on_node == NULL) { crm_debug_5("(null) Found: %s", action->uuid); result = g_list_append(result, action); } else if(action->node == NULL) { /* skip */ } else if(action->node->details != not_on_node->details) { crm_debug_5("Found: %s", action->uuid); result = g_list_append(result, action); } ); return result; } GListPtr find_actions(GListPtr input, const char *key, node_t *on_node) { GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); slist_iter( action, action_t, input, lpc, crm_debug_5("Matching %s against %s", key, action->uuid); if(safe_str_neq(key, action->uuid)) { continue; } else if(on_node == NULL) { result = g_list_append(result, action); } else if(action->node == NULL) { /* skip */ crm_debug_2("While looking for %s action on %s, " "found an unallocated one. Assigning" " it to the requested node...", key, on_node->details->uname); action->node = on_node; result = g_list_append(result, action); } else if(safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_append(result, action); } ); return result; } GListPtr find_actions_exact(GListPtr input, const char *key, node_t *on_node) { GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); slist_iter( action, action_t, input, lpc, crm_debug_5("Matching %s against %s", key, action->uuid); if(safe_str_neq(key, action->uuid)) { crm_debug_3("Key mismatch: %s vs. %s", key, action->uuid); continue; } else if(on_node == NULL || action->node == NULL) { crm_debug_3("on_node=%p, action->node=%p", on_node, action->node); continue; } else if(safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_append(result, action); } crm_debug_2("Node mismatch: %s vs. %s", on_node->details->id, action->node->details->id); ); return result; } void set_id(xmlNode * xml_obj, const char *prefix, int child) { int id_len = 0; gboolean use_prefix = TRUE; gboolean use_child = TRUE; char *new_id = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); id_len = 1 + strlen(id); if(child > 999) { pe_err("Are you insane?!?" " The CRM does not support > 1000 children per resource"); return; } else if(child < 0) { use_child = FALSE; } else { id_len += 4; /* child */ } if(prefix == NULL || safe_str_eq(id, prefix)) { use_prefix = FALSE; } else { id_len += (1 + strlen(prefix)); } crm_malloc0(new_id, id_len); if(use_child) { snprintf(new_id, id_len, "%s%s%s:%d", use_prefix?prefix:"", use_prefix?":":"", id, child); } else { snprintf(new_id, id_len, "%s%s%s", use_prefix?prefix:"", use_prefix?":":"", id); } crm_xml_add(xml_obj, XML_ATTR_ID, new_id); crm_free(new_id); } static void resource_node_score(resource_t *rsc, node_t *node, int score, const char *tag) { node_t *match = NULL; if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, resource_node_score(child_rsc, node, score, tag); ); } crm_debug_2("Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score); match = pe_find_node_id(rsc->allowed_nodes, node->details->id); if(match == NULL) { match = node_copy(node); match->weight = 0; rsc->allowed_nodes = g_list_append(rsc->allowed_nodes, match); } match->weight = merge_weights(match->weight, score); } void resource_location(resource_t *rsc, node_t *node, int score, const char *tag, pe_working_set_t *data_set) { if(node != NULL) { resource_node_score(rsc, node, score, tag); } else if(data_set != NULL) { slist_iter( node, node_t, data_set->nodes, lpc, resource_node_score(rsc, node, score, tag); ); } else { slist_iter( node, node_t, rsc->allowed_nodes, lpc, resource_node_score(rsc, node, score, tag); ); } if(node == NULL && score == -INFINITY) { if(rsc->allocated_to) { crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname); crm_free(rsc->allocated_to); rsc->allocated_to = NULL; } } } #define sort_return(an_int) crm_free(a_uuid); crm_free(b_uuid); return an_int gint sort_op_by_callid(gconstpointer a, gconstpointer b) { char *a_uuid = NULL; char *b_uuid = NULL; const xmlNode *xml_a = a; const xmlNode *xml_b = b; const char *a_xml_id = crm_element_value_const(xml_a, XML_ATTR_ID); const char *b_xml_id = crm_element_value_const(xml_b, XML_ATTR_ID); const char *a_task_id = crm_element_value_const(xml_a, XML_LRM_ATTR_CALLID); const char *b_task_id = crm_element_value_const(xml_b, XML_LRM_ATTR_CALLID); const char *a_key = crm_element_value_const(xml_a, XML_ATTR_TRANSITION_MAGIC); const char *b_key = crm_element_value_const(xml_b, XML_ATTR_TRANSITION_MAGIC); int dummy = -1; int a_id = -1; int b_id = -1; int a_rc = -1; int b_rc = -1; int a_status = -1; int b_status = -1; int a_call_id = -1; int b_call_id = -1; if(safe_str_eq(a_xml_id, b_xml_id)) { /* We have duplicate lrm_rsc_op entries in the status * section which is unliklely to be a good thing * - we can handle it easily enough, but we need to get * to the bottom of why its happening. */ pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id); sort_return(0); } CRM_CHECK(a_task_id != NULL && b_task_id != NULL, sort_return(0)); a_call_id = crm_parse_int(a_task_id, NULL); b_call_id = crm_parse_int(b_task_id, NULL); if(a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesnt matter since * stops are never pending */ sort_return(0); } else if(a_call_id >= 0 && a_call_id < b_call_id) { crm_debug_4("%s (%d) < %s (%d) : call id", a_xml_id, a_call_id, b_xml_id, b_call_id); sort_return(-1); } else if(b_call_id >= 0 && a_call_id > b_call_id) { crm_debug_4("%s (%d) > %s (%d) : call id", a_xml_id, a_call_id, b_xml_id, b_call_id); sort_return(1); } crm_debug_5("%s (%d) == %s (%d) : continuing", a_xml_id, a_call_id, b_xml_id, b_call_id); /* now process pending ops */ CRM_CHECK(a_key != NULL && b_key != NULL, sort_return(0)); CRM_CHECK(decode_transition_magic( - a_key, &a_uuid, &a_id, &dummy, &a_status, &a_rc), + a_key, &a_uuid, &a_id, &dummy, &a_status, &a_rc, &dummy), sort_return(0)); CRM_CHECK(decode_transition_magic( - b_key, &b_uuid, &b_id, &dummy, &b_status, &b_rc), + b_key, &b_uuid, &b_id, &dummy, &b_status, &b_rc, &dummy), sort_return(0)); /* try and determin the relative age of the operation... * some pending operations (ie. a start) may have been supuerceeded * by a subsequent stop * * [a|b]_id == -1 means its a shutdown operation and _always_ comes last */ if(safe_str_neq(a_uuid, b_uuid) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesnt match then one better * be a pending operation. * pending operations dont survive between elections and joins * because we query the LRM directly */ CRM_CHECK(a_call_id == -1 || b_call_id == -1, sort_return(0)); CRM_CHECK(a_call_id >= 0 || b_call_id >= 0, sort_return(0)); if(b_call_id == -1) { crm_debug_2("%s (%d) < %s (%d) : transition + call id", a_xml_id, a_call_id, b_xml_id, b_call_id); sort_return(-1); } if(a_call_id == -1) { crm_debug_2("%s (%d) > %s (%d) : transition + call id", a_xml_id, a_call_id, b_xml_id, b_call_id); sort_return(1); } } else if((a_id >= 0 && a_id < b_id) || b_id == -1) { crm_debug_3("%s (%d) < %s (%d) : transition", a_xml_id, a_id, b_xml_id, b_id); sort_return(-1); } else if((b_id >= 0 && a_id > b_id) || a_id == -1) { crm_debug_3("%s (%d) > %s (%d) : transition", a_xml_id, a_id, b_xml_id, b_id); sort_return(1); } /* we should never end up here */ crm_err("%s (%d:%d:%s) ?? %s (%d:%d:%s) : default", a_xml_id, a_call_id, a_id, a_uuid, b_xml_id, b_call_id, b_id, b_uuid); CRM_CHECK(FALSE, sort_return(0)); } time_t get_timet_now(pe_working_set_t *data_set) { time_t now = 0; if(data_set && data_set->now) { now = data_set->now->tm_now; } if(now == 0) { /* eventually we should convert data_set->now into time_tm * for now, its only triggered by PE regression tests */ now = time(NULL); crm_crit("Defaulting to 'now'"); if(data_set && data_set->now) { data_set->now->tm_now = now; } } return now; } int get_failcount(node_t *node, resource_t *rsc, int *last_failure, pe_working_set_t *data_set) { int last = 0; int fail_count = 0; resource_t *failed = rsc; char *fail_attr = crm_concat("fail-count", rsc->id, '-'); const char *value = g_hash_table_lookup(node->details->attrs, fail_attr); if(is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(rsc); } if(value != NULL) { fail_count = char2score(value); crm_info("%s has failed %d on %s", rsc->id, fail_count, node->details->uname); } crm_free(fail_attr); fail_attr = crm_concat("last-failure", rsc->id, '-'); value = g_hash_table_lookup(node->details->attrs, fail_attr); if(value != NULL && rsc->failure_timeout) { last = crm_parse_int(value, NULL); if(last_failure) { *last_failure = last; } if(last > 0) { time_t now = get_timet_now(data_set); if(now > (last + rsc->failure_timeout)) { crm_notice("Failcount for %s on %s has expired (limit was %ds)", failed->id, node->details->uname, rsc->failure_timeout); fail_count = 0; } } } crm_free(fail_attr); return fail_count; } diff --git a/transitioner/actions.c b/transitioner/actions.c index 4c33b530e8..b9dc40b679 100644 --- a/transitioner/actions.c +++ b/transitioner/actions.c @@ -1,520 +1,532 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include char *te_uuid = NULL; IPC_Channel *crm_ch = NULL; void send_rsc_command(crm_action_t *action); extern crm_action_timer_t *transition_timer; static void te_start_action_timer(crm_action_t *action) { crm_malloc0(action->timer, sizeof(crm_action_timer_t)); action->timer->timeout = action->timeout; action->timer->reason = timeout_action_warn; action->timer->action = action; action->timer->source_id = Gmain_timeout_add( action->timer->timeout, action_timer_callback, (void*)action->timer); CRM_ASSERT(action->timer->source_id != 0); } static gboolean te_pseudo_action(crm_graph_t *graph, crm_action_t *pseudo) { crm_info("Pseudo action %d fired and confirmed", pseudo->id); pseudo->confirmed = TRUE; update_graph(graph, pseudo); trigger_graph(); return TRUE; } #if SUPPORT_HEARTBEAT void send_stonith_update(stonith_ops_t * op) { enum cib_errors rc = cib_ok; const char *target = op->node_name; const char *uuid = op->node_uuid; /* zero out the node-status & remove all LRM status info */ xmlNode *node_state = create_xml_node(NULL, XML_CIB_TAG_STATE); CRM_CHECK(op->node_name != NULL, return); CRM_CHECK(op->node_uuid != NULL, return); crm_xml_add(node_state, XML_ATTR_UUID, uuid); crm_xml_add(node_state, XML_ATTR_UNAME, target); crm_xml_add(node_state, XML_CIB_ATTR_HASTATE, DEADSTATUS); crm_xml_add(node_state, XML_CIB_ATTR_INCCM, XML_BOOLEAN_NO); crm_xml_add(node_state, XML_CIB_ATTR_CRMDSTATE, OFFLINESTATUS); crm_xml_add(node_state, XML_CIB_ATTR_JOINSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_CIB_ATTR_EXPSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_CIB_ATTR_REPLACE, XML_CIB_TAG_LRM); crm_xml_add(node_state, XML_ATTR_ORIGIN, __FUNCTION__); rc = te_cib_conn->cmds->update( te_cib_conn, XML_CIB_TAG_STATUS, node_state, NULL, cib_quorum_override|cib_scope_local); if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); abort_transition( INFINITY, tg_shutdown, "CIB update failed", node_state); } else { /* delay processing the trigger until the update completes */ add_cib_op_callback(rc, FALSE, NULL, cib_fencing_updated); } free_xml(node_state); return; } #endif static gboolean te_fence_node(crm_graph_t *graph, crm_action_t *action) { #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { const char *id = NULL; const char *uuid = NULL; const char *target = NULL; const char *type = NULL; stonith_ops_t * st_op = NULL; id = ID(action->xml); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); type = g_hash_table_lookup(action->params, crm_meta_name("stonith_action")); CRM_CHECK(id != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); CRM_CHECK(uuid != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); CRM_CHECK(type != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); CRM_CHECK(target != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); te_log_action(LOG_INFO, "Executing %s fencing operation (%s) on %s (timeout=%d)", type, id, target, transition_graph->transition_timeout / 2); /* Passing NULL means block until we can connect... */ te_connect_stonith(NULL); crm_malloc0(st_op, sizeof(stonith_ops_t)); if(safe_str_eq(type, "poweroff")) { st_op->optype = POWEROFF; } else { st_op->optype = RESET; } st_op->timeout = transition_graph->transition_timeout / 2; st_op->node_name = crm_strdup(target); st_op->node_uuid = crm_strdup(uuid); st_op->private_data = generate_transition_key( - transition_graph->id, action->id, te_uuid); + transition_graph->id, action->id, 0, te_uuid); CRM_ASSERT(stonithd_input_IPC_channel() != NULL); if (ST_OK != stonithd_node_fence( st_op )) { crm_err("Cannot fence %s: stonithd_node_fence() call failed ", target); } return TRUE; } #endif return FALSE; } +static int get_target_rc(crm_action_t *action) +{ + const char *target_rc_s = g_hash_table_lookup( + action->params, crm_meta_name(XML_ATTR_TE_TARGET_RC)); + + if(target_rc_s != NULL) { + return crm_parse_int(target_rc_s, "0"); + } + return 0; +} + static gboolean te_crm_command(crm_graph_t *graph, crm_action_t *action) { char *value = NULL; char *counter = NULL; xmlNode *cmd = NULL; const char *id = NULL; const char *task = NULL; const char *on_node = NULL; gboolean ret = TRUE; id = ID(action->xml); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, te_log_action(LOG_ERR, "Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task)); return FALSE); te_log_action(LOG_INFO, "Executing crm-event (%s): %s on %s", crm_str(id), crm_str(task), on_node); cmd = create_request(task, NULL, on_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); counter = generate_transition_key( - transition_graph->id, action->id, te_uuid); + transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); ret = send_ipc_message(crm_ch, cmd); crm_free(counter); free_xml(cmd); value = g_hash_table_lookup(action->params, crm_meta_name(XML_ATTR_TE_NOWAIT)); if(ret == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if(crm_is_true(value)) { crm_info("Skipping wait for %d", action->id); action->confirmed = TRUE; update_graph(graph, action); trigger_graph(); } else if(ret && action->timeout > 0) { crm_debug("Setting timer for action %d",action->id); action->timer->reason = timeout_action_warn; te_start_action_timer(action); } return TRUE; } static gboolean te_rsc_command(crm_graph_t *graph, crm_action_t *action) { /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ const char *task = NULL; const char *on_node = NULL; action->executed = FALSE; on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, te_log_action(LOG_ERR, "Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task)); return FALSE); send_rsc_command(action); return TRUE; } gboolean cib_action_update(crm_action_t *action, int status) { char *op_id = NULL; char *code = NULL; char *digest = NULL; xmlNode *tmp = NULL; xmlNode *params = NULL; xmlNode *state = NULL; xmlNode *rsc = NULL; xmlNode *xml_op = NULL; xmlNode *action_rsc = NULL; enum cib_errors rc = cib_ok; const char *name = NULL; const char *value = NULL; const char *rsc_id = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task_uuid = crm_element_value( action->xml, XML_LRM_ATTR_TASK_KEY); const char *target_uuid = crm_element_value( action->xml, XML_LRM_ATTR_TARGET_UUID); int call_options = cib_quorum_override|cib_scope_local; crm_warn("%s %d: %s on %s timed out", crm_element_name(action->xml), action->id, task_uuid, target); action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE); if(action_rsc == NULL) { return FALSE; } rsc_id = ID(action_rsc); CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action"); return FALSE); code = crm_itoa(status); /* update the CIB */ state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(state, XML_ATTR_UUID, target_uuid); crm_xml_add(state, XML_ATTR_UNAME, target); rsc = create_xml_node(state, XML_CIB_TAG_LRM); crm_xml_add(rsc, XML_ATTR_ID, target_uuid); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); crm_xml_add(rsc, XML_ATTR_ID, rsc_id); name = XML_ATTR_TYPE; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_CLASS; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_PROVIDER; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); xml_op = create_xml_node(rsc, XML_LRM_TAG_RSC_OP); crm_xml_add(xml_op, XML_ATTR_ID, task); op_id = generate_op_key(rsc_id, task, action->interval); crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_free(op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(xml_op, XML_LRM_ATTR_OPSTATUS, code); crm_xml_add(xml_op, XML_LRM_ATTR_CALLID, "-1"); crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, action->interval); crm_xml_add(xml_op, XML_LRM_ATTR_RC, code); crm_xml_add(xml_op, XML_ATTR_ORIGIN, __FUNCTION__); crm_free(code); - code = generate_transition_key(transition_graph->id, action->id,te_uuid); + code = generate_transition_key( + transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, code); crm_free(code); code = generate_transition_magic( crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY), status, status); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, code); crm_free(code); tmp = find_xml_node(action->xml, "attributes", TRUE); params = create_xml_node(NULL, XML_TAG_PARAMS); copy_in_properties(params, tmp); filter_action_parameters(params, CRM_FEATURE_SET); digest = calculate_xml_digest(params, TRUE, FALSE); /* info for now as this area has been problematic to debug */ crm_debug("Calculated digest %s for %s (%s)\n", digest, ID(xml_op), crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); crm_log_xml(LOG_DEBUG, "digest:source", params); crm_xml_add(xml_op, XML_LRM_ATTR_OP_DIGEST, digest); crm_free(digest); free_xml(params); crm_debug_3("Updating CIB with \"%s\" (%s): %s %s on %s", status<0?"new action":XML_ATTR_TIMEOUT, crm_element_name(action->xml), crm_str(task), rsc_id, target); rc = te_cib_conn->cmds->update( te_cib_conn, XML_CIB_TAG_STATUS, state, NULL, call_options); crm_debug("Updating CIB with %s action %d: %s on %s (call_id=%d)", op_status2text(status), action->id, task_uuid, target, rc); add_cib_op_callback(rc, FALSE, NULL, cib_action_updated); free_xml(state); action->sent_update = TRUE; if(rc < cib_ok) { return FALSE; } return TRUE; } void send_rsc_command(crm_action_t *action) { xmlNode *cmd = NULL; xmlNode *rsc_op = NULL; char *counter = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *task_uuid = NULL; CRM_ASSERT(action != NULL); CRM_ASSERT(action->xml != NULL); rsc_op = action->xml; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); on_node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET); counter = generate_transition_key( - transition_graph->id, action->id, te_uuid); + transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); crm_info("Initiating action %d: %s %s on %s", action->id, task, task_uuid, on_node); crm_free(counter); if(rsc_op != NULL) { crm_log_xml_debug_2(rsc_op, "Performing"); } cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, on_node, CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); #if 1 send_ipc_message(crm_ch, cmd); #else /* test the TE timer/recovery code */ if((action->id % 11) == 0) { crm_err("Faking lost action %d: %s", action->id, task_uuid); } else { send_ipc_message(crm_ch, cmd); } #endif free_xml(cmd); action->executed = TRUE; value = g_hash_table_lookup(action->params, crm_meta_name(XML_ATTR_TE_NOWAIT)); if(crm_is_true(value)) { crm_debug("Skipping wait for %d", action->id); action->confirmed = TRUE; update_graph(transition_graph, action); trigger_graph(); } else if(action->timeout > 0) { int action_timeout = (2 * action->timeout) + transition_graph->network_delay; crm_debug_3("Setting timer for action %s", task_uuid); if(transition_graph->transition_timeout < action_timeout) { crm_debug("Action %d:" " Increasing transition %d timeout to %d (2*%d + %d)", action->id, transition_graph->id, action_timeout, action->timeout, transition_graph->network_delay); transition_graph->transition_timeout = action_timeout; } te_start_action_timer(action); } } crm_graph_functions_t te_graph_fns = { te_pseudo_action, te_rsc_command, te_crm_command, te_fence_node }; extern GMainLoop* mainloop; void notify_crmd(crm_graph_t *graph) { xmlNode *cmd = NULL; int log_level = LOG_DEBUG; const char *op = CRM_OP_TEABORT; int pending_callbacks = num_cib_op_callbacks(); stop_te_timer(transition_timer); if(pending_callbacks != 0) { crm_warn("Delaying completion until all CIB updates complete"); return; } CRM_CHECK(graph->complete, graph->complete = TRUE); switch(graph->completion_action) { case tg_stop: op = CRM_OP_TECOMPLETE; log_level = LOG_INFO; break; case tg_abort: case tg_restart: op = CRM_OP_TEABORT; break; case tg_shutdown: crm_info("Exiting after transition"); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); return; } exit(LSB_EXIT_OK); } te_log_action(log_level, "Transition %d status: %s - %s", graph->id, op, crm_str(graph->abort_reason)); print_graph(LOG_DEBUG_3, graph); cmd = create_request( op, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_TENGINE, NULL); if(graph->abort_reason != NULL) { crm_xml_add(cmd, "message", graph->abort_reason); } send_ipc_message(crm_ch, cmd); free_xml(cmd); graph->abort_reason = NULL; graph->completion_action = tg_restart; } diff --git a/transitioner/callbacks.c b/transitioner/callbacks.c index d9c2a8892c..6e8fcaef86 100644 --- a/transitioner/callbacks.c +++ b/transitioner/callbacks.c @@ -1,608 +1,609 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include void te_update_confirm(const char *event, xmlNode *msg); void te_update_diff(const char *event, xmlNode *msg); xmlNode *need_abort(xmlNode *update); void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); extern char *te_uuid; gboolean shuttingdown = FALSE; crm_graph_t *transition_graph; GTRIGSource *transition_trigger = NULL; crm_action_timer_t *transition_timer = NULL; static gboolean start_global_timer(crm_action_timer_t *timer, int timeout) { CRM_ASSERT(timer != NULL); CRM_CHECK(timer > 0, return FALSE); CRM_CHECK(timer->source_id == 0, return FALSE); if(timeout <= 0) { crm_err("Tried to start timer with period: %d", timeout); } else if(timer->source_id == 0) { crm_debug_2("Starting abort timer: %dms", timeout); timer->timeout = timeout; timer->source_id = Gmain_timeout_add( timeout, global_timer_callback, (void*)timer); CRM_ASSERT(timer->source_id != 0); return TRUE; } else { crm_err("Timer is already active with period: %d", timer->timeout); } return FALSE; } void te_update_diff(const char *event, xmlNode *msg) { int rc = -1; const char *op = NULL; xmlNode *diff = NULL; xmlNode *aborted = NULL; const char *set_name = NULL; int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; if(msg == NULL) { crm_err("NULL update"); return; } crm_element_value_int(msg, F_CIB_RC, &rc); op = crm_element_value(msg, F_CIB_OPERATION); if(rc < cib_ok) { crm_debug_2("Ignoring failed %s operation: %s", op, cib_error2string(rc)); return; } diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); cib_diff_version_details( diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_debug("Processing diff (%s): %d.%d.%d -> %d.%d.%d", op, diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates); log_cib_diff(LOG_DEBUG_2, diff, op); set_name = "diff-added"; if(diff != NULL) { xmlNode *section = NULL; xmlNode *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); if(change_set != NULL) { crm_debug_2("Checking status changes"); section=get_object_root(XML_CIB_TAG_STATUS,change_set); } if(section != NULL) { extract_event(section); } crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); } set_name = "diff-removed"; if(diff != NULL && aborted == NULL) { xmlNode *attrs = NULL; xmlNode *status = NULL; xmlNode *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); if(aborted == NULL && change_set != NULL) { status = get_object_root(XML_CIB_TAG_STATUS, change_set); xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); if(attrs != NULL) { crm_info("Aborting on "XML_TAG_TRANSIENT_NODEATTRS" deletions"); abort_transition(INFINITY, tg_restart, XML_TAG_TRANSIENT_NODEATTRS, attrs); } ); } } if(aborted != NULL) { abort_transition( INFINITY, tg_restart, "Non-status change", NULL); } return; } gboolean process_te_message(xmlNode *msg, xmlNode *xml_data, IPC_Channel *sender) { xmlNode *xml_obj = NULL; const char *from = crm_element_value(msg, F_ORIG); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *ref = crm_element_value(msg, XML_ATTR_REFERENCE); const char *op = crm_element_value(msg, F_CRM_TASK); const char *type = crm_element_value(msg, F_CRM_MSG_TYPE); crm_debug_2("Processing %s (%s) message", op, ref); crm_log_xml(LOG_DEBUG_3, "ipc", msg); if(op == NULL){ /* error */ } else if(strcasecmp(op, CRM_OP_HELLO) == 0) { /* ignore */ } else if(sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) { crm_debug_2("Bad sys-to %s", crm_str(sys_to)); return FALSE; } else if(safe_str_eq(op, CRM_OP_INVOKE_LRM) && safe_str_eq(sys_from, CRM_SYSTEM_LRMD) /* && safe_str_eq(type, XML_ATTR_RESPONSE) */ ){ #if CRM_DEPRECATED_SINCE_2_0_4 if(safe_str_eq(crm_element_name(xml_data), XML_TAG_CIB)) { xml_obj = xml_data; } else { xml_obj = find_xml_node(xml_data, XML_TAG_CIB, TRUE); } #else xml_obj = xml_data; CRM_CHECK(xml_obj != NULL, crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); #endif CRM_CHECK(xml_obj != NULL, crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); xml_obj = get_object_root(XML_CIB_TAG_STATUS, xml_obj); CRM_CHECK(xml_obj != NULL, crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); crm_log_xml(LOG_DEBUG_2, "Processing (N)ACK", msg); crm_info("Processing (N)ACK %s from %s", crm_element_value(msg, XML_ATTR_REFERENCE), from); extract_event(xml_obj); } else if(safe_str_eq(type, XML_ATTR_RESPONSE)) { crm_err("Message was a response not a request. Discarding"); return TRUE; } else if(strcasecmp(op, CRM_OP_TRANSITION) == 0) { const char *graph_file = crm_element_value(msg, F_CRM_TGRAPH); const char *graph_input = crm_element_value(msg, F_CRM_TGRAPH_INPUT); CRM_CHECK(graph_file != NULL || xml_data != NULL, crm_err("No graph provided"); crm_log_xml(LOG_WARNING, "no graph", msg); return TRUE); if(transition_graph->complete == FALSE) { crm_info("Another transition is already active"); abort_transition( INFINITY, tg_restart, "Transition Active", NULL); } else { const char *value = NULL; xmlNode *graph_data = xml_data; crm_debug("Processing graph derived from %s", graph_input); if(graph_file != NULL) { FILE *graph_fd = fopen(graph_file, "r"); CRM_CHECK(graph_fd != NULL, cl_perror("Could not open graph file %s", graph_file); return TRUE); graph_data = file2xml(graph_fd, FALSE); unlink(graph_file); fclose(graph_fd); } destroy_graph(transition_graph); transition_graph = unpack_graph(graph_data); start_global_timer(transition_timer, transition_graph->transition_timeout); value = crm_element_value(graph_data, "failed-stop-offset"); if(value) { failed_stop_offset = crm_strdup(value); } value = crm_element_value(graph_data, "failed-start-offset"); if(value) { failed_start_offset = crm_strdup(value); } trigger_graph(); print_graph(LOG_DEBUG_2, transition_graph); if(graph_data != xml_data) { free_xml(graph_data); } } } else if(strcasecmp(op, CRM_OP_TE_HALT) == 0) { abort_transition(INFINITY, tg_stop, "Peer Halt", NULL); } else if(strcasecmp(op, CRM_OP_TEABORT) == 0) { abort_transition(INFINITY, tg_restart, "Peer Cancelled", NULL); } else { crm_err("Unknown command: %s::%s from %s", type, op, sys_from); } crm_debug_3("finished processing message"); return TRUE; } #if SUPPORT_HEARTBEAT void tengine_stonith_callback(stonith_ops_t * op) { const char *allow_fail = NULL; + int target_rc = -1; int stonith_id = -1; int transition_id = -1; char *uuid = NULL; crm_action_t *stonith_action = NULL; if(op == NULL) { crm_err("Called with a NULL op!"); return; } crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", op->call_id, op->optype, op->node_name, op->op_result, (char *)op->node_list, op->private_data); /* this will mark the event complete if a match is found */ CRM_CHECK(op->private_data != NULL, return); /* filter out old STONITH actions */ CRM_CHECK(decode_transition_key( - op->private_data, &uuid, &transition_id, &stonith_id), + op->private_data, &uuid, &transition_id, &stonith_id, &target_rc), crm_err("Invalid event detected"); goto bail; ); if(transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid) || transition_graph->id != transition_id) { crm_info("Ignoring STONITH action initiated outside" " of the current transition"); } stonith_action = get_action(stonith_id, TRUE); if(stonith_action == NULL) { crm_err("Stonith action not matched"); goto bail; } switch(op->op_result) { case STONITH_SUCCEEDED: send_stonith_update(op); break; case STONITH_CANNOT: case STONITH_TIMEOUT: case STONITH_GENERIC: stonith_action->failed = TRUE; allow_fail = g_hash_table_lookup( stonith_action->params, crm_meta_name(XML_ATTR_TE_ALLOWFAIL)); if(FALSE == crm_is_true(allow_fail)) { crm_err("Stonith of %s failed (%d)..." " aborting transition.", op->node_name, op->op_result); abort_transition(INFINITY, tg_restart, "Stonith failed", NULL); } break; default: crm_err("Unsupported action result: %d", op->op_result); abort_transition(INFINITY, tg_restart, "Unsupport Stonith result", NULL); } update_graph(transition_graph, stonith_action); trigger_graph(); bail: crm_free(uuid); return; } void tengine_stonith_connection_destroy(gpointer user_data) { crm_err("Fencing daemon has left us"); stonith_src = NULL; if(stonith_src == NULL) { G_main_set_trigger(stonith_reconnect); } /* cbchan will be garbage at this point, arrange for it to be reset */ set_stonithd_input_IPC_channel_NULL(); return; } gboolean tengine_stonith_dispatch(IPC_Channel *sender, void *user_data) { int lpc = 0; while(stonithd_op_result_ready()) { if (sender->ch_status == IPC_DISCONNECT) { /* The message which was pending for us is that * the IPC status is now IPC_DISCONNECT */ break; } if(ST_FAIL == stonithd_receive_ops_result(FALSE)) { crm_err("stonithd_receive_ops_result() failed"); } else { lpc++; } } crm_debug_2("Processed %d messages", lpc); if (sender->ch_status == IPC_DISCONNECT) { return FALSE; } return TRUE; } #endif void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { trigger_graph(); if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); crm_log_xml_warn(msg, "Failed update"); } } void cib_action_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { trigger_graph(); if(rc < cib_ok) { crm_err("Update %d FAILED: %s", call_id, cib_error2string(rc)); } } void cib_failcount_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { trigger_graph(); if(rc < cib_ok) { crm_err("Update %d FAILED: %s", call_id, cib_error2string(rc)); } } gboolean action_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_CHECK(timer->action != NULL, return FALSE); if(transition_graph->complete) { crm_warn("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_action_warn) { print_action( LOG_WARNING,"Action missed its timeout", timer->action); } else { /* fail the action */ cib_action_update(timer->action, LRM_OP_TIMEOUT); } return FALSE; } static int unconfirmed_actions(gboolean send_updates) { int unconfirmed = 0; const char *key = NULL; const char *task = NULL; const char *node = NULL; crm_debug_2("Unconfirmed actions..."); slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, /* lookup event */ slist_iter( action, crm_action_t, synapse->actions, lpc2, if(action->executed == FALSE) { continue; } else if(action->confirmed) { continue; } unconfirmed++; task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); key = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); crm_info("Action %s %d unconfirmed from %s", key, action->id, node); if(action->type != action_type_rsc) { continue; } else if(send_updates == FALSE) { continue; } else if(safe_str_eq(task, "cancel")) { /* we dont need to update the CIB with these */ continue; } else if(safe_str_eq(task, "stop")) { /* *never* update the CIB with these */ continue; } cib_action_update(action, LRM_OP_PENDING); ); ); if(unconfirmed > 0) { crm_warn("Waiting on %d unconfirmed actions", unconfirmed); } return unconfirmed; } gboolean global_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_CHECK(timer->action == NULL, return FALSE); if(transition_graph->complete) { crm_err("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_abort) { int unconfirmed = unconfirmed_actions(FALSE); crm_warn("Transition abort timeout reached..." " marking transition complete."); transition_graph->complete = TRUE; abort_transition(INFINITY, tg_restart, "Global Timeout", NULL); if(unconfirmed != 0) { crm_warn("Writing %d unconfirmed actions to the CIB", unconfirmed); unconfirmed_actions(TRUE); } } return FALSE; } gboolean te_graph_trigger(gpointer user_data) { int timeout = 0; enum transition_status graph_rc = -1; if(transition_graph->complete == FALSE) { graph_rc = run_graph(transition_graph); timeout = transition_graph->transition_timeout; print_graph(LOG_DEBUG_3, transition_graph); if(graph_rc == transition_active) { crm_debug_3("Transition not yet complete"); stop_te_timer(transition_timer); start_global_timer(transition_timer, timeout); return TRUE; } else if(graph_rc == transition_pending) { crm_debug_3("Transition not yet complete - no actions fired"); return TRUE; } if(graph_rc != transition_complete) { crm_err("Transition failed: %s", transition_status(graph_rc)); print_graph(LOG_WARNING, transition_graph); } } transition_graph->complete = TRUE; notify_crmd(transition_graph); return TRUE; } diff --git a/transitioner/events.c b/transitioner/events.c index 0242bda5cf..710ccae041 100644 --- a/transitioner/events.c +++ b/transitioner/events.c @@ -1,582 +1,577 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include char *failed_stop_offset = NULL; char *failed_start_offset = NULL; xmlNode *need_abort(xmlNode *update); void process_graph_event(xmlNode *event, const char *event_node); int match_graph_event(int action_id, xmlNode *event, const char *event_node, - int op_status, int op_rc); + int op_status, int op_rc, int target_rc); xmlNode * need_abort(xmlNode *update) { xmlNode *section_xml = NULL; const char *section = NULL; if(update == NULL) { return NULL; } xml_prop_iter(update, name, value, if(safe_str_eq(name, XML_ATTR_HAVE_QUORUM)) { goto do_abort; /* possibly not required */ } else if(safe_str_eq(name, XML_ATTR_NUMPEERS)) { goto do_abort; } else if(safe_str_eq(name, XML_ATTR_GENERATION)) { goto do_abort; } else if(safe_str_eq(name, XML_ATTR_GENERATION_ADMIN)) { goto do_abort; } continue; do_abort: crm_debug("Aborting on change to %s", name); crm_log_xml_debug(update, "Abort: CIB Attrs"); return update; ); section = XML_CIB_TAG_NODES; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, return section_xml; ); section = XML_CIB_TAG_RESOURCES; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, return section_xml; ); section = XML_CIB_TAG_CONSTRAINTS; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, return section_xml; ); section = XML_CIB_TAG_CRMCONFIG; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, return section_xml; ); return NULL; } static gboolean fail_incompletable_actions(crm_graph_t *graph, const char *down_node) { const char *target = NULL; xmlNode *last_action = NULL; slist_iter( synapse, synapse_t, graph->synapses, lpc, if (synapse->confirmed) { continue; } slist_iter( action, crm_action_t, synapse->actions, lpc, if(action->type == action_type_pseudo || action->confirmed) { continue; } target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); if(safe_str_eq(target, down_node)) { action->failed = TRUE; last_action = action->xml; update_graph(graph, action); crm_notice("Action %d (%s) is scheduled for %s (offline)", action->id, ID(action->xml), down_node); } ); ); if(last_action != NULL) { crm_warn("Node %s shutdown resulted in un-runnable actions", down_node); abort_transition(INFINITY, tg_restart, "Node failure", last_action); return TRUE; } return FALSE; } gboolean extract_event(xmlNode *msg) { int shutdown = 0; const char *shutdown_s = NULL; const char *event_node = NULL; /* [cib fragment] ... */ crm_debug_4("Extracting event from %s", crm_element_name(msg)); xml_child_iter_filter( msg, node_state, XML_CIB_TAG_STATE, xmlNode *attrs = NULL; xmlNode *resources = NULL; const char *ccm_state = crm_element_value( node_state, XML_CIB_ATTR_INCCM); const char *crmd_state = crm_element_value( node_state, XML_CIB_ATTR_CRMDSTATE); /* Transient node attribute changes... */ event_node = crm_element_value(node_state, XML_ATTR_ID); crm_debug_2("Processing state update from %s", event_node); crm_log_xml_debug_3(node_state, "Processing"); attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); if(attrs != NULL) { crm_info("Aborting on "XML_TAG_TRANSIENT_NODEATTRS" changes for %s", event_node); abort_transition(INFINITY, tg_restart, XML_TAG_TRANSIENT_NODEATTRS, attrs); } resources = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); resources = find_xml_node( resources, XML_LRM_TAG_RESOURCES, FALSE); /* LRM resource update... */ xml_child_iter( resources, rsc, xml_child_iter( rsc, rsc_op, crm_log_xml_debug_3(rsc_op, "Processing resource update"); process_graph_event(rsc_op, event_node); ); ); /* * node state update... possibly from a shutdown we requested */ if(safe_str_eq(ccm_state, XML_BOOLEAN_FALSE) || safe_str_eq(crmd_state, CRMD_JOINSTATE_DOWN)) { crm_action_t *shutdown = NULL; shutdown = match_down_event(0, event_node, NULL); if(shutdown != NULL) { update_graph(transition_graph, shutdown); trigger_graph(); } else { crm_info("Stonith/shutdown of %s not matched", event_node); abort_transition(INFINITY, tg_restart, "Node failure", node_state); } fail_incompletable_actions(transition_graph, event_node); } shutdown_s = crm_element_value(node_state, XML_CIB_ATTR_SHUTDOWN); if(shutdown_s) { shutdown = crm_parse_int(shutdown_s, NULL); } if(shutdown_s && shutdown > 0) { crm_info("Aborting on "XML_CIB_ATTR_SHUTDOWN" attribute for %s", event_node); abort_transition(INFINITY, tg_restart, "Shutdown request", node_state); } ); return TRUE; } static void -update_failcount(xmlNode *event, const char *event_node, int rc) +update_failcount(xmlNode *event, const char *event_node, int rc, int target_rc) { int interval = 0; char *task = NULL; char *rsc_id = NULL; char *attr_name = NULL; const char *id = ID(event); const char *on_uuid = event_node; const char *value = NULL; if(rc == 99) { /* this is an internal code for "we're busy, try again" */ return; + + } else if(rc == target_rc) { + return; } if(failed_stop_offset == NULL) { failed_stop_offset = crm_strdup(INFINITY_S); } if(failed_start_offset == NULL) { failed_start_offset = crm_strdup(INFINITY_S); } CRM_CHECK(on_uuid != NULL, return); CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval), crm_err("Couldn't parse: %s", ID(event)); goto bail); CRM_CHECK(task != NULL, goto bail); CRM_CHECK(rsc_id != NULL, goto bail); if(safe_str_eq(task, CRMD_ACTION_START)) { interval = 1; value = failed_start_offset; } else if(safe_str_eq(task, CRMD_ACTION_STOP)) { interval = 1; value = failed_stop_offset; } if(value == NULL || safe_str_neq(value, INFINITY_S)) { value = XML_NVPAIR_ATTR_VALUE"++"; } if(interval > 0) { int call_id = 0; char *now = crm_itoa(time(NULL)); attr_name = crm_concat("fail-count", rsc_id, '-'); crm_warn("Updating failcount for %s on %s after failed %s:" " rc=%d (update=%s, time=%s)", rsc_id, on_uuid, task, rc, value, now); /* don't let notificatios of these updates cause new transitions */ call_id = update_attr(te_cib_conn, cib_inhibit_notify, XML_CIB_TAG_STATUS, on_uuid, NULL,NULL, attr_name, value, FALSE); add_cib_op_callback(call_id, FALSE, NULL, cib_failcount_updated); crm_free(attr_name); attr_name = crm_concat("last-failure", rsc_id, '-'); /* don't let notificatios of these updates cause new transitions */ call_id = update_attr(te_cib_conn, cib_inhibit_notify, XML_CIB_TAG_STATUS, on_uuid, NULL,NULL, attr_name, value, FALSE); add_cib_op_callback(call_id, FALSE, NULL, cib_failcount_updated); crm_free(attr_name); crm_free(now); } bail: crm_free(rsc_id); crm_free(task); } static int -status_from_rc(crm_action_t *action, int orig_status, int rc) +status_from_rc(crm_action_t *action, int orig_status, int rc, int target_rc) { - int target_rc = 0; int status = orig_status; - const char *target_rc_s = g_hash_table_lookup( - action->params, crm_meta_name(XML_ATTR_TE_TARGET_RC)); - - if(target_rc_s != NULL) { - crm_debug_2("Target rc: %s vs. %d", target_rc_s, rc); - target_rc = crm_parse_int(target_rc_s, NULL); - } - if(target_rc == rc) { crm_debug_2("Target rc: == %d", rc); if(status != LRM_OP_DONE) { crm_debug_2("Re-mapping op status to" " LRM_OP_DONE for rc=%d", rc); status = LRM_OP_DONE; } } else { crm_debug_2("Target rc: != %d", rc); if(status != LRM_OP_ERROR) { crm_info("Re-mapping op status to" " LRM_OP_ERROR for rc=%d", rc); status = LRM_OP_ERROR; } } /* 99 is the code we use for direct nack's */ if(rc != 99 && status != LRM_OP_DONE) { const char *task, *uname; task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); uname = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); - crm_warn("Action %d (%s) on %s failed (target: %s vs. rc: %d): %s", - action->id, task, uname, crm_str(target_rc_s), rc, op_status2text(status)); + crm_warn("Action %d (%s) on %s failed (target: %d vs. rc: %d): %s", + action->id, task, uname, target_rc, rc, op_status2text(status)); } return status; } /* * returns the ID of the action if a match is found * returns -1 if a match was not found * returns -2 if a match was found but the action failed (and was * not allowed to) */ int match_graph_event(int action_id, xmlNode *event, const char *event_node, - int op_status, int op_rc) + int op_status, int op_rc, int target_rc) { const char *target = NULL; const char *allow_fail = NULL; const char *this_event = ID(event); crm_action_t *action = NULL; action = get_action(action_id, FALSE); if(action == NULL) { return -1; } - op_status = status_from_rc(action, op_status, op_rc); + op_status = status_from_rc(action, op_status, op_rc, target_rc); if(op_status != LRM_OP_DONE) { - update_failcount(event, event_node, op_rc); + update_failcount(event, event_node, op_rc, target_rc); } /* Process OP status */ switch(op_status) { case LRM_OP_PENDING: crm_debug("Ignoring pending operation"); return action->id; break; case LRM_OP_DONE: break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: action->failed = TRUE; break; case LRM_OP_CANCELLED: /* do nothing?? */ crm_err("Dont know what to do for cancelled ops yet"); break; default: action->failed = TRUE; crm_err("Unsupported action result: %d", op_status); } /* stop this event's timer if it had one */ stop_te_timer(action->timer); action->confirmed = TRUE; update_graph(transition_graph, action); trigger_graph(); if(action->failed) { allow_fail = g_hash_table_lookup( action->params, crm_meta_name(XML_ATTR_TE_ALLOWFAIL)); if(crm_is_true(allow_fail)) { action->failed = FALSE; } } if(action->failed) { abort_transition(action->synapse->priority+1, tg_restart, "Event failed", event); } target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); te_log_action(LOG_INFO, "Action %s (%d) confirmed on %s (rc=%d)", crm_str(this_event), action->id, crm_str(target), op_status); return action->id; } crm_action_t * get_action(int id, gboolean confirmed) { slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, slist_iter( action, crm_action_t, synapse->actions, lpc2, if(action->id == id) { if(confirmed) { stop_te_timer(action->timer); action->confirmed = TRUE; } return action; } ) ); return NULL; } crm_action_t * match_down_event(int id, const char *target, const char *filter) { const char *this_action = NULL; const char *this_node = NULL; crm_action_t *match = NULL; slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, /* lookup event */ slist_iter( action, crm_action_t, synapse->actions, lpc2, if(id > 0 && action->id == id) { match = action; break; } this_action = crm_element_value( action->xml, XML_LRM_ATTR_TASK); if(action->type != action_type_crm) { continue; } else if(safe_str_eq(this_action, CRM_OP_LRM_REFRESH)){ continue; } else if(filter != NULL && safe_str_neq(this_action, filter)) { continue; } this_node = crm_element_value( action->xml, XML_LRM_ATTR_TARGET_UUID); if(this_node == NULL) { crm_log_xml_err(action->xml, "No node uuid"); } if(safe_str_neq(this_node, target)) { crm_debug("Action %d : Node mismatch: %s", action->id, this_node); continue; } match = action; break; ); if(match != NULL) { /* stop this event's timer if it had one */ break; } ); if(match != NULL) { /* stop this event's timer if it had one */ crm_debug("Match found for action %d: %s on %s", id, crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target); stop_te_timer(match->timer); match->confirmed = TRUE; } else if(id > 0) { crm_err("No match for action %d", id); } else { crm_warn("No match for shutdown action on %s", target); } return match; } void process_graph_event(xmlNode *event, const char *event_node) { int rc = -1; int status = -1; int action = -1; + int target_rc = -1; int transition_num = -1; char *update_te_uuid = NULL; gboolean passed = FALSE; const char *id = NULL; const char *magic = NULL; CRM_ASSERT(event != NULL); id = ID(event); magic = crm_element_value(event, XML_ATTR_TRANSITION_MAGIC); if(magic == NULL) { /* non-change */ return; } CRM_CHECK(decode_transition_magic( magic, &update_te_uuid, &transition_num, &action, - &status, &rc), + &status, &rc, &target_rc), crm_err("Invalid event %s detected", id); abort_transition(INFINITY, tg_restart,"Bad event", event); ); if(status == LRM_OP_PENDING) { goto bail; } if(transition_num == -1) { crm_err("Action %s (%s) initiated outside of a transition", id, magic); abort_transition(INFINITY, tg_restart,"Unexpected event",event); } else if(action < 0 || safe_str_neq(update_te_uuid, te_uuid)) { crm_info("Action %s (%s) initiated by a different transitioner", id, magic); abort_transition(INFINITY, tg_restart,"Foreign event", event); } else if(transition_graph->id != transition_num) { crm_info("Detected action %s from a different transition:" " %d vs. %d", id, transition_num, transition_graph->id); abort_transition(INFINITY, tg_restart,"Old event", event); } else if(transition_graph->complete) { crm_info("Action %s arrived after a completed transition", id); abort_transition(INFINITY, tg_restart, "Inactive graph", event); } else if(match_graph_event( - action, event, event_node, status, rc) < 0) { + action, event, event_node, status, rc, target_rc) < 0) { crm_err("Unknown graph action %s", id); abort_transition(INFINITY, tg_restart, "Unknown event", event); } else { passed = TRUE; crm_debug_2("Processed update to %s: %s", id, magic); } if(passed == FALSE && rc != EXECRA_OK) { - update_failcount(event, event_node, rc); + update_failcount(event, event_node, rc, target_rc); } bail: crm_free(update_te_uuid); return; }