diff --git a/lib/crm/pengine/utils.c b/lib/crm/pengine/utils.c index 79122ace36..bfa26d9ae6 100644 --- a/lib/crm/pengine/utils.c +++ b/lib/crm/pengine/utils.c @@ -1,1237 +1,1278 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include void print_str_str(gpointer key, gpointer value, gpointer user_data); gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); void unpack_operation( action_t *action, xmlNode *xml_obj, pe_working_set_t* data_set); void pe_free_shallow(GListPtr alist) { pe_free_shallow_adv(alist, TRUE); } void pe_free_shallow_adv(GListPtr alist, gboolean with_data) { GListPtr item; GListPtr item_next = alist; if(with_data == FALSE && alist != NULL) { g_list_free(alist); return; } while(item_next != NULL) { item = item_next; item_next = item_next->next; if(with_data) { /* crm_debug_5("freeing %p", item->data); */ crm_free(item->data); } item->data = NULL; item->next = NULL; g_list_free_1(item); } } node_t * node_copy(node_t *this_node) { node_t *new_node = NULL; CRM_CHECK(this_node != NULL, return NULL); crm_malloc0(new_node, sizeof(node_t)); CRM_ASSERT(new_node != NULL); crm_debug_5("Copying %p (%s) to %p", this_node, this_node->details->uname, new_node); new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; new_node->details = this_node->details; return new_node; } /* are the contents of list1 and list2 equal * nodes with weight < 0 are ignored if filter == TRUE * * slow but linear * */ gboolean node_list_eq(GListPtr list1, GListPtr list2, gboolean filter) { node_t *other_node; GListPtr lhs = list1; GListPtr rhs = list2; slist_iter( node, node_t, lhs, lpc, if(node == NULL || (filter && node->weight < 0)) { continue; } other_node = (node_t*) pe_find_node_id(rhs, node->details->id); if(other_node == NULL || other_node->weight < 0) { return FALSE; } ); lhs = list2; rhs = list1; slist_iter( node, node_t, lhs, lpc, if(node == NULL || (filter && node->weight < 0)) { continue; } other_node = (node_t*) pe_find_node_id(rhs, node->details->id); if(other_node == NULL || other_node->weight < 0) { return FALSE; } ); return TRUE; } /* the intersection of list1 and list2 */ GListPtr node_list_and(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; unsigned lpc = 0; for(lpc = 0; lpc < g_list_length(list1); lpc++) { node_t *node = (node_t*)g_list_nth_data(list1, lpc); node_t *other_node = pe_find_node_id(list2, node->details->id); node_t *new_node = NULL; if(other_node != NULL) { new_node = node_copy(node); } if(new_node != NULL) { crm_debug_4("%s: %d + %d", node->details->uname, other_node->weight, new_node->weight); new_node->weight = merge_weights( new_node->weight, other_node->weight); crm_debug_3("New node weight for %s: %d", new_node->details->uname, new_node->weight); if(filter && new_node->weight < 0) { crm_free(new_node); new_node = NULL; } } if(new_node != NULL) { result = g_list_append(result, new_node); } } return result; } /* list1 - list2 */ GListPtr node_list_minus(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; slist_iter( node, node_t, list1, lpc, node_t *other_node = pe_find_node_id(list2, node->details->id); node_t *new_node = NULL; if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); crm_debug_3("Minus result len: %d", g_list_length(result)); return result; } /* list1 + list2 - (intersection of list1 and list2) */ GListPtr node_list_xor(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; slist_iter( node, node_t, list1, lpc, node_t *new_node = NULL; node_t *other_node = (node_t*) pe_find_node_id(list2, node->details->id); if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); slist_iter( node, node_t, list2, lpc, node_t *new_node = NULL; node_t *other_node = (node_t*) pe_find_node_id(list1, node->details->id); if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); crm_debug_3("Xor result len: %d", g_list_length(result)); return result; } GListPtr node_list_or(GListPtr list1, GListPtr list2, gboolean filter) { node_t *other_node = NULL; GListPtr result = NULL; gboolean needs_filter = FALSE; result = node_list_dup(list1, FALSE, filter); slist_iter( node, node_t, list2, lpc, if(node == NULL) { continue; } other_node = (node_t*)pe_find_node_id( result, node->details->id); if(other_node != NULL) { crm_debug_4("%s + %s: %d + %d", node->details->uname, other_node->details->uname, node->weight, other_node->weight); other_node->weight = merge_weights( other_node->weight, node->weight); if(filter && node->weight < 0) { needs_filter = TRUE; } } else if(filter == FALSE || node->weight >= 0) { node_t *new_node = node_copy(node); result = g_list_append(result, new_node); } ); /* not the neatest way, but the most expedient for now */ if(filter && needs_filter) { GListPtr old_result = result; result = node_list_dup(old_result, FALSE, filter); pe_free_shallow_adv(old_result, TRUE); } return result; } GListPtr node_list_dup(GListPtr list1, gboolean reset, gboolean filter) { GListPtr result = NULL; slist_iter( this_node, node_t, list1, lpc, node_t *new_node = NULL; if(filter && this_node->weight < 0) { continue; } new_node = node_copy(this_node); if(reset) { new_node->weight = 0; } if(new_node != NULL) { result = g_list_append(result, new_node); } ); return result; } void dump_node_scores(int level, resource_t *rsc, const char *comment, GListPtr nodes) { GListPtr list = nodes; if(rsc) { list = rsc->allowed_nodes; } slist_iter( node, node_t, list, lpc, if(rsc) { do_crm_log(level, "%s: %s allocation score on %s: %d", comment, rsc->id, node->details->uname, node->weight); } else { do_crm_log(level, "%s: %s = %d", comment, node->details->uname, node->weight); } ); if(rsc && rsc->children) { slist_iter( child, resource_t, rsc->children, lpc, dump_node_scores(level, child, comment, nodes); ); } } gint sort_rsc_index(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; if(a == NULL && b == NULL) { return 0; } if(a == NULL) { return 1; } if(b == NULL) { return -1; } if(resource1->sort_index > resource2->sort_index) { return -1; } if(resource1->sort_index < resource2->sort_index) { return 1; } return 0; } gint sort_rsc_priority(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; if(a == NULL && b == NULL) { return 0; } if(a == NULL) { return 1; } if(b == NULL) { return -1; } if(resource1->priority > resource2->priority) { return -1; } if(resource1->priority < resource2->priority) { return 1; } return 0; } action_t * custom_action(resource_t *rsc, char *key, const char *task, node_t *on_node, gboolean optional, gboolean save_action, pe_working_set_t *data_set) { action_t *action = NULL; GListPtr possible_matches = NULL; CRM_CHECK(key != NULL, return NULL); CRM_CHECK(task != NULL, return NULL); if(save_action && rsc != NULL) { possible_matches = find_actions(rsc->actions, key, on_node); } if(possible_matches != NULL) { crm_free(key); if(g_list_length(possible_matches) > 1) { pe_warn("Action %s for %s on %s exists %d times", task, rsc?rsc->id:"", on_node?on_node->details->uname:"", g_list_length(possible_matches)); } action = g_list_nth_data(possible_matches, 0); crm_debug_4("Found existing action (%d) %s for %s on %s", action->id, task, rsc?rsc->id:"", on_node?on_node->details->uname:""); g_list_free(possible_matches); } if(action == NULL) { if(save_action) { crm_debug_2("Creating%s action %d: %s for %s on %s", optional?"":" manditory", data_set->action_id, key, rsc?rsc->id:"", on_node?on_node->details->uname:""); } crm_malloc0(action, sizeof(action_t)); if(save_action) { action->id = data_set->action_id++; } else { action->id = 0; } action->rsc = rsc; CRM_ASSERT(task != NULL); action->task = crm_strdup(task); action->node = on_node; action->uuid = key; action->actions_before = NULL; action->actions_after = NULL; action->failure_is_fatal = TRUE; action->pseudo = FALSE; action->dumped = FALSE; action->runnable = TRUE; action->processed = FALSE; action->optional = optional; action->seen_count = 0; action->extra = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); action->meta = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if(save_action) { data_set->actions = g_list_append( data_set->actions, action); } if(rsc != NULL) { action->op_entry = find_rsc_op_entry(rsc, key); unpack_operation( action, action->op_entry, data_set); if(save_action) { rsc->actions = g_list_append( rsc->actions, action); } } if(save_action) { crm_debug_4("Action %d created", action->id); } } if(optional == FALSE && action->optional) { crm_debug_2("Action %d (%s) marked manditory", action->id, action->uuid); action->optional = FALSE; } if(rsc != NULL) { enum action_tasks a_task = text2task(action->task); int warn_level = LOG_DEBUG_3; if(save_action) { warn_level = LOG_WARNING; } if(action->node != NULL && action->op_entry != NULL) { unpack_instance_attributes( action->op_entry, XML_TAG_ATTR_SETS, action->node->details->attrs, action->extra, NULL, FALSE, data_set->now); } if(action->pseudo) { /* leave untouched */ } else if(action->node == NULL) { action->runnable = FALSE; } else if(is_not_set(rsc->flags, pe_rsc_managed)) { do_crm_log(warn_level, "Action %s (unmanaged)", action->uuid); action->optional = TRUE; /* action->runnable = FALSE; */ } else if(action->node->details->online == FALSE) { action->runnable = FALSE; do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)", action->uuid, action->node->details->uname); if(is_set(action->rsc->flags, pe_rsc_managed) && save_action && a_task == stop_rsc) { do_crm_log(warn_level, "Marking node %s unclean", action->node->details->uname); action->node->details->unclean = TRUE; } } else if(action->needs == rsc_req_nothing) { crm_debug_3("Action %s doesnt require anything", action->uuid); action->runnable = TRUE; #if 0 /* * No point checking this * - if we dont have quorum we cant stonith anyway */ } else if(action->needs == rsc_req_stonith) { crm_debug_3("Action %s requires only stonith", action->uuid); action->runnable = TRUE; #endif } else if(data_set->have_quorum == FALSE && data_set->no_quorum_policy == no_quorum_stop) { action->runnable = FALSE; crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid); } else if(data_set->have_quorum == FALSE && data_set->no_quorum_policy == no_quorum_freeze) { crm_debug_3("Check resource is already active"); if(rsc->fns->active(rsc, TRUE) == FALSE) { action->runnable = FALSE; crm_debug("%s\t%s (cancelled : quorum freeze)", action->node->details->uname, action->uuid); } } else { crm_debug_3("Action %s is runnable", action->uuid); action->runnable = TRUE; } if(save_action) { switch(a_task) { case stop_rsc: set_bit(rsc->flags, pe_rsc_stopping); break; case start_rsc: clear_bit(rsc->flags, pe_rsc_starting); if(action->runnable) { set_bit(rsc->flags, pe_rsc_starting); } break; default: break; } } } return action; } void unpack_operation( action_t *action, xmlNode *xml_obj, pe_working_set_t* data_set) { int value_i = 0; int start_delay = 0; char *value_ms = NULL; const char *class = NULL; const char *value = NULL; const char *field = NULL; CRM_CHECK(action->rsc != NULL, return); class = g_hash_table_lookup(action->rsc->meta, "class"); if(xml_obj != NULL) { value = crm_element_value(xml_obj, "prereq"); } if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_START)) { value = g_hash_table_lookup(action->rsc->meta, "start_prereq"); } if(value == NULL && safe_str_neq(action->task, CRMD_ACTION_START)) { /* todo: integrate stop as an option? */ action->needs = rsc_req_nothing; value = "nothing (default)"; } else if(safe_str_eq(value, "nothing")) { action->needs = rsc_req_nothing; } else if(safe_str_eq(value, "quorum")) { action->needs = rsc_req_quorum; } else if(safe_str_eq(value, "fencing")) { action->needs = rsc_req_stonith; } else if(data_set->no_quorum_policy == no_quorum_ignore || safe_str_eq(class, "stonith")) { action->needs = rsc_req_nothing; value = "nothing (default)"; } else if(data_set->no_quorum_policy == no_quorum_freeze && data_set->stonith_enabled) { action->needs = rsc_req_stonith; value = "fencing (default)"; } else { action->needs = rsc_req_quorum; value = "quorum (default)"; } if(safe_str_eq(class, "stonith")) { if(action->needs == rsc_req_stonith) { crm_config_err("Stonith resources (eg. %s) cannot require" " fencing to start", action->rsc->id); } action->needs = rsc_req_nothing; value = "nothing (fencing override)"; } crm_debug_3("\tAction %s requires: %s", action->task, value); value = NULL; if(xml_obj != NULL) { value = crm_element_value(xml_obj, "on_fail"); } if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) { value = g_hash_table_lookup( action->rsc->meta, "on_stopfail"); if(value != NULL) { #if CRM_DEPRECATED_SINCE_2_0_2 crm_config_err("The \"on_stopfail\" attribute used in" " %s has been deprecated since 2.0.2", action->rsc->id); #else crm_config_err("The \"on_stopfail\" attribute used in" " %s has been deprecated since 2.0.2" " and is now disabled", action->rsc->id); value = NULL; #endif crm_config_err("Please use specify the \"on_fail\"" " attribute on the \"stop\" operation" " instead"); } } if(value == NULL) { } else if(safe_str_eq(value, "block")) { action->on_fail = action_fail_block; } else if(safe_str_eq(value, "fence")) { action->on_fail = action_fail_fence; value = "node fencing"; if(data_set->stonith_enabled == FALSE) { crm_config_err("Specifying on_fail=fence and" " stonith-enabled=false makes no sense"); action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } } else if(safe_str_eq(value, "ignore") || safe_str_eq(value, "nothing")) { action->on_fail = action_fail_ignore; value = "ignore"; } else if(safe_str_eq(value, "migrate")) { action->on_fail = action_fail_migrate; value = "force migration"; } else if(safe_str_eq(value, "stop")) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } else if(safe_str_eq(value, "restart")) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate)"; } else { pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); value = NULL; } /* defaults */ if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) { if(data_set->stonith_enabled) { action->on_fail = action_fail_fence; value = "resource fence (default)"; } else { action->on_fail = action_fail_block; value = "resource block (default)"; } } else if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) { action->on_fail = action_migrate_failure; value = "atomic migration recovery (default)"; } else if(value == NULL) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate) (default)"; } crm_debug_3("\t%s failure handling: %s", action->task, value); value = NULL; if(xml_obj != NULL) { value = crm_element_value(xml_obj, "role_after_failure"); } if(value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) { action->fail_role = text2role(value); } /* defaults */ if(action->fail_role == RSC_ROLE_UNKNOWN) { if(safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) { action->fail_role = RSC_ROLE_SLAVE; } else { action->fail_role = RSC_ROLE_STARTED; } } crm_debug_3("\t%s failure results in: %s", action->task, role2text(action->fail_role)); if(xml_obj != NULL) { xml_prop_iter(xml_obj, p_name, p_value, if(p_value != NULL) { g_hash_table_insert(action->meta, crm_strdup(p_name), crm_strdup(p_value)); } ); unpack_instance_attributes(xml_obj, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); unpack_instance_attributes(xml_obj, XML_TAG_ATTR_SETS, NULL, action->meta, NULL, FALSE, data_set->now); } field = XML_LRM_ATTR_INTERVAL; value = g_hash_table_lookup(action->meta, field); if(value != NULL) { value_i = crm_get_msec(value); CRM_CHECK(value_i >= 0, value_i = 0); value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, crm_strdup(field), value_ms); } field = "start_delay"; value = g_hash_table_lookup(action->meta, field); if(value != NULL) { value_i = crm_get_msec(value); if(value_i < 0) { value_i = 0; } start_delay = value_i; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, crm_strdup(field), value_ms); } field = "timeout"; value = g_hash_table_lookup(action->meta, field); if(value == NULL) { value = pe_pref( data_set->config_hash, "default-action-timeout"); } value_i = crm_get_msec(value); if(value_i < 0) { value_i = 0; } value_i += start_delay; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, crm_strdup(field), value_ms); } xmlNode * find_rsc_op_entry(resource_t *rsc, const char *key) { int number = 0; const char *name = NULL; const char *value = NULL; const char *interval = NULL; char *match_key = NULL; xmlNode *op = NULL; xml_child_iter_filter( rsc->ops_xml, operation, "op", name = crm_element_value(operation, "name"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "disabled"); if(crm_is_true(value)) { crm_debug_2("%s disabled", ID(operation)); continue; } number = crm_get_msec(interval); if(number < 0) { continue; } match_key = generate_op_key(rsc->id, name, number); if(safe_str_eq(key, match_key)) { op = operation; } crm_free(match_key); if(op != NULL) { return op; } ); crm_debug_3("No match for %s", key); return op; } void print_node(const char *pre_text, node_t *node, gboolean details) { if(node == NULL) { crm_debug_4("%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } crm_debug_4("%s%s%sNode %s: (weight=%d, fixed=%s)", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", node->details==NULL?"error ":node->details->online?"":"Unavailable/Unclean ", node->details->uname, node->weight, node->fixed?"True":"False"); if(details && node != NULL && node->details != NULL) { char *pe_mutable = crm_strdup("\t\t"); crm_debug_4("\t\t===Node Attributes"); g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable); crm_free(pe_mutable); crm_debug_4("\t\t=== Resources"); slist_iter( rsc, resource_t, node->details->running_rsc, lpc, print_resource(LOG_DEBUG_4, "\t\t", rsc, FALSE); ); } } /* * Used by the HashTable for-loop */ void print_str_str(gpointer key, gpointer value, gpointer user_data) { crm_debug_4("%s%s %s ==> %s", user_data==NULL?"":(char*)user_data, user_data==NULL?"":": ", (char*)key, (char*)value); } void print_resource( int log_level, const char *pre_text, resource_t *rsc, gboolean details) { long options = pe_print_log; if(rsc == NULL) { do_crm_log(log_level-1, "%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } if(details) { options |= pe_print_details; } rsc->fns->print(rsc, pre_text, options, &log_level); } void pe_free_action(action_t *action) { if(action == NULL) { return; } pe_free_shallow(action->actions_before);/* action_warpper_t* */ pe_free_shallow(action->actions_after); /* action_warpper_t* */ g_hash_table_destroy(action->extra); g_hash_table_destroy(action->meta); crm_free(action->task); crm_free(action->uuid); crm_free(action); } GListPtr find_recurring_actions(GListPtr input, node_t *not_on_node) { const char *value = NULL; GListPtr result = NULL; CRM_CHECK(input != NULL, return NULL); slist_iter( action, action_t, input, lpc, value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL); if(value == NULL) { /* skip */ } else if(safe_str_eq(value, "0")) { /* skip */ } else if(safe_str_eq(CRMD_ACTION_CANCEL, action->task)) { /* skip */ } else if(not_on_node == NULL) { crm_debug_5("(null) Found: %s", action->uuid); result = g_list_append(result, action); } else if(action->node == NULL) { /* skip */ } else if(action->node->details != not_on_node->details) { crm_debug_5("Found: %s", action->uuid); result = g_list_append(result, action); } ); return result; } GListPtr find_actions(GListPtr input, const char *key, node_t *on_node) { GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); slist_iter( action, action_t, input, lpc, crm_debug_5("Matching %s against %s", key, action->uuid); if(safe_str_neq(key, action->uuid)) { continue; } else if(on_node == NULL) { result = g_list_append(result, action); } else if(action->node == NULL) { /* skip */ crm_debug_2("While looking for %s action on %s, " "found an unallocated one. Assigning" " it to the requested node...", key, on_node->details->uname); action->node = on_node; result = g_list_append(result, action); } else if(safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_append(result, action); } ); return result; } GListPtr find_actions_exact(GListPtr input, const char *key, node_t *on_node) { GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); slist_iter( action, action_t, input, lpc, crm_debug_5("Matching %s against %s", key, action->uuid); if(safe_str_neq(key, action->uuid)) { crm_debug_3("Key mismatch: %s vs. %s", key, action->uuid); continue; } else if(on_node == NULL || action->node == NULL) { crm_debug_3("on_node=%p, action->node=%p", on_node, action->node); continue; } else if(safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_append(result, action); } crm_debug_2("Node mismatch: %s vs. %s", on_node->details->id, action->node->details->id); ); return result; } void set_id(xmlNode * xml_obj, const char *prefix, int child) { int id_len = 0; gboolean use_prefix = TRUE; gboolean use_child = TRUE; char *new_id = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); id_len = 1 + strlen(id); if(child > 999) { pe_err("Are you insane?!?" " The CRM does not support > 1000 children per resource"); return; } else if(child < 0) { use_child = FALSE; } else { id_len += 4; /* child */ } if(prefix == NULL || safe_str_eq(id, prefix)) { use_prefix = FALSE; } else { id_len += (1 + strlen(prefix)); } crm_malloc0(new_id, id_len); if(use_child) { snprintf(new_id, id_len, "%s%s%s:%d", use_prefix?prefix:"", use_prefix?":":"", id, child); } else { snprintf(new_id, id_len, "%s%s%s", use_prefix?prefix:"", use_prefix?":":"", id); } crm_xml_add(xml_obj, XML_ATTR_ID, new_id); crm_free(new_id); } static void resource_node_score(resource_t *rsc, node_t *node, int score, const char *tag) { node_t *match = NULL; if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, resource_node_score(child_rsc, node, score, tag); ); } crm_debug_2("Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score); match = pe_find_node_id(rsc->allowed_nodes, node->details->id); if(match == NULL) { match = node_copy(node); match->weight = 0; rsc->allowed_nodes = g_list_append(rsc->allowed_nodes, match); } match->weight = merge_weights(match->weight, score); } void resource_location(resource_t *rsc, node_t *node, int score, const char *tag, pe_working_set_t *data_set) { if(node != NULL) { resource_node_score(rsc, node, score, tag); } else if(data_set != NULL) { slist_iter( node, node_t, data_set->nodes, lpc, resource_node_score(rsc, node, score, tag); ); } else { slist_iter( node, node_t, rsc->allowed_nodes, lpc, resource_node_score(rsc, node, score, tag); ); } if(node == NULL && score == -INFINITY) { if(rsc->allocated_to) { crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname); crm_free(rsc->allocated_to); rsc->allocated_to = NULL; } } } #define sort_return(an_int) crm_free(a_uuid); crm_free(b_uuid); return an_int gint sort_op_by_callid(gconstpointer a, gconstpointer b) { char *a_uuid = NULL; char *b_uuid = NULL; const xmlNode *xml_a = a; const xmlNode *xml_b = b; const char *a_xml_id = crm_element_value_const(xml_a, XML_ATTR_ID); const char *b_xml_id = crm_element_value_const(xml_b, XML_ATTR_ID); const char *a_task_id = crm_element_value_const(xml_a, XML_LRM_ATTR_CALLID); const char *b_task_id = crm_element_value_const(xml_b, XML_LRM_ATTR_CALLID); const char *a_key = crm_element_value_const(xml_a, XML_ATTR_TRANSITION_MAGIC); const char *b_key = crm_element_value_const(xml_b, XML_ATTR_TRANSITION_MAGIC); int dummy = -1; int a_id = -1; int b_id = -1; int a_rc = -1; int b_rc = -1; int a_status = -1; int b_status = -1; int a_call_id = -1; int b_call_id = -1; if(safe_str_eq(a_xml_id, b_xml_id)) { /* We have duplicate lrm_rsc_op entries in the status * section which is unliklely to be a good thing * - we can handle it easily enough, but we need to get * to the bottom of why its happening. */ pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id); sort_return(0); } CRM_CHECK(a_task_id != NULL && b_task_id != NULL, sort_return(0)); a_call_id = crm_parse_int(a_task_id, NULL); b_call_id = crm_parse_int(b_task_id, NULL); if(a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesnt matter since * stops are never pending */ sort_return(0); } else if(a_call_id >= 0 && a_call_id < b_call_id) { crm_debug_4("%s (%d) < %s (%d) : call id", a_xml_id, a_call_id, b_xml_id, b_call_id); sort_return(-1); } else if(b_call_id >= 0 && a_call_id > b_call_id) { crm_debug_4("%s (%d) > %s (%d) : call id", a_xml_id, a_call_id, b_xml_id, b_call_id); sort_return(1); } crm_debug_5("%s (%d) == %s (%d) : continuing", a_xml_id, a_call_id, b_xml_id, b_call_id); /* now process pending ops */ CRM_CHECK(a_key != NULL && b_key != NULL, sort_return(0)); CRM_CHECK(decode_transition_magic( a_key, &a_uuid, &a_id, &dummy, &a_status, &a_rc), sort_return(0)); CRM_CHECK(decode_transition_magic( b_key, &b_uuid, &b_id, &dummy, &b_status, &b_rc), sort_return(0)); /* try and determin the relative age of the operation... * some pending operations (ie. a start) may have been supuerceeded * by a subsequent stop * * [a|b]_id == -1 means its a shutdown operation and _always_ comes last */ if(safe_str_neq(a_uuid, b_uuid) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesnt match then one better * be a pending operation. * pending operations dont survive between elections and joins * because we query the LRM directly */ CRM_CHECK(a_call_id == -1 || b_call_id == -1, sort_return(0)); CRM_CHECK(a_call_id >= 0 || b_call_id >= 0, sort_return(0)); if(b_call_id == -1) { crm_debug_2("%s (%d) < %s (%d) : transition + call id", a_xml_id, a_call_id, b_xml_id, b_call_id); sort_return(-1); } if(a_call_id == -1) { crm_debug_2("%s (%d) > %s (%d) : transition + call id", a_xml_id, a_call_id, b_xml_id, b_call_id); sort_return(1); } } else if((a_id >= 0 && a_id < b_id) || b_id == -1) { crm_debug_3("%s (%d) < %s (%d) : transition", a_xml_id, a_id, b_xml_id, b_id); sort_return(-1); } else if((b_id >= 0 && a_id > b_id) || a_id == -1) { crm_debug_3("%s (%d) > %s (%d) : transition", a_xml_id, a_id, b_xml_id, b_id); sort_return(1); } /* we should never end up here */ crm_err("%s (%d:%d:%s) ?? %s (%d:%d:%s) : default", a_xml_id, a_call_id, a_id, a_uuid, b_xml_id, b_call_id, b_id, b_uuid); CRM_CHECK(FALSE, sort_return(0)); } time_t get_timet_now(pe_working_set_t *data_set) { time_t now = 0; if(data_set && data_set->now) { now = data_set->now->tm_now; } if(now == 0) { /* eventually we should convert data_set->now into time_tm * for now, its only triggered by PE regression tests */ now = time(NULL); crm_crit("Defaulting to 'now'"); if(data_set && data_set->now) { data_set->now->tm_now = now; } } return now; } + + +int get_failcount(node_t *node, resource_t *rsc, int *last_failure, pe_working_set_t *data_set) +{ + int last = 0; + int fail_count = 0; + resource_t *failed = rsc; + char *fail_attr = crm_concat("fail-count", rsc->id, '-'); + const char *value = g_hash_table_lookup(node->details->attrs, fail_attr); + + if(is_not_set(rsc->flags, pe_rsc_unique)) { + failed = uber_parent(rsc); + } + + if(value != NULL) { + fail_count = char2score(value); + crm_info("%s has failed %d on %s", + rsc->id, fail_count, node->details->uname); + } + crm_free(fail_attr); + + fail_attr = crm_concat("last-failure", rsc->id, '-'); + value = g_hash_table_lookup(node->details->attrs, fail_attr); + if(value != NULL && rsc->failure_timeout) { + last = crm_parse_int(value, NULL); + if(last_failure) { + *last_failure = last; + } + if(last > 0) { + time_t now = get_timet_now(data_set); + if(now > (last + rsc->failure_timeout)) { + crm_notice("Failcount for %s on %s has expired (limit was %ds)", + failed->id, node->details->uname, rsc->failure_timeout); + fail_count = 0; + } + } + } + + crm_free(fail_attr); + return fail_count; +} diff --git a/lib/crm/pengine/utils.h b/lib/crm/pengine/utils.h index 9f242f521b..85ef507e6d 100644 --- a/lib/crm/pengine/utils.h +++ b/lib/crm/pengine/utils.h @@ -1,125 +1,125 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef PE_UTILS__H #define PE_UTILS__H #include #include extern node_t *node_copy(node_t *this_node) ; extern time_t get_timet_now(pe_working_set_t *data_set); - +extern int get_failcount(node_t *node, resource_t *rsc, int *last_failure, pe_working_set_t *data_set); /* Binary like operators for lists of nodes */ extern GListPtr node_list_dup(GListPtr list1, gboolean reset, gboolean filter); extern GListPtr node_list_and(GListPtr list1, GListPtr list2, gboolean filter); extern GListPtr node_list_xor(GListPtr list1, GListPtr list2, gboolean filter); extern GListPtr node_list_minus(GListPtr list1,GListPtr list2,gboolean filter); extern gboolean node_list_eq(GListPtr list1, GListPtr list2, gboolean filter); extern GListPtr node_list_or(GListPtr list1, GListPtr list2, gboolean filter); extern void pe_free_shallow(GListPtr alist); extern void pe_free_shallow_adv(GListPtr alist, gboolean with_data); /* For creating the transition graph */ extern xmlNode *action2xml(action_t *action, gboolean as_input); /* Printing functions for debug */ extern void print_node( const char *pre_text, node_t *node, gboolean details); extern void print_resource( int log_level, const char *pre_text, resource_t *rsc, gboolean details); extern void dump_node_scores(int level, resource_t *rsc, const char *comment, GListPtr nodes); /* Sorting functions */ extern gint sort_rsc_priority(gconstpointer a, gconstpointer b); extern gint sort_rsc_index(gconstpointer a, gconstpointer b); extern xmlNode *find_rsc_op_entry(resource_t *rsc, const char *key); extern action_t *custom_action( resource_t *rsc, char *key, const char *task, node_t *on_node, gboolean optional, gboolean foo, pe_working_set_t *data_set); #define delete_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_DELETE, 0) #define delete_action(rsc, node, optional) custom_action( \ rsc, delete_key(rsc), CRMD_ACTION_DELETE, node, \ optional, TRUE, data_set); #define stopped_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_STOPPED, 0) #define stopped_action(rsc, node, optional) custom_action( \ rsc, stopped_key(rsc), CRMD_ACTION_STOPPED, node, \ optional, TRUE, data_set); #define stop_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_STOP, 0) #define stop_action(rsc, node, optional) custom_action( \ rsc, stop_key(rsc), CRMD_ACTION_STOP, node, \ optional, TRUE, data_set); #define start_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_START, 0) #define start_action(rsc, node, optional) custom_action( \ rsc, start_key(rsc), CRMD_ACTION_START, node, \ optional, TRUE, data_set) #define started_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_STARTED, 0) #define started_action(rsc, node, optional) custom_action( \ rsc, started_key(rsc), CRMD_ACTION_STARTED, node, \ optional, TRUE, data_set) #define promote_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_PROMOTE, 0) #define promote_action(rsc, node, optional) custom_action( \ rsc, promote_key(rsc), CRMD_ACTION_PROMOTE, node, \ optional, TRUE, data_set) #define promoted_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_PROMOTED, 0) #define promoted_action(rsc, node, optional) custom_action( \ rsc, promoted_key(rsc), CRMD_ACTION_PROMOTED, node, \ optional, TRUE, data_set) #define demote_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_DEMOTE, 0) #define demote_action(rsc, node, optional) custom_action( \ rsc, demote_key(rsc), CRMD_ACTION_DEMOTE, node, \ optional, TRUE, data_set) #define demoted_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_DEMOTED, 0) #define demoted_action(rsc, node, optional) custom_action( \ rsc, demoted_key(rsc), CRMD_ACTION_DEMOTED, node, \ optional, TRUE, data_set) extern GListPtr find_actions(GListPtr input, const char *key, node_t *on_node); extern GListPtr find_actions_exact( GListPtr input, const char *key, node_t *on_node); extern GListPtr find_recurring_actions(GListPtr input, node_t *not_on_node); extern void set_id(xmlNode *xml_obj, const char *prefix, int child); extern void pe_free_action(action_t *action); extern void resource_location(resource_t *rsc, node_t *node, int score, const char *tag, pe_working_set_t *data_set); extern gint sort_op_by_callid(gconstpointer a, gconstpointer b); #endif diff --git a/pengine/allocate.c b/pengine/allocate.c index 6e238d769d..6ead624a46 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -1,974 +1,950 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include void set_alloc_actions(pe_working_set_t *data_set); void migrate_reload_madness(pe_working_set_t *data_set); resource_alloc_functions_t resource_class_alloc_functions[] = { { native_merge_weights, native_color, native_create_actions, native_create_probe, native_internal_constraints, native_rsc_colocation_lh, native_rsc_colocation_rh, native_rsc_order_lh, native_rsc_order_rh, native_rsc_location, native_expand, complex_migrate_reload, complex_stonith_ordering, complex_create_notify_element, }, { group_merge_weights, group_color, group_create_actions, native_create_probe, group_internal_constraints, group_rsc_colocation_lh, group_rsc_colocation_rh, group_rsc_order_lh, group_rsc_order_rh, group_rsc_location, group_expand, complex_migrate_reload, complex_stonith_ordering, complex_create_notify_element, }, { native_merge_weights, clone_color, clone_create_actions, clone_create_probe, clone_internal_constraints, clone_rsc_colocation_lh, clone_rsc_colocation_rh, clone_rsc_order_lh, clone_rsc_order_rh, clone_rsc_location, clone_expand, complex_migrate_reload, complex_stonith_ordering, complex_create_notify_element, }, { native_merge_weights, master_color, master_create_actions, clone_create_probe, master_internal_constraints, clone_rsc_colocation_lh, master_rsc_colocation_rh, clone_rsc_order_lh, clone_rsc_order_rh, clone_rsc_location, clone_expand, complex_migrate_reload, complex_stonith_ordering, complex_create_notify_element, } }; static gboolean check_rsc_parameters(resource_t *rsc, node_t *node, xmlNode *rsc_entry, pe_working_set_t *data_set) { int attr_lpc = 0; gboolean force_restart = FALSE; gboolean delete_resource = FALSE; const char *value = NULL; const char *old_value = NULL; const char *attr_list[] = { XML_ATTR_TYPE, XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER }; for(; attr_lpc < DIMOF(attr_list); attr_lpc++) { value = crm_element_value(rsc->xml, attr_list[attr_lpc]); old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]); if(value == old_value /* ie. NULL */ || crm_str_eq(value, old_value, TRUE)) { continue; } force_restart = TRUE; crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s", rsc->id, node->details->uname, attr_list[attr_lpc], crm_str(old_value), crm_str(value)); } if(force_restart) { /* make sure the restart happens */ stop_action(rsc, node, FALSE); set_bit(rsc->flags, pe_rsc_start_pending); delete_resource = TRUE; } return delete_resource; } static gboolean check_action_definition(resource_t *rsc, node_t *active_node, xmlNode *xml_op, pe_working_set_t *data_set) { char *key = NULL; int interval = 0; const char *interval_s = NULL; gboolean did_change = FALSE; gboolean start_op = FALSE; xmlNode *params_all = NULL; xmlNode *params_restart = NULL; GHashTable *local_rsc_params = NULL; char *digest_all_calc = NULL; const char *digest_all = NULL; const char *restart_list = NULL; const char *digest_restart = NULL; char *digest_restart_calc = NULL; action_t *action = NULL; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); CRM_CHECK(active_node != NULL, return FALSE); interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); /* we need to reconstruct the key because of the way we used to construct resource IDs */ key = generate_op_key(rsc->id, task, interval); if(interval > 0) { xmlNode *op_match = NULL; crm_debug_2("Checking parameters for %s", key); op_match = find_rsc_op_entry(rsc, key); if(op_match == NULL && data_set->stop_action_orphans) { /* create a cancel action */ action_t *cancel = NULL; char *cancel_key = crm_strdup(key); const char *call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); crm_info("Orphan action will be stopped: %s on %s", key, active_node->details->uname); /* cancel_key = generate_op_key( */ /* rsc->id, CRMD_ACTION_CANCEL, interval); */ cancel = custom_action( rsc, cancel_key, CRMD_ACTION_CANCEL, active_node, FALSE, TRUE, data_set); crm_free(cancel->task); cancel->task = crm_strdup(CRMD_ACTION_CANCEL); add_hash_param(cancel->meta, XML_LRM_ATTR_TASK, task); add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id); add_hash_param(cancel->meta, XML_LRM_ATTR_INTERVAL, interval_s); custom_action_order( rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set); crm_free(key); key = NULL; return TRUE; } else if(op_match == NULL) { crm_debug("Orphan action detected: %s on %s", key, active_node->details->uname); crm_free(key); key = NULL; return TRUE; } } action = custom_action(rsc, key, task, active_node, TRUE, FALSE, data_set); local_rsc_params = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); unpack_instance_attributes( rsc->xml, XML_TAG_ATTR_SETS, active_node->details->attrs, local_rsc_params, NULL, FALSE, data_set->now); params_all = create_xml_node(NULL, XML_TAG_PARAMS); g_hash_table_foreach(action->extra, hash2field, params_all); g_hash_table_foreach(rsc->parameters, hash2field, params_all); g_hash_table_foreach(action->meta, hash2metafield, params_all); g_hash_table_foreach(local_rsc_params, hash2field, params_all); filter_action_parameters(params_all, op_version); digest_all_calc = calculate_xml_digest(params_all, TRUE, FALSE); digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); restart_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART); if(crm_str_eq(task, CRMD_ACTION_START, TRUE)) { start_op = TRUE; } if(start_op && digest_restart) { params_restart = copy_xml(params_all); if(restart_list) { filter_reload_parameters(params_restart, restart_list); } digest_restart_calc = calculate_xml_digest(params_restart, TRUE, FALSE); if(safe_str_neq(digest_restart_calc, digest_restart)) { did_change = TRUE; crm_log_xml_info(params_restart, "params:restart"); crm_warn("Parameters to %s on %s changed: recorded %s vs. %s (restart:%s) %s", key, active_node->details->uname, crm_str(digest_restart), digest_restart_calc, op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); key = generate_op_key(rsc->id, task, interval); custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); goto cleanup; } } if(safe_str_neq(digest_all_calc, digest_all)) { action_t *op = NULL; did_change = TRUE; crm_log_xml_info(params_all, "params:all"); crm_warn("Parameters to %s on %s changed: recorded %s vs. %s (all:%s) %s", key, active_node->details->uname, crm_str(digest_all), digest_all_calc, op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); key = generate_op_key(rsc->id, task, interval); op = custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); if(start_op && digest_restart) { op->allow_reload_conversion = TRUE; } else if(interval > 0) { custom_action_order(rsc, start_key(rsc), NULL, NULL, crm_strdup(op->task), op, pe_order_runnable_left, data_set); } } cleanup: free_xml(params_all); free_xml(params_restart); crm_free(digest_all_calc); crm_free(digest_restart_calc); g_hash_table_destroy(local_rsc_params); pe_free_action(action); return did_change; } extern gboolean DeleteRsc(resource_t *rsc, node_t *node, gboolean optional, pe_working_set_t *data_set); static void check_actions_for(xmlNode *rsc_entry, node_t *node, pe_working_set_t *data_set) { const char *id = NULL; const char *task = NULL; int interval = 0; const char *interval_s = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; const char *rsc_id = ID(rsc_entry); gboolean is_probe = FALSE; int start_index = 0, stop_index = 0; resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); CRM_CHECK(rsc != NULL, return); CRM_CHECK(node != NULL, return); CRM_CHECK(rsc_id != NULL, return); if(is_set(rsc->flags, pe_rsc_orphan)) { crm_debug_2("Skipping param check for %s: orphan", rsc->id); return; } else if(pe_find_node_id(rsc->running_on, node->details->id) == NULL) { crm_debug_2("Skipping param check for %s: no longer active on %s", rsc->id, node->details->uname); return; } crm_debug_3("Processing %s on %s", rsc->id, node->details->uname); if(check_rsc_parameters(rsc, node, rsc_entry, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, op_list = g_list_append(op_list, rsc_op); ); sorted_op_list = g_list_sort(op_list, sort_op_by_callid); calculate_active_ops(sorted_op_list, &start_index, &stop_index); slist_iter( rsc_op, xmlNode, sorted_op_list, lpc, if(start_index < stop_index) { /* stopped */ continue; } else if(lpc < start_index) { /* action occurred prior to a start */ continue; } id = ID(rsc_op); is_probe = FALSE; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; } if(is_probe || safe_str_eq(task, CRMD_ACTION_START) || interval > 0) { check_action_definition(rsc, node, rsc_op, data_set); } ); g_list_free(sorted_op_list); } static void check_actions(pe_working_set_t *data_set) { const char *id = NULL; node_t *node = NULL; xmlNode *lrm_rscs = NULL; xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, id = crm_element_value(node_state, XML_ATTR_ID); lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE); node = pe_find_node_id(data_set->nodes, id); if(node == NULL) { continue; } else if(can_run_resources(node) == FALSE) { crm_debug_2("Skipping param check for %s: cant run resources", node->details->uname); continue; } crm_debug_2("Processing node %s", node->details->uname); if(node->details->online || data_set->stonith_enabled) { xml_child_iter_filter( lrm_rscs, rsc_entry, XML_LRM_TAG_RESOURCE, if(xml_has_children(rsc_entry)) { check_actions_for(rsc_entry, node, data_set); } ); } ); } static gboolean apply_placement_constraints(pe_working_set_t *data_set) { crm_debug_3("Applying constraints..."); slist_iter( cons, rsc_to_node_t, data_set->placement_constraints, lpc, cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons); ); return TRUE; } static void common_apply_stickiness(resource_t *rsc, node_t *node, pe_working_set_t *data_set) { int fail_count = 0; - char *fail_attr = NULL; const char *value = NULL; resource_t *failed = rsc; GHashTable *meta_hash = NULL; if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, common_apply_stickiness(child_rsc, node, data_set); ); return; } meta_hash = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); get_meta_attributes(meta_hash, rsc, node, data_set); /* update resource preferences that relate to the current node */ value = g_hash_table_lookup(meta_hash, "resource_stickiness"); if(value != NULL && safe_str_neq("default", value)) { rsc->stickiness = char2score(value); } else { rsc->stickiness = data_set->default_resource_stickiness; } value = g_hash_table_lookup(meta_hash, XML_RSC_ATTR_FAIL_STICKINESS); if(value != NULL && safe_str_neq("default", value)) { rsc->migration_threshold = char2score(value); } else { rsc->migration_threshold = data_set->default_migration_threshold; } - /* process failure stickiness */ - fail_attr = crm_concat("fail-count", rsc->id, '-'); - value = g_hash_table_lookup(node->details->attrs, fail_attr); - if(value != NULL) { - fail_count = char2score(value); - crm_info("%s has failed %d on %s", - rsc->id, fail_count, node->details->uname); - } - crm_free(fail_attr); - if(is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(rsc); } - fail_attr = crm_concat("last-failure", rsc->id, '-'); - value = g_hash_table_lookup(node->details->attrs, fail_attr); - if(value != NULL && rsc->failure_timeout) { - int last_failure = crm_parse_int(value, NULL); - if(last_failure > 0) { - time_t now = get_timet_now(data_set); - if(now > (last_failure + rsc->failure_timeout)) { - crm_notice("Failcount for %s on %s has expired (limit was %ds)", - failed->id, node->details->uname, rsc->failure_timeout); - fail_count = 0; - } - } - } - crm_free(fail_attr); + fail_count = get_failcount(node, rsc, NULL, data_set); if(fail_count > 0 && rsc->migration_threshold != 0) { if(rsc->migration_threshold <= fail_count) { resource_location(failed, node, -INFINITY, "__fail_limit__", data_set); crm_warn("Forcing %s away from %s after %d failures (max=%d)", failed->id, node->details->uname, fail_count, rsc->migration_threshold); } else { crm_notice("%s can fail %d more times on %s before being forced off", failed->id, rsc->migration_threshold - fail_count, node->details->uname); } } g_hash_table_destroy(meta_hash); } static void complex_set_cmds(resource_t *rsc) { rsc->cmds = &resource_class_alloc_functions[rsc->variant]; slist_iter( child_rsc, resource_t, rsc->children, lpc, complex_set_cmds(child_rsc); ); } void set_alloc_actions(pe_working_set_t *data_set) { slist_iter( rsc, resource_t, data_set->resources, lpc, complex_set_cmds(rsc); ); } gboolean stage0(pe_working_set_t *data_set) { xmlNode * cib_constraints = get_object_root( XML_CIB_TAG_CONSTRAINTS, data_set->input); if(data_set->input == NULL) { return FALSE; } cluster_status(data_set); set_alloc_actions(data_set); slist_iter(node, node_t, data_set->nodes, lpc, slist_iter( rsc, resource_t, data_set->resources, lpc2, common_apply_stickiness(rsc, node, data_set); ); ); unpack_constraints(cib_constraints, data_set); return TRUE; } /* * Check nodes for resources started outside of the LRM */ gboolean probe_resources(pe_working_set_t *data_set) { action_t *probe_complete = NULL; action_t *probe_node_complete = NULL; slist_iter( node, node_t, data_set->nodes, lpc, gboolean force_probe = FALSE; const char *probed = g_hash_table_lookup( node->details->attrs, CRM_OP_PROBED); crm_debug_2("%s probed: %s", node->details->uname, probed); if(node->details->online == FALSE) { continue; } else if(node->details->unclean) { continue; } else if(probe_complete == NULL) { probe_complete = get_pseudo_op(CRM_OP_PROBED, data_set); } if(probed != NULL && crm_is_true(probed) == FALSE) { force_probe = TRUE; } probe_node_complete = custom_action( NULL, crm_strdup(CRM_OP_PROBED), CRM_OP_PROBED, node, FALSE, TRUE, data_set); probe_node_complete->optional = crm_is_true(probed); probe_node_complete->priority = INFINITY; add_hash_param(probe_node_complete->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); custom_action_order(NULL, NULL, probe_node_complete, NULL, NULL, probe_complete, pe_order_optional, data_set); slist_iter( rsc, resource_t, data_set->resources, lpc2, if(rsc->cmds->create_probe( rsc, node, probe_node_complete, force_probe, data_set)) { probe_complete->optional = FALSE; probe_node_complete->optional = FALSE; custom_action_order( NULL, NULL, probe_complete, rsc, start_key(rsc), NULL, pe_order_optional, data_set); } ); ); return TRUE; } /* * Count how many valid nodes we have (so we know the maximum number of * colors we can resolve). * * Apply node constraints (ie. filter the "allowed_nodes" part of resources */ gboolean stage2(pe_working_set_t *data_set) { crm_debug_3("Applying placement constraints"); slist_iter( node, node_t, data_set->nodes, lpc, if(node == NULL) { /* error */ } else if(node->weight >= 0.0 /* global weight */ && node->details->online && node->details->type == node_member) { data_set->max_valid_nodes++; } ); apply_placement_constraints(data_set); return TRUE; } /* * Create internal resource constraints before allocation */ gboolean stage3(pe_working_set_t *data_set) { slist_iter( rsc, resource_t, data_set->resources, lpc, rsc->cmds->internal_constraints(rsc, data_set); ); return TRUE; } /* * Check for orphaned or redefined actions */ gboolean stage4(pe_working_set_t *data_set) { check_actions(data_set); return TRUE; } gboolean stage5(pe_working_set_t *data_set) { /* Take (next) highest resource, assign it and create its actions */ slist_iter( rsc, resource_t, data_set->resources, lpc, rsc->cmds->color(rsc, data_set); ); probe_resources(data_set); slist_iter( rsc, resource_t, data_set->resources, lpc, rsc->cmds->create_actions(rsc, data_set); ); return TRUE; } static gboolean is_managed(const resource_t *rsc) { if(is_set(rsc->flags, pe_rsc_managed)) { return TRUE; } slist_iter( child_rsc, resource_t, rsc->children, lpc, if(is_managed(child_rsc)) { return TRUE; } ); return FALSE; } static gboolean any_managed_resouces(pe_working_set_t *data_set) { slist_iter( rsc, resource_t, data_set->resources, lpc, if(is_managed(rsc)) { return TRUE; } ); return FALSE; } /* * Create dependancies for stonith and shutdown operations */ gboolean stage6(pe_working_set_t *data_set) { action_t *dc_down = NULL; action_t *stonith_op = NULL; action_t *last_stonith = NULL; gboolean integrity_lost = FALSE; action_t *ready = get_pseudo_op(STONITH_UP, data_set); action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); gboolean need_stonith = FALSE; crm_debug_3("Processing fencing and shutdown cases"); if(data_set->stonith_enabled && (data_set->have_quorum || data_set->no_quorum_policy == no_quorum_ignore)) { need_stonith = TRUE; } if(need_stonith && any_managed_resouces(data_set) == FALSE) { crm_crit("Delaying fencing operations until there are resources to manage"); need_stonith = FALSE; } slist_iter( node, node_t, data_set->nodes, lpc, stonith_op = NULL; if(node->details->unclean && need_stonith) { pe_warn("Scheduling Node %s for STONITH", node->details->uname); stonith_op = custom_action( NULL, crm_strdup(CRM_OP_FENCE), CRM_OP_FENCE, node, FALSE, TRUE, data_set); add_hash_param( stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname); add_hash_param( stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id); add_hash_param( stonith_op->meta, "stonith_action", data_set->stonith_action); stonith_constraints(node, stonith_op, data_set); order_actions(ready, stonith_op, pe_order_implies_left); order_actions(stonith_op, all_stopped, pe_order_implies_right); if(node->details->is_dc) { dc_down = stonith_op; } else { if(last_stonith) { order_actions(last_stonith, stonith_op, pe_order_implies_left); } last_stonith = stonith_op; } } else if(node->details->online && node->details->shutdown) { action_t *down_op = NULL; crm_info("Scheduling Node %s for shutdown", node->details->uname); down_op = custom_action( NULL, crm_strdup(CRM_OP_SHUTDOWN), CRM_OP_SHUTDOWN, node, FALSE, TRUE, data_set); shutdown_constraints(node, down_op, data_set); if(node->details->is_dc) { dc_down = down_op; } } if(node->details->unclean && stonith_op == NULL) { integrity_lost = TRUE; pe_warn("Node %s is unclean!", node->details->uname); } ); if(integrity_lost) { if(data_set->have_quorum == FALSE) { crm_notice("Cannot fence unclean nodes until quorum is" " attained (or no_quorum_policy is set to ignore)"); } else if(data_set->stonith_enabled == FALSE) { pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED"); pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE"); } } if(dc_down != NULL) { GListPtr shutdown_matches = find_actions( data_set->actions, CRM_OP_SHUTDOWN, NULL); crm_debug_2("Ordering shutdowns before %s on %s (DC)", dc_down->task, dc_down->node->details->uname); add_hash_param(dc_down->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); slist_iter( node_stop, action_t, shutdown_matches, lpc, if(node_stop->node->details->is_dc) { continue; } crm_debug("Ordering shutdown on %s before %s on %s", node_stop->node->details->uname, dc_down->task, dc_down->node->details->uname); order_actions(node_stop, dc_down, pe_order_implies_left); ); if(last_stonith && dc_down != last_stonith) { order_actions(last_stonith, dc_down, pe_order_implies_left); } g_list_free(shutdown_matches); } return TRUE; } /* * Determin the sets of independant actions and the correct order for the * actions in each set. * * Mark dependencies of un-runnable actions un-runnable * */ gboolean stage7(pe_working_set_t *data_set) { crm_debug_4("Applying ordering constraints"); slist_iter( order, order_constraint_t, data_set->ordering_constraints, lpc, resource_t *rsc = order->lh_rsc; crm_debug_2("Applying ordering constraint: %d", order->id); if(rsc != NULL) { crm_debug_4("rsc_action-to-*"); rsc->cmds->rsc_order_lh(rsc, order, data_set); continue; } rsc = order->rh_rsc; if(rsc != NULL) { crm_debug_4("action-to-rsc_action"); rsc->cmds->rsc_order_rh(order->lh_action, rsc, order); } else { crm_debug_4("action-to-action"); order_actions( order->lh_action, order->rh_action, order->type); } ); update_action_states(data_set->actions); slist_iter( rsc, resource_t, data_set->resources, lpc, rsc->cmds->migrate_reload(rsc, data_set); ); return TRUE; } int transition_id = -1; /* * Create a dependency graph to send to the transitioner (via the CRMd) */ gboolean stage8(pe_working_set_t *data_set) { const char *value = NULL; char *transition_id_s = NULL; transition_id++; transition_id_s = crm_itoa(transition_id); crm_debug_2("Creating transition graph %d.", transition_id); data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); value = pe_pref(data_set->config_hash, "cluster-delay"); crm_xml_add(data_set->graph, "cluster-delay", value); crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY"); value = pe_pref(data_set->config_hash, "start-failure-is-fatal"); if(crm_is_true(value)) { crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY"); } else { crm_xml_add(data_set->graph, "failed-start-offset", "1"); } value = pe_pref(data_set->config_hash, "batch-limit"); crm_xml_add(data_set->graph, "batch-limit", value); crm_xml_add(data_set->graph, "transition_id", transition_id_s); crm_free(transition_id_s); /* errors... slist_iter(action, action_t, action_list, lpc, if(action->optional == FALSE && action->runnable == FALSE) { print_action("Ignoring", action, TRUE); } ); */ slist_iter( rsc, resource_t, data_set->resources, lpc, crm_debug_4("processing actions for rsc=%s", rsc->id); rsc->cmds->expand(rsc, data_set); ); crm_log_xml_debug_3( data_set->graph, "created resource-driven action list"); /* catch any non-resource specific actions */ crm_debug_4("processing non-resource actions"); slist_iter( action, action_t, data_set->actions, lpc, graph_element_from_action(action, data_set); ); crm_log_xml_debug_3(data_set->graph, "created generic action list"); crm_debug_2("Created transition graph %d.", transition_id); return TRUE; } void cleanup_alloc_calculations(pe_working_set_t *data_set) { if(data_set == NULL) { return; } crm_debug_3("deleting order cons: %p", data_set->ordering_constraints); pe_free_ordering(data_set->ordering_constraints); data_set->ordering_constraints = NULL; crm_debug_3("deleting node cons: %p", data_set->placement_constraints); pe_free_rsc_to_node(data_set->placement_constraints); data_set->placement_constraints = NULL; crm_debug_3("deleting inter-resource cons: %p", data_set->colocation_constraints); pe_free_shallow(data_set->colocation_constraints); data_set->colocation_constraints = NULL; cleanup_calculations(data_set); }