diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h index 2e11a70067..04b7dc24f3 100644 --- a/include/crm/msg_xml.h +++ b/include/crm/msg_xml.h @@ -1,263 +1,264 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef XML_TAGS__H #define XML_TAGS__H #define CIB_OPTIONS_FIRST "cib-bootstrap-options" #define F_CRM_TASK "crm_task" #define F_CRM_HOST_TO "crm_host_to" #define F_CRM_MSG_TYPE F_SUBTYPE #define F_CRM_SYS_TO "crm_sys_to" #define F_CRM_SYS_FROM "crm_sys_from" #define F_CRM_HOST_FROM F_ORIG #define F_CRM_REFERENCE XML_ATTR_REFERENCE #define F_CRM_VERSION XML_ATTR_VERSION #define F_CRM_ORIGIN "origin" #define F_CRM_JOIN_ID "join_id" #define F_CRM_ELECTION_ID "election-id" #define F_CRM_ELECTION_OWNER "election-owner" #define F_CRM_TGRAPH "crm-tgraph" #define F_CRM_TGRAPH_INPUT "crm-tgraph-in" /*---- Common tags/attrs */ #define XML_DIFF_MARKER "__crm_diff_marker__" #define XML_ATTR_TAGNAME F_XML_TAGNAME #define XML_TAG_CIB "cib" #define XML_TAG_FAILED "failed" #define XML_ATTR_CRM_VERSION "crm_feature_set" #define XML_ATTR_DIGEST "digest" #define XML_ATTR_NUMPEERS "num_peers" #define XML_ATTR_HAVE_QUORUM "have_quorum" #define XML_ATTR_CCM_TRANSITION "ccm_transition" #define XML_ATTR_GENERATION "epoch" #define XML_ATTR_GENERATION_ADMIN "admin_epoch" #define XML_ATTR_NUMUPDATES "num_updates" #define XML_ATTR_TIMEOUT "timeout" #define XML_ATTR_ORIGIN "crm-debug-origin" #define XML_ATTR_TSTAMP "crm-timestamp" #define XML_CIB_ATTR_WRITTEN "cib-last-written" #define XML_ATTR_VERSION "version" #define XML_ATTR_DESC "description" #define XML_ATTR_ID "id" #define XML_ATTR_ID_LONG "long-id" #define XML_ATTR_TYPE "type" #define XML_ATTR_FILTER_TYPE "type_filter" #define XML_ATTR_FILTER_ID "id_filter" #define XML_ATTR_FILTER_PRIORITY "priority_filter" #define XML_ATTR_VERBOSE "verbose" #define XML_ATTR_OP "op" #define XML_ATTR_DC "is_dc" #define XML_ATTR_DC_UUID "dc_uuid" #define XML_ATTR_CIB_REVISION "cib_feature_revision" #define XML_ATTR_CIB_REVISION_MAX "cib_feature_revision_max" #define XML_BOOLEAN_TRUE "true" #define XML_BOOLEAN_FALSE "false" #define XML_BOOLEAN_YES XML_BOOLEAN_TRUE #define XML_BOOLEAN_NO XML_BOOLEAN_FALSE #define XML_TAG_OPTIONS "options" /*---- top level tags/attrs */ #define XML_MSG_TAG "crm_message" #define XML_MSG_TAG_DATA "msg_data" #define XML_ATTR_REQUEST "request" #define XML_ATTR_RESPONSE "response" #define XML_ATTR_UNAME "uname" #define XML_ATTR_UUID "id" #define XML_ATTR_REFERENCE "reference" #define XML_FAIL_TAG_RESOURCE "failed_resource" #define XML_FAILRES_ATTR_RESID "resource_id" #define XML_FAILRES_ATTR_REASON "reason" #define XML_FAILRES_ATTR_RESSTATUS "resource_status" #define XML_CRM_TAG_PING "ping_response" #define XML_PING_ATTR_STATUS "result" #define XML_PING_ATTR_SYSFROM "crm_subsystem" #define XML_TAG_FRAGMENT "cib_fragment" #define XML_ATTR_RESULT "result" #define XML_ATTR_SECTION "section" #define XML_FAIL_TAG_CIB "failed_update" #define XML_FAILCIB_ATTR_ID "id" #define XML_FAILCIB_ATTR_OBJTYPE "object_type" #define XML_FAILCIB_ATTR_OP "operation" #define XML_FAILCIB_ATTR_REASON "reason" /*---- CIB specific tags/attrs */ #define XML_CIB_TAG_SECTION_ALL "all" #define XML_CIB_TAG_CONFIGURATION "configuration" #define XML_CIB_TAG_STATUS "status" #define XML_CIB_TAG_RESOURCES "resources" #define XML_CIB_TAG_NODES "nodes" #define XML_CIB_TAG_CONSTRAINTS "constraints" #define XML_CIB_TAG_CRMCONFIG "crm_config" #define XML_CIB_TAG_STATE "node_state" #define XML_CIB_TAG_NODE "node" #define XML_CIB_TAG_CONSTRAINT "constraint" #define XML_CIB_TAG_NVPAIR "nvpair" #define XML_CIB_TAG_PROPSET "cluster_property_set" #define XML_TAG_ATTR_SETS "instance_attributes" #define XML_TAG_META_SETS "meta_attributes" #define XML_TAG_ATTRS "attributes" #define XML_TAG_PARAMS "parameters" #define XML_CIB_TAG_RESOURCE "primitive" #define XML_CIB_TAG_GROUP "group" #define XML_CIB_TAG_INCARNATION "clone" #define XML_CIB_TAG_MASTER "master_slave" #define XML_RSC_ATTR_STOPFAIL "on_stopfail" #define XML_RSC_ATTR_RESTART "restart_type" #define XML_RSC_ATTR_START_TIMEOUT "start_timeout" #define XML_RSC_ATTR_STOP_TIMEOUT "stop_timeout" #define XML_RSC_ATTR_ORDERED "ordered" #define XML_RSC_ATTR_INTERLEAVE "interleave" #define XML_RSC_ATTR_INCARNATION "clone" #define XML_RSC_ATTR_INCARNATION_MAX "clone_max" #define XML_RSC_ATTR_INCARNATION_NODEMAX "clone_node_max" #define XML_RSC_ATTR_MASTER_MAX "master_max" #define XML_RSC_ATTR_MASTER_NODEMAX "master_node_max" #define XML_RSC_ATTR_STATE "clone_state" #define XML_RSC_ATTR_MANAGED "is_managed" #define XML_RSC_ATTR_TARGET_ROLE "target_role" #define XML_RSC_ATTR_UNIQUE "globally_unique" #define XML_RSC_ATTR_NOTIFY "notify" #define XML_RSC_ATTR_STICKINESS "resource_stickiness" #define XML_RSC_ATTR_FAIL_STICKINESS "migration-threshold" +#define XML_RSC_ATTR_FAIL_TIMEOUT "failure-timeout" #define XML_RSC_ATTR_MULTIPLE "multiple_active" #define XML_RSC_ATTR_START "start_prereq" #define XML_RSC_ATTR_PRIORITY "priority" #define XML_CIB_TAG_LRM "lrm" #define XML_LRM_TAG_RESOURCES "lrm_resources" #define XML_LRM_TAG_RESOURCE "lrm_resource" #define XML_LRM_TAG_AGENTS "lrm_agents" #define XML_LRM_TAG_AGENT "lrm_agent" #define XML_LRM_TAG_RSC_OP "lrm_rsc_op" #define XML_AGENT_ATTR_CLASS "class" #define XML_AGENT_ATTR_PROVIDER "provider" #define XML_LRM_TAG_ATTRIBUTES "attributes" #define XML_CIB_ATTR_REPLACE "replace" #define XML_CIB_ATTR_SOURCE "source" #define XML_CIB_ATTR_HEALTH "health" #define XML_CIB_ATTR_WEIGHT "weight" #define XML_CIB_ATTR_PRIORITY "priority" #define XML_CIB_ATTR_CLEAR "clear_on" #define XML_CIB_ATTR_SOURCE "source" #define XML_CIB_ATTR_JOINSTATE "join" #define XML_CIB_ATTR_EXPSTATE "expected" #define XML_CIB_ATTR_INCCM "in_ccm" #define XML_CIB_ATTR_CRMDSTATE "crmd" #define XML_CIB_ATTR_HASTATE "ha" #define XML_CIB_ATTR_SHUTDOWN "shutdown" #define XML_CIB_ATTR_STONITH "stonith" #define XML_LRM_ATTR_INTERVAL "interval" #define XML_LRM_ATTR_TASK "operation" #define XML_LRM_ATTR_TASK_KEY "operation_key" #define XML_LRM_ATTR_TARGET "on_node" #define XML_LRM_ATTR_TARGET_UUID "on_node_uuid" #define XML_LRM_ATTR_RSCID "rsc_id" #define XML_LRM_ATTR_OPSTATUS "op_status" #define XML_LRM_ATTR_RC "rc_code" #define XML_LRM_ATTR_CALLID "call_id" #define XML_LRM_ATTR_OP_DIGEST "op_digest" #define XML_LRM_ATTR_OP_RESTART "op_force_restart" #define XML_LRM_ATTR_RESTART_DIGEST "op_restart_digest" #define XML_TAG_GRAPH "transition_graph" #define XML_GRAPH_TAG_RSC_OP "rsc_op" #define XML_GRAPH_TAG_PSEUDO_EVENT "pseudo_event" #define XML_GRAPH_TAG_CRM_EVENT "crm_event" #define XML_TAG_RULE "rule" #define XML_RULE_ATTR_SCORE "score" #define XML_RULE_ATTR_SCORE_ATTRIBUTE "score_attribute" #define XML_RULE_ATTR_SCORE_MANGLED "score_attribute_mangled" #define XML_RULE_ATTR_ROLE "role" #define XML_RULE_ATTR_RESULT "result" #define XML_RULE_ATTR_BOOLEAN_OP "boolean_op" #define XML_RULE_ATTR_FROMSTATE "from_role" #define XML_RULE_ATTR_TOSTATE "to_role" #define XML_TAG_EXPRESSION "expression" #define XML_EXPR_ATTR_ATTRIBUTE "attribute" #define XML_EXPR_ATTR_OPERATION "operation" #define XML_EXPR_ATTR_VALUE "value" #define XML_EXPR_ATTR_TYPE "type" #define XML_CONS_TAG_RSC_DEPEND "rsc_colocation" #define XML_CONS_TAG_RSC_ORDER "rsc_order" #define XML_CONS_TAG_RSC_LOCATION "rsc_location" #define XML_CONS_ATTR_FROM "from" #define XML_CONS_ATTR_TO "to" #define XML_CONS_ATTR_ACTION "action" #define XML_CONS_ATTR_TOACTION "to_action" #define XML_CONS_ATTR_SYMMETRICAL "symmetrical" #define XML_NVPAIR_ATTR_NAME "name" #define XML_NVPAIR_ATTR_VALUE "value" #define XML_STRENGTH_VAL_MUST "must" #define XML_STRENGTH_VAL_SHOULD "should" #define XML_STRENGTH_VAL_SHOULDNOT "should_not" #define XML_STRENGTH_VAL_MUSTNOT "must_not" #define XML_NODE_ATTR_STATE "state" #define XML_CONFIG_ATTR_DC_DEADTIME "dc_deadtime" #define XML_CONFIG_ATTR_ELECTION_FAIL "election_timeout" #define XML_CONFIG_ATTR_FORCE_QUIT "shutdown_escalation" #define XML_CONFIG_ATTR_REANNOUNCE "join_reannouce" #define XML_CONFIG_ATTR_RECHECK "cluster_recheck_interval" #define XML_CIB_TAG_GENERATION_TUPPLE "generation_tuple" #define XML_ATTR_TRANSITION_MAGIC "transition_magic" #define XML_ATTR_TRANSITION_KEY "transition_key" #define XML_ATTR_TE_NOWAIT "op_no_wait" #define XML_ATTR_TE_TARGET_RC "op_target_rc" #define XML_ATTR_TE_ALLOWFAIL "op_allow_fail" #define XML_ATTR_LRM_PROBE "lrm-is-probe" #define XML_TAG_TRANSIENT_NODEATTRS "transient_attributes" #include #define ID(x) crm_element_value(x, XML_ATTR_ID) #define INSTANCE(x) crm_element_value(x, XML_CIB_ATTR_INSTANCE) #define TSTAMP(x) crm_element_value(x, XML_ATTR_TSTAMP) #define TYPE(x) crm_element_name(x) #endif diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h index decf241872..d0759d93b2 100644 --- a/include/crm/pengine/status.h +++ b/include/crm/pengine/status.h @@ -1,232 +1,234 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef PENGINE_STATUS__H #define PENGINE_STATUS__H #include #include #include typedef struct node_s node_t; typedef struct action_s action_t; typedef struct resource_s resource_t; typedef enum no_quorum_policy_e { no_quorum_freeze, no_quorum_stop, no_quorum_ignore } no_quorum_policy_t; enum node_type { node_ping, node_member }; enum pe_restart { pe_restart_restart, pe_restart_ignore }; typedef struct pe_working_set_s { xmlNode *input; ha_time_t *now; /* options extracted from the input */ char *transition_idle_timeout; char *dc_uuid; node_t *dc_node; gboolean have_quorum; gboolean stonith_enabled; const char *stonith_action; gboolean symmetric_cluster; gboolean is_managed_default; gboolean start_failure_fatal; gboolean remove_after_stop; gboolean stop_rsc_orphans; gboolean stop_action_orphans; gboolean stop_everything; + int default_failure_timeout; int default_migration_threshold; int default_resource_stickiness; no_quorum_policy_t no_quorum_policy; GHashTable *config_hash; GListPtr nodes; GListPtr resources; GListPtr placement_constraints; GListPtr ordering_constraints; GListPtr colocation_constraints; GListPtr actions; xmlNode *failed; /* stats */ int num_synapse; int max_valid_nodes; int order_id; int action_id; /* final output */ xmlNode *graph; } pe_working_set_t; struct node_shared_s { const char *id; const char *uname; gboolean online; gboolean standby; gboolean unclean; gboolean shutdown; gboolean expected_up; gboolean is_dc; int num_resources; GListPtr running_rsc; /* resource_t* */ GListPtr allocated_rsc; /* resource_t* */ GHashTable *attrs; /* char* => char* */ enum node_type type; }; struct node_s { int weight; gboolean fixed; int count; struct node_shared_s *details; }; #include #define pe_rsc_orphan 0x00000001ULL #define pe_rsc_managed 0x00000002ULL #define pe_rsc_notify 0x00000010ULL #define pe_rsc_unique 0x00000020ULL #define pe_rsc_can_migrate 0x00000040ULL #define pe_rsc_provisional 0x00000100ULL #define pe_rsc_allocating 0x00000200ULL #define pe_rsc_merging 0x00000400ULL #define pe_rsc_failed 0x00010000ULL #define pe_rsc_shutdown 0x00020000ULL #define pe_rsc_runnable 0x00040000ULL #define pe_rsc_start_pending 0x00080000ULL #define pe_rsc_starting 0x00100000ULL #define pe_rsc_stopping 0x00200000ULL struct resource_s { char *id; char *clone_name; char *long_name; xmlNode *xml; xmlNode *ops_xml; resource_t *parent; void *variant_opaque; enum pe_obj_types variant; resource_object_functions_t *fns; resource_alloc_functions_t *cmds; enum rsc_recovery_type recovery_type; enum pe_restart restart_type; int priority; int stickiness; int sort_index; + int failure_timeout; int effective_priority; int migration_threshold; unsigned long long flags; GListPtr rsc_cons_lhs; /* rsc_colocation_t* */ GListPtr rsc_cons; /* rsc_colocation_t* */ GListPtr rsc_location; /* rsc_to_node_t* */ GListPtr actions; /* action_t* */ node_t *allocated_to; GListPtr running_on; /* node_t* */ GListPtr known_on; /* node_t* */ GListPtr allowed_nodes; /* node_t* */ enum rsc_role_e role; enum rsc_role_e next_role; GHashTable *meta; GHashTable *parameters; GListPtr children; /* resource_t* */ }; struct action_s { int id; int priority; resource_t *rsc; void *rsc_opaque; node_t *node; char *task; char *uuid; xmlNode *op_entry; gboolean pseudo; gboolean runnable; gboolean optional; gboolean print_always; gboolean failure_is_fatal; gboolean implied_by_stonith; gboolean allow_reload_conversion; enum rsc_start_requirement needs; enum action_fail_response on_fail; enum rsc_role_e fail_role; gboolean dumped; gboolean processed; action_t *pre_notify; action_t *pre_notified; action_t *post_notify; action_t *post_notified; int seen_count; GHashTable *meta; GHashTable *extra; GHashTable *notify_keys; /* do NOT free */ GListPtr actions_before; /* action_warpper_t* */ GListPtr actions_after; /* action_warpper_t* */ }; gboolean cluster_status(pe_working_set_t *data_set); extern void set_working_set_defaults(pe_working_set_t *data_set); extern void cleanup_calculations(pe_working_set_t *data_set); extern resource_t *pe_find_resource(GListPtr rsc_list, const char *id_rh); extern node_t *pe_find_node(GListPtr node_list, const char *uname); extern node_t *pe_find_node_id(GListPtr node_list, const char *id); extern GListPtr find_operations( const char *rsc, const char *node, gboolean active_filter, pe_working_set_t *data_set); #endif diff --git a/lib/crm/pengine/complex.c b/lib/crm/pengine/complex.c index 89203bb0cd..0017280dea 100644 --- a/lib/crm/pengine/complex.c +++ b/lib/crm/pengine/complex.c @@ -1,439 +1,446 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include void populate_hash(xmlNode *nvpair_list, GHashTable *hash, const char **attrs, int attrs_length); resource_object_functions_t resource_class_functions[] = { { native_unpack, native_find_child, native_children, native_parameter, native_print, native_active, native_resource_state, native_location, native_free }, { group_unpack, native_find_child, native_children, native_parameter, group_print, group_active, group_resource_state, native_location, group_free }, { clone_unpack, native_find_child, native_children, native_parameter, clone_print, clone_active, clone_resource_state, native_location, clone_free }, { master_unpack, native_find_child, native_children, native_parameter, clone_print, clone_active, clone_resource_state, native_location, clone_free } }; int get_resource_type(const char *name) { if(safe_str_eq(name, XML_CIB_TAG_RESOURCE)) { return pe_native; } else if(safe_str_eq(name, XML_CIB_TAG_GROUP)) { return pe_group; } else if(safe_str_eq(name, XML_CIB_TAG_INCARNATION)) { return pe_clone; } else if(safe_str_eq(name, XML_CIB_TAG_MASTER)) { return pe_master; } return pe_unknown; } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { add_hash_param(user_data, key, value); } void get_meta_attributes(GHashTable *meta_hash, resource_t *rsc, node_t *node, pe_working_set_t *data_set) { GHashTable *node_hash = NULL; if(node) { node_hash = node->details->attrs; } xml_prop_iter(rsc->xml, prop_name, prop_value, add_hash_param(meta_hash, prop_name, prop_value); ); unpack_instance_attributes(rsc->xml, XML_TAG_META_SETS, node_hash, meta_hash, NULL, data_set->now); /* populate from the regular attributes until the GUI can create * meta attributes */ unpack_instance_attributes(rsc->xml, XML_TAG_ATTR_SETS, node_hash, meta_hash, NULL, data_set->now); /* set anything else based on the parent */ if(rsc->parent != NULL) { g_hash_table_foreach(rsc->parent->meta, dup_attr, meta_hash); } } gboolean common_unpack(xmlNode * xml_obj, resource_t **rsc, resource_t *parent, pe_working_set_t *data_set) { const char *value = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); crm_log_xml_debug_3(xml_obj, "Processing resource input..."); if(id == NULL) { pe_err("Must specify id tag in "); return FALSE; } else if(rsc == NULL) { pe_err("Nowhere to unpack resource into"); return FALSE; } crm_malloc0(*rsc, sizeof(resource_t)); if(*rsc == NULL) { return FALSE; } (*rsc)->xml = xml_obj; (*rsc)->parent = parent; (*rsc)->ops_xml = find_xml_node(xml_obj, "operations", FALSE); (*rsc)->variant = get_resource_type(crm_element_name(xml_obj)); if((*rsc)->variant == pe_unknown) { pe_err("Unknown resource type: %s", crm_element_name(xml_obj)); crm_free(*rsc); return FALSE; } (*rsc)->parameters = g_hash_table_new_full( g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str); (*rsc)->meta = g_hash_table_new_full( g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str); value = crm_element_value(xml_obj, XML_RSC_ATTR_INCARNATION); if(value) { (*rsc)->id = crm_concat(id, value, ':'); add_hash_param((*rsc)->meta, XML_RSC_ATTR_INCARNATION, value); } else { (*rsc)->id = crm_strdup(id); } if(parent) { (*rsc)->long_name = crm_concat(parent->long_name, (*rsc)->id, ':'); } else { (*rsc)->long_name = crm_strdup((*rsc)->id); } (*rsc)->fns = &resource_class_functions[(*rsc)->variant]; crm_debug_3("Unpacking resource..."); get_meta_attributes((*rsc)->meta, *rsc, NULL, data_set); if(parent != NULL) { g_hash_table_foreach( parent->parameters, dup_attr, (*rsc)->parameters); } (*rsc)->flags = 0; set_bit((*rsc)->flags, pe_rsc_runnable); set_bit((*rsc)->flags, pe_rsc_provisional); if(data_set->is_managed_default) { set_bit((*rsc)->flags, pe_rsc_managed); } (*rsc)->rsc_cons = NULL; (*rsc)->actions = NULL; (*rsc)->role = RSC_ROLE_STOPPED; (*rsc)->next_role = RSC_ROLE_UNKNOWN; (*rsc)->recovery_type = recovery_stop_start; (*rsc)->stickiness = data_set->default_resource_stickiness; (*rsc)->migration_threshold = data_set->default_migration_threshold; + (*rsc)->failure_timeout = data_set->default_failure_timeout; value = g_hash_table_lookup((*rsc)->meta, XML_CIB_ATTR_PRIORITY); (*rsc)->priority = crm_parse_int(value, "0"); (*rsc)->effective_priority = (*rsc)->priority; value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_NOTIFY); if(crm_is_true(value)) { set_bit((*rsc)->flags, pe_rsc_notify); } value = g_hash_table_lookup((*rsc)->meta, "is_managed"); if(value != NULL && safe_str_neq("default", value)) { gboolean bool_value = TRUE; cl_str_to_boolean(value, &bool_value); if(bool_value == FALSE) { clear_bit((*rsc)->flags, pe_rsc_managed); } else { set_bit((*rsc)->flags, pe_rsc_managed); } } crm_debug_2("Options for %s", (*rsc)->id); value = g_hash_table_lookup((*rsc)->meta, "globally_unique"); if(value == NULL || crm_is_true(value)) { set_bit((*rsc)->flags, pe_rsc_unique); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_RESTART); if(safe_str_eq(value, "restart")) { (*rsc)->restart_type = pe_restart_restart; crm_debug_2("\tDependancy restart handling: restart"); } else { (*rsc)->restart_type = pe_restart_ignore; crm_debug_2("\tDependancy restart handling: ignore"); } value = g_hash_table_lookup((*rsc)->meta, "multiple_active"); if(safe_str_eq(value, "stop_only")) { (*rsc)->recovery_type = recovery_stop_only; crm_debug_2("\tMultiple running resource recovery: stop only"); } else if(safe_str_eq(value, "block")) { (*rsc)->recovery_type = recovery_block; crm_debug_2("\tMultiple running resource recovery: block"); } else { (*rsc)->recovery_type = recovery_stop_start; crm_debug_2("\tMultiple running resource recovery: stop/start"); } value = g_hash_table_lookup((*rsc)->meta, "resource_stickiness"); if(value != NULL && safe_str_neq("default", value)) { (*rsc)->stickiness = char2score(value); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_FAIL_STICKINESS); if(value != NULL) { (*rsc)->migration_threshold = char2score(value); } + value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_FAIL_TIMEOUT); + if(value != NULL) { + /* call crm_get_msec() and convert back to seconds */ + (*rsc)->failure_timeout = (crm_get_msec(value) / 1000); + } + value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_TARGET_ROLE); if(data_set->stop_everything) { (*rsc)->next_role = RSC_ROLE_STOPPED; } else if(value != NULL && safe_str_neq("default", value)) { (*rsc)->next_role = text2role(value); if((*rsc)->next_role == RSC_ROLE_UNKNOWN) { crm_config_err("%s: Unknown value for " XML_RSC_ATTR_TARGET_ROLE": %s", (*rsc)->id, value); } } crm_debug_2("\tDesired next state: %s", (*rsc)->next_role!=RSC_ROLE_UNKNOWN?role2text((*rsc)->next_role):"default"); if((*rsc)->fns->unpack(*rsc, data_set) == FALSE) { return FALSE; } if(is_not_set((*rsc)->flags, pe_rsc_managed)) { crm_warn("Resource %s is currently not managed", (*rsc)->id); } else if(data_set->symmetric_cluster) { resource_location(*rsc, NULL, 0, "symmetric_default", data_set); } crm_debug_2("\tAction notification: %s", is_set((*rsc)->flags, pe_rsc_notify)?"required":"not required"); /* data_set->resources = g_list_append(data_set->resources, (*rsc)); */ return TRUE; } void common_update_score(resource_t *rsc, const char *id, int score) { node_t *node = NULL; node = pe_find_node_id(rsc->allowed_nodes, id); if(node != NULL) { crm_debug_2("Updating score for %s on %s: %d + %d", rsc->id, id, node->weight, score); node->weight = merge_weights(node->weight, score); } if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, common_update_score(child_rsc, id, score); ); } } resource_t *uber_parent(resource_t *rsc) { resource_t *parent = rsc; while(parent != NULL && parent->parent != NULL) { parent = parent->parent; } return parent; } void common_apply_stickiness(resource_t *rsc, node_t *node, pe_working_set_t *data_set) { int fail_count = 0; char *fail_attr = NULL; const char *value = NULL; GHashTable *meta_hash = NULL; if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, common_apply_stickiness(child_rsc, node, data_set); ); return; } meta_hash = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); get_meta_attributes(meta_hash, rsc, node, data_set); /* update resource preferences that relate to the current node */ value = g_hash_table_lookup(meta_hash, "resource_stickiness"); if(value != NULL && safe_str_neq("default", value)) { rsc->stickiness = char2score(value); } else { rsc->stickiness = data_set->default_resource_stickiness; } value = g_hash_table_lookup(meta_hash, XML_RSC_ATTR_FAIL_STICKINESS); if(value != NULL && safe_str_neq("default", value)) { rsc->migration_threshold = char2score(value); } else { rsc->migration_threshold = data_set->default_migration_threshold; } /* process failure stickiness */ fail_attr = crm_concat("fail-count", rsc->id, '-'); value = g_hash_table_lookup(node->details->attrs, fail_attr); if(value != NULL) { fail_count = char2score(value); crm_info("%s has failed %d on %s", rsc->id, fail_count, node->details->uname); } crm_free(fail_attr); if(fail_count > 0 && rsc->migration_threshold != 0) { resource_t *failed = rsc; if(is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(rsc); } if(rsc->migration_threshold <= fail_count) { resource_location(failed, node, -INFINITY, "__fail_limit__", data_set); crm_warn("Forcing %s away from %s after %d failures (max=%d)", failed->id, node->details->uname, fail_count, rsc->migration_threshold); } else { crm_notice("%s can fail %d more times on %s before being forced off", failed->id, rsc->migration_threshold - fail_count, node->details->uname); } } g_hash_table_destroy(meta_hash); } void common_free(resource_t *rsc) { if(rsc == NULL) { return; } crm_debug_5("Freeing %s %d", rsc->id, rsc->variant); g_list_free(rsc->rsc_cons); g_list_free(rsc->rsc_cons_lhs); if(rsc->parameters != NULL) { g_hash_table_destroy(rsc->parameters); } if(rsc->meta != NULL) { g_hash_table_destroy(rsc->meta); } if(is_set(rsc->flags, pe_rsc_orphan)) { free_xml(rsc->xml); } if(rsc->running_on) { g_list_free(rsc->running_on); rsc->running_on = NULL; } if(rsc->known_on) { g_list_free(rsc->known_on); rsc->known_on = NULL; } if(rsc->actions) { g_list_free(rsc->actions); rsc->actions = NULL; } pe_free_shallow_adv(rsc->rsc_location, FALSE); pe_free_shallow_adv(rsc->allowed_nodes, TRUE); crm_free(rsc->id); crm_free(rsc->long_name); crm_free(rsc->clone_name); crm_free(rsc->allocated_to); crm_free(rsc->variant_opaque); crm_free(rsc); crm_debug_5("Resource freed"); } diff --git a/lib/crm/pengine/status.c b/lib/crm/pengine/status.c index e50ad60a26..e3e587e324 100644 --- a/lib/crm/pengine/status.c +++ b/lib/crm/pengine/status.c @@ -1,308 +1,309 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include xmlNode * do_calculations( pe_working_set_t *data_set, xmlNode *xml_input, ha_time_t *now); extern xmlNode*get_object_root( const char *object_type, xmlNode *the_root); #define PE_WORKING_DIR HA_VARLIBDIR"/heartbeat/pengine" #define MEMCHECK_STAGE_0 0 #define check_and_exit(stage) cleanup_calculations(data_set); \ crm_mem_stats(NULL); \ crm_err("Exiting: stage %d", stage); \ exit(1); /* * Unpack everything * At the end you'll have: * - A list of nodes * - A list of resources (each with any dependencies on other resources) * - A list of constraints between resources and nodes * - A list of constraints between start/stop actions * - A list of nodes that need to be stonith'd * - A list of nodes that need to be shutdown * - A list of the possible stop/start actions (without dependencies) */ gboolean cluster_status(pe_working_set_t *data_set) { xmlNode * config = get_object_root( XML_CIB_TAG_CRMCONFIG, data_set->input); xmlNode * cib_nodes = get_object_root( XML_CIB_TAG_NODES, data_set->input); xmlNode * cib_resources = get_object_root( XML_CIB_TAG_RESOURCES, data_set->input); xmlNode * cib_status = get_object_root( XML_CIB_TAG_STATUS, data_set->input); const char *value = crm_element_value( data_set->input, XML_ATTR_HAVE_QUORUM); crm_debug_3("Beginning unpack"); /* reset remaining global variables */ if(data_set->input == NULL) { return FALSE; } if(data_set->now == NULL) { data_set->now = new_ha_date(TRUE); } if(data_set->input != NULL && crm_element_value(data_set->input, XML_ATTR_DC_UUID) != NULL) { /* this should always be present */ data_set->dc_uuid = crm_element_value_copy( data_set->input, XML_ATTR_DC_UUID); } unpack_config(config, data_set); if(value != NULL) { cl_str_to_boolean(value, &data_set->have_quorum); } if(data_set->have_quorum == FALSE && data_set->no_quorum_policy != no_quorum_ignore) { crm_warn("We do not have quorum" " - fencing and resource management disabled"); } unpack_nodes(cib_nodes, data_set); unpack_resources(cib_resources, data_set); unpack_status(cib_status, data_set); return TRUE; } static void pe_free_resources(GListPtr resources) { resource_t *rsc = NULL; GListPtr iterator = resources; while(iterator != NULL) { iterator = iterator; rsc = (resource_t *)iterator->data; iterator = iterator->next; rsc->fns->free(rsc); } if(resources != NULL) { g_list_free(resources); } } static void pe_free_actions(GListPtr actions) { GListPtr iterator = actions; while(iterator != NULL) { pe_free_action(iterator->data); iterator = iterator->next; } if(actions != NULL) { g_list_free(actions); } } static void pe_free_nodes(GListPtr nodes) { GListPtr iterator = nodes; while(iterator != NULL) { node_t *node = (node_t*)iterator->data; struct node_shared_s *details = node->details; iterator = iterator->next; crm_debug_5("deleting node"); crm_debug_5("%s is being deleted", details->uname); print_node("delete", node, FALSE); if(details != NULL) { if(details->attrs != NULL) { g_hash_table_destroy(details->attrs); } pe_free_shallow_adv(details->running_rsc, FALSE); pe_free_shallow_adv(details->allocated_rsc, FALSE); crm_free(details); } crm_free(node); } if(nodes != NULL) { g_list_free(nodes); } } void cleanup_calculations(pe_working_set_t *data_set) { if(data_set == NULL) { return; } if(data_set->config_hash != NULL) { g_hash_table_destroy(data_set->config_hash); } crm_free(data_set->dc_uuid); crm_free(data_set->transition_idle_timeout); crm_debug_3("deleting resources"); pe_free_resources(data_set->resources); crm_debug_3("deleting actions"); pe_free_actions(data_set->actions); crm_debug_3("deleting nodes"); pe_free_nodes(data_set->nodes); free_xml(data_set->graph); free_ha_date(data_set->now); free_xml(data_set->input); free_xml(data_set->failed); data_set->stonith_action = NULL; CRM_CHECK(data_set->ordering_constraints == NULL, ;); CRM_CHECK(data_set->placement_constraints == NULL, ;); } void set_working_set_defaults(pe_working_set_t *data_set) { data_set->input = NULL; data_set->now = NULL; data_set->graph = NULL; data_set->failed = create_xml_node(NULL, "failed-ops"); data_set->transition_idle_timeout = NULL; data_set->dc_uuid = NULL; data_set->dc_node = NULL; data_set->have_quorum = FALSE; data_set->stonith_enabled = FALSE; data_set->stonith_action = NULL; data_set->symmetric_cluster = TRUE; data_set->is_managed_default = TRUE; data_set->no_quorum_policy = no_quorum_freeze; data_set->remove_after_stop = FALSE; data_set->stop_action_orphans = TRUE; data_set->stop_rsc_orphans = TRUE; data_set->config_hash = NULL; data_set->nodes = NULL; data_set->resources = NULL; data_set->ordering_constraints = NULL; data_set->placement_constraints = NULL; data_set->colocation_constraints = NULL; data_set->actions = NULL; data_set->num_synapse = 0; data_set->max_valid_nodes = 0; data_set->order_id = 1; data_set->action_id = 1; - data_set->default_migration_threshold = 0; + data_set->default_failure_timeout = 0; + data_set->default_migration_threshold = 0; data_set->default_resource_stickiness = 0; } resource_t * pe_find_resource(GListPtr rsc_list, const char *id) { unsigned lpc = 0; resource_t *rsc = NULL; resource_t *child_rsc = NULL; if(id == NULL) { return NULL; } crm_debug_4("Looking for %s in %d objects", id, g_list_length(rsc_list)); for(lpc = 0; lpc < g_list_length(rsc_list); lpc++) { rsc = g_list_nth_data(rsc_list, lpc); if(rsc == NULL) { } else if(rsc->id && strcmp(rsc->id, id) == 0){ crm_debug_4("Found a match for %s", id); return rsc; } else if(rsc->long_name && strcmp(rsc->long_name, id) == 0) { crm_debug_4("Found a match for %s", id); return rsc; } else if(rsc->clone_name && strcmp(rsc->clone_name, id) == 0) { crm_debug_4("Found a match for %s", id); return rsc; } } for(lpc = 0; lpc < g_list_length(rsc_list); lpc++) { rsc = g_list_nth_data(rsc_list, lpc); child_rsc = rsc->fns->find_child(rsc, id); if(child_rsc != NULL) { return child_rsc; } } crm_debug_2("No match for %s", id); return NULL; } node_t * pe_find_node_id(GListPtr nodes, const char *id) { slist_iter(node, node_t, nodes, lpc, if(node && safe_str_eq(node->details->id, id)) { return node; } ); /* error */ return NULL; } node_t * pe_find_node(GListPtr nodes, const char *uname) { slist_iter(node, node_t, nodes, lpc, if(node && safe_str_eq(node->details->uname, uname)) { return node; } ); /* error */ return NULL; } diff --git a/lib/crm/pengine/unpack.c b/lib/crm/pengine/unpack.c index ff1c94e093..22f158edfc 100644 --- a/lib/crm/pengine/unpack.c +++ b/lib/crm/pengine/unpack.c @@ -1,1438 +1,1465 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include /* for ONLINESTATUS */ #include #include #include #include gboolean unpack_config(xmlNode *config, pe_working_set_t *data_set) { const char *name = NULL; const char *value = NULL; GHashTable *config_hash = g_hash_table_new_full( g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str); data_set->config_hash = config_hash; unpack_instance_attributes( config, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, data_set->now); #if CRM_DEPRECATED_SINCE_2_0_1 xml_child_iter_filter( config, a_child, XML_CIB_TAG_NVPAIR, name = crm_element_value(a_child, XML_NVPAIR_ATTR_NAME); value = crm_element_value(a_child, XML_NVPAIR_ATTR_VALUE); if(g_hash_table_lookup(config_hash, name) == NULL) { g_hash_table_insert( config_hash,crm_strdup(name),crm_strdup(value)); } crm_config_err("Creating directly" "beneath has been depreciated since" " 2.0.1", ID(a_child), name); ); #else xml_child_iter_filter( config, a_child, XML_CIB_TAG_NVPAIR, name = crm_element_value(a_child, XML_NVPAIR_ATTR_NAME); crm_config_err("Creating directly" "beneath has been depreciated since" " 2.0.1 and is now disabled", ID(a_child), name); ); #endif verify_pe_options(data_set->config_hash); value = pe_pref(data_set->config_hash, "default-action-timeout"); data_set->transition_idle_timeout = crm_strdup(value); crm_debug("Default action timeout: %s", data_set->transition_idle_timeout); value = pe_pref(data_set->config_hash, "default-resource-stickiness"); data_set->default_resource_stickiness = char2score(value); crm_debug("Default stickiness: %d", data_set->default_resource_stickiness); value = pe_pref(data_set->config_hash, "stop-all-resources"); data_set->stop_everything = crm_is_true(value); crm_debug("Stop all active resources: %s", data_set->stop_everything?"true":"false"); value = pe_pref(data_set->config_hash, "default-migration-threshold"); data_set->default_migration_threshold = char2score(value); crm_debug("Default migration threshold: %d", data_set->default_migration_threshold); value = pe_pref(data_set->config_hash, "stonith-enabled"); cl_str_to_boolean(value, &data_set->stonith_enabled); crm_debug("STONITH of failed nodes is %s", data_set->stonith_enabled?"enabled":"disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); crm_debug_2("STONITH will %s nodes", data_set->stonith_action); value = pe_pref(data_set->config_hash, "symmetric-cluster"); cl_str_to_boolean(value, &data_set->symmetric_cluster); if(data_set->symmetric_cluster) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "no-quorum-policy"); if(safe_str_eq(value, "ignore")) { data_set->no_quorum_policy = no_quorum_ignore; } else if(safe_str_eq(value, "freeze")) { data_set->no_quorum_policy = no_quorum_freeze; } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of CCM Quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of CCM Quorum: Stop ALL resources"); break; case no_quorum_ignore: crm_notice("On loss of CCM Quorum: Ignore"); break; } value = pe_pref(data_set->config_hash, "stop-orphan-resources"); cl_str_to_boolean(value, &data_set->stop_rsc_orphans); crm_debug_2("Orphan resources are %s", data_set->stop_rsc_orphans?"stopped":"ignored"); value = pe_pref(data_set->config_hash, "stop-orphan-actions"); cl_str_to_boolean(value, &data_set->stop_action_orphans); crm_debug_2("Orphan resource actions are %s", data_set->stop_action_orphans?"stopped":"ignored"); value = pe_pref(data_set->config_hash, "remove-after-stop"); cl_str_to_boolean(value, &data_set->remove_after_stop); crm_debug_2("Stopped resources are removed from the status section: %s", data_set->remove_after_stop?"true":"false"); value = pe_pref(data_set->config_hash, "is-managed-default"); cl_str_to_boolean(value, &data_set->is_managed_default); crm_debug_2("By default resources are %smanaged", data_set->is_managed_default?"":"not "); value = pe_pref(data_set->config_hash, "start-failure-is-fatal"); cl_str_to_boolean(value, &data_set->start_failure_fatal); crm_debug_2("Start failures are %s", data_set->start_failure_fatal?"always fatal":"handled by failcount"); return TRUE; } gboolean unpack_nodes(xmlNode * xml_nodes, pe_working_set_t *data_set) { node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; gboolean unseen_are_unclean = TRUE; const char *blind_faith = pe_pref( data_set->config_hash, "startup-fencing"); if(crm_is_true(blind_faith) == FALSE) { unseen_are_unclean = FALSE; crm_warn("Blind faith: not fencing unseen nodes"); } xml_child_iter_filter( xml_nodes, xml_obj, XML_CIB_TAG_NODE, new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); crm_debug_3("Processing node %s/%s", uname, id); if(id == NULL) { crm_config_err("Must specify id tag in "); continue; } if(type == NULL) { crm_config_err("Must specify type tag in "); continue; } if(pe_find_node(data_set->nodes, uname) != NULL) { crm_config_warn("Detected multiple node entries with uname=%s" " - this is rarely intended", uname); } crm_malloc0(new_node, sizeof(node_t)); if(new_node == NULL) { return FALSE; } new_node->weight = 0; new_node->fixed = FALSE; crm_malloc0(new_node->details, sizeof(struct node_shared_s)); if(new_node->details == NULL) { crm_free(new_node); return FALSE; } crm_debug_3("Creaing node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->type = node_ping; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->running_rsc = NULL; new_node->details->attrs = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ if(data_set->stonith_enabled == FALSE || unseen_are_unclean == FALSE) { /* blind faith... */ new_node->details->unclean = FALSE; } else { /* all nodes are unclean until we've seen their * status entry */ new_node->details->unclean = TRUE; } if(type == NULL || safe_str_eq(type, "member") || safe_str_eq(type, NORMALNODE)) { new_node->details->type = node_member; } add_node_attrs(xml_obj, new_node, data_set); if(crm_is_true(g_hash_table_lookup( new_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", new_node->details->uname); new_node->weight = -INFINITY; new_node->details->standby = TRUE; } data_set->nodes = g_list_append(data_set->nodes, new_node); crm_debug_3("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); ); return TRUE; } gboolean unpack_resources(xmlNode * xml_resources, pe_working_set_t *data_set) { xml_child_iter( xml_resources, xml_obj, resource_t *new_rsc = NULL; crm_debug_3("Begining unpack... %s", xml_obj?crm_element_name(xml_obj):""); if(common_unpack(xml_obj, &new_rsc, NULL, data_set)) { data_set->resources = g_list_append( data_set->resources, new_rsc); print_resource(LOG_DEBUG_3, "Added", new_rsc, FALSE); } else { crm_config_err("Failed unpacking %s %s", crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID)); if(new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } ); data_set->resources = g_list_sort( data_set->resources, sort_rsc_priority); return TRUE; } /* remove nodes that are down, stopping */ /* create +ve rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode * status, pe_working_set_t *data_set) { const char *id = NULL; const char *uname = NULL; xmlNode * lrm_rsc = NULL; xmlNode * attrs = NULL; node_t *this_node = NULL; crm_debug_3("Begining unpack"); xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, id = crm_element_value(node_state, XML_ATTR_ID); uname = crm_element_value(node_state, XML_ATTR_UNAME); attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); crm_debug_3("Processing node %s", uname); this_node = pe_find_node_id(data_set->nodes, id); if(uname == NULL) { /* error */ continue; } else if(this_node == NULL) { crm_config_warn("Node %s in status section no longer exists", uname); continue; } /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; crm_debug_3("Adding runtime node attrs"); add_node_attrs(attrs, this_node, data_set); crm_debug_3("determining node state"); determine_online_status(node_state, this_node, data_set); if(this_node->details->online || data_set->stonith_enabled) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ crm_debug_3("Processing lrm resource entries"); unpack_lrm_resources(this_node, lrm_rsc, data_set); } ); return TRUE; } static gboolean determine_online_status_no_fencing(xmlNode * node_state, node_t *this_node) { gboolean online = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(ha_state == NULL) { ha_state = DEADSTATUS; } if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)){ crm_debug_2("Node is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); } else if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)) { } else if(safe_str_eq(crm_state, ONLINESTATUS)) { if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join_state); } } else if(this_node->details->expected_up == FALSE) { crm_debug_2("CRMd is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); crm_debug_2("\tcrm_state=%s, join_state=%s, expected=%s", crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { /* mark it unclean */ this_node->details->unclean = TRUE; crm_warn("Node %s is partially & un-expectedly down", this_node->details->uname); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(xmlNode * node_state, node_t *this_node) { gboolean online = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(ha_state == NULL) { ha_state = DEADSTATUS; } if(crm_is_true(ccm_state) && safe_str_eq(ha_state, ACTIVESTATUS) && safe_str_eq(crm_state, ONLINESTATUS)) { online = TRUE; if(safe_str_neq(join_state, CRMD_JOINSTATE_MEMBER)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; } } else if(crm_is_true(ccm_state) == FALSE && safe_str_eq(ha_state, DEADSTATUS) && safe_str_eq(crm_state, OFFLINESTATUS) && this_node->details->expected_up == FALSE) { crm_debug("Node %s is down: join_state=%s, expected=%s", this_node->details->uname, crm_str(join_state), crm_str(exp_state)); } else if(this_node->details->expected_up) { /* mark it unclean */ this_node->details->unclean = TRUE; crm_warn("Node %s (%s) is un-expectedly down", this_node->details->uname, this_node->details->id); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { crm_info("Node %s is comming up", this_node->details->uname); crm_debug("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } gboolean determine_online_status( xmlNode * node_state, node_t *this_node, pe_working_set_t *data_set) { gboolean online = FALSE; const char *shutdown = crm_element_value(node_state, XML_CIB_ATTR_SHUTDOWN); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(this_node == NULL) { crm_config_err("No node to check"); return online; } shutdown = crm_element_value(node_state, XML_CIB_ATTR_SHUTDOWN); this_node->details->expected_up = FALSE; if(safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) { this_node->details->expected_up = TRUE; } this_node->details->shutdown = FALSE; if(shutdown != NULL && safe_str_neq("0", shutdown)) { this_node->details->shutdown = TRUE; this_node->details->expected_up = FALSE; } if(data_set->stonith_enabled == FALSE) { online = determine_online_status_no_fencing( node_state, this_node); } else { online = determine_online_status_fencing( node_state, this_node); } if(online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if(online && this_node->details->shutdown) { /* dont run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if(this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if(this_node->details->online) { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown?"shutting down": this_node->details->standby?"standby":"online"); } else { crm_debug_2("Node %s is offline", this_node->details->uname); } return online; } #define set_char(x) last_rsc_id[lpc] = x; complete = TRUE; static char * increment_clone(char *last_rsc_id) { int lpc = 0; int len = 0; char *tmp = NULL; gboolean complete = FALSE; CRM_CHECK(last_rsc_id != NULL, return NULL); if(last_rsc_id != NULL) { len = strlen(last_rsc_id); } lpc = len-1; while(complete == FALSE && lpc > 0) { switch (last_rsc_id[lpc]) { case 0: lpc--; break; case '0': set_char('1'); break; case '1': set_char('2'); break; case '2': set_char('3'); break; case '3': set_char('4'); break; case '4': set_char('5'); break; case '5': set_char('6'); break; case '6': set_char('7'); break; case '7': set_char('8'); break; case '8': set_char('9'); break; case '9': last_rsc_id[lpc] = '0'; lpc--; break; case ':': tmp = last_rsc_id; crm_malloc0(last_rsc_id, len + 2); memcpy(last_rsc_id, tmp, len); last_rsc_id[++lpc] = '1'; last_rsc_id[len] = '0'; last_rsc_id[len+1] = 0; complete = TRUE; crm_free(tmp); break; default: crm_err("Unexpected char: %c (%d)", last_rsc_id[lpc], lpc); break; } } return last_rsc_id; } static resource_t * create_fake_resource(const char *rsc_id, xmlNode *rsc_entry, pe_working_set_t *data_set) { resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_info(xml_rsc, "Orphan resource"); common_unpack(xml_rsc, &rsc, NULL, data_set); set_bit(rsc->flags, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } static resource_t * unpack_find_resource( pe_working_set_t *data_set, node_t *node, const char *rsc_id, xmlNode *rsc_entry) { resource_t *rsc = NULL; gboolean is_duped_clone = FALSE; char *alt_rsc_id = crm_strdup(rsc_id); while(rsc == NULL) { crm_debug_3("looking for: %s", alt_rsc_id); rsc = pe_find_resource(data_set->resources, alt_rsc_id); /* no match */ if(rsc == NULL) { crm_debug_2("%s not found: %d", alt_rsc_id, is_duped_clone); if(is_duped_clone) { /* create one */ rsc = create_fake_resource(alt_rsc_id, rsc_entry, data_set); crm_info("Making sure orphan %s/%s is stopped on %s", rsc_id, rsc->id, node->details->uname); resource_location(rsc, NULL, -INFINITY, "__orphan_clone_dont_run__", data_set); } break; /* not running anywhere else */ } else if(rsc->running_on == NULL) { crm_debug_3("not active yet"); break; /* always unique */ } else if(is_set(rsc->flags, pe_rsc_unique)) { crm_debug_3("unique"); break; /* running somewhere already but we dont care * find another clone instead */ } else { crm_debug_3("find another one"); rsc = NULL; is_duped_clone = TRUE; alt_rsc_id = increment_clone(alt_rsc_id); } } crm_free(alt_rsc_id); if(rsc != NULL) { crm_free(rsc->clone_name); rsc->clone_name = NULL; if(is_duped_clone) { crm_info("Internally renamed %s on %s to %s", rsc_id, node->details->uname, rsc->id); rsc->clone_name = crm_strdup(rsc_id); } } return rsc; } static resource_t * process_orphan_resource(xmlNode *rsc_entry, node_t *node, pe_working_set_t *data_set) { resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_log_xml_info(rsc_entry, "Orphan resource"); crm_config_warn("Nothing known about resource %s running on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if(data_set->stop_rsc_orphans == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } else { crm_info("Making sure orphan %s is stopped", rsc_id); print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE); CRM_CHECK(rsc != NULL, return NULL); resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set); } return rsc; } static void process_rsc_state(resource_t *rsc, node_t *node, enum action_fail_response on_fail, xmlNode *migrate_op, pe_working_set_t *data_set) { if(on_fail == action_migrate_failure) { node_t *from = NULL; const char *uuid = NULL; uuid = crm_element_value(migrate_op, CRMD_ACTION_MIGRATED); from = pe_find_node_id(data_set->nodes, uuid); process_rsc_state(rsc, from, action_fail_recover,NULL,data_set); on_fail = action_fail_recover; } crm_debug_2("Resource %s is %s on %s", rsc->id, role2text(rsc->role), node->details->uname); /* process current state */ if(rsc->role != RSC_ROLE_UNKNOWN) { rsc->known_on = g_list_append(rsc->known_on, node); } if(rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { if(on_fail != action_fail_ignore) { set_bit(rsc->flags, pe_rsc_failed); crm_debug_2("Force stop"); } native_add_running(rsc, node, data_set); if(is_set(rsc->flags, pe_rsc_managed) && rsc->stickiness != 0) { resource_location(rsc, node, rsc->stickiness, "stickiness", data_set); crm_debug_2("Resource %s: preferring current location" " (node=%s, weight=%d)", rsc->id, node->details->uname, rsc->stickiness); } if(on_fail == action_fail_ignore) { /* nothing to do */ } else if(node->details->unclean) { stop_action(rsc, node, FALSE); } else if(on_fail == action_fail_fence) { /* treat it as if it is still running * but also mark the node as unclean */ node->details->unclean = TRUE; stop_action(rsc, node, FALSE); } else if(on_fail == action_fail_block) { /* is_managed == FALSE will prevent any * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); } else if(on_fail == action_fail_migrate) { stop_action(rsc, node, FALSE); /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__",data_set); } else { stop_action(rsc, node, FALSE); } } else if(rsc->clone_name) { crm_debug_2("Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); crm_free(rsc->clone_name); rsc->clone_name = NULL; } else { char *key = stop_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, node); slist_iter(stop, action_t, possible_matches, lpc, stop->optional = TRUE; ); crm_free(key); } } /* create active recurring operations as optional */ static void process_recurring(node_t *node, resource_t *rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t *data_set) { const char *task = NULL; const char *status = NULL; crm_debug_3("%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); slist_iter(rsc_op, xmlNode, sorted_op_list, lpc, int interval = 0; char *key = NULL; const char *id = ID(rsc_op); const char *interval_s = NULL; if(node->details->online == FALSE) { crm_debug_4("Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; } else if(start_index < stop_index) { crm_debug_4("Skipping %s/%s: not active", rsc->id, node->details->uname); break; } else if(lpc <= start_index) { crm_debug_4("Skipping %s/%s: old", id, node->details->uname); continue; } interval_s = crm_element_value(rsc_op,XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0) { crm_debug_4("Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if(safe_str_eq(status, "-1")) { crm_debug_4("Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = generate_op_key(rsc->id, task, interval); crm_debug_3("Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); ); } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { const char *task = NULL; const char *status = NULL; *stop_index = -1; *start_index = -1; slist_iter( rsc_op, xmlNode, sorted_op_list, lpc, task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if(safe_str_eq(task, CRMD_ACTION_STOP) && safe_str_eq(status, "0")) { *stop_index = lpc; } else if(safe_str_eq(task, CRMD_ACTION_START)) { *start_index = lpc; } else if(*start_index <= *stop_index && safe_str_eq(task, CRMD_ACTION_STATUS)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if(safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) { *start_index = lpc; } } ); } static void unpack_lrm_rsc_state( node_t *node, xmlNode * rsc_entry, pe_working_set_t *data_set) { int stop_index = -1; int start_index = -1; int max_call_id = -1; const char *task = NULL; const char *value = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; xmlNode *migrate_op = NULL; enum action_fail_response on_fail = FALSE; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_debug_3("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, op_list = g_list_append(op_list, rsc_op); ); if(op_list == NULL) { /* if there are no operations, there is nothing to do */ return; } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if(rsc == NULL) { rsc = process_orphan_resource(rsc_entry, node, data_set); } CRM_ASSERT(rsc != NULL); /* process operations */ max_call_id = -1; saved_role = rsc->role; on_fail = action_fail_ignore; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); slist_iter( rsc_op, xmlNode, sorted_op_list, lpc, task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if(safe_str_eq(task, CRMD_ACTION_MIGRATED)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, &max_call_id, &on_fail, data_set); ); /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if(value != NULL && safe_str_neq("default", value)) { enum rsc_role_e req_role = text2role(value); if(req_role != RSC_ROLE_UNKNOWN && req_role != rsc->next_role){ if(rsc->next_role != RSC_ROLE_UNKNOWN) { crm_debug("%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } rsc->next_role = req_role; } } if(saved_role > rsc->role) { rsc->role = saved_role; } } gboolean unpack_lrm_resources(node_t *node, xmlNode * lrm_rsc_list, pe_working_set_t *data_set) { CRM_CHECK(node != NULL, return FALSE); crm_debug_3("Unpacking resources on %s", node->details->uname); slist_iter( rsc, resource_t, data_set->resources, lpc, common_apply_stickiness(rsc, node, data_set); ); xml_child_iter_filter( lrm_rsc_list, rsc_entry, XML_LRM_TAG_RESOURCE, unpack_lrm_rsc_state(node, rsc_entry, data_set); ); return TRUE; } gboolean unpack_rsc_op(resource_t *rsc, node_t *node, xmlNode *xml_op, int *max_call_id, enum action_fail_response *on_fail, pe_working_set_t *data_set) { const char *id = NULL; const char *task = NULL; + const char *magic = NULL; const char *task_id = NULL; const char *actual_rc = NULL; /* const char *target_rc = NULL; */ const char *task_status = NULL; const char *interval_s = NULL; const char *op_digest = NULL; const char *op_version = NULL; int interval = 0; int task_id_i = -1; int task_status_i = -2; int actual_rc_i = 0; action_t *action = NULL; node_t *effective_node = NULL; + gboolean expired = FALSE; gboolean is_probe = FALSE; gboolean is_stop_action = FALSE; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(node != NULL, return FALSE); CRM_CHECK(xml_op != NULL, return FALSE); id = ID(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); task_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); task_status = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); op_digest = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); + magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); CRM_CHECK(id != NULL, return FALSE); CRM_CHECK(task != NULL, return FALSE); CRM_CHECK(task_status != NULL, return FALSE); task_status_i = crm_parse_int(task_status, NULL); CRM_CHECK(task_status_i <= LRM_OP_ERROR, return FALSE); CRM_CHECK(task_status_i >= LRM_OP_PENDING, return FALSE); if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) { /* safe to ignore these */ return TRUE; } + if(rsc->failure_timeout > 0) { + time_t now = time(NULL); + int last_run = now; + int rc = crm_element_value_int(xml_op, "last_run", &last_run); + +/* int last_change = crm_element_value_int(xml_op, "last_rc_change"); */ + if(rc == 0 && now > (last_run + rsc->failure_timeout)) { + expired = TRUE; + } + } + crm_debug_2("Unpacking task %s/%s (call_id=%s, status=%s) on %s (role=%s)", id, task, task_id, task_status, node->details->uname, role2text(rsc->role)); interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; } if(safe_str_eq(task, CRMD_ACTION_STOP)) { is_stop_action = TRUE; } if(task_status_i != LRM_OP_PENDING) { task_id_i = crm_parse_int(task_id, "-1"); CRM_CHECK(task_id != NULL, return FALSE); CRM_CHECK(task_id_i >= 0, return FALSE); CRM_CHECK(task_id_i > *max_call_id, return FALSE); } if(*max_call_id < task_id_i) { *max_call_id = task_id_i; } if(node->details->unclean) { crm_debug_2("Node %s (where %s is running) is unclean." " Further action depends on the value of %s", node->details->uname, rsc->id, XML_RSC_ATTR_STOPFAIL); } actual_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); CRM_CHECK(actual_rc != NULL, return FALSE); actual_rc_i = crm_parse_int(actual_rc, NULL); if(task_status_i == LRM_OP_NOTSUPPORTED) { actual_rc_i = EXECRA_UNIMPLEMENT_FEATURE; } + + if(expired + && actual_rc_i != EXECRA_NOT_RUNNING + && actual_rc_i != EXECRA_RUNNING_MASTER + && actual_rc_i != EXECRA_OK) { + crm_notice("Ignoring expired failure %s (rc=%d, magic=%s) on %s", + id, actual_rc_i, magic, node->details->uname); + goto done; + } switch(actual_rc_i) { case EXECRA_NOT_RUNNING: if(is_probe) { /* treat these like stops */ is_stop_action = TRUE; } if(is_stop_action) { task_status_i = LRM_OP_DONE; } else { task_status_i = LRM_OP_ERROR; } break; case EXECRA_RUNNING_MASTER: if(is_probe || (rsc->role == RSC_ROLE_MASTER && safe_str_eq(task, CRMD_ACTION_STATUS))) { task_status_i = LRM_OP_DONE; } else { task_status_i = LRM_OP_ERROR; if(rsc->role != RSC_ROLE_MASTER) { /* this wil happen normally if the PE is invoked after * a resource is demoted and re-promoted but before the * 'master' monitor has been re-initiated * * The monitor will occur before the promote and appear * to be an error (the error status is be cleared by a * successful demote) */ crm_warn("%s reported %s in master mode on %s", - id, rsc->id, - node->details->uname); + id, rsc->id, node->details->uname); } } rsc->role = RSC_ROLE_MASTER; break; case EXECRA_FAILED_MASTER: rsc->role = RSC_ROLE_MASTER; task_status_i = LRM_OP_ERROR; break; case EXECRA_UNIMPLEMENT_FEATURE: if(interval > 0) { task_status_i = LRM_OP_ERROR; break; } /* else: fall through */ case EXECRA_INSUFFICIENT_PRIV: case EXECRA_NOT_INSTALLED: effective_node = node; /* fall through */ case EXECRA_NOT_CONFIGURED: case EXECRA_INVALID_PARAM: crm_err("Hard error: %s failed with rc=%d.", id, actual_rc_i); if(effective_node) { crm_err(" Preventing %s from re-starting on %s", rsc->id, effective_node->details->uname); } else { crm_err(" Preventing %s from re-starting anywhere in the cluster", rsc->id); } resource_location(rsc, effective_node, -INFINITY, "hard-error", data_set); if(is_probe) { /* treat these like stops */ is_stop_action = TRUE; task_status_i = LRM_OP_DONE; actual_rc_i = EXECRA_NOT_RUNNING; } else { task_status_i = LRM_OP_ERROR; } break; case EXECRA_OK: if(interval > 0 && rsc->role == RSC_ROLE_MASTER) { /* catch status ops that return 0 instead of 8 while they * are supposed to be in master mode */ task_status_i = LRM_OP_ERROR; } break; default: if(task_status_i == LRM_OP_DONE) { crm_info("Remapping %s (rc=%d) on %s to an ERROR", id, actual_rc_i, node->details->uname); task_status_i = LRM_OP_ERROR; } } if(task_status_i == LRM_OP_ERROR || task_status_i == LRM_OP_TIMEOUT || task_status_i == LRM_OP_NOTSUPPORTED) { - action = custom_action(rsc, crm_strdup(id), task, NULL, - TRUE, FALSE, data_set); - if(action->on_fail == action_fail_ignore) { - crm_warn("Remapping %s (rc=%d) on %s to DONE", - id, actual_rc_i, node->details->uname); - task_status_i = LRM_OP_DONE; - } + action = custom_action(rsc, crm_strdup(id), task, NULL, TRUE, FALSE, data_set); + if(expired) { + crm_notice("Ignoring expired failure (calculated) %s (rc=%d, magic=%s) on %s", + id, actual_rc_i, magic, node->details->uname); + goto done; + + } else if(action->on_fail == action_fail_ignore) { + crm_warn("Remapping %s (rc=%d) on %s to DONE: ignore", + id, actual_rc_i, node->details->uname); + task_status_i = LRM_OP_DONE; + } } switch(task_status_i) { case LRM_OP_PENDING: if(safe_str_eq(task, CRMD_ACTION_START)) { set_bit(rsc->flags, pe_rsc_start_pending); rsc->role = RSC_ROLE_STARTED; } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } break; case LRM_OP_DONE: crm_debug_3("%s/%s completed on %s", rsc->id, task, node->details->uname); if(is_stop_action) { rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { rsc->role = RSC_ROLE_SLAVE; } else if(rsc->role < RSC_ROLE_STARTED) { crm_debug_3("%s active on %s", rsc->id, node->details->uname); rsc->role = RSC_ROLE_STARTED; } break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: crm_warn("Processing failed op %s on %s: %s", id, node->details->uname, op_status2text(task_status_i)); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); add_node_copy(data_set->failed, xml_op); if(*on_fail < action->on_fail) { *on_fail = action->on_fail; } if(is_stop_action) { resource_location( rsc, node, -INFINITY, "__stop_fail__", data_set); } else if((data_set->start_failure_fatal || compare_version("2.0", op_version) > 0) && safe_str_eq(task, CRMD_ACTION_START)) { crm_warn("Compatability handling for failed op %s on %s", id, node->details->uname); resource_location( rsc, node, -INFINITY, "__legacy_start__", data_set); } if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* * staying in role=master ends up putting the PE/TE into a loop * setting role=slave is not dangerous because no master will be * promoted until the failed resource has been fully stopped */ crm_warn("Forcing %s to stop after a failed demote action", rsc->id); rsc->next_role = RSC_ROLE_STOPPED; rsc->role = RSC_ROLE_SLAVE; } else if(rsc->role < RSC_ROLE_STARTED) { rsc->role = RSC_ROLE_STARTED; } crm_debug_2("Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean?"true":"false", fail2text(action->on_fail), role2text(action->fail_role)); if(action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if(action->fail_role == RSC_ROLE_STOPPED) { crm_err("Making sure %s doesn't come up again", rsc->id); /* make sure it doesnt come up again */ pe_free_shallow_adv(rsc->allowed_nodes, TRUE); rsc->allowed_nodes = node_list_dup( data_set->nodes, FALSE, FALSE); slist_iter( node, node_t, rsc->allowed_nodes, lpc, node->weight = -INFINITY; ); } pe_free_action(action); action = NULL; break; case LRM_OP_CANCELLED: /* do nothing?? */ pe_err("Dont know what to do for cancelled ops yet"); break; } + done: crm_debug_3("Resource %s after %s: role=%s", rsc->id, task, role2text(rsc->role)); pe_free_action(action); return TRUE; } gboolean add_node_attrs(xmlNode *xml_obj, node_t *node, pe_working_set_t *data_set) { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_UNAME), crm_strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_ID), crm_strdup(node->details->id)); if(safe_str_eq(node->details->id, data_set->dc_uuid)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_FALSE)); } unpack_instance_attributes( xml_obj, XML_TAG_ATTR_SETS, NULL, node->details->attrs, NULL, data_set->now); return TRUE; } static GListPtr extract_operations(const char *node, const char *rsc, xmlNode *rsc_entry, gboolean active_filter) { int stop_index = -1; int start_index = -1; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_append(op_list, rsc_op); ); if(op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if(active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); slist_iter(rsc_op, xmlNode, sorted_op_list, lpc, if(start_index < stop_index) { crm_debug_4("Skipping %s: not active", ID(rsc_entry)); break; } else if(lpc < start_index) { crm_debug_4("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); ); g_list_free(sorted_op_list); return op_list; } GListPtr find_operations( const char *rsc, const char *node, gboolean active_filter, pe_working_set_t *data_set) { GListPtr output = NULL; GListPtr intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); const char *uname = NULL; node_t *this_node = NULL; xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, uname = crm_element_value(node_state, XML_ATTR_UNAME); if(node != NULL && safe_str_neq(uname, node)) { continue; } this_node = pe_find_node(data_set->nodes, uname); CRM_CHECK(this_node != NULL, continue); determine_online_status(node_state, this_node, data_set); if(this_node->details->online || data_set->stonith_enabled) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); xml_child_iter_filter( tmp, lrm_rsc, XML_LRM_TAG_RESOURCE, const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if(rsc != NULL && safe_str_neq(rsc_id, rsc)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); ); } ); return output; }