diff --git a/include/crm/pengine/common.h b/include/crm/pengine/common.h index 1575f38df5..aa2e32d197 100644 --- a/include/crm/pengine/common.h +++ b/include/crm/pengine/common.h @@ -1,123 +1,124 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef PE_COMMON__H #define PE_COMMON__H #include extern gboolean was_processing_error; extern gboolean was_processing_warning; /* order is significant here * items listed in order of accending severeness * more severe actions take precedent over lower ones */ enum action_fail_response { action_fail_ignore, action_fail_recover, action_migrate_failure, /* recovery from a failed atomic migration */ action_fail_migrate, /* recover by moving it somewhere else */ action_fail_block, action_fail_stop, + action_fail_standby, action_fail_fence }; /* the "done" action must be the "pre" action +1 */ enum action_tasks { no_action, monitor_rsc, stop_rsc, stopped_rsc, start_rsc, started_rsc, action_notify, action_notified, action_promote, action_promoted, action_demote, action_demoted, shutdown_crm, stonith_node }; enum rsc_recovery_type { recovery_stop_start, recovery_stop_only, recovery_block }; enum rsc_start_requirement { rsc_req_nothing, rsc_req_quorum, rsc_req_stonith }; enum rsc_role_e { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }; #define RSC_ROLE_MAX RSC_ROLE_MASTER+1 #define RSC_ROLE_UNKNOWN_S "Unknown" #define RSC_ROLE_STOPPED_S "Stopped" #define RSC_ROLE_STARTED_S "Started" #define RSC_ROLE_SLAVE_S "Slave" #define RSC_ROLE_MASTER_S "Master" enum pe_print_options { pe_print_log = 0x0001, pe_print_html = 0x0002, pe_print_ncurses = 0x0004, pe_print_printf = 0x0008, pe_print_dev = 0x0010, pe_print_details = 0x0020, pe_print_max_details = 0x0040, pe_print_rsconly = 0x0080, pe_print_ops = 0x0100, pe_print_suppres_nl = 0x0200, }; extern int merge_weights(int w1, int w2); extern const char *task2text(enum action_tasks task); extern enum action_tasks text2task(const char *task); extern enum rsc_role_e text2role(const char *role); extern const char *role2text(enum rsc_role_e role); extern const char *fail2text(enum action_fail_response fail); extern void add_hash_param(GHashTable *hash, const char *name, const char *value); extern void pe_metadata(void); extern void verify_pe_options(GHashTable *options); extern const char *pe_pref(GHashTable *options, const char *name); extern void calculate_active_ops(GList* sorted_op_list, int *start_index, int *stop_index); /* Helper macros to avoid NULL pointers */ #define safe_val3(def, t,u,v) (t?t->u?t->u->v:def:def) #define safe_val5(def, t,u,v,w,x) (t?t->u?t->u->v?t->u->v->w?t->u->v->w->x:def:def:def:def) #define pe_err(fmt...) { was_processing_error = TRUE; crm_config_error = TRUE; crm_err(fmt); } #define pe_warn(fmt...) { was_processing_warning = TRUE; crm_config_warning = TRUE; crm_warn(fmt); } #define pe_proc_err(fmt...) { was_processing_error = TRUE; crm_err(fmt); } #define pe_proc_warn(fmt...) { was_processing_warning = TRUE; crm_warn(fmt); } #endif diff --git a/lib/pengine/common.c b/lib/pengine/common.c index 1d2777798d..a148150420 100644 --- a/lib/pengine/common.c +++ b/lib/pengine/common.c @@ -1,361 +1,364 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include gboolean was_processing_error = FALSE; gboolean was_processing_warning = FALSE; static gboolean check_quorum(const char *value) { if(safe_str_eq(value, "stop")) { return TRUE; } else if(safe_str_eq(value, "freeze")) { return TRUE; } else if(safe_str_eq(value, "ignore")) { return TRUE; } else if(safe_str_eq(value, "suicide")) { return TRUE; } return FALSE; } static gboolean check_stonith_action(const char *value) { if(safe_str_eq(value, "reboot")) { return TRUE; } else if(safe_str_eq(value, "poweroff")) { return TRUE; } return FALSE; } pe_cluster_option pe_opts[] = { /* name, old-name, validate, default, description */ { "no-quorum-policy", "no_quorum_policy", "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, "What to do when the cluster does not have quorum", NULL }, { "symmetric-cluster", "symmetric_cluster", "boolean", NULL, "true", &check_boolean, "All resources can run anywhere by default", NULL }, { "stonith-enabled", "stonith_enabled", "boolean", NULL, "true", &check_boolean, "Failed nodes are STONITH'd", NULL }, { "stonith-action", "stonith_action", "enum", "reboot, poweroff", "reboot", &check_stonith_action, "Action to send to STONITH device", NULL }, { "stonith-timeout", NULL, "integer", NULL, "60s", &check_timer, "How long to wait for the STONITH action to complete", NULL }, { "startup-fencing", "startup_fencing", "boolean", NULL, "true", &check_boolean, "STONITH unseen nodes", "Advanced Use Only! Not using the default is very unsafe!" }, { "default-resource-stickiness", "default_resource_stickiness", "integer", NULL, "0", &check_number, "", NULL }, { "is-managed-default", "is_managed_default", "boolean", NULL, "true", &check_boolean, "Should the cluster start/stop resources as required", NULL }, { "maintenance-mode", NULL, "boolean", NULL, "false", &check_boolean, "Should the cluster ...", NULL }, { "cluster-delay", "transition_idle_timeout", "time", NULL, "60s", &check_time, "Round trip delay over the network (excluding action execution)", "The \"correct\" value will depend on the speed and load of your network and cluster nodes." }, { "batch-limit", NULL, "integer", NULL, "30", &check_number, "The number of jobs that the TE is allowed to execute in parallel", "The \"correct\" value will depend on the speed and load of your network and cluster nodes." }, { "stop-all-resources", NULL, "boolean", NULL, "false", &check_boolean, "Should the cluster stop all active resources", NULL }, { "default-action-timeout", "default_action_timeout", "time", NULL, "20s", &check_time, "How long to wait for actions to complete", NULL }, { "stop-orphan-resources", "stop_orphan_resources", "boolean", NULL, "true", &check_boolean, "Should deleted resources be stopped", NULL }, { "stop-orphan-actions", "stop_orphan_actions", "boolean", NULL, "true", &check_boolean, "Should deleted actions be cancelled", NULL }, { "remove-after-stop", "remove_after_stop", "boolean", NULL, "false", &check_boolean, "Remove resources from the LRM after they are stopped", "Always set this to false. Other values are, at best, poorly tested and potentially dangerous." }, /* { "", "", , "0", "", NULL }, */ { "pe-error-series-max", NULL, "integer", NULL, "-1", &check_number, "The number of PE inputs resulting in ERRORs to save", "Zero to disable, -1 to store unlimited." }, { "pe-warn-series-max", NULL, "integer", NULL, "-1", &check_number, "The number of PE inputs resulting in WARNINGs to save", "Zero to disable, -1 to store unlimited." }, { "pe-input-series-max", NULL, "integer", NULL, "-1", &check_number, "The number of other PE inputs to save", "Zero to disable, -1 to store unlimited." }, { "start-failure-is-fatal", NULL, "boolean", NULL, "true", &check_boolean, "Always treat start failures as fatal", "This was the old default. However when set to FALSE, the cluster will instead use the resource's failcount and value for resource-failure-stickiness" } }; void pe_metadata(void) { config_metadata("Policy Engine", "1.0", "Policy Engine Options", "This is a fake resource that details the options that can be configured for the Policy Engine.", pe_opts, DIMOF(pe_opts)); } void verify_pe_options(GHashTable *options) { verify_all_options(options, pe_opts, DIMOF(pe_opts)); } const char * pe_pref(GHashTable *options, const char *name) { return get_cluster_pref(options, pe_opts, DIMOF(pe_opts), name); } const char * fail2text(enum action_fail_response fail) { const char *result = ""; switch(fail) { case action_fail_ignore: result = "ignore"; break; case action_fail_block: result = "block"; break; case action_fail_recover: result = "recover"; break; case action_fail_migrate: result = "migrate"; break; case action_fail_stop: result = "stop"; break; case action_migrate_failure: result = "atomic migration recovery"; break; case action_fail_fence: result = "fence"; break; + case action_fail_standby: + result = "standby"; + break; } return result; } enum action_tasks text2task(const char *task) { if(safe_str_eq(task, CRMD_ACTION_STOP)) { return stop_rsc; } else if(safe_str_eq(task, CRMD_ACTION_STOPPED)) { return stopped_rsc; } else if(safe_str_eq(task, CRMD_ACTION_START)) { return start_rsc; } else if(safe_str_eq(task, CRMD_ACTION_STARTED)) { return started_rsc; } else if(safe_str_eq(task, CRM_OP_SHUTDOWN)) { return shutdown_crm; } else if(safe_str_eq(task, CRM_OP_FENCE)) { return stonith_node; } else if(safe_str_eq(task, CRMD_ACTION_STATUS)) { return monitor_rsc; } else if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) { return action_notify; } else if(safe_str_eq(task, CRMD_ACTION_NOTIFIED)) { return action_notified; } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { return action_promote; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { return action_demote; } else if(safe_str_eq(task, CRMD_ACTION_PROMOTED)) { return action_promoted; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTED)) { return action_demoted; } else if(safe_str_eq(task, CRMD_ACTION_CANCEL)) { return no_action; } else if(safe_str_eq(task, CRMD_ACTION_DELETE)) { return no_action; } else if(safe_str_eq(task, CRMD_ACTION_STATUS)) { return no_action; } else if(safe_str_eq(task, CRM_OP_PROBED)) { return no_action; } else if(safe_str_eq(task, CRM_OP_LRM_REFRESH)) { return no_action; } else if(safe_str_eq(task, CRMD_ACTION_MIGRATE)) { return no_action; } else if(safe_str_eq(task, CRMD_ACTION_MIGRATED)) { return no_action; } else if(safe_str_eq(task, "fail")) { return no_action; } else if(safe_str_eq(task, "stonith_up")) { return no_action; } else if(safe_str_eq(task, "all_stopped")) { return no_action; } crm_debug("Unsupported action: %s", task); return no_action; } const char * task2text(enum action_tasks task) { const char *result = ""; switch(task) { case no_action: result = "no_action"; break; case stop_rsc: result = CRMD_ACTION_STOP; break; case stopped_rsc: result = CRMD_ACTION_STOPPED; break; case start_rsc: result = CRMD_ACTION_START; break; case started_rsc: result = CRMD_ACTION_STARTED; break; case shutdown_crm: result = CRM_OP_SHUTDOWN; break; case stonith_node: result = CRM_OP_FENCE; break; case monitor_rsc: result = CRMD_ACTION_STATUS; break; case action_notify: result = CRMD_ACTION_NOTIFY; break; case action_notified: result = CRMD_ACTION_NOTIFIED; break; case action_promote: result = CRMD_ACTION_PROMOTE; break; case action_promoted: result = CRMD_ACTION_PROMOTED; break; case action_demote: result = CRMD_ACTION_DEMOTE; break; case action_demoted: result = CRMD_ACTION_DEMOTED; break; } return result; } const char * role2text(enum rsc_role_e role) { CRM_CHECK(role >= RSC_ROLE_UNKNOWN, return RSC_ROLE_UNKNOWN_S); CRM_CHECK(role < RSC_ROLE_MAX, return RSC_ROLE_UNKNOWN_S); switch(role) { case RSC_ROLE_UNKNOWN: return RSC_ROLE_UNKNOWN_S; case RSC_ROLE_STOPPED: return RSC_ROLE_STOPPED_S; case RSC_ROLE_STARTED: return RSC_ROLE_STARTED_S; case RSC_ROLE_SLAVE: return RSC_ROLE_SLAVE_S; case RSC_ROLE_MASTER: return RSC_ROLE_MASTER_S; } return RSC_ROLE_UNKNOWN_S; } enum rsc_role_e text2role(const char *role) { if(safe_str_eq(role, RSC_ROLE_STOPPED_S)) { return RSC_ROLE_STOPPED; } else if(safe_str_eq(role, RSC_ROLE_STARTED_S)) { return RSC_ROLE_STARTED; } else if(safe_str_eq(role, RSC_ROLE_SLAVE_S)) { return RSC_ROLE_SLAVE; } else if(safe_str_eq(role, RSC_ROLE_MASTER_S)) { return RSC_ROLE_MASTER; } else if(safe_str_eq(role, RSC_ROLE_UNKNOWN_S)) { return RSC_ROLE_UNKNOWN; } crm_err("Unknown role: %s", role); return RSC_ROLE_UNKNOWN; } int merge_weights(int w1, int w2) { int result = w1 + w2; if(w1 <= -INFINITY || w2 <= -INFINITY) { if(w1 >= INFINITY || w2 >= INFINITY) { crm_debug_2("-INFINITY + INFINITY == -INFINITY"); } return -INFINITY; } else if(w1 >= INFINITY || w2 >= INFINITY) { return INFINITY; } /* detect wrap-around */ if(result > 0) { if(w1 <= 0 && w2 < 0) { result = -INFINITY; } } else if(w1 > 0 && w2 > 0) { result = INFINITY; } /* detect +/- INFINITY */ if(result >= INFINITY) { result = INFINITY; } else if(result <= -INFINITY) { result = -INFINITY; } crm_debug_5("%d + %d = %d", w1, w2, result); return result; } void add_hash_param(GHashTable *hash, const char *name, const char *value) { CRM_CHECK(hash != NULL, return); crm_debug_3("adding: name=%s value=%s", crm_str(name), crm_str(value)); if(name == NULL || value == NULL) { return; } else if(safe_str_eq(value, "#default")) { return; } else if(g_hash_table_lookup(hash, name) == NULL) { g_hash_table_insert(hash, crm_strdup(name), crm_strdup(value)); } } diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 9c0417bf93..4b6e9e7656 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,1553 +1,1556 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include /* for ONLINESTATUS */ #include #include #include #include #define set_config_flag(data_set, option, flag) do { \ const char *tmp = pe_pref(data_set->config_hash, option); \ if(tmp) { \ if(crm_is_true(tmp)) { \ set_bit_inplace(data_set->flags, flag); \ } else { \ clear_bit_inplace(data_set->flags, flag); \ } \ } \ } while(0) gboolean unpack_config(xmlNode *config, pe_working_set_t *data_set) { const char *value = NULL; GHashTable *config_hash = g_hash_table_new_full( g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str); data_set->config_hash = config_hash; unpack_instance_attributes( config, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, data_set->now); verify_pe_options(data_set->config_hash); value = pe_pref(data_set->config_hash, "default-action-timeout"); data_set->transition_idle_timeout = crm_strdup(value); crm_debug("Default action timeout: %s", data_set->transition_idle_timeout); value = pe_pref(data_set->config_hash, "stonith-timeout"); data_set->stonith_timeout = crm_get_msec(value); crm_debug("STONITH timeout: %d", data_set->stonith_timeout); set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled); crm_debug("STONITH of failed nodes is %s", is_set(data_set->flags, pe_flag_stonith_enabled)?"enabled":"disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); crm_debug_2("STONITH will %s nodes", data_set->stonith_action); set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything); crm_debug("Stop all active resources: %s", is_set(data_set->flags, pe_flag_stop_everything)?"true":"false"); set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster); if(is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "default-resource-stickiness"); data_set->default_resource_stickiness = char2score(value); crm_debug("Default stickiness: %d", data_set->default_resource_stickiness); value = pe_pref(data_set->config_hash, "no-quorum-policy"); if(safe_str_eq(value, "ignore")) { data_set->no_quorum_policy = no_quorum_ignore; } else if(safe_str_eq(value, "freeze")) { data_set->no_quorum_policy = no_quorum_freeze; } else if(safe_str_eq(value, "suicide")) { gboolean do_panic = FALSE; crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic); if(is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE){ crm_config_err("Setting no-quorum-policy=suicide makes no sense if stonith-enabled=false"); } if(do_panic && is_set(data_set->flags, pe_flag_stonith_enabled)) { data_set->no_quorum_policy = no_quorum_suicide; } else if(is_set(data_set->flags, pe_flag_have_quorum) == FALSE && do_panic == FALSE) { crm_notice("Resetting no-quorum-policy to 'stop': The cluster has never had quorum"); data_set->no_quorum_policy = no_quorum_stop; } } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of CCM Quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of CCM Quorum: Stop ALL resources"); break; case no_quorum_suicide: crm_notice("On loss of CCM Quorum: Fence all remaining nodes"); break; case no_quorum_ignore: crm_notice("On loss of CCM Quorum: Ignore"); break; } set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans); crm_debug_2("Orphan resources are %s", is_set(data_set->flags, pe_flag_stop_rsc_orphans)?"stopped":"ignored"); set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans); crm_debug_2("Orphan resource actions are %s", is_set(data_set->flags, pe_flag_stop_action_orphans)?"stopped":"ignored"); set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop); crm_debug_2("Stopped resources are removed from the status section: %s", is_set(data_set->flags, pe_flag_remove_after_stop)?"true":"false"); set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode); crm_debug_2("Maintenance mode: %s", is_set(data_set->flags, pe_flag_maintenance_mode)?"true":"false"); if(is_set(data_set->flags, pe_flag_maintenance_mode)) { clear_bit(data_set->flags, pe_flag_is_managed_default); } else { set_config_flag(data_set, "is-managed-default", pe_flag_is_managed_default); } crm_debug_2("By default resources are %smanaged", is_set(data_set->flags, pe_flag_is_managed_default)?"":"not "); set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal); crm_debug_2("Start failures are %s", is_set(data_set->flags, pe_flag_start_failure_fatal)?"always fatal":"handled by failcount"); return TRUE; } gboolean unpack_nodes(xmlNode * xml_nodes, pe_working_set_t *data_set) { node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; gboolean unseen_are_unclean = TRUE; const char *blind_faith = pe_pref( data_set->config_hash, "startup-fencing"); if(crm_is_true(blind_faith) == FALSE) { unseen_are_unclean = FALSE; crm_warn("Blind faith: not fencing unseen nodes"); } xml_child_iter_filter( xml_nodes, xml_obj, XML_CIB_TAG_NODE, new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); crm_debug_3("Processing node %s/%s", uname, id); if(id == NULL) { crm_config_err("Must specify id tag in "); continue; } if(type == NULL) { crm_config_err("Must specify type tag in "); continue; } if(pe_find_node(data_set->nodes, uname) != NULL) { crm_config_warn("Detected multiple node entries with uname=%s" " - this is rarely intended", uname); } crm_malloc0(new_node, sizeof(node_t)); if(new_node == NULL) { return FALSE; } new_node->weight = 0; new_node->fixed = FALSE; crm_malloc0(new_node->details, sizeof(struct node_shared_s)); if(new_node->details == NULL) { crm_free(new_node); return FALSE; } crm_debug_3("Creaing node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->type = node_ping; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->running_rsc = NULL; new_node->details->attrs = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ if(is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE || unseen_are_unclean == FALSE) { /* blind faith... */ new_node->details->unclean = FALSE; } else { /* all nodes are unclean until we've seen their * status entry */ new_node->details->unclean = TRUE; } if(type == NULL || safe_str_eq(type, "member") || safe_str_eq(type, NORMALNODE)) { new_node->details->type = node_member; } add_node_attrs(xml_obj, new_node, FALSE, data_set); data_set->nodes = g_list_append(data_set->nodes, new_node); crm_debug_3("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); ); return TRUE; } gboolean unpack_resources(xmlNode * xml_resources, pe_working_set_t *data_set) { xml_child_iter( xml_resources, xml_obj, resource_t *new_rsc = NULL; crm_debug_3("Begining unpack... %s", xml_obj?crm_element_name(xml_obj):""); if(common_unpack(xml_obj, &new_rsc, NULL, data_set)) { data_set->resources = g_list_append( data_set->resources, new_rsc); print_resource(LOG_DEBUG_3, "Added", new_rsc, FALSE); } else { crm_config_err("Failed unpacking %s %s", crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID)); if(new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } ); data_set->resources = g_list_sort( data_set->resources, sort_rsc_priority); if(is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { crm_config_warn("No STONITH resources have been defined"); } return TRUE; } /* remove nodes that are down, stopping */ /* create +ve rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode * status, pe_working_set_t *data_set) { const char *id = NULL; const char *uname = NULL; const char *shutdown = NULL; xmlNode * lrm_rsc = NULL; xmlNode * attrs = NULL; node_t *this_node = NULL; crm_debug_3("Begining unpack"); xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, id = crm_element_value(node_state, XML_ATTR_ID); uname = crm_element_value(node_state, XML_ATTR_UNAME); attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); crm_debug_3("Processing node %s", uname); this_node = pe_find_node_id(data_set->nodes, id); if(uname == NULL) { /* error */ continue; } else if(this_node == NULL) { crm_config_warn("Node %s in status section no longer exists", uname); continue; } /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; crm_debug_3("Adding runtime node attrs"); shutdown = crm_element_value(node_state, XML_CIB_ATTR_SHUTDOWN); if(shutdown != NULL) { g_hash_table_insert(this_node->details->attrs, crm_strdup(XML_CIB_ATTR_SHUTDOWN), crm_strdup(shutdown)); } add_node_attrs(attrs, this_node, TRUE, data_set); if(crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } crm_debug_3("determining node state"); determine_online_status(node_state, this_node, data_set); if(data_set->no_quorum_policy == no_quorum_suicide) { /* Everything else should flow from this automatically * At least until the PE becomes able to migrate off healthy resources */ crm_notice("Marking node %s STONITH: The cluster does not have quorum", this_node->details->uname); this_node->details->unclean = TRUE; } if(this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ crm_debug_3("Processing lrm resource entries"); unpack_lrm_resources(this_node, lrm_rsc, data_set); } ); return TRUE; } static gboolean determine_online_status_no_fencing(xmlNode * node_state, node_t *this_node) { gboolean online = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(ha_state == NULL) { ha_state = DEADSTATUS; } if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)){ crm_debug_2("Node is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); } else if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)) { } else if(safe_str_eq(crm_state, ONLINESTATUS)) { if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join_state); } } else if(this_node->details->expected_up == FALSE) { crm_debug_2("CRMd is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); crm_debug_2("\tcrm_state=%s, join_state=%s, expected=%s", crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { /* mark it unclean */ this_node->details->unclean = TRUE; crm_warn("Node %s is partially & un-expectedly down", this_node->details->uname); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(xmlNode * node_state, node_t *this_node) { gboolean online = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(ha_state == NULL) { ha_state = DEADSTATUS; } if(crm_is_true(ccm_state) && safe_str_eq(ha_state, ACTIVESTATUS) && safe_str_eq(crm_state, ONLINESTATUS)) { if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { online = TRUE; } else if(safe_str_eq(join_state, CRMD_JOINSTATE_PENDING)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; online = TRUE; } else if(safe_str_eq(join_state, CRMD_JOINSTATE_NACK)) { crm_warn("Node %s is not part of the cluster", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; online = TRUE; } else { crm_warn("Node %s (%s) is un-expectedly down", this_node->details->uname, this_node->details->id); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); this_node->details->unclean = TRUE; } } else if(crm_is_true(ccm_state) == FALSE && safe_str_eq(ha_state, DEADSTATUS) && safe_str_eq(crm_state, OFFLINESTATUS) && this_node->details->expected_up == FALSE) { crm_debug("Node %s is down: join_state=%s, expected=%s", this_node->details->uname, crm_str(join_state), crm_str(exp_state)); } else if(this_node->details->expected_up) { /* mark it unclean */ this_node->details->unclean = TRUE; crm_warn("Node %s (%s) is un-expectedly down", this_node->details->uname, this_node->details->id); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { crm_info("Node %s is comming up", this_node->details->uname); crm_debug("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } gboolean determine_online_status( xmlNode * node_state, node_t *this_node, pe_working_set_t *data_set) { gboolean online = FALSE; const char *shutdown = NULL; const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(this_node == NULL) { crm_config_err("No node to check"); return online; } this_node->details->expected_up = FALSE; if(safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) { this_node->details->expected_up = TRUE; } this_node->details->shutdown = FALSE; shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN); if(shutdown != NULL && safe_str_neq("0", shutdown)) { this_node->details->shutdown = TRUE; this_node->details->expected_up = FALSE; } if(is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { online = determine_online_status_no_fencing( node_state, this_node); } else { online = determine_online_status_fencing( node_state, this_node); } if(online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if(online && this_node->details->shutdown) { /* dont run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if(this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if(this_node->details->online) { const char *terminate = g_hash_table_lookup(this_node->details->attrs, "terminate"); if(crm_is_true(terminate)) { crm_notice("Forcing node %s to be terminated", this_node->details->uname); this_node->details->unclean = TRUE; } else { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown?"shutting down": this_node->details->pending?"pending": this_node->details->standby?"standby":"online"); } } else { crm_debug_2("Node %s is offline", this_node->details->uname); } return online; } #define set_char(x) last_rsc_id[lpc] = x; complete = TRUE; static char * increment_clone(char *last_rsc_id) { int lpc = 0; int len = 0; char *tmp = NULL; gboolean complete = FALSE; CRM_CHECK(last_rsc_id != NULL, return NULL); if(last_rsc_id != NULL) { len = strlen(last_rsc_id); } lpc = len-1; while(complete == FALSE && lpc > 0) { switch (last_rsc_id[lpc]) { case 0: lpc--; break; case '0': set_char('1'); break; case '1': set_char('2'); break; case '2': set_char('3'); break; case '3': set_char('4'); break; case '4': set_char('5'); break; case '5': set_char('6'); break; case '6': set_char('7'); break; case '7': set_char('8'); break; case '8': set_char('9'); break; case '9': last_rsc_id[lpc] = '0'; lpc--; break; case ':': tmp = last_rsc_id; crm_malloc0(last_rsc_id, len + 2); memcpy(last_rsc_id, tmp, len); last_rsc_id[++lpc] = '1'; last_rsc_id[len] = '0'; last_rsc_id[len+1] = 0; complete = TRUE; crm_free(tmp); break; default: crm_err("Unexpected char: %c (%d)", last_rsc_id[lpc], lpc); break; } } return last_rsc_id; } static resource_t * create_fake_resource(const char *rsc_id, xmlNode *rsc_entry, pe_working_set_t *data_set) { resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_info(xml_rsc, "Orphan resource"); common_unpack(xml_rsc, &rsc, NULL, data_set); set_bit(rsc->flags, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } extern gboolean create_child_clone(resource_t *rsc, int sub_id, pe_working_set_t *data_set); static resource_t * unpack_find_resource( pe_working_set_t *data_set, node_t *node, const char *rsc_id, xmlNode *rsc_entry) { resource_t *rsc = NULL; resource_t *clone_parent = NULL; gboolean is_duped_clone = FALSE; char *alt_rsc_id = crm_strdup(rsc_id); while(rsc == NULL) { crm_debug_3("looking for: %s", alt_rsc_id); rsc = pe_find_resource(data_set->resources, alt_rsc_id); /* no match */ if(rsc == NULL) { crm_debug_2("%s not found: %d", alt_rsc_id, is_duped_clone); if(is_duped_clone == FALSE) { break; } /* create one */ #if 1 create_child_clone(clone_parent, -1, data_set); crm_debug("Looking again for %s", alt_rsc_id); rsc = pe_find_resource(data_set->resources, alt_rsc_id); CRM_CHECK(rsc != NULL, crm_err("%s stil not found", alt_rsc_id); continue); #else rsc = create_fake_resource(alt_rsc_id, rsc_entry, data_set); crm_info("Making sure orphan %s/%s of %s is stopped on %s", rsc_id, rsc->id, clone_parent->id, node->details->uname); resource_location(rsc, NULL, -INFINITY, "__orphan_clone_dont_run__", data_set); break; #endif /* not running anywhere else */ } else if(rsc->running_on == NULL) { crm_debug_3("not active yet"); break; /* always unique */ } else if(is_set(rsc->flags, pe_rsc_unique)) { crm_debug_3("unique"); break; /* running somewhere already but we dont care * find another clone instead */ } else { crm_debug_3("find another one"); clone_parent = uber_parent(rsc); rsc = NULL; is_duped_clone = TRUE; alt_rsc_id = increment_clone(alt_rsc_id); } } crm_free(alt_rsc_id); if(rsc != NULL) { crm_free(rsc->clone_name); rsc->clone_name = NULL; if(is_duped_clone) { crm_info("Internally renamed %s on %s to %s", rsc_id, node->details->uname, rsc->id); rsc->clone_name = crm_strdup(rsc_id); } } return rsc; } static resource_t * process_orphan_resource(xmlNode *rsc_entry, node_t *node, pe_working_set_t *data_set) { resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_log_xml_info(rsc_entry, "Orphan resource"); crm_config_warn("Nothing known about resource %s running on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if(is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } else { crm_info("Making sure orphan %s is stopped", rsc_id); print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE); CRM_CHECK(rsc != NULL, return NULL); resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set); } return rsc; } static void process_rsc_state(resource_t *rsc, node_t *node, enum action_fail_response on_fail, xmlNode *migrate_op, pe_working_set_t *data_set) { if(on_fail == action_migrate_failure) { node_t *from = NULL; const char *uuid = NULL; uuid = crm_element_value(migrate_op, CRMD_ACTION_MIGRATED); from = pe_find_node_id(data_set->nodes, uuid); process_rsc_state(rsc, from, action_fail_recover,NULL,data_set); on_fail = action_fail_recover; } crm_debug_2("Resource %s is %s on %s", rsc->id, role2text(rsc->role), node->details->uname); /* process current state */ if(rsc->role != RSC_ROLE_UNKNOWN) { rsc->known_on = g_list_append(rsc->known_on, node); } if(rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { if(on_fail != action_fail_ignore) { set_bit(rsc->flags, pe_rsc_failed); crm_debug_2("Force stop"); } native_add_running(rsc, node, data_set); if(on_fail == action_fail_ignore) { /* nothing to do */ } else if(node->details->unclean) { stop_action(rsc, node, FALSE); } else if(on_fail == action_fail_fence) { /* treat it as if it is still running * but also mark the node as unclean */ node->details->unclean = TRUE; stop_action(rsc, node, FALSE); + } else if(on_fail == action_fail_standby) { + node->details->standby = TRUE; + } else if(on_fail == action_fail_block) { /* is_managed == FALSE will prevent any * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); } else if(on_fail == action_fail_migrate) { stop_action(rsc, node, FALSE); /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__",data_set); } else { stop_action(rsc, node, FALSE); } } else if(rsc->clone_name) { crm_debug_2("Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); crm_free(rsc->clone_name); rsc->clone_name = NULL; } else { char *key = stop_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, node); slist_iter(stop, action_t, possible_matches, lpc, stop->optional = TRUE; ); crm_free(key); } } /* create active recurring operations as optional */ static void process_recurring(node_t *node, resource_t *rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t *data_set) { const char *task = NULL; const char *status = NULL; crm_debug_3("%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); slist_iter(rsc_op, xmlNode, sorted_op_list, lpc, int interval = 0; char *key = NULL; const char *id = ID(rsc_op); const char *interval_s = NULL; if(node->details->online == FALSE) { crm_debug_4("Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; } else if(start_index < stop_index) { crm_debug_4("Skipping %s/%s: not active", rsc->id, node->details->uname); break; } else if(lpc <= start_index) { crm_debug_4("Skipping %s/%s: old", id, node->details->uname); continue; } interval_s = crm_element_value(rsc_op,XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0) { crm_debug_4("Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if(safe_str_eq(status, "-1")) { crm_debug_4("Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = generate_op_key(rsc->id, task, interval); crm_debug_3("Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); ); } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { const char *task = NULL; const char *status = NULL; *stop_index = -1; *start_index = -1; slist_iter( rsc_op, xmlNode, sorted_op_list, lpc, task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if(safe_str_eq(task, CRMD_ACTION_STOP) && safe_str_eq(status, "0")) { *stop_index = lpc; } else if(safe_str_eq(task, CRMD_ACTION_START)) { *start_index = lpc; } else if(*start_index <= *stop_index && safe_str_eq(task, CRMD_ACTION_STATUS)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if(safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) { *start_index = lpc; } } ); } static void unpack_lrm_rsc_state( node_t *node, xmlNode * rsc_entry, pe_working_set_t *data_set) { int stop_index = -1; int start_index = -1; int max_call_id = -1; const char *task = NULL; const char *value = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; xmlNode *migrate_op = NULL; enum action_fail_response on_fail = FALSE; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_debug_3("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, op_list = g_list_append(op_list, rsc_op); ); if(op_list == NULL) { /* if there are no operations, there is nothing to do */ return; } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if(rsc == NULL) { rsc = process_orphan_resource(rsc_entry, node, data_set); } CRM_ASSERT(rsc != NULL); /* process operations */ max_call_id = -1; saved_role = rsc->role; on_fail = action_fail_ignore; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); slist_iter( rsc_op, xmlNode, sorted_op_list, lpc, task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if(safe_str_eq(task, CRMD_ACTION_MIGRATED)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, &max_call_id, &on_fail, data_set); ); /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if(value != NULL && safe_str_neq("default", value)) { enum rsc_role_e req_role = text2role(value); if(req_role != RSC_ROLE_UNKNOWN && req_role != rsc->next_role){ if(rsc->next_role != RSC_ROLE_UNKNOWN) { crm_debug("%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } rsc->next_role = req_role; } } if(saved_role > rsc->role) { rsc->role = saved_role; } } gboolean unpack_lrm_resources(node_t *node, xmlNode * lrm_rsc_list, pe_working_set_t *data_set) { CRM_CHECK(node != NULL, return FALSE); crm_debug_3("Unpacking resources on %s", node->details->uname); xml_child_iter_filter( lrm_rsc_list, rsc_entry, XML_LRM_TAG_RESOURCE, unpack_lrm_rsc_state(node, rsc_entry, data_set); ); return TRUE; } gboolean unpack_rsc_op(resource_t *rsc, node_t *node, xmlNode *xml_op, int *max_call_id, enum action_fail_response *on_fail, pe_working_set_t *data_set) { const char *id = NULL; const char *key = NULL; const char *task = NULL; const char *magic = NULL; const char *task_id = NULL; const char *actual_rc = NULL; /* const char *target_rc = NULL; */ const char *task_status = NULL; const char *interval_s = NULL; const char *op_digest = NULL; const char *op_version = NULL; int interval = 0; int task_id_i = -1; int task_status_i = -2; int actual_rc_i = 0; int target_rc = -1; action_t *action = NULL; node_t *effective_node = NULL; resource_t *failed = NULL; gboolean expired = FALSE; gboolean is_probe = FALSE; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(node != NULL, return FALSE); CRM_CHECK(xml_op != NULL, return FALSE); id = ID(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); task_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); task_status = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); op_digest = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(id != NULL, return FALSE); CRM_CHECK(task != NULL, return FALSE); CRM_CHECK(task_status != NULL, return FALSE); task_status_i = crm_parse_int(task_status, NULL); CRM_CHECK(task_status_i <= LRM_OP_ERROR, return FALSE); CRM_CHECK(task_status_i >= LRM_OP_PENDING, return FALSE); if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) { /* safe to ignore these */ return TRUE; } if(rsc->failure_timeout > 0) { int last_run = 0; if(crm_element_value_int(xml_op, "last-run", &last_run) == 0) { /* int last_change = crm_element_value_int(xml_op, "last_rc_change"); */ time_t now = get_timet_now(data_set); if(now > (last_run + rsc->failure_timeout)) { expired = TRUE; } } } crm_debug_2("Unpacking task %s/%s (call_id=%s, status=%s) on %s (role=%s)", id, task, task_id, task_status, node->details->uname, role2text(rsc->role)); interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; } if(task_status_i != LRM_OP_PENDING) { task_id_i = crm_parse_int(task_id, "-1"); CRM_CHECK(task_id != NULL, return FALSE); CRM_CHECK(task_id_i >= 0, return FALSE); CRM_CHECK(task_id_i > *max_call_id, return FALSE); } if(*max_call_id < task_id_i) { *max_call_id = task_id_i; } if(node->details->unclean) { crm_debug_2("Node %s (where %s is running) is unclean." " Further action depends on the value of the stop's on-fail attribue", node->details->uname, rsc->id); } actual_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); CRM_CHECK(actual_rc != NULL, return FALSE); actual_rc_i = crm_parse_int(actual_rc, NULL); if(key) { int dummy = 0; char *dummy_string = NULL; decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc); crm_free(dummy_string); } if(task_status_i == LRM_OP_DONE && target_rc >= 0) { if(target_rc == actual_rc_i) { task_status_i = LRM_OP_DONE; } else { task_status_i = LRM_OP_ERROR; crm_info("Remapping %s (rc=%d) on %s to an ERROR (expected %d)", id, actual_rc_i, node->details->uname, target_rc); } } else if(task_status_i == LRM_OP_ERROR) { /* let us decide that */ crm_debug("Remapping %s (rc=%d, status=%d) on %s to DONE", id, actual_rc_i, task_status_i, node->details->uname); task_status_i = LRM_OP_DONE; } if(task_status_i == LRM_OP_NOTSUPPORTED) { actual_rc_i = EXECRA_UNIMPLEMENT_FEATURE; } if(expired && actual_rc_i != EXECRA_NOT_RUNNING && actual_rc_i != EXECRA_RUNNING_MASTER && actual_rc_i != EXECRA_OK) { crm_notice("Ignoring expired failure %s (rc=%d, magic=%s) on %s", id, actual_rc_i, magic, node->details->uname); goto done; } /* we could clean this up significantly except for old LRMs and CRMs that * didnt include target_rc and liked to remap status */ switch(actual_rc_i) { case EXECRA_NOT_RUNNING: if(is_probe || target_rc == actual_rc_i) { task_status_i = LRM_OP_DONE; rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } else if(safe_str_neq(task, CRMD_ACTION_STOP)) { task_status_i = LRM_OP_ERROR; } break; case EXECRA_RUNNING_MASTER: if(is_probe) { task_status_i = LRM_OP_DONE; crm_warn("%s found active %s in master mode on %s", id, rsc->id, node->details->uname); } else if(target_rc == actual_rc_i) { /* nothing to do */ } else if(target_rc >= 0) { task_status_i = LRM_OP_ERROR; /* legacy code for pre-0.6.5 operations */ } else if(safe_str_neq(task, CRMD_ACTION_STATUS) || rsc->role != RSC_ROLE_MASTER) { task_status_i = LRM_OP_ERROR; if(rsc->role != RSC_ROLE_MASTER) { crm_err("%s reported %s in master mode on %s", id, rsc->id, node->details->uname); } } rsc->role = RSC_ROLE_MASTER; break; case EXECRA_FAILED_MASTER: rsc->role = RSC_ROLE_MASTER; task_status_i = LRM_OP_ERROR; break; case EXECRA_UNIMPLEMENT_FEATURE: if(interval > 0) { task_status_i = LRM_OP_NOTSUPPORTED; break; } /* else: fall through */ case EXECRA_INSUFFICIENT_PRIV: case EXECRA_NOT_INSTALLED: case EXECRA_INVALID_PARAM: effective_node = node; /* fall through */ case EXECRA_NOT_CONFIGURED: failed = rsc; if(is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(failed); } crm_err("Hard error - %s failed with rc=%d: Preventing %s from re-starting %s %s", id, actual_rc_i, failed->id, effective_node?"on":"anywhere", effective_node?effective_node->details->uname:"in the cluster"); resource_location(failed, effective_node, -INFINITY, "hard-error", data_set); if(is_probe) { /* treat these like stops */ task = CRMD_ACTION_STOP; task_status_i = LRM_OP_DONE; } break; case EXECRA_OK: if(is_probe && target_rc == 7) { task_status_i = LRM_OP_DONE; crm_warn("%s found active %s on %s", id, rsc->id, node->details->uname); /* legacy code for pre-0.6.5 operations */ } else if(target_rc < 0 && interval > 0 && rsc->role == RSC_ROLE_MASTER) { /* catch status ops that return 0 instead of 8 while they * are supposed to be in master mode */ task_status_i = LRM_OP_ERROR; } break; default: if(task_status_i == LRM_OP_DONE) { crm_info("Remapping %s (rc=%d) on %s to an ERROR", id, actual_rc_i, node->details->uname); task_status_i = LRM_OP_ERROR; } } if(task_status_i == LRM_OP_ERROR || task_status_i == LRM_OP_TIMEOUT || task_status_i == LRM_OP_NOTSUPPORTED) { action = custom_action(rsc, crm_strdup(id), task, NULL, TRUE, FALSE, data_set); if(expired) { crm_notice("Ignoring expired failure (calculated) %s (rc=%d, magic=%s) on %s", id, actual_rc_i, magic, node->details->uname); goto done; } else if(action->on_fail == action_fail_ignore) { crm_warn("Remapping %s (rc=%d) on %s to DONE: ignore", id, actual_rc_i, node->details->uname); task_status_i = LRM_OP_DONE; } } switch(task_status_i) { case LRM_OP_PENDING: if(safe_str_eq(task, CRMD_ACTION_START)) { set_bit(rsc->flags, pe_rsc_start_pending); rsc->role = RSC_ROLE_STARTED; } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } break; case LRM_OP_DONE: crm_debug_3("%s/%s completed on %s", rsc->id, task, node->details->uname); if(actual_rc_i == EXECRA_NOT_RUNNING) { /* nothing to do */ } else if(safe_str_eq(task, CRMD_ACTION_STOP)) { rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { rsc->role = RSC_ROLE_SLAVE; } else if(rsc->role < RSC_ROLE_STARTED) { crm_debug_3("%s active on %s", rsc->id, node->details->uname); rsc->role = RSC_ROLE_STARTED; } break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: crm_warn("Processing failed op %s on %s: %s", id, node->details->uname, op_status2text(task_status_i)); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); add_node_copy(data_set->failed, xml_op); if(*on_fail < action->on_fail) { *on_fail = action->on_fail; } if(safe_str_eq(task, CRMD_ACTION_STOP)) { resource_location( rsc, node, -INFINITY, "__stop_fail__", data_set); } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* * staying in role=master ends up putting the PE/TE into a loop * setting role=slave is not dangerous because no master will be * promoted until the failed resource has been fully stopped */ crm_warn("Forcing %s to stop after a failed demote action", rsc->id); rsc->next_role = RSC_ROLE_STOPPED; rsc->role = RSC_ROLE_SLAVE; } else if((is_set(data_set->flags, pe_flag_start_failure_fatal) || compare_version("2.0", op_version) > 0) && safe_str_eq(task, CRMD_ACTION_START)) { crm_warn("Compatability handling for failed op %s on %s", id, node->details->uname); resource_location( rsc, node, -INFINITY, "__legacy_start__", data_set); } if(rsc->role < RSC_ROLE_STARTED) { rsc->role = RSC_ROLE_STARTED; } crm_debug_2("Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean?"true":"false", fail2text(action->on_fail), role2text(action->fail_role)); if(action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if(action->fail_role == RSC_ROLE_STOPPED) { crm_err("Making sure %s doesn't come up again", rsc->id); /* make sure it doesnt come up again */ pe_free_shallow_adv(rsc->allowed_nodes, TRUE); rsc->allowed_nodes = node_list_dup( data_set->nodes, FALSE, FALSE); slist_iter( node, node_t, rsc->allowed_nodes, lpc, node->weight = -INFINITY; ); } pe_free_action(action); action = NULL; break; case LRM_OP_CANCELLED: /* do nothing?? */ pe_err("Dont know what to do for cancelled ops yet"); break; } done: crm_debug_3("Resource %s after %s: role=%s", rsc->id, task, role2text(rsc->role)); pe_free_action(action); return TRUE; } gboolean add_node_attrs(xmlNode *xml_obj, node_t *node, gboolean overwrite, pe_working_set_t *data_set) { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_UNAME), crm_strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_ID), crm_strdup(node->details->id)); if(safe_str_eq(node->details->id, data_set->dc_uuid)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_FALSE)); } unpack_instance_attributes( xml_obj, XML_TAG_ATTR_SETS, NULL, node->details->attrs, NULL, overwrite, data_set->now); return TRUE; } static GListPtr extract_operations(const char *node, const char *rsc, xmlNode *rsc_entry, gboolean active_filter) { int stop_index = -1; int start_index = -1; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_append(op_list, rsc_op); ); if(op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if(active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); slist_iter(rsc_op, xmlNode, sorted_op_list, lpc, if(start_index < stop_index) { crm_debug_4("Skipping %s: not active", ID(rsc_entry)); break; } else if(lpc < start_index) { crm_debug_4("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); ); g_list_free(sorted_op_list); return op_list; } GListPtr find_operations( const char *rsc, const char *node, gboolean active_filter, pe_working_set_t *data_set) { GListPtr output = NULL; GListPtr intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); const char *uname = NULL; node_t *this_node = NULL; xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, uname = crm_element_value(node_state, XML_ATTR_UNAME); if(node != NULL && safe_str_neq(uname, node)) { continue; } this_node = pe_find_node(data_set->nodes, uname); CRM_CHECK(this_node != NULL, continue); determine_online_status(node_state, this_node, data_set); if(this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); xml_child_iter_filter( tmp, lrm_rsc, XML_LRM_TAG_RESOURCE, const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if(rsc != NULL && safe_str_neq(rsc_id, rsc)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); ); } ); return output; } diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index 7c201599c2..c1d75fbb56 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -1,1349 +1,1359 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include extern xmlNode *get_object_root(const char *object_type,xmlNode *the_root); void print_str_str(gpointer key, gpointer value, gpointer user_data); gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); void unpack_operation( action_t *action, xmlNode *xml_obj, pe_working_set_t* data_set); void pe_free_shallow(GListPtr alist) { pe_free_shallow_adv(alist, TRUE); } void pe_free_shallow_adv(GListPtr alist, gboolean with_data) { GListPtr item; GListPtr item_next = alist; if(with_data == FALSE && alist != NULL) { g_list_free(alist); return; } while(item_next != NULL) { item = item_next; item_next = item_next->next; if(with_data) { /* crm_debug_5("freeing %p", item->data); */ crm_free(item->data); } item->data = NULL; item->next = NULL; g_list_free_1(item); } } node_t * node_copy(node_t *this_node) { node_t *new_node = NULL; CRM_CHECK(this_node != NULL, return NULL); crm_malloc0(new_node, sizeof(node_t)); CRM_ASSERT(new_node != NULL); crm_debug_5("Copying %p (%s) to %p", this_node, this_node->details->uname, new_node); new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; new_node->details = this_node->details; return new_node; } /* are the contents of list1 and list2 equal * nodes with weight < 0 are ignored if filter == TRUE * * slow but linear * */ gboolean node_list_eq(GListPtr list1, GListPtr list2, gboolean filter) { node_t *other_node; GListPtr lhs = list1; GListPtr rhs = list2; slist_iter( node, node_t, lhs, lpc, if(node == NULL || (filter && node->weight < 0)) { continue; } other_node = (node_t*) pe_find_node_id(rhs, node->details->id); if(other_node == NULL || other_node->weight < 0) { return FALSE; } ); lhs = list2; rhs = list1; slist_iter( node, node_t, lhs, lpc, if(node == NULL || (filter && node->weight < 0)) { continue; } other_node = (node_t*) pe_find_node_id(rhs, node->details->id); if(other_node == NULL || other_node->weight < 0) { return FALSE; } ); return TRUE; } /* any node in list1 or list2 and not in the other gets a score of -INFINITY */ GListPtr node_list_exclude(GListPtr list1, GListPtr list2) { node_t *other_node = NULL; GListPtr result = NULL; result = node_list_dup(list1, FALSE, FALSE); slist_iter( node, node_t, result, lpc, other_node = pe_find_node_id(list2, node->details->id); if(other_node == NULL) { node->weight = -INFINITY; } else { node->weight = merge_weights(node->weight, other_node->weight); } ); slist_iter( node, node_t, list2, lpc, other_node = pe_find_node_id(result, node->details->id); if(other_node == NULL) { node_t *new_node = node_copy(node); new_node->weight = -INFINITY; result = g_list_append(result, new_node); } ); return result; } /* the intersection of list1 and list2 */ GListPtr node_list_and(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; unsigned lpc = 0; for(lpc = 0; lpc < g_list_length(list1); lpc++) { node_t *node = (node_t*)g_list_nth_data(list1, lpc); node_t *other_node = pe_find_node_id(list2, node->details->id); node_t *new_node = NULL; if(other_node != NULL) { new_node = node_copy(node); } if(new_node != NULL) { crm_debug_4("%s: %d + %d", node->details->uname, other_node->weight, new_node->weight); new_node->weight = merge_weights( new_node->weight, other_node->weight); crm_debug_3("New node weight for %s: %d", new_node->details->uname, new_node->weight); if(filter && new_node->weight < 0) { crm_free(new_node); new_node = NULL; } } if(new_node != NULL) { result = g_list_append(result, new_node); } } return result; } /* list1 - list2 */ GListPtr node_list_minus(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; slist_iter( node, node_t, list1, lpc, node_t *other_node = pe_find_node_id(list2, node->details->id); node_t *new_node = NULL; if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); crm_debug_3("Minus result len: %d", g_list_length(result)); return result; } /* list1 + list2 - (intersection of list1 and list2) */ GListPtr node_list_xor(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; slist_iter( node, node_t, list1, lpc, node_t *new_node = NULL; node_t *other_node = (node_t*) pe_find_node_id(list2, node->details->id); if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); slist_iter( node, node_t, list2, lpc, node_t *new_node = NULL; node_t *other_node = (node_t*) pe_find_node_id(list1, node->details->id); if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); crm_debug_3("Xor result len: %d", g_list_length(result)); return result; } GListPtr node_list_or(GListPtr list1, GListPtr list2, gboolean filter) { node_t *other_node = NULL; GListPtr result = NULL; gboolean needs_filter = FALSE; result = node_list_dup(list1, FALSE, filter); slist_iter( node, node_t, list2, lpc, if(node == NULL) { continue; } other_node = (node_t*)pe_find_node_id( result, node->details->id); if(other_node != NULL) { crm_debug_4("%s + %s: %d + %d", node->details->uname, other_node->details->uname, node->weight, other_node->weight); other_node->weight = merge_weights( other_node->weight, node->weight); if(filter && node->weight < 0) { needs_filter = TRUE; } } else if(filter == FALSE || node->weight >= 0) { node_t *new_node = node_copy(node); result = g_list_append(result, new_node); } ); /* not the neatest way, but the most expedient for now */ if(filter && needs_filter) { GListPtr old_result = result; result = node_list_dup(old_result, FALSE, filter); pe_free_shallow_adv(old_result, TRUE); } return result; } GListPtr node_list_dup(GListPtr list1, gboolean reset, gboolean filter) { GListPtr result = NULL; slist_iter( this_node, node_t, list1, lpc, node_t *new_node = NULL; if(filter && this_node->weight < 0) { continue; } new_node = node_copy(this_node); if(reset) { new_node->weight = 0; } if(new_node != NULL) { result = g_list_append(result, new_node); } ); return result; } void dump_node_scores(int level, resource_t *rsc, const char *comment, GListPtr nodes) { GListPtr list = nodes; if(rsc) { list = rsc->allowed_nodes; } slist_iter( node, node_t, list, lpc, if(level == 0) { if(rsc) { fprintf(stdout, "%s: %s allocation score on %s: %d\n", comment, rsc->id, node->details->uname, node->weight); } else { fprintf(stdout, "%s: %s = %d\n", comment, node->details->uname, node->weight); } } else { if(rsc) { do_crm_log(level, "%s: %s allocation score on %s: %d", comment, rsc->id, node->details->uname, node->weight); } else { do_crm_log(level, "%s: %s = %d", comment, node->details->uname, node->weight); } } ); if(rsc && rsc->children) { slist_iter( child, resource_t, rsc->children, lpc, dump_node_scores(level, child, comment, nodes); ); } } gint sort_rsc_index(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; if(a == NULL && b == NULL) { return 0; } if(a == NULL) { return 1; } if(b == NULL) { return -1; } if(resource1->sort_index > resource2->sort_index) { return -1; } if(resource1->sort_index < resource2->sort_index) { return 1; } return 0; } gint sort_rsc_priority(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; if(a == NULL && b == NULL) { return 0; } if(a == NULL) { return 1; } if(b == NULL) { return -1; } if(resource1->priority > resource2->priority) { return -1; } if(resource1->priority < resource2->priority) { return 1; } return 0; } action_t * custom_action(resource_t *rsc, char *key, const char *task, node_t *on_node, gboolean optional, gboolean save_action, pe_working_set_t *data_set) { action_t *action = NULL; GListPtr possible_matches = NULL; CRM_CHECK(key != NULL, return NULL); CRM_CHECK(task != NULL, return NULL); if(save_action && rsc != NULL) { possible_matches = find_actions(rsc->actions, key, on_node); } if(possible_matches != NULL) { crm_free(key); if(g_list_length(possible_matches) > 1) { pe_warn("Action %s for %s on %s exists %d times", task, rsc?rsc->id:"", on_node?on_node->details->uname:"", g_list_length(possible_matches)); } action = g_list_nth_data(possible_matches, 0); crm_debug_4("Found existing action (%d) %s for %s on %s", action->id, task, rsc?rsc->id:"", on_node?on_node->details->uname:""); g_list_free(possible_matches); } if(action == NULL) { if(save_action) { crm_debug_4("Creating%s action %d: %s for %s on %s", optional?"":" manditory", data_set->action_id, key, rsc?rsc->id:"", on_node?on_node->details->uname:""); } crm_malloc0(action, sizeof(action_t)); if(save_action) { action->id = data_set->action_id++; } else { action->id = 0; } action->rsc = rsc; CRM_ASSERT(task != NULL); action->task = crm_strdup(task); action->node = on_node; action->uuid = key; action->actions_before = NULL; action->actions_after = NULL; action->failure_is_fatal = TRUE; action->pseudo = FALSE; action->dumped = FALSE; action->runnable = TRUE; action->processed = FALSE; action->optional = optional; action->seen_count = 0; action->extra = g_hash_table_new_full( g_str_hash, g_str_equal, free, free); action->meta = g_hash_table_new_full( g_str_hash, g_str_equal, free, free); if(save_action) { data_set->actions = g_list_append( data_set->actions, action); } if(rsc != NULL) { action->op_entry = find_rsc_op_entry(rsc, key); unpack_operation( action, action->op_entry, data_set); if(save_action) { rsc->actions = g_list_append( rsc->actions, action); } } if(save_action) { crm_debug_4("Action %d created", action->id); } } if(optional == FALSE && action->optional) { crm_debug_2("Action %d (%s) marked manditory", action->id, action->uuid); action->optional = FALSE; } if(rsc != NULL) { enum action_tasks a_task = text2task(action->task); int warn_level = LOG_DEBUG_3; if(save_action) { warn_level = LOG_WARNING; } if(action->node != NULL && action->op_entry != NULL) { unpack_instance_attributes( action->op_entry, XML_TAG_ATTR_SETS, action->node->details->attrs, action->extra, NULL, FALSE, data_set->now); } if(action->pseudo) { /* leave untouched */ } else if(action->node == NULL) { action->runnable = FALSE; } else if(g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL) == NULL && is_not_set(rsc->flags, pe_rsc_managed)) { do_crm_log(LOG_DEBUG, "Action %s (unmanaged)", action->uuid); action->optional = TRUE; /* action->runnable = FALSE; */ } else if(action->node->details->online == FALSE) { action->runnable = FALSE; do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)", action->uuid, action->node->details->uname); if(is_set(action->rsc->flags, pe_rsc_managed) && save_action && a_task == stop_rsc) { do_crm_log(warn_level, "Marking node %s unclean", action->node->details->uname); action->node->details->unclean = TRUE; } } else if(action->node->details->pending) { action->runnable = FALSE; do_crm_log(warn_level, "Action %s on %s is unrunnable (pending)", action->uuid, action->node->details->uname); } else if(action->needs == rsc_req_nothing) { crm_debug_3("Action %s doesnt require anything", action->uuid); action->runnable = TRUE; #if 0 /* * No point checking this * - if we dont have quorum we cant stonith anyway */ } else if(action->needs == rsc_req_stonith) { crm_debug_3("Action %s requires only stonith", action->uuid); action->runnable = TRUE; #endif } else if(is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_stop) { action->runnable = FALSE; crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid); } else if(is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_freeze) { crm_debug_3("Check resource is already active"); if(rsc->fns->active(rsc, TRUE) == FALSE) { action->runnable = FALSE; crm_debug("%s\t%s (cancelled : quorum freeze)", action->node->details->uname, action->uuid); } } else { crm_debug_3("Action %s is runnable", action->uuid); action->runnable = TRUE; } if(save_action) { switch(a_task) { case stop_rsc: set_bit(rsc->flags, pe_rsc_stopping); break; case start_rsc: clear_bit(rsc->flags, pe_rsc_starting); if(action->runnable) { set_bit(rsc->flags, pe_rsc_starting); } break; default: break; } } } return action; } void unpack_operation( action_t *action, xmlNode *xml_obj, pe_working_set_t* data_set) { int value_i = 0; unsigned long long interval = 0; unsigned long long start_delay = 0; char *value_ms = NULL; const char *class = NULL; const char *value = NULL; const char *field = NULL; CRM_CHECK(action->rsc != NULL, return); unpack_instance_attributes(data_set->op_defaults, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); xml_prop_iter(xml_obj, name, value, if(value != NULL && g_hash_table_lookup(action->meta, name) == NULL) { g_hash_table_insert(action->meta, crm_strdup(name), crm_strdup(value)); } ); unpack_instance_attributes(xml_obj, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); unpack_instance_attributes(xml_obj, XML_TAG_ATTR_SETS, NULL, action->meta, NULL, FALSE, data_set->now); g_hash_table_remove(action->meta, "id"); class = g_hash_table_lookup(action->rsc->meta, "class"); value = g_hash_table_lookup(action->meta, "prereq"); if(value == NULL && safe_str_neq(action->task, CRMD_ACTION_START)) { /* todo: integrate stop as an option? */ action->needs = rsc_req_nothing; value = "nothing (default)"; } else if(safe_str_eq(value, "nothing")) { action->needs = rsc_req_nothing; } else if(safe_str_eq(value, "quorum")) { action->needs = rsc_req_quorum; } else if(safe_str_eq(value, "fencing")) { action->needs = rsc_req_stonith; } else if(data_set->no_quorum_policy == no_quorum_ignore || safe_str_eq(class, "stonith")) { action->needs = rsc_req_nothing; value = "nothing (default)"; } else if(data_set->no_quorum_policy == no_quorum_freeze && is_set(data_set->flags, pe_flag_stonith_enabled)) { action->needs = rsc_req_stonith; value = "fencing (default)"; } else { action->needs = rsc_req_quorum; value = "quorum (default)"; } if(safe_str_eq(class, "stonith")) { if(action->needs == rsc_req_stonith) { crm_config_err("Stonith resources (eg. %s) cannot require" " fencing to start", action->rsc->id); } action->needs = rsc_req_nothing; value = "nothing (fencing override)"; } crm_debug_3("\tAction %s requires: %s", action->task, value); value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL); + if(safe_str_eq(action->task, CRMD_ACTION_STOP) + && safe_str_eq(value, "standby")) { + crm_config_err("on-fail=standby is not allowed for stop actions: %s", action->rsc->id); + value = NULL; + } + if(value == NULL) { } else if(safe_str_eq(value, "block")) { action->on_fail = action_fail_block; } else if(safe_str_eq(value, "fence")) { action->on_fail = action_fail_fence; value = "node fencing"; if(is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { crm_config_err("Specifying on_fail=fence and" " stonith-enabled=false makes no sense"); action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } + } else if(safe_str_eq(value, "standby")) { + action->on_fail = action_fail_standby; + value = "node standby"; + } else if(safe_str_eq(value, "ignore") || safe_str_eq(value, "nothing")) { action->on_fail = action_fail_ignore; value = "ignore"; } else if(safe_str_eq(value, "migrate")) { action->on_fail = action_fail_migrate; value = "force migration"; } else if(safe_str_eq(value, "stop")) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } else if(safe_str_eq(value, "restart")) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate)"; } else { pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); value = NULL; } /* defaults */ if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) { if(is_set(data_set->flags, pe_flag_stonith_enabled)) { action->on_fail = action_fail_fence; value = "resource fence (default)"; } else { action->on_fail = action_fail_block; value = "resource block (default)"; } } else if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) { action->on_fail = action_migrate_failure; value = "atomic migration recovery (default)"; } else if(value == NULL) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate) (default)"; } crm_debug_3("\t%s failure handling: %s", action->task, value); value = NULL; if(xml_obj != NULL) { value = g_hash_table_lookup(action->meta, "role_after_failure"); } if(value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) { action->fail_role = text2role(value); } /* defaults */ if(action->fail_role == RSC_ROLE_UNKNOWN) { if(safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) { action->fail_role = RSC_ROLE_SLAVE; } else { action->fail_role = RSC_ROLE_STARTED; } } crm_debug_3("\t%s failure results in: %s", action->task, role2text(action->fail_role)); field = XML_LRM_ATTR_INTERVAL; value = g_hash_table_lookup(action->meta, field); if(value != NULL) { interval = crm_get_interval(value); if(interval > 0) { value_ms = crm_itoa(interval); g_hash_table_replace(action->meta, crm_strdup(field), value_ms); } else { g_hash_table_remove(action->meta, field); } } field = XML_OP_ATTR_START_DELAY; value = g_hash_table_lookup(action->meta, field); if(value != NULL) { value_i = crm_get_msec(value); if(value_i < 0) { value_i = 0; } start_delay = value_i; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, crm_strdup(field), value_ms); } else if(interval > 0 && g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN)) { char *date_str = NULL; char *date_str_mutable = NULL; ha_time_t *origin = NULL; value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN); date_str = crm_strdup(value); date_str_mutable = date_str; origin = parse_date(&date_str_mutable); crm_free(date_str); if(origin == NULL) { crm_config_err("Operation %s contained an invalid "XML_OP_ATTR_ORIGIN": %s", ID(xml_obj), value); } else { ha_time_t *delay = NULL; int rc = compare_date(origin, data_set->now); unsigned long long delay_s = 0; while(rc < 0) { add_seconds(origin, interval/1000); rc = compare_date(origin, data_set->now); } delay = subtract_time(origin, data_set->now); delay_s = date_in_seconds(delay); /* log_date(LOG_DEBUG_5, "delay", delay, ha_log_date|ha_log_time|ha_log_local); */ crm_info("Calculated a start delay of %llus for %s", delay_s, ID(xml_obj)); g_hash_table_replace(action->meta, crm_strdup(XML_OP_ATTR_START_DELAY), crm_itoa(delay_s * 1000)); start_delay = delay_s * 1000; free_ha_date(origin); free_ha_date(delay); } } field = XML_ATTR_TIMEOUT; value = g_hash_table_lookup(action->meta, field); if(value == NULL) { value = pe_pref( data_set->config_hash, "default-action-timeout"); } value_i = crm_get_msec(value); if(value_i < 0) { value_i = 0; } value_i += start_delay; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, crm_strdup(field), value_ms); } xmlNode * find_rsc_op_entry(resource_t *rsc, const char *key) { int number = 0; const char *name = NULL; const char *value = NULL; const char *interval = NULL; char *match_key = NULL; xmlNode *op = NULL; xml_child_iter_filter( rsc->ops_xml, operation, "op", name = crm_element_value(operation, "name"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "disabled"); if(crm_is_true(value)) { crm_debug_2("%s disabled", ID(operation)); continue; } number = crm_get_interval(interval); if(number < 0) { continue; } match_key = generate_op_key(rsc->id, name, number); if(safe_str_eq(key, match_key)) { op = operation; } crm_free(match_key); if(op != NULL) { return op; } ); crm_debug_3("No match for %s", key); return op; } void print_node(const char *pre_text, node_t *node, gboolean details) { if(node == NULL) { crm_debug_4("%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } crm_debug_4("%s%s%sNode %s: (weight=%d, fixed=%s)", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", node->details==NULL?"error ":node->details->online?"":"Unavailable/Unclean ", node->details->uname, node->weight, node->fixed?"True":"False"); if(details && node != NULL && node->details != NULL) { char *pe_mutable = crm_strdup("\t\t"); crm_debug_4("\t\t===Node Attributes"); g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable); crm_free(pe_mutable); crm_debug_4("\t\t=== Resources"); slist_iter( rsc, resource_t, node->details->running_rsc, lpc, print_resource(LOG_DEBUG_4, "\t\t", rsc, FALSE); ); } } /* * Used by the HashTable for-loop */ void print_str_str(gpointer key, gpointer value, gpointer user_data) { crm_debug_4("%s%s %s ==> %s", user_data==NULL?"":(char*)user_data, user_data==NULL?"":": ", (char*)key, (char*)value); } void print_resource( int log_level, const char *pre_text, resource_t *rsc, gboolean details) { long options = pe_print_log; if(rsc == NULL) { do_crm_log(log_level-1, "%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } if(details) { options |= pe_print_details; } rsc->fns->print(rsc, pre_text, options, &log_level); } void pe_free_action(action_t *action) { if(action == NULL) { return; } pe_free_shallow(action->actions_before);/* action_warpper_t* */ pe_free_shallow(action->actions_after); /* action_warpper_t* */ g_hash_table_destroy(action->extra); g_hash_table_destroy(action->meta); crm_free(action->task); crm_free(action->uuid); crm_free(action); } GListPtr find_recurring_actions(GListPtr input, node_t *not_on_node) { const char *value = NULL; GListPtr result = NULL; CRM_CHECK(input != NULL, return NULL); slist_iter( action, action_t, input, lpc, value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL); if(value == NULL) { /* skip */ } else if(safe_str_eq(value, "0")) { /* skip */ } else if(safe_str_eq(CRMD_ACTION_CANCEL, action->task)) { /* skip */ } else if(not_on_node == NULL) { crm_debug_5("(null) Found: %s", action->uuid); result = g_list_append(result, action); } else if(action->node == NULL) { /* skip */ } else if(action->node->details != not_on_node->details) { crm_debug_5("Found: %s", action->uuid); result = g_list_append(result, action); } ); return result; } GListPtr find_actions(GListPtr input, const char *key, node_t *on_node) { GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); slist_iter( action, action_t, input, lpc, crm_debug_5("Matching %s against %s", key, action->uuid); if(safe_str_neq(key, action->uuid)) { continue; } else if(on_node == NULL) { result = g_list_append(result, action); } else if(action->node == NULL) { /* skip */ crm_debug_2("While looking for %s action on %s, " "found an unallocated one. Assigning" " it to the requested node...", key, on_node->details->uname); action->node = on_node; result = g_list_append(result, action); } else if(safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_append(result, action); } ); return result; } GListPtr find_actions_exact(GListPtr input, const char *key, node_t *on_node) { GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); slist_iter( action, action_t, input, lpc, crm_debug_5("Matching %s against %s", key, action->uuid); if(safe_str_neq(key, action->uuid)) { crm_debug_3("Key mismatch: %s vs. %s", key, action->uuid); continue; } else if(on_node == NULL || action->node == NULL) { crm_debug_3("on_node=%p, action->node=%p", on_node, action->node); continue; } else if(safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_append(result, action); } crm_debug_2("Node mismatch: %s vs. %s", on_node->details->id, action->node->details->id); ); return result; } void set_id(xmlNode * xml_obj, const char *prefix, int child) { int id_len = 0; gboolean use_prefix = TRUE; gboolean use_child = TRUE; char *new_id = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); id_len = 1 + strlen(id); if(child > 999) { pe_err("Are you insane?!?" " The CRM does not support > 1000 children per resource"); return; } else if(child < 0) { use_child = FALSE; } else { id_len += 4; /* child */ } if(prefix == NULL || safe_str_eq(id, prefix)) { use_prefix = FALSE; } else { id_len += (1 + strlen(prefix)); } crm_malloc0(new_id, id_len); if(use_child) { snprintf(new_id, id_len, "%s%s%s:%d", use_prefix?prefix:"", use_prefix?":":"", id, child); } else { snprintf(new_id, id_len, "%s%s%s", use_prefix?prefix:"", use_prefix?":":"", id); } crm_xml_add(xml_obj, XML_ATTR_ID, new_id); crm_free(new_id); } static void resource_node_score(resource_t *rsc, node_t *node, int score, const char *tag) { node_t *match = NULL; if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, resource_node_score(child_rsc, node, score, tag); ); } crm_debug_2("Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score); match = pe_find_node_id(rsc->allowed_nodes, node->details->id); if(match == NULL) { match = node_copy(node); match->weight = 0; rsc->allowed_nodes = g_list_append(rsc->allowed_nodes, match); } match->weight = merge_weights(match->weight, score); } void resource_location(resource_t *rsc, node_t *node, int score, const char *tag, pe_working_set_t *data_set) { if(node != NULL) { resource_node_score(rsc, node, score, tag); } else if(data_set != NULL) { slist_iter( node, node_t, data_set->nodes, lpc, resource_node_score(rsc, node, score, tag); ); } else { slist_iter( node, node_t, rsc->allowed_nodes, lpc, resource_node_score(rsc, node, score, tag); ); } if(node == NULL && score == -INFINITY) { if(rsc->allocated_to) { crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname); crm_free(rsc->allocated_to); rsc->allocated_to = NULL; } } } #define sort_return(an_int) crm_free(a_uuid); crm_free(b_uuid); return an_int gint sort_op_by_callid(gconstpointer a, gconstpointer b) { char *a_uuid = NULL; char *b_uuid = NULL; const xmlNode *xml_a = a; const xmlNode *xml_b = b; const char *a_xml_id = crm_element_value_const(xml_a, XML_ATTR_ID); const char *b_xml_id = crm_element_value_const(xml_b, XML_ATTR_ID); const char *a_task_id = crm_element_value_const(xml_a, XML_LRM_ATTR_CALLID); const char *b_task_id = crm_element_value_const(xml_b, XML_LRM_ATTR_CALLID); const char *a_key = crm_element_value_const(xml_a, XML_ATTR_TRANSITION_MAGIC); const char *b_key = crm_element_value_const(xml_b, XML_ATTR_TRANSITION_MAGIC); int dummy = -1; int a_id = -1; int b_id = -1; int a_rc = -1; int b_rc = -1; int a_status = -1; int b_status = -1; int a_call_id = -1; int b_call_id = -1; if(safe_str_eq(a_xml_id, b_xml_id)) { /* We have duplicate lrm_rsc_op entries in the status * section which is unliklely to be a good thing * - we can handle it easily enough, but we need to get * to the bottom of why its happening. */ pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id); sort_return(0); } CRM_CHECK(a_task_id != NULL && b_task_id != NULL, crm_err("a: %s, b: %s", crm_str(a_xml_id), crm_str(b_xml_id)); sort_return(0)); a_call_id = crm_parse_int(a_task_id, NULL); b_call_id = crm_parse_int(b_task_id, NULL); if(a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesnt matter since * stops are never pending */ sort_return(0); } else if(a_call_id >= 0 && a_call_id < b_call_id) { crm_debug_4("%s (%d) < %s (%d) : call id", a_xml_id, a_call_id, b_xml_id, b_call_id); sort_return(-1); } else if(b_call_id >= 0 && a_call_id > b_call_id) { crm_debug_4("%s (%d) > %s (%d) : call id", a_xml_id, a_call_id, b_xml_id, b_call_id); sort_return(1); } crm_debug_5("%s (%d) == %s (%d) : continuing", a_xml_id, a_call_id, b_xml_id, b_call_id); /* now process pending ops */ CRM_CHECK(a_key != NULL && b_key != NULL, sort_return(0)); CRM_CHECK(decode_transition_magic( a_key, &a_uuid, &a_id, &dummy, &a_status, &a_rc, &dummy), sort_return(0)); CRM_CHECK(decode_transition_magic( b_key, &b_uuid, &b_id, &dummy, &b_status, &b_rc, &dummy), sort_return(0)); /* try and determin the relative age of the operation... * some pending operations (ie. a start) may have been supuerceeded * by a subsequent stop * * [a|b]_id == -1 means its a shutdown operation and _always_ comes last */ if(safe_str_neq(a_uuid, b_uuid) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesnt match then one better * be a pending operation. * pending operations dont survive between elections and joins * because we query the LRM directly */ CRM_CHECK(a_call_id == -1 || b_call_id == -1, crm_err("a: %s=%d, b: %s=%d", crm_str(a_xml_id), a_call_id, crm_str(b_xml_id), b_call_id); sort_return(0)); CRM_CHECK(a_call_id >= 0 || b_call_id >= 0, sort_return(0)); if(b_call_id == -1) { crm_debug_2("%s (%d) < %s (%d) : transition + call id", a_xml_id, a_call_id, b_xml_id, b_call_id); sort_return(-1); } if(a_call_id == -1) { crm_debug_2("%s (%d) > %s (%d) : transition + call id", a_xml_id, a_call_id, b_xml_id, b_call_id); sort_return(1); } } else if((a_id >= 0 && a_id < b_id) || b_id == -1) { crm_debug_3("%s (%d) < %s (%d) : transition", a_xml_id, a_id, b_xml_id, b_id); sort_return(-1); } else if((b_id >= 0 && a_id > b_id) || a_id == -1) { crm_debug_3("%s (%d) > %s (%d) : transition", a_xml_id, a_id, b_xml_id, b_id); sort_return(1); } /* we should never end up here */ crm_err("%s (%d:%d:%s) ?? %s (%d:%d:%s) : default", a_xml_id, a_call_id, a_id, a_uuid, b_xml_id, b_call_id, b_id, b_uuid); CRM_CHECK(FALSE, sort_return(0)); } time_t get_timet_now(pe_working_set_t *data_set) { time_t now = 0; if(data_set && data_set->now) { now = data_set->now->tm_now; } if(now == 0) { /* eventually we should convert data_set->now into time_tm * for now, its only triggered by PE regression tests */ now = time(NULL); crm_crit("Defaulting to 'now'"); if(data_set && data_set->now) { data_set->now->tm_now = now; } } return now; } int get_failcount(node_t *node, resource_t *rsc, int *last_failure, pe_working_set_t *data_set) { int last = 0; int fail_count = 0; resource_t *failed = rsc; char *fail_attr = crm_concat("fail-count", rsc->id, '-'); const char *value = g_hash_table_lookup(node->details->attrs, fail_attr); if(is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(rsc); } if(value != NULL) { fail_count = char2score(value); crm_info("%s has failed %d times on %s", rsc->id, fail_count, node->details->uname); } crm_free(fail_attr); fail_attr = crm_concat("last-failure", rsc->id, '-'); value = g_hash_table_lookup(node->details->attrs, fail_attr); if(value != NULL && rsc->failure_timeout) { last = crm_parse_int(value, NULL); if(last_failure) { *last_failure = last; } if(last > 0) { time_t now = get_timet_now(data_set); if(now > (last + rsc->failure_timeout)) { crm_notice("Failcount for %s on %s has expired (limit was %ds)", failed->id, node->details->uname, rsc->failure_timeout); fail_count = 0; } } } crm_free(fail_attr); return fail_count; } diff --git a/xml/resources.rng.in b/xml/resources.rng.in index 136b81e9f3..3bb05287e7 100644 --- a/xml/resources.rng.in +++ b/xml/resources.rng.in @@ -1,174 +1,175 @@ ocf lsb heartbeat stonith Started Slave Master nothing quorum fencing ignore block stop restart + standby fence